stealing more hacks from acorn in the name of speed

This commit is contained in:
Mihai Bazon
2012-10-11 13:00:58 +03:00
parent f4584af42c
commit fb5c01c073
2 changed files with 158 additions and 186 deletions

View File

@@ -126,17 +126,18 @@ var UNICODE = {
connector_punctuation: new RegExp("[\\u005F\\u203F\\u2040\\u2054\\uFE33\\uFE34\\uFE4D-\\uFE4F\\uFF3F]") connector_punctuation: new RegExp("[\\u005F\\u203F\\u2040\\u2054\\uFE33\\uFE34\\uFE4D-\\uFE4F\\uFF3F]")
}; };
function is_letter(ch) { function is_letter(code) {
return UNICODE.letter.test(ch); return (code >= 97 && code <= 122)
|| (code >= 65 && code <= 90)
|| (code >= 0xaa && UNICODE.letter.test(String.fromCharCode(code)));
}; };
function is_digit(ch) { function is_digit(code) {
ch = ch.charCodeAt(0); return code >= 48 && code <= 57; //XXX: find out if "UnicodeDigit" means something else than 0..9
return ch >= 48 && ch <= 57; //XXX: find out if "UnicodeDigit" means something else than 0..9
}; };
function is_alphanumeric_char(ch) { function is_alphanumeric_char(code) {
return is_digit(ch) || is_letter(ch); return is_digit(code) || is_letter(code);
}; };
function is_unicode_combining_mark(ch) { function is_unicode_combining_mark(ch) {
@@ -148,21 +149,21 @@ function is_unicode_connector_punctuation(ch) {
}; };
function is_identifier(name) { function is_identifier(name) {
return /^[a-z_$][a-z0-9_$]*$/i.test(name) return /^[a-z_$][a-z0-9_$]*$/i.test(name) && !RESERVED_WORDS(name);
&& !RESERVED_WORDS(name)
}; };
function is_identifier_start(ch) { function is_identifier_start(code) {
return ch == "$" || ch == "_" || is_letter(ch); return code == 36 || code == 95 || is_letter(code);
}; };
function is_identifier_char(ch) { function is_identifier_char(ch) {
return is_identifier_start(ch) var code = ch.charCodeAt(0);
return is_identifier_start(code)
|| is_digit(code)
|| code == 8204 // \u200c: zero-width non-joiner <ZWNJ>
|| code == 8205 // \u200d: zero-width joiner <ZWJ> (in my ECMA-262 PDF, this is also 200c)
|| is_unicode_combining_mark(ch) || is_unicode_combining_mark(ch)
|| is_digit(ch)
|| is_unicode_connector_punctuation(ch) || is_unicode_connector_punctuation(ch)
|| ch == "\u200c" // zero-width non-joiner <ZWNJ>
|| ch == "\u200d" // zero-width joiner <ZWJ> (in my ECMA-262 PDF, this is also 200c)
; ;
}; };
@@ -280,11 +281,9 @@ function tokenizer($TEXT, filename) {
}; };
function read_while(pred) { function read_while(pred) {
var ret = "", ch = peek(), i = 0; var ret = "", ch, i = 0;
while (ch && pred(ch, i++)) { while ((ch = peek()) && pred(ch, i++))
ret += next(); ret += next();
ch = peek();
}
return ret; return ret;
}; };
@@ -295,29 +294,22 @@ function tokenizer($TEXT, filename) {
function read_num(prefix) { function read_num(prefix) {
var has_e = false, after_e = false, has_x = false, has_dot = prefix == "."; var has_e = false, after_e = false, has_x = false, has_dot = prefix == ".";
var num = read_while(function(ch, i){ var num = read_while(function(ch, i){
if (ch == "x" || ch == "X") { var code = ch.charCodeAt(0);
if (has_x) return false; switch (code) {
return has_x = true; case 120: case 88: // xX
return has_x ? false : (has_x = true);
case 101: case 69: // eE
return has_x ? true : has_e ? false : (has_e = after_e = true);
case 45: // -
return after_e || (i == 0 && !prefix);
case 43: // +
return after_e;
case (after_e = false, 46): // .
return (!has_dot && !has_x && !has_e) ? (has_dot = true) : false;
} }
if (!has_x && (ch == "E" || ch == "e")) { return is_alphanumeric_char(code);
if (has_e) return false;
return has_e = after_e = true;
}
if (ch == "-") {
if (after_e || (i == 0 && !prefix)) return true;
return false;
}
if (ch == "+") return after_e;
after_e = false;
if (ch == ".") {
if (!has_dot && !has_x && !has_e)
return has_dot = true;
return false;
}
return is_alphanumeric_char(ch);
}); });
if (prefix) if (prefix) num = prefix + num;
num = prefix + num;
var valid = parse_js_number(num); var valid = parse_js_number(num);
if (!isNaN(valid)) { if (!isNaN(valid)) {
return token("num", valid); return token("num", valid);
@@ -328,17 +320,17 @@ function tokenizer($TEXT, filename) {
function read_escaped_char(in_string) { function read_escaped_char(in_string) {
var ch = next(true, in_string); var ch = next(true, in_string);
switch (ch) { switch (ch.charCodeAt(0)) {
case "n" : return "\n"; case 110 : return "\n";
case "r" : return "\r"; case 114 : return "\r";
case "t" : return "\t"; case 116 : return "\t";
case "b" : return "\b"; case 98 : return "\b";
case "v" : return "\u000b"; case 118 : return "\u000b"; // \v
case "f" : return "\f"; case 102 : return "\f";
case "0" : return "\0"; case 48 : return "\0";
case "x" : return String.fromCharCode(hex_bytes(2)); case 120 : return String.fromCharCode(hex_bytes(2)); // \x
case "u" : return String.fromCharCode(hex_bytes(4)); case 117 : return String.fromCharCode(hex_bytes(4)); // \u
case "\n": return ""; case 10 : return ""; // newline
default : return ch; default : return ch;
} }
}; };
@@ -354,8 +346,7 @@ function tokenizer($TEXT, filename) {
return num; return num;
}; };
function read_string() { var read_string = with_eof_error("Unterminated string constant", function(){
return with_eof_error("Unterminated string constant", function(){
var quote = next(), ret = ""; var quote = next(), ret = "";
for (;;) { for (;;) {
var ch = next(true); var ch = next(true);
@@ -382,7 +373,6 @@ function tokenizer($TEXT, filename) {
} }
return token("string", ret); return token("string", ret);
}); });
};
function read_line_comment() { function read_line_comment() {
next(); next();
@@ -397,17 +387,20 @@ function tokenizer($TEXT, filename) {
return token("comment1", ret, true); return token("comment1", ret, true);
}; };
function read_multiline_comment() { var read_multiline_comment = with_eof_error("Unterminated multiline comment", function(){
next(); next();
return with_eof_error("Unterminated multiline comment", function(){ var i = find("*/", true);
var i = find("*/", true), var text = S.text.substring(S.pos, i);
text = S.text.substring(S.pos, i); var a = text.split("\n"), n = a.length;
// update stream position
S.pos = i + 2; S.pos = i + 2;
S.line += text.split("\n").length - 1; S.line += n - 1;
if (n > 1) S.col = a[n - 1].length;
else S.col += a[n - 1].length;
S.col += 2;
S.newline_before = S.newline_before || text.indexOf("\n") >= 0; S.newline_before = S.newline_before || text.indexOf("\n") >= 0;
return token("comment2", text, true); return token("comment2", text, true);
}); });
};
function read_name() { function read_name() {
var backslash = false, name = "", ch, escaped = false, hex; var backslash = false, name = "", ch, escaped = false, hex;
@@ -432,8 +425,7 @@ function tokenizer($TEXT, filename) {
return name; return name;
}; };
function read_regexp(regexp) { var read_regexp = with_eof_error("Unterminated regular expression", function(regexp){
return with_eof_error("Unterminated regular expression", function(){
var prev_backslash = false, ch, in_class = false; var prev_backslash = false, ch, in_class = false;
while ((ch = next(true))) if (prev_backslash) { while ((ch = next(true))) if (prev_backslash) {
regexp += "\\" + ch; regexp += "\\" + ch;
@@ -454,7 +446,6 @@ function tokenizer($TEXT, filename) {
var mods = read_name(); var mods = read_name();
return token("regexp", new RegExp(regexp, mods)); return token("regexp", new RegExp(regexp, mods));
}); });
};
function read_operator(prefix) { function read_operator(prefix) {
function grow(op) { function grow(op) {
@@ -488,7 +479,7 @@ function tokenizer($TEXT, filename) {
function handle_dot() { function handle_dot() {
next(); next();
return is_digit(peek()) return is_digit(peek().charCodeAt(0))
? read_num(".") ? read_num(".")
: token("punc", "."); : token("punc", ".");
}; };
@@ -502,13 +493,15 @@ function tokenizer($TEXT, filename) {
}; };
function with_eof_error(eof_error, cont) { function with_eof_error(eof_error, cont) {
return function(x) {
try { try {
return cont(); return cont(x);
} catch(ex) { } catch(ex) {
if (ex === EX_EOF) parse_error(eof_error); if (ex === EX_EOF) parse_error(eof_error);
else throw ex; else throw ex;
} }
}; };
};
function next_token(force_regexp) { function next_token(force_regexp) {
if (force_regexp != null) if (force_regexp != null)
@@ -517,13 +510,16 @@ function tokenizer($TEXT, filename) {
start_token(); start_token();
var ch = peek(); var ch = peek();
if (!ch) return token("eof"); if (!ch) return token("eof");
if (is_digit(ch)) return read_num(); var code = ch.charCodeAt(0);
if (ch == '"' || ch == "'") return read_string(); switch (code) {
case 34: case 39: return read_string();
case 46: return handle_dot();
case 47: return handle_slash();
}
if (is_digit(code)) return read_num();
if (PUNC_CHARS(ch)) return token("punc", next()); if (PUNC_CHARS(ch)) return token("punc", next());
if (ch == ".") return handle_dot();
if (ch == "/") return handle_slash();
if (OPERATOR_CHARS(ch)) return read_operator(); if (OPERATOR_CHARS(ch)) return read_operator();
if (ch == "\\" || is_identifier_start(ch)) return read_word(); if (code == 92 || is_identifier_start(code)) return read_word();
parse_error("Unexpected character '" + ch + "'"); parse_error("Unexpected character '" + ch + "'");
}; };
@@ -538,7 +534,7 @@ function tokenizer($TEXT, filename) {
/* -----[ Parser (constants) ]----- */ /* -----[ Parser (constants) ]----- */
var UNARY_PREFIX = array_to_hash([ var UNARY_PREFIX = makePredicate([
"typeof", "typeof",
"void", "void",
"delete", "delete",
@@ -550,19 +546,9 @@ var UNARY_PREFIX = array_to_hash([
"+" "+"
]); ]);
var UNARY_POSTFIX = array_to_hash([ "--", "++" ]); var UNARY_POSTFIX = makePredicate([ "--", "++" ]);
var ASSIGNMENT = (function(a, ret, i){ var ASSIGNMENT = makePredicate([ "=", "+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&=" ]);
while (i < a.length) {
ret[a[i]] = a[i];
i++;
}
return ret;
})(
[ "=", "+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&=" ],
{},
0
);
var PRECEDENCE = (function(a, ret){ var PRECEDENCE = (function(a, ret){
for (var i = 0, n = 1; i < a.length; ++i, ++n) { for (var i = 0, n = 1; i < a.length; ++i, ++n) {
@@ -680,7 +666,7 @@ function parse($TEXT, options) {
function parenthesised() { function parenthesised() {
expect("("); expect("(");
var exp = expression(); var exp = expression(true);
expect(")"); expect(")");
return exp; return exp;
}; };
@@ -688,7 +674,7 @@ function parse($TEXT, options) {
function embed_tokens(parser) { function embed_tokens(parser) {
return function() { return function() {
var start = S.token; var start = S.token;
var expr = parser.apply(this, arguments); var expr = parser();
var end = prev(); var end = prev();
expr.start = start; expr.start = start;
expr.end = end; expr.end = end;
@@ -697,6 +683,7 @@ function parse($TEXT, options) {
}; };
var statement = embed_tokens(function() { var statement = embed_tokens(function() {
var tmp;
if (is("operator", "/") || is("operator", "/=")) { if (is("operator", "/") || is("operator", "/=")) {
S.peeked = null; S.peeked = null;
S.token = S.input(S.token.value.substr(1)); // force regexp S.token = S.input(S.token.value.substr(1)); // force regexp
@@ -738,7 +725,7 @@ function parse($TEXT, options) {
} }
case "keyword": case "keyword":
switch (prog1(S.token.value, next)) { switch (tmp = S.token.value, next(), tmp) {
case "break": case "break":
return break_cont(AST_Break); return break_cont(AST_Break);
@@ -752,7 +739,7 @@ function parse($TEXT, options) {
case "do": case "do":
return new AST_Do({ return new AST_Do({
body : in_loop(statement), body : in_loop(statement),
condition : (expect_token("keyword", "while"), prog1(parenthesised, semicolon)) condition : (expect_token("keyword", "while"), tmp = parenthesised(), semicolon(), tmp)
}); });
case "while": case "while":
@@ -778,30 +765,30 @@ function parse($TEXT, options) {
? (next(), null) ? (next(), null)
: can_insert_semicolon() : can_insert_semicolon()
? null ? null
: prog1(expression, semicolon) ) : (tmp = expression(true), semicolon(), tmp) )
}); });
case "switch": case "switch":
return new AST_Switch({ return new AST_Switch({
expression : parenthesised(), expression : parenthesised(),
body : switch_body_() body : in_loop(switch_body_)
}); });
case "throw": case "throw":
if (S.token.nlb) if (S.token.nlb)
croak("Illegal newline after 'throw'"); croak("Illegal newline after 'throw'");
return new AST_Throw({ return new AST_Throw({
value: prog1(expression, semicolon) value: (tmp = expression(true), semicolon(), tmp)
}); });
case "try": case "try":
return try_(); return try_();
case "var": case "var":
return prog1(var_, semicolon); return tmp = var_(), semicolon(), tmp;
case "const": case "const":
return prog1(const_, semicolon); return tmp = const_(), semicolon(), tmp;
case "with": case "with":
return new AST_With({ return new AST_With({
@@ -831,8 +818,8 @@ function parse($TEXT, options) {
return new AST_LabeledStatement({ body: stat, label: label }); return new AST_LabeledStatement({ body: stat, label: label });
}; };
function simple_statement() { function simple_statement(tmp) {
return new AST_SimpleStatement({ body: prog1(expression, semicolon) }); return new AST_SimpleStatement({ body: (tmp = expression(true), semicolon(), tmp) });
}; };
function break_cont(type) { function break_cont(type) {
@@ -869,9 +856,9 @@ function parse($TEXT, options) {
function regular_for(init) { function regular_for(init) {
expect(";"); expect(";");
var test = is("punc", ";") ? null : expression(); var test = is("punc", ";") ? null : expression(true);
expect(";"); expect(";");
var step = is("punc", ")") ? null : expression(); var step = is("punc", ")") ? null : expression(true);
expect(")"); expect(")");
return new AST_For({ return new AST_For({
init : init, init : init,
@@ -883,7 +870,7 @@ function parse($TEXT, options) {
function for_in(init) { function for_in(init) {
var lhs = init instanceof AST_Var ? init.definitions[0].name : null; var lhs = init instanceof AST_Var ? init.definitions[0].name : null;
var obj = expression(); var obj = expression(true);
expect(")"); expect(")");
return new AST_ForIn({ return new AST_ForIn({
init : init, init : init,
@@ -911,10 +898,8 @@ function parse($TEXT, options) {
next(); next();
return a; return a;
})(true, []), })(true, []),
body: embed_tokens(function(){ body: (function(loop, labels){
++S.in_function; ++S.in_function;
var loop = S.in_loop;
var labels = S.labels;
S.in_directives = true; S.in_directives = true;
S.in_loop = 0; S.in_loop = 0;
S.labels = []; S.labels = [];
@@ -923,7 +908,7 @@ function parse($TEXT, options) {
S.in_loop = loop; S.in_loop = loop;
S.labels = labels; S.labels = labels;
return a; return a;
})() })(S.in_loop, S.labels)
}); });
}; };
@@ -951,17 +936,17 @@ function parse($TEXT, options) {
return a; return a;
}; };
var switch_body_ = curry(in_loop, function(){ function switch_body_() {
expect("{"); expect("{");
var a = [], cur = null, branch = null; var a = [], cur = null, branch = null, tmp;
while (!is("punc", "}")) { while (!is("punc", "}")) {
if (is("eof")) unexpected(); if (is("eof")) unexpected();
if (is("keyword", "case")) { if (is("keyword", "case")) {
if (branch) branch.end = prev(); if (branch) branch.end = prev();
cur = []; cur = [];
branch = new AST_Case({ branch = new AST_Case({
start : prog1(S.token, next), start : (tmp = S.token, next(), tmp),
expression : expression(), expression : expression(true),
body : cur body : cur
}); });
a.push(branch); a.push(branch);
@@ -971,9 +956,9 @@ function parse($TEXT, options) {
if (branch) branch.end = prev(); if (branch) branch.end = prev();
cur = []; cur = [];
branch = new AST_Default({ branch = new AST_Default({
start : prog1(S.token, next, curry(expect, ":")), start : (tmp = S.token, next(), expect(":"), tmp),
body : cur body : cur
}) });
a.push(branch); a.push(branch);
} }
else { else {
@@ -984,7 +969,7 @@ function parse($TEXT, options) {
if (branch) branch.end = prev(); if (branch) branch.end = prev();
next(); next();
return a; return a;
}); };
function try_() { function try_() {
var body = block_(), bcatch = null, bfinally = null; var body = block_(), bcatch = null, bfinally = null;
@@ -1110,7 +1095,7 @@ function parse($TEXT, options) {
switch (start.value) { switch (start.value) {
case "(": case "(":
next(); next();
var ex = expression(); var ex = expression(true);
ex.start = start; ex.start = start;
ex.end = S.token; ex.end = S.token;
expect(")"); expect(")");
@@ -1201,6 +1186,7 @@ function parse($TEXT, options) {
}); });
function as_property_name() { function as_property_name() {
var tmp;
switch (S.token.type) { switch (S.token.type) {
case "num": case "num":
case "string": case "string":
@@ -1208,19 +1194,20 @@ function parse($TEXT, options) {
case "operator": case "operator":
case "keyword": case "keyword":
case "atom": case "atom":
return prog1(S.token.value, next); return (tmp = S.token.value, next(), tmp);
default: default:
unexpected(); unexpected();
} }
}; };
function as_name() { function as_name() {
var tmp;
switch (S.token.type) { switch (S.token.type) {
case "name": case "name":
case "operator": case "operator":
case "keyword": case "keyword":
case "atom": case "atom":
return prog1(S.token.value, next); return (tmp = S.token.value, next(), tmp);
default: default:
unexpected(); unexpected();
} }
@@ -1254,7 +1241,7 @@ function parse($TEXT, options) {
} }
if (is("punc", "[")) { if (is("punc", "[")) {
next(); next();
var prop = expression(); var prop = expression(true);
expect("]"); expect("]");
return subscripts(new AST_Sub({ return subscripts(new AST_Sub({
start : start, start : start,
@@ -1276,17 +1263,17 @@ function parse($TEXT, options) {
}; };
var maybe_unary = function(allow_calls) { var maybe_unary = function(allow_calls) {
var start = S.token; var start = S.token, tmp;
if (is("operator") && UNARY_PREFIX[S.token.value]) { if (is("operator") && UNARY_PREFIX(S.token.value)) {
var ex = make_unary(AST_UnaryPrefix, var ex = make_unary(AST_UnaryPrefix,
prog1(S.token.value, next), (tmp = S.token.value, next(), tmp),
maybe_unary(allow_calls)); maybe_unary(allow_calls));
ex.start = start; ex.start = start;
ex.end = prev(); ex.end = prev();
return ex; return ex;
} }
var val = expr_atom(allow_calls); var val = expr_atom(allow_calls);
while (is("operator") && UNARY_POSTFIX[S.token.value] && !S.token.nlb) { while (is("operator") && UNARY_POSTFIX(S.token.value) && !S.token.nlb) {
val = make_unary(AST_UnaryPostfix, S.token.value, val); val = make_unary(AST_UnaryPostfix, S.token.value, val);
val.start = start; val.start = start;
val.end = S.token; val.end = S.token;
@@ -1357,13 +1344,13 @@ function parse($TEXT, options) {
var maybe_assign = function(no_in) { var maybe_assign = function(no_in) {
var start = S.token; var start = S.token;
var left = maybe_conditional(no_in), val = S.token.value; var left = maybe_conditional(no_in), val = S.token.value;
if (is("operator") && ASSIGNMENT[val]) { if (is("operator") && ASSIGNMENT(val)) {
if (is_assignable(left)) { if (is_assignable(left)) {
next(); next();
return new AST_Assign({ return new AST_Assign({
start : start, start : start,
left : left, left : left,
operator : ASSIGNMENT[val], operator : val,
right : maybe_assign(no_in), right : maybe_assign(no_in),
end : peek() end : peek()
}); });
@@ -1374,8 +1361,6 @@ function parse($TEXT, options) {
}; };
var expression = function(commas, no_in) { var expression = function(commas, no_in) {
if (arguments.length == 0)
commas = true;
var start = S.token; var start = S.token;
var expr = maybe_assign(no_in); var expr = maybe_assign(no_in);
if (commas && is("punc", ",")) { if (commas && is("punc", ",")) {

View File

@@ -43,19 +43,6 @@
"use strict"; "use strict";
function curry(f) {
var args = slice(arguments, 1);
return function() { return f.apply(this, args.concat(slice(arguments))); };
};
function prog1(ret) {
if (ret instanceof Function)
ret = ret();
for (var i = 1, n = arguments.length; --n > 0; ++i)
arguments[i]();
return ret;
};
function array_to_hash(a) { function array_to_hash(a) {
var ret = Object.create(null); var ret = Object.create(null);
for (var i = 0; i < a.length; ++i) for (var i = 0; i < a.length; ++i)