stealing more hacks from acorn in the name of speed

This commit is contained in:
Mihai Bazon
2012-10-11 13:00:58 +03:00
parent f4584af42c
commit fb5c01c073
2 changed files with 158 additions and 186 deletions

View File

@@ -126,17 +126,18 @@ var UNICODE = {
connector_punctuation: new RegExp("[\\u005F\\u203F\\u2040\\u2054\\uFE33\\uFE34\\uFE4D-\\uFE4F\\uFF3F]")
};
function is_letter(ch) {
return UNICODE.letter.test(ch);
function is_letter(code) {
return (code >= 97 && code <= 122)
|| (code >= 65 && code <= 90)
|| (code >= 0xaa && UNICODE.letter.test(String.fromCharCode(code)));
};
function is_digit(ch) {
ch = ch.charCodeAt(0);
return ch >= 48 && ch <= 57; //XXX: find out if "UnicodeDigit" means something else than 0..9
function is_digit(code) {
return code >= 48 && code <= 57; //XXX: find out if "UnicodeDigit" means something else than 0..9
};
function is_alphanumeric_char(ch) {
return is_digit(ch) || is_letter(ch);
function is_alphanumeric_char(code) {
return is_digit(code) || is_letter(code);
};
function is_unicode_combining_mark(ch) {
@@ -148,21 +149,21 @@ function is_unicode_connector_punctuation(ch) {
};
function is_identifier(name) {
return /^[a-z_$][a-z0-9_$]*$/i.test(name)
&& !RESERVED_WORDS(name)
return /^[a-z_$][a-z0-9_$]*$/i.test(name) && !RESERVED_WORDS(name);
};
function is_identifier_start(ch) {
return ch == "$" || ch == "_" || is_letter(ch);
function is_identifier_start(code) {
return code == 36 || code == 95 || is_letter(code);
};
function is_identifier_char(ch) {
return is_identifier_start(ch)
var code = ch.charCodeAt(0);
return is_identifier_start(code)
|| is_digit(code)
|| code == 8204 // \u200c: zero-width non-joiner <ZWNJ>
|| code == 8205 // \u200d: zero-width joiner <ZWJ> (in my ECMA-262 PDF, this is also 200c)
|| is_unicode_combining_mark(ch)
|| is_digit(ch)
|| is_unicode_connector_punctuation(ch)
|| ch == "\u200c" // zero-width non-joiner <ZWNJ>
|| ch == "\u200d" // zero-width joiner <ZWJ> (in my ECMA-262 PDF, this is also 200c)
;
};
@@ -280,11 +281,9 @@ function tokenizer($TEXT, filename) {
};
function read_while(pred) {
var ret = "", ch = peek(), i = 0;
while (ch && pred(ch, i++)) {
var ret = "", ch, i = 0;
while ((ch = peek()) && pred(ch, i++))
ret += next();
ch = peek();
}
return ret;
};
@@ -295,29 +294,22 @@ function tokenizer($TEXT, filename) {
function read_num(prefix) {
var has_e = false, after_e = false, has_x = false, has_dot = prefix == ".";
var num = read_while(function(ch, i){
if (ch == "x" || ch == "X") {
if (has_x) return false;
return has_x = true;
var code = ch.charCodeAt(0);
switch (code) {
case 120: case 88: // xX
return has_x ? false : (has_x = true);
case 101: case 69: // eE
return has_x ? true : has_e ? false : (has_e = after_e = true);
case 45: // -
return after_e || (i == 0 && !prefix);
case 43: // +
return after_e;
case (after_e = false, 46): // .
return (!has_dot && !has_x && !has_e) ? (has_dot = true) : false;
}
if (!has_x && (ch == "E" || ch == "e")) {
if (has_e) return false;
return has_e = after_e = true;
}
if (ch == "-") {
if (after_e || (i == 0 && !prefix)) return true;
return false;
}
if (ch == "+") return after_e;
after_e = false;
if (ch == ".") {
if (!has_dot && !has_x && !has_e)
return has_dot = true;
return false;
}
return is_alphanumeric_char(ch);
return is_alphanumeric_char(code);
});
if (prefix)
num = prefix + num;
if (prefix) num = prefix + num;
var valid = parse_js_number(num);
if (!isNaN(valid)) {
return token("num", valid);
@@ -328,17 +320,17 @@ function tokenizer($TEXT, filename) {
function read_escaped_char(in_string) {
var ch = next(true, in_string);
switch (ch) {
case "n" : return "\n";
case "r" : return "\r";
case "t" : return "\t";
case "b" : return "\b";
case "v" : return "\u000b";
case "f" : return "\f";
case "0" : return "\0";
case "x" : return String.fromCharCode(hex_bytes(2));
case "u" : return String.fromCharCode(hex_bytes(4));
case "\n": return "";
switch (ch.charCodeAt(0)) {
case 110 : return "\n";
case 114 : return "\r";
case 116 : return "\t";
case 98 : return "\b";
case 118 : return "\u000b"; // \v
case 102 : return "\f";
case 48 : return "\0";
case 120 : return String.fromCharCode(hex_bytes(2)); // \x
case 117 : return String.fromCharCode(hex_bytes(4)); // \u
case 10 : return ""; // newline
default : return ch;
}
};
@@ -354,35 +346,33 @@ function tokenizer($TEXT, filename) {
return num;
};
function read_string() {
return with_eof_error("Unterminated string constant", function(){
var quote = next(), ret = "";
for (;;) {
var ch = next(true);
if (ch == "\\") {
// read OctalEscapeSequence (XXX: deprecated if "strict mode")
// https://github.com/mishoo/UglifyJS/issues/178
var octal_len = 0, first = null;
ch = read_while(function(ch){
if (ch >= "0" && ch <= "7") {
if (!first) {
first = ch;
return ++octal_len;
}
else if (first <= "3" && octal_len <= 2) return ++octal_len;
else if (first >= "4" && octal_len <= 1) return ++octal_len;
var read_string = with_eof_error("Unterminated string constant", function(){
var quote = next(), ret = "";
for (;;) {
var ch = next(true);
if (ch == "\\") {
// read OctalEscapeSequence (XXX: deprecated if "strict mode")
// https://github.com/mishoo/UglifyJS/issues/178
var octal_len = 0, first = null;
ch = read_while(function(ch){
if (ch >= "0" && ch <= "7") {
if (!first) {
first = ch;
return ++octal_len;
}
return false;
});
if (octal_len > 0) ch = String.fromCharCode(parseInt(ch, 8));
else ch = read_escaped_char(true);
}
else if (ch == quote) break;
ret += ch;
else if (first <= "3" && octal_len <= 2) return ++octal_len;
else if (first >= "4" && octal_len <= 1) return ++octal_len;
}
return false;
});
if (octal_len > 0) ch = String.fromCharCode(parseInt(ch, 8));
else ch = read_escaped_char(true);
}
return token("string", ret);
});
};
else if (ch == quote) break;
ret += ch;
}
return token("string", ret);
});
function read_line_comment() {
next();
@@ -397,17 +387,20 @@ function tokenizer($TEXT, filename) {
return token("comment1", ret, true);
};
function read_multiline_comment() {
var read_multiline_comment = with_eof_error("Unterminated multiline comment", function(){
next();
return with_eof_error("Unterminated multiline comment", function(){
var i = find("*/", true),
text = S.text.substring(S.pos, i);
S.pos = i + 2;
S.line += text.split("\n").length - 1;
S.newline_before = S.newline_before || text.indexOf("\n") >= 0;
return token("comment2", text, true);
});
};
var i = find("*/", true);
var text = S.text.substring(S.pos, i);
var a = text.split("\n"), n = a.length;
// update stream position
S.pos = i + 2;
S.line += n - 1;
if (n > 1) S.col = a[n - 1].length;
else S.col += a[n - 1].length;
S.col += 2;
S.newline_before = S.newline_before || text.indexOf("\n") >= 0;
return token("comment2", text, true);
});
function read_name() {
var backslash = false, name = "", ch, escaped = false, hex;
@@ -432,29 +425,27 @@ function tokenizer($TEXT, filename) {
return name;
};
function read_regexp(regexp) {
return with_eof_error("Unterminated regular expression", function(){
var prev_backslash = false, ch, in_class = false;
while ((ch = next(true))) if (prev_backslash) {
regexp += "\\" + ch;
prev_backslash = false;
} else if (ch == "[") {
in_class = true;
regexp += ch;
} else if (ch == "]" && in_class) {
in_class = false;
regexp += ch;
} else if (ch == "/" && !in_class) {
break;
} else if (ch == "\\") {
prev_backslash = true;
} else {
regexp += ch;
}
var mods = read_name();
return token("regexp", new RegExp(regexp, mods));
});
};
var read_regexp = with_eof_error("Unterminated regular expression", function(regexp){
var prev_backslash = false, ch, in_class = false;
while ((ch = next(true))) if (prev_backslash) {
regexp += "\\" + ch;
prev_backslash = false;
} else if (ch == "[") {
in_class = true;
regexp += ch;
} else if (ch == "]" && in_class) {
in_class = false;
regexp += ch;
} else if (ch == "/" && !in_class) {
break;
} else if (ch == "\\") {
prev_backslash = true;
} else {
regexp += ch;
}
var mods = read_name();
return token("regexp", new RegExp(regexp, mods));
});
function read_operator(prefix) {
function grow(op) {
@@ -488,7 +479,7 @@ function tokenizer($TEXT, filename) {
function handle_dot() {
next();
return is_digit(peek())
return is_digit(peek().charCodeAt(0))
? read_num(".")
: token("punc", ".");
};
@@ -502,12 +493,14 @@ function tokenizer($TEXT, filename) {
};
function with_eof_error(eof_error, cont) {
try {
return cont();
} catch(ex) {
if (ex === EX_EOF) parse_error(eof_error);
else throw ex;
}
return function(x) {
try {
return cont(x);
} catch(ex) {
if (ex === EX_EOF) parse_error(eof_error);
else throw ex;
}
};
};
function next_token(force_regexp) {
@@ -517,13 +510,16 @@ function tokenizer($TEXT, filename) {
start_token();
var ch = peek();
if (!ch) return token("eof");
if (is_digit(ch)) return read_num();
if (ch == '"' || ch == "'") return read_string();
var code = ch.charCodeAt(0);
switch (code) {
case 34: case 39: return read_string();
case 46: return handle_dot();
case 47: return handle_slash();
}
if (is_digit(code)) return read_num();
if (PUNC_CHARS(ch)) return token("punc", next());
if (ch == ".") return handle_dot();
if (ch == "/") return handle_slash();
if (OPERATOR_CHARS(ch)) return read_operator();
if (ch == "\\" || is_identifier_start(ch)) return read_word();
if (code == 92 || is_identifier_start(code)) return read_word();
parse_error("Unexpected character '" + ch + "'");
};
@@ -538,7 +534,7 @@ function tokenizer($TEXT, filename) {
/* -----[ Parser (constants) ]----- */
var UNARY_PREFIX = array_to_hash([
var UNARY_PREFIX = makePredicate([
"typeof",
"void",
"delete",
@@ -550,19 +546,9 @@ var UNARY_PREFIX = array_to_hash([
"+"
]);
var UNARY_POSTFIX = array_to_hash([ "--", "++" ]);
var UNARY_POSTFIX = makePredicate([ "--", "++" ]);
var ASSIGNMENT = (function(a, ret, i){
while (i < a.length) {
ret[a[i]] = a[i];
i++;
}
return ret;
})(
[ "=", "+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&=" ],
{},
0
);
var ASSIGNMENT = makePredicate([ "=", "+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&=" ]);
var PRECEDENCE = (function(a, ret){
for (var i = 0, n = 1; i < a.length; ++i, ++n) {
@@ -680,7 +666,7 @@ function parse($TEXT, options) {
function parenthesised() {
expect("(");
var exp = expression();
var exp = expression(true);
expect(")");
return exp;
};
@@ -688,7 +674,7 @@ function parse($TEXT, options) {
function embed_tokens(parser) {
return function() {
var start = S.token;
var expr = parser.apply(this, arguments);
var expr = parser();
var end = prev();
expr.start = start;
expr.end = end;
@@ -697,6 +683,7 @@ function parse($TEXT, options) {
};
var statement = embed_tokens(function() {
var tmp;
if (is("operator", "/") || is("operator", "/=")) {
S.peeked = null;
S.token = S.input(S.token.value.substr(1)); // force regexp
@@ -738,7 +725,7 @@ function parse($TEXT, options) {
}
case "keyword":
switch (prog1(S.token.value, next)) {
switch (tmp = S.token.value, next(), tmp) {
case "break":
return break_cont(AST_Break);
@@ -752,7 +739,7 @@ function parse($TEXT, options) {
case "do":
return new AST_Do({
body : in_loop(statement),
condition : (expect_token("keyword", "while"), prog1(parenthesised, semicolon))
condition : (expect_token("keyword", "while"), tmp = parenthesised(), semicolon(), tmp)
});
case "while":
@@ -778,30 +765,30 @@ function parse($TEXT, options) {
? (next(), null)
: can_insert_semicolon()
? null
: prog1(expression, semicolon) )
: (tmp = expression(true), semicolon(), tmp) )
});
case "switch":
return new AST_Switch({
expression : parenthesised(),
body : switch_body_()
body : in_loop(switch_body_)
});
case "throw":
if (S.token.nlb)
croak("Illegal newline after 'throw'");
return new AST_Throw({
value: prog1(expression, semicolon)
value: (tmp = expression(true), semicolon(), tmp)
});
case "try":
return try_();
case "var":
return prog1(var_, semicolon);
return tmp = var_(), semicolon(), tmp;
case "const":
return prog1(const_, semicolon);
return tmp = const_(), semicolon(), tmp;
case "with":
return new AST_With({
@@ -831,8 +818,8 @@ function parse($TEXT, options) {
return new AST_LabeledStatement({ body: stat, label: label });
};
function simple_statement() {
return new AST_SimpleStatement({ body: prog1(expression, semicolon) });
function simple_statement(tmp) {
return new AST_SimpleStatement({ body: (tmp = expression(true), semicolon(), tmp) });
};
function break_cont(type) {
@@ -869,9 +856,9 @@ function parse($TEXT, options) {
function regular_for(init) {
expect(";");
var test = is("punc", ";") ? null : expression();
var test = is("punc", ";") ? null : expression(true);
expect(";");
var step = is("punc", ")") ? null : expression();
var step = is("punc", ")") ? null : expression(true);
expect(")");
return new AST_For({
init : init,
@@ -883,7 +870,7 @@ function parse($TEXT, options) {
function for_in(init) {
var lhs = init instanceof AST_Var ? init.definitions[0].name : null;
var obj = expression();
var obj = expression(true);
expect(")");
return new AST_ForIn({
init : init,
@@ -911,10 +898,8 @@ function parse($TEXT, options) {
next();
return a;
})(true, []),
body: embed_tokens(function(){
body: (function(loop, labels){
++S.in_function;
var loop = S.in_loop;
var labels = S.labels;
S.in_directives = true;
S.in_loop = 0;
S.labels = [];
@@ -923,7 +908,7 @@ function parse($TEXT, options) {
S.in_loop = loop;
S.labels = labels;
return a;
})()
})(S.in_loop, S.labels)
});
};
@@ -951,17 +936,17 @@ function parse($TEXT, options) {
return a;
};
var switch_body_ = curry(in_loop, function(){
function switch_body_() {
expect("{");
var a = [], cur = null, branch = null;
var a = [], cur = null, branch = null, tmp;
while (!is("punc", "}")) {
if (is("eof")) unexpected();
if (is("keyword", "case")) {
if (branch) branch.end = prev();
cur = [];
branch = new AST_Case({
start : prog1(S.token, next),
expression : expression(),
start : (tmp = S.token, next(), tmp),
expression : expression(true),
body : cur
});
a.push(branch);
@@ -971,9 +956,9 @@ function parse($TEXT, options) {
if (branch) branch.end = prev();
cur = [];
branch = new AST_Default({
start : prog1(S.token, next, curry(expect, ":")),
start : (tmp = S.token, next(), expect(":"), tmp),
body : cur
})
});
a.push(branch);
}
else {
@@ -984,7 +969,7 @@ function parse($TEXT, options) {
if (branch) branch.end = prev();
next();
return a;
});
};
function try_() {
var body = block_(), bcatch = null, bfinally = null;
@@ -1110,7 +1095,7 @@ function parse($TEXT, options) {
switch (start.value) {
case "(":
next();
var ex = expression();
var ex = expression(true);
ex.start = start;
ex.end = S.token;
expect(")");
@@ -1201,6 +1186,7 @@ function parse($TEXT, options) {
});
function as_property_name() {
var tmp;
switch (S.token.type) {
case "num":
case "string":
@@ -1208,19 +1194,20 @@ function parse($TEXT, options) {
case "operator":
case "keyword":
case "atom":
return prog1(S.token.value, next);
return (tmp = S.token.value, next(), tmp);
default:
unexpected();
}
};
function as_name() {
var tmp;
switch (S.token.type) {
case "name":
case "operator":
case "keyword":
case "atom":
return prog1(S.token.value, next);
return (tmp = S.token.value, next(), tmp);
default:
unexpected();
}
@@ -1254,7 +1241,7 @@ function parse($TEXT, options) {
}
if (is("punc", "[")) {
next();
var prop = expression();
var prop = expression(true);
expect("]");
return subscripts(new AST_Sub({
start : start,
@@ -1276,17 +1263,17 @@ function parse($TEXT, options) {
};
var maybe_unary = function(allow_calls) {
var start = S.token;
if (is("operator") && UNARY_PREFIX[S.token.value]) {
var start = S.token, tmp;
if (is("operator") && UNARY_PREFIX(S.token.value)) {
var ex = make_unary(AST_UnaryPrefix,
prog1(S.token.value, next),
(tmp = S.token.value, next(), tmp),
maybe_unary(allow_calls));
ex.start = start;
ex.end = prev();
return ex;
}
var val = expr_atom(allow_calls);
while (is("operator") && UNARY_POSTFIX[S.token.value] && !S.token.nlb) {
while (is("operator") && UNARY_POSTFIX(S.token.value) && !S.token.nlb) {
val = make_unary(AST_UnaryPostfix, S.token.value, val);
val.start = start;
val.end = S.token;
@@ -1357,13 +1344,13 @@ function parse($TEXT, options) {
var maybe_assign = function(no_in) {
var start = S.token;
var left = maybe_conditional(no_in), val = S.token.value;
if (is("operator") && ASSIGNMENT[val]) {
if (is("operator") && ASSIGNMENT(val)) {
if (is_assignable(left)) {
next();
return new AST_Assign({
start : start,
left : left,
operator : ASSIGNMENT[val],
operator : val,
right : maybe_assign(no_in),
end : peek()
});
@@ -1374,8 +1361,6 @@ function parse($TEXT, options) {
};
var expression = function(commas, no_in) {
if (arguments.length == 0)
commas = true;
var start = S.token;
var expr = maybe_assign(no_in);
if (commas && is("punc", ",")) {

View File

@@ -43,19 +43,6 @@
"use strict";
function curry(f) {
var args = slice(arguments, 1);
return function() { return f.apply(this, args.concat(slice(arguments))); };
};
function prog1(ret) {
if (ret instanceof Function)
ret = ret();
for (var i = 1, n = arguments.length; --n > 0; ++i)
arguments[i]();
return ret;
};
function array_to_hash(a) {
var ret = Object.create(null);
for (var i = 0; i < a.length; ++i)