stealing more hacks from acorn in the name of speed

2012-10-11 13:00:58 +03:00
parent f4584af42c
commit fb5c01c073
2 changed files with 158 additions and 186 deletions
--- a/lib/parse.js
+++ b/lib/parse.js
@@ -126,17 +126,18 @@ var UNICODE = {
    connector_punctuation: new RegExp("[\\u005F\\u203F\\u2040\\u2054\\uFE33\\uFE34\\uFE4D-\\uFE4F\\uFF3F]")
 };
-function is_letter(ch) {
+function is_letter(code) {
-    return UNICODE.letter.test(ch);
+    return (code >= 97 && code <= 122)
        || (code >= 65 && code <= 90)
        || (code >= 0xaa && UNICODE.letter.test(String.fromCharCode(code)));
 };
-function is_digit(ch) {
+function is_digit(code) {
-    ch = ch.charCodeAt(0);
+    return code >= 48 && code <= 57; //XXX: find out if "UnicodeDigit" means something else than 0..9
    return ch >= 48 && ch <= 57; //XXX: find out if "UnicodeDigit" means something else than 0..9
 };
-function is_alphanumeric_char(ch) {
+function is_alphanumeric_char(code) {
-    return is_digit(ch) || is_letter(ch);
+    return is_digit(code) || is_letter(code);
 };
 function is_unicode_combining_mark(ch) {
@@ -148,21 +149,21 @@ function is_unicode_connector_punctuation(ch) {
 };
 function is_identifier(name) {
-    return /^[a-z_$][a-z0-9_$]*$/i.test(name)
+    return /^[a-z_$][a-z0-9_$]*$/i.test(name) && !RESERVED_WORDS(name);
        && !RESERVED_WORDS(name)
 };
-function is_identifier_start(ch) {
+function is_identifier_start(code) {
-    return ch == "$" || ch == "_" || is_letter(ch);
+    return code == 36 || code == 95 || is_letter(code);
 };
 function is_identifier_char(ch) {
-    return is_identifier_start(ch)
+    var code = ch.charCodeAt(0);
    return is_identifier_start(code)
        || is_digit(code)
        || code == 8204 // \u200c: zero-width non-joiner <ZWNJ>
        || code == 8205 // \u200d: zero-width joiner <ZWJ> (in my ECMA-262 PDF, this is also 200c)
        || is_unicode_combining_mark(ch)
        || is_digit(ch)
        || is_unicode_connector_punctuation(ch)
        || ch == "\u200c" // zero-width non-joiner <ZWNJ>
        || ch == "\u200d" // zero-width joiner <ZWJ> (in my ECMA-262 PDF, this is also 200c)
    ;
 };
@@ -280,11 +281,9 @@ function tokenizer($TEXT, filename) {
    };
    function read_while(pred) {
-        var ret = "", ch = peek(), i = 0;
+        var ret = "", ch, i = 0;
-        while (ch && pred(ch, i++)) {
+        while ((ch = peek()) && pred(ch, i++))
            ret += next();
            ch = peek();
        }
        return ret;
    };
@@ -295,29 +294,22 @@ function tokenizer($TEXT, filename) {
    function read_num(prefix) {
        var has_e = false, after_e = false, has_x = false, has_dot = prefix == ".";
        var num = read_while(function(ch, i){
-            if (ch == "x" || ch == "X") {
+            var code = ch.charCodeAt(0);
-                if (has_x) return false;
+            switch (code) {
-                return has_x = true;
+              case 120: case 88: // xX
                return has_x ? false : (has_x = true);
              case 101: case 69: // eE
                return has_x ? true : has_e ? false : (has_e = after_e = true);
              case 45: // -
                return after_e || (i == 0 && !prefix);
              case 43: // +
                return after_e;
              case (after_e = false, 46): // .
                return (!has_dot && !has_x && !has_e) ? (has_dot = true) : false;
            }
-            if (!has_x && (ch == "E" || ch == "e")) {
+            return is_alphanumeric_char(code);
                if (has_e) return false;
                return has_e = after_e = true;
            }
            if (ch == "-") {
                if (after_e || (i == 0 && !prefix)) return true;
                return false;
            }
            if (ch == "+") return after_e;
            after_e = false;
            if (ch == ".") {
                if (!has_dot && !has_x && !has_e)
                    return has_dot = true;
                return false;
            }
            return is_alphanumeric_char(ch);
        });
-        if (prefix)
+        if (prefix) num = prefix + num;
            num = prefix + num;
        var valid = parse_js_number(num);
        if (!isNaN(valid)) {
            return token("num", valid);
@@ -328,17 +320,17 @@ function tokenizer($TEXT, filename) {
    function read_escaped_char(in_string) {
        var ch = next(true, in_string);
-        switch (ch) {
+        switch (ch.charCodeAt(0)) {
-          case "n" : return "\n";
+          case 110 : return "\n";
-          case "r" : return "\r";
+          case 114 : return "\r";
-          case "t" : return "\t";
+          case 116 : return "\t";
-          case "b" : return "\b";
+          case 98  : return "\b";
-          case "v" : return "\u000b";
+          case 118 : return "\u000b"; // \v
-          case "f" : return "\f";
+          case 102 : return "\f";
-          case "0" : return "\0";
+          case 48  : return "\0";
-          case "x" : return String.fromCharCode(hex_bytes(2));
+          case 120 : return String.fromCharCode(hex_bytes(2)); // \x
-          case "u" : return String.fromCharCode(hex_bytes(4));
+          case 117 : return String.fromCharCode(hex_bytes(4)); // \u
-          case "\n": return "";
+          case 10  : return ""; // newline
          default  : return ch;
        }
    };
@@ -354,8 +346,7 @@ function tokenizer($TEXT, filename) {
        return num;
    };
-    function read_string() {
+    var read_string = with_eof_error("Unterminated string constant", function(){
        return with_eof_error("Unterminated string constant", function(){
        var quote = next(), ret = "";
        for (;;) {
            var ch = next(true);
@@ -382,7 +373,6 @@ function tokenizer($TEXT, filename) {
        }
        return token("string", ret);
    });
    };
    function read_line_comment() {
        next();
@@ -397,17 +387,20 @@ function tokenizer($TEXT, filename) {
        return token("comment1", ret, true);
    };
-    function read_multiline_comment() {
+    var read_multiline_comment = with_eof_error("Unterminated multiline comment", function(){
        next();
-        return with_eof_error("Unterminated multiline comment", function(){
+        var i = find("*/", true);
-            var i = find("*/", true),
+        var text = S.text.substring(S.pos, i);
-            text = S.text.substring(S.pos, i);
+        var a = text.split("\n"), n = a.length;
        // update stream position
        S.pos = i + 2;
-            S.line += text.split("\n").length - 1;
+        S.line += n - 1;
        if (n > 1) S.col = a[n - 1].length;
        else S.col += a[n - 1].length;
        S.col += 2;
        S.newline_before = S.newline_before || text.indexOf("\n") >= 0;
        return token("comment2", text, true);
    });
    };
    function read_name() {
        var backslash = false, name = "", ch, escaped = false, hex;
@@ -432,8 +425,7 @@ function tokenizer($TEXT, filename) {
        return name;
    };
-    function read_regexp(regexp) {
+    var read_regexp = with_eof_error("Unterminated regular expression", function(regexp){
        return with_eof_error("Unterminated regular expression", function(){
        var prev_backslash = false, ch, in_class = false;
        while ((ch = next(true))) if (prev_backslash) {
            regexp += "\\" + ch;
@@ -454,7 +446,6 @@ function tokenizer($TEXT, filename) {
        var mods = read_name();
        return token("regexp", new RegExp(regexp, mods));
    });
    };
    function read_operator(prefix) {
        function grow(op) {
@@ -488,7 +479,7 @@ function tokenizer($TEXT, filename) {
    function handle_dot() {
        next();
-        return is_digit(peek())
+        return is_digit(peek().charCodeAt(0))
            ? read_num(".")
            : token("punc", ".");
    };
@@ -502,13 +493,15 @@ function tokenizer($TEXT, filename) {
    };
    function with_eof_error(eof_error, cont) {
        return function(x) {
            try {
-            return cont();
+                return cont(x);
            } catch(ex) {
                if (ex === EX_EOF) parse_error(eof_error);
                else throw ex;
            }
        };
    };
    function next_token(force_regexp) {
        if (force_regexp != null)
@@ -517,13 +510,16 @@ function tokenizer($TEXT, filename) {
        start_token();
        var ch = peek();
        if (!ch) return token("eof");
-        if (is_digit(ch)) return read_num();
+        var code = ch.charCodeAt(0);
-        if (ch == '"' || ch == "'") return read_string();
+        switch (code) {
          case 34: case 39: return read_string();
          case 46: return handle_dot();
          case 47: return handle_slash();
        }
        if (is_digit(code)) return read_num();
        if (PUNC_CHARS(ch)) return token("punc", next());
        if (ch == ".") return handle_dot();
        if (ch == "/") return handle_slash();
        if (OPERATOR_CHARS(ch)) return read_operator();
-        if (ch == "\\" || is_identifier_start(ch)) return read_word();
+        if (code == 92 || is_identifier_start(code)) return read_word();
        parse_error("Unexpected character '" + ch + "'");
    };
@@ -538,7 +534,7 @@ function tokenizer($TEXT, filename) {
 /* -----[ Parser (constants) ]----- */
-var UNARY_PREFIX = array_to_hash([
+var UNARY_PREFIX = makePredicate([
    "typeof",
    "void",
    "delete",
@@ -550,19 +546,9 @@ var UNARY_PREFIX = array_to_hash([
    "+"
 ]);
-var UNARY_POSTFIX = array_to_hash([ "--", "++" ]);
+var UNARY_POSTFIX = makePredicate([ "--", "++" ]);
-var ASSIGNMENT = (function(a, ret, i){
+var ASSIGNMENT = makePredicate([ "=", "+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&=" ]);
    while (i < a.length) {
        ret[a[i]] = a[i];
        i++;
    }
    return ret;
 })(
    [ "=", "+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&=" ],
    {},
    0
 );
 var PRECEDENCE = (function(a, ret){
    for (var i = 0, n = 1; i < a.length; ++i, ++n) {
@@ -680,7 +666,7 @@ function parse($TEXT, options) {
    function parenthesised() {
        expect("(");
-        var exp = expression();
+        var exp = expression(true);
        expect(")");
        return exp;
    };
@@ -688,7 +674,7 @@ function parse($TEXT, options) {
    function embed_tokens(parser) {
        return function() {
            var start = S.token;
-            var expr = parser.apply(this, arguments);
+            var expr = parser();
            var end = prev();
            expr.start = start;
            expr.end = end;
@@ -697,6 +683,7 @@ function parse($TEXT, options) {
    };
    var statement = embed_tokens(function() {
        var tmp;
        if (is("operator", "/") || is("operator", "/=")) {
            S.peeked = null;
            S.token = S.input(S.token.value.substr(1)); // force regexp
@@ -738,7 +725,7 @@ function parse($TEXT, options) {
            }
          case "keyword":
-            switch (prog1(S.token.value, next)) {
+            switch (tmp = S.token.value, next(), tmp) {
              case "break":
                return break_cont(AST_Break);
@@ -752,7 +739,7 @@ function parse($TEXT, options) {
              case "do":
                return new AST_Do({
                    body      : in_loop(statement),
-                    condition : (expect_token("keyword", "while"), prog1(parenthesised, semicolon))
+                    condition : (expect_token("keyword", "while"), tmp = parenthesised(), semicolon(), tmp)
                });
              case "while":
@@ -778,30 +765,30 @@ function parse($TEXT, options) {
                             ? (next(), null)
                             : can_insert_semicolon()
                             ? null
-                             : prog1(expression, semicolon) )
+                             : (tmp = expression(true), semicolon(), tmp) )
                });
              case "switch":
                return new AST_Switch({
                    expression : parenthesised(),
-                    body       : switch_body_()
+                    body       : in_loop(switch_body_)
                });
              case "throw":
                if (S.token.nlb)
                    croak("Illegal newline after 'throw'");
                return new AST_Throw({
-                    value: prog1(expression, semicolon)
+                    value: (tmp = expression(true), semicolon(), tmp)
                });
              case "try":
                return try_();
              case "var":
-                return prog1(var_, semicolon);
+                return tmp = var_(), semicolon(), tmp;
              case "const":
-                return prog1(const_, semicolon);
+                return tmp = const_(), semicolon(), tmp;
              case "with":
                return new AST_With({
@@ -831,8 +818,8 @@ function parse($TEXT, options) {
        return new AST_LabeledStatement({ body: stat, label: label });
    };
-    function simple_statement() {
+    function simple_statement(tmp) {
-        return new AST_SimpleStatement({ body: prog1(expression, semicolon) });
+        return new AST_SimpleStatement({ body: (tmp = expression(true), semicolon(), tmp) });
    };
    function break_cont(type) {
@@ -869,9 +856,9 @@ function parse($TEXT, options) {
    function regular_for(init) {
        expect(";");
-        var test = is("punc", ";") ? null : expression();
+        var test = is("punc", ";") ? null : expression(true);
        expect(";");
-        var step = is("punc", ")") ? null : expression();
+        var step = is("punc", ")") ? null : expression(true);
        expect(")");
        return new AST_For({
            init      : init,
@@ -883,7 +870,7 @@ function parse($TEXT, options) {
    function for_in(init) {
        var lhs = init instanceof AST_Var ? init.definitions[0].name : null;
-        var obj = expression();
+        var obj = expression(true);
        expect(")");
        return new AST_ForIn({
            init   : init,
@@ -911,10 +898,8 @@ function parse($TEXT, options) {
                next();
                return a;
            })(true, []),
-            body: embed_tokens(function(){
+            body: (function(loop, labels){
                ++S.in_function;
                var loop = S.in_loop;
                var labels = S.labels;
                S.in_directives = true;
                S.in_loop = 0;
                S.labels = [];
@@ -923,7 +908,7 @@ function parse($TEXT, options) {
                S.in_loop = loop;
                S.labels = labels;
                return a;
-            })()
+            })(S.in_loop, S.labels)
        });
    };
@@ -951,17 +936,17 @@ function parse($TEXT, options) {
        return a;
    };
-    var switch_body_ = curry(in_loop, function(){
+    function switch_body_() {
        expect("{");
-        var a = [], cur = null, branch = null;
+        var a = [], cur = null, branch = null, tmp;
        while (!is("punc", "}")) {
            if (is("eof")) unexpected();
            if (is("keyword", "case")) {
                if (branch) branch.end = prev();
                cur = [];
                branch = new AST_Case({
-                    start      : prog1(S.token, next),
+                    start      : (tmp = S.token, next(), tmp),
-                    expression : expression(),
+                    expression : expression(true),
                    body       : cur
                });
                a.push(branch);
@@ -971,9 +956,9 @@ function parse($TEXT, options) {
                if (branch) branch.end = prev();
                cur = [];
                branch = new AST_Default({
-                    start : prog1(S.token, next, curry(expect, ":")),
+                    start : (tmp = S.token, next(), expect(":"), tmp),
                    body  : cur
-                })
+                });
                a.push(branch);
            }
            else {
@@ -984,7 +969,7 @@ function parse($TEXT, options) {
        if (branch) branch.end = prev();
        next();
        return a;
-    });
+    };
    function try_() {
        var body = block_(), bcatch = null, bfinally = null;
@@ -1110,7 +1095,7 @@ function parse($TEXT, options) {
            switch (start.value) {
              case "(":
                next();
-                var ex = expression();
+                var ex = expression(true);
                ex.start = start;
                ex.end = S.token;
                expect(")");
@@ -1201,6 +1186,7 @@ function parse($TEXT, options) {
    });
    function as_property_name() {
        var tmp;
        switch (S.token.type) {
          case "num":
          case "string":
@@ -1208,19 +1194,20 @@ function parse($TEXT, options) {
          case "operator":
          case "keyword":
          case "atom":
-            return prog1(S.token.value, next);
+            return (tmp = S.token.value, next(), tmp);
          default:
            unexpected();
        }
    };
    function as_name() {
        var tmp;
        switch (S.token.type) {
          case "name":
          case "operator":
          case "keyword":
          case "atom":
-            return prog1(S.token.value, next);
+            return (tmp = S.token.value, next(), tmp);
          default:
            unexpected();
        }
@@ -1254,7 +1241,7 @@ function parse($TEXT, options) {
        }
        if (is("punc", "[")) {
            next();
-            var prop = expression();
+            var prop = expression(true);
            expect("]");
            return subscripts(new AST_Sub({
                start      : start,
@@ -1276,17 +1263,17 @@ function parse($TEXT, options) {
    };
    var maybe_unary = function(allow_calls) {
-        var start = S.token;
+        var start = S.token, tmp;
-        if (is("operator") && UNARY_PREFIX[S.token.value]) {
+        if (is("operator") && UNARY_PREFIX(S.token.value)) {
            var ex = make_unary(AST_UnaryPrefix,
-                                prog1(S.token.value, next),
+                                (tmp = S.token.value, next(), tmp),
                                maybe_unary(allow_calls));
            ex.start = start;
            ex.end = prev();
            return ex;
        }
        var val = expr_atom(allow_calls);
-        while (is("operator") && UNARY_POSTFIX[S.token.value] && !S.token.nlb) {
+        while (is("operator") && UNARY_POSTFIX(S.token.value) && !S.token.nlb) {
            val = make_unary(AST_UnaryPostfix, S.token.value, val);
            val.start = start;
            val.end = S.token;
@@ -1357,13 +1344,13 @@ function parse($TEXT, options) {
    var maybe_assign = function(no_in) {
        var start = S.token;
        var left = maybe_conditional(no_in), val = S.token.value;
-        if (is("operator") && ASSIGNMENT[val]) {
+        if (is("operator") && ASSIGNMENT(val)) {
            if (is_assignable(left)) {
                next();
                return new AST_Assign({
                    start    : start,
                    left     : left,
-                    operator : ASSIGNMENT[val],
+                    operator : val,
                    right    : maybe_assign(no_in),
                    end      : peek()
                });
@@ -1374,8 +1361,6 @@ function parse($TEXT, options) {
    };
    var expression = function(commas, no_in) {
        if (arguments.length == 0)
            commas = true;
        var start = S.token;
        var expr = maybe_assign(no_in);
        if (commas && is("punc", ",")) {
--- a/lib/utils.js
+++ b/lib/utils.js
@@ -43,19 +43,6 @@
 "use strict";
 function curry(f) {
    var args = slice(arguments, 1);
    return function() { return f.apply(this, args.concat(slice(arguments))); };
 };
 function prog1(ret) {
    if (ret instanceof Function)
        ret = ret();
    for (var i = 1, n = arguments.length; --n > 0; ++i)
        arguments[i]();
    return ret;
 };
 function array_to_hash(a) {
    var ret = Object.create(null);
    for (var i = 0; i < a.length; ++i)