stealing more hacks from acorn in the name of speed

2012-10-11 13:00:58 +03:00
parent f4584af42c
commit fb5c01c073
2 changed files with 158 additions and 186 deletions
--- a/lib/parse.js
+++ b/lib/parse.js
@@ -126,17 +126,18 @@ var UNICODE = {
    connector_punctuation: new RegExp("[\\u005F\\u203F\\u2040\\u2054\\uFE33\\uFE34\\uFE4D-\\uFE4F\\uFF3F]")
 };

-function is_letter(ch) {
-    return UNICODE.letter.test(ch);
+function is_letter(code) {
+    return (code >= 97 && code <= 122)
+        || (code >= 65 && code <= 90)
+        || (code >= 0xaa && UNICODE.letter.test(String.fromCharCode(code)));
 };

-function is_digit(ch) {
-    ch = ch.charCodeAt(0);
-    return ch >= 48 && ch <= 57; //XXX: find out if "UnicodeDigit" means something else than 0..9
+function is_digit(code) {
+    return code >= 48 && code <= 57; //XXX: find out if "UnicodeDigit" means something else than 0..9
 };

-function is_alphanumeric_char(ch) {
-    return is_digit(ch) || is_letter(ch);
+function is_alphanumeric_char(code) {
+    return is_digit(code) || is_letter(code);
 };

 function is_unicode_combining_mark(ch) {
@@ -148,21 +149,21 @@ function is_unicode_connector_punctuation(ch) {
 };

 function is_identifier(name) {
-    return /^[a-z_$][a-z0-9_$]*$/i.test(name)
-        && !RESERVED_WORDS(name)
+    return /^[a-z_$][a-z0-9_$]*$/i.test(name) && !RESERVED_WORDS(name);
 };

-function is_identifier_start(ch) {
-    return ch == "$" || ch == "_" || is_letter(ch);
+function is_identifier_start(code) {
+    return code == 36 || code == 95 || is_letter(code);
 };

 function is_identifier_char(ch) {
-    return is_identifier_start(ch)
+    var code = ch.charCodeAt(0);
+    return is_identifier_start(code)
+        || is_digit(code)
+        || code == 8204 // \u200c: zero-width non-joiner <ZWNJ>
+        || code == 8205 // \u200d: zero-width joiner <ZWJ> (in my ECMA-262 PDF, this is also 200c)
        || is_unicode_combining_mark(ch)
-        || is_digit(ch)
        || is_unicode_connector_punctuation(ch)
-        || ch == "\u200c" // zero-width non-joiner <ZWNJ>
-        || ch == "\u200d" // zero-width joiner <ZWJ> (in my ECMA-262 PDF, this is also 200c)
    ;
 };

@@ -280,11 +281,9 @@ function tokenizer($TEXT, filename) {
    };

    function read_while(pred) {
-        var ret = "", ch = peek(), i = 0;
-        while (ch && pred(ch, i++)) {
+        var ret = "", ch, i = 0;
+        while ((ch = peek()) && pred(ch, i++))
            ret += next();
-            ch = peek();
-        }
        return ret;
    };

@@ -295,29 +294,22 @@ function tokenizer($TEXT, filename) {
    function read_num(prefix) {
        var has_e = false, after_e = false, has_x = false, has_dot = prefix == ".";
        var num = read_while(function(ch, i){
-            if (ch == "x" || ch == "X") {
-                if (has_x) return false;
-                return has_x = true;
+            var code = ch.charCodeAt(0);
+            switch (code) {
+              case 120: case 88: // xX
+                return has_x ? false : (has_x = true);
+              case 101: case 69: // eE
+                return has_x ? true : has_e ? false : (has_e = after_e = true);
+              case 45: // -
+                return after_e || (i == 0 && !prefix);
+              case 43: // +
+                return after_e;
+              case (after_e = false, 46): // .
+                return (!has_dot && !has_x && !has_e) ? (has_dot = true) : false;
            }
-            if (!has_x && (ch == "E" || ch == "e")) {
-                if (has_e) return false;
-                return has_e = after_e = true;
-            }
-            if (ch == "-") {
-                if (after_e || (i == 0 && !prefix)) return true;
-                return false;
-            }
-            if (ch == "+") return after_e;
-            after_e = false;
-            if (ch == ".") {
-                if (!has_dot && !has_x && !has_e)
-                    return has_dot = true;
-                return false;
-            }
-            return is_alphanumeric_char(ch);
+            return is_alphanumeric_char(code);
        });
-        if (prefix)
-            num = prefix + num;
+        if (prefix) num = prefix + num;
        var valid = parse_js_number(num);
        if (!isNaN(valid)) {
            return token("num", valid);
@@ -328,17 +320,17 @@ function tokenizer($TEXT, filename) {

    function read_escaped_char(in_string) {
        var ch = next(true, in_string);
-        switch (ch) {
-          case "n" : return "\n";
-          case "r" : return "\r";
-          case "t" : return "\t";
-          case "b" : return "\b";
-          case "v" : return "\u000b";
-          case "f" : return "\f";
-          case "0" : return "\0";
-          case "x" : return String.fromCharCode(hex_bytes(2));
-          case "u" : return String.fromCharCode(hex_bytes(4));
-          case "\n": return "";
+        switch (ch.charCodeAt(0)) {
+          case 110 : return "\n";
+          case 114 : return "\r";
+          case 116 : return "\t";
+          case 98  : return "\b";
+          case 118 : return "\u000b"; // \v
+          case 102 : return "\f";
+          case 48  : return "\0";
+          case 120 : return String.fromCharCode(hex_bytes(2)); // \x
+          case 117 : return String.fromCharCode(hex_bytes(4)); // \u
+          case 10  : return ""; // newline
          default  : return ch;
        }
    };
@@ -354,35 +346,33 @@ function tokenizer($TEXT, filename) {
        return num;
    };

-    function read_string() {
-        return with_eof_error("Unterminated string constant", function(){
-            var quote = next(), ret = "";
-            for (;;) {
-                var ch = next(true);
-                if (ch == "\\") {
-                    // read OctalEscapeSequence (XXX: deprecated if "strict mode")
-                    // https://github.com/mishoo/UglifyJS/issues/178
-                    var octal_len = 0, first = null;
-                    ch = read_while(function(ch){
-                        if (ch >= "0" && ch <= "7") {
-                            if (!first) {
-                                first = ch;
-                                return ++octal_len;
-                            }
-                            else if (first <= "3" && octal_len <= 2) return ++octal_len;
-                            else if (first >= "4" && octal_len <= 1) return ++octal_len;
+    var read_string = with_eof_error("Unterminated string constant", function(){
+        var quote = next(), ret = "";
+        for (;;) {
+            var ch = next(true);
+            if (ch == "\\") {
+                // read OctalEscapeSequence (XXX: deprecated if "strict mode")
+                // https://github.com/mishoo/UglifyJS/issues/178
+                var octal_len = 0, first = null;
+                ch = read_while(function(ch){
+                    if (ch >= "0" && ch <= "7") {
+                        if (!first) {
+                            first = ch;
+                            return ++octal_len;
                        }
-                        return false;
-                    });
-                    if (octal_len > 0) ch = String.fromCharCode(parseInt(ch, 8));
-                    else ch = read_escaped_char(true);
-                }
-                else if (ch == quote) break;
-                ret += ch;
+                        else if (first <= "3" && octal_len <= 2) return ++octal_len;
+                        else if (first >= "4" && octal_len <= 1) return ++octal_len;
+                    }
+                    return false;
+                });
+                if (octal_len > 0) ch = String.fromCharCode(parseInt(ch, 8));
+                else ch = read_escaped_char(true);
            }
-            return token("string", ret);
-        });
-    };
+            else if (ch == quote) break;
+            ret += ch;
+        }
+        return token("string", ret);
+    });

    function read_line_comment() {
        next();
@@ -397,17 +387,20 @@ function tokenizer($TEXT, filename) {
        return token("comment1", ret, true);
    };

-    function read_multiline_comment() {
+    var read_multiline_comment = with_eof_error("Unterminated multiline comment", function(){
        next();
-        return with_eof_error("Unterminated multiline comment", function(){
-            var i = find("*/", true),
-            text = S.text.substring(S.pos, i);
-            S.pos = i + 2;
-            S.line += text.split("\n").length - 1;
-            S.newline_before = S.newline_before || text.indexOf("\n") >= 0;
-            return token("comment2", text, true);
-        });
-    };
+        var i = find("*/", true);
+        var text = S.text.substring(S.pos, i);
+        var a = text.split("\n"), n = a.length;
+        // update stream position
+        S.pos = i + 2;
+        S.line += n - 1;
+        if (n > 1) S.col = a[n - 1].length;
+        else S.col += a[n - 1].length;
+        S.col += 2;
+        S.newline_before = S.newline_before || text.indexOf("\n") >= 0;
+        return token("comment2", text, true);
+    });

    function read_name() {
        var backslash = false, name = "", ch, escaped = false, hex;
@@ -432,29 +425,27 @@ function tokenizer($TEXT, filename) {
        return name;
    };

-    function read_regexp(regexp) {
-        return with_eof_error("Unterminated regular expression", function(){
-            var prev_backslash = false, ch, in_class = false;
-            while ((ch = next(true))) if (prev_backslash) {
-                regexp += "\\" + ch;
-                prev_backslash = false;
-            } else if (ch == "[") {
-                in_class = true;
-                regexp += ch;
-            } else if (ch == "]" && in_class) {
-                in_class = false;
-                regexp += ch;
-            } else if (ch == "/" && !in_class) {
-                break;
-            } else if (ch == "\\") {
-                prev_backslash = true;
-            } else {
-                regexp += ch;
-            }
-            var mods = read_name();
-            return token("regexp", new RegExp(regexp, mods));
-        });
-    };
+    var read_regexp = with_eof_error("Unterminated regular expression", function(regexp){
+        var prev_backslash = false, ch, in_class = false;
+        while ((ch = next(true))) if (prev_backslash) {
+            regexp += "\\" + ch;
+            prev_backslash = false;
+        } else if (ch == "[") {
+            in_class = true;
+            regexp += ch;
+        } else if (ch == "]" && in_class) {
+            in_class = false;
+            regexp += ch;
+        } else if (ch == "/" && !in_class) {
+            break;
+        } else if (ch == "\\") {
+            prev_backslash = true;
+        } else {
+            regexp += ch;
+        }
+        var mods = read_name();
+        return token("regexp", new RegExp(regexp, mods));
+    });

    function read_operator(prefix) {
        function grow(op) {
@@ -488,7 +479,7 @@ function tokenizer($TEXT, filename) {

    function handle_dot() {
        next();
-        return is_digit(peek())
+        return is_digit(peek().charCodeAt(0))
            ? read_num(".")
            : token("punc", ".");
    };
@@ -502,12 +493,14 @@ function tokenizer($TEXT, filename) {
    };

    function with_eof_error(eof_error, cont) {
-        try {
-            return cont();
-        } catch(ex) {
-            if (ex === EX_EOF) parse_error(eof_error);
-            else throw ex;
-        }
+        return function(x) {
+            try {
+                return cont(x);
+            } catch(ex) {
+                if (ex === EX_EOF) parse_error(eof_error);
+                else throw ex;
+            }
+        };
    };

    function next_token(force_regexp) {
@@ -517,13 +510,16 @@ function tokenizer($TEXT, filename) {
        start_token();
        var ch = peek();
        if (!ch) return token("eof");
-        if (is_digit(ch)) return read_num();
-        if (ch == '"' || ch == "'") return read_string();
+        var code = ch.charCodeAt(0);
+        switch (code) {
+          case 34: case 39: return read_string();
+          case 46: return handle_dot();
+          case 47: return handle_slash();
+        }
+        if (is_digit(code)) return read_num();
        if (PUNC_CHARS(ch)) return token("punc", next());
-        if (ch == ".") return handle_dot();
-        if (ch == "/") return handle_slash();
        if (OPERATOR_CHARS(ch)) return read_operator();
-        if (ch == "\\" || is_identifier_start(ch)) return read_word();
+        if (code == 92 || is_identifier_start(code)) return read_word();
        parse_error("Unexpected character '" + ch + "'");
    };

@@ -538,7 +534,7 @@ function tokenizer($TEXT, filename) {

 /* -----[ Parser (constants) ]----- */

-var UNARY_PREFIX = array_to_hash([
+var UNARY_PREFIX = makePredicate([
    "typeof",
    "void",
    "delete",
@@ -550,19 +546,9 @@ var UNARY_PREFIX = array_to_hash([
    "+"
 ]);

-var UNARY_POSTFIX = array_to_hash([ "--", "++" ]);
+var UNARY_POSTFIX = makePredicate([ "--", "++" ]);

-var ASSIGNMENT = (function(a, ret, i){
-    while (i < a.length) {
-        ret[a[i]] = a[i];
-        i++;
-    }
-    return ret;
-})(
-    [ "=", "+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&=" ],
-    {},
-    0
-);
+var ASSIGNMENT = makePredicate([ "=", "+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&=" ]);

 var PRECEDENCE = (function(a, ret){
    for (var i = 0, n = 1; i < a.length; ++i, ++n) {
@@ -680,7 +666,7 @@ function parse($TEXT, options) {

    function parenthesised() {
        expect("(");
-        var exp = expression();
+        var exp = expression(true);
        expect(")");
        return exp;
    };
@@ -688,7 +674,7 @@ function parse($TEXT, options) {
    function embed_tokens(parser) {
        return function() {
            var start = S.token;
-            var expr = parser.apply(this, arguments);
+            var expr = parser();
            var end = prev();
            expr.start = start;
            expr.end = end;
@@ -697,6 +683,7 @@ function parse($TEXT, options) {
    };

    var statement = embed_tokens(function() {
+        var tmp;
        if (is("operator", "/") || is("operator", "/=")) {
            S.peeked = null;
            S.token = S.input(S.token.value.substr(1)); // force regexp
@@ -738,7 +725,7 @@ function parse($TEXT, options) {
            }

          case "keyword":
-            switch (prog1(S.token.value, next)) {
+            switch (tmp = S.token.value, next(), tmp) {
              case "break":
                return break_cont(AST_Break);

@@ -752,7 +739,7 @@ function parse($TEXT, options) {
              case "do":
                return new AST_Do({
                    body      : in_loop(statement),
-                    condition : (expect_token("keyword", "while"), prog1(parenthesised, semicolon))
+                    condition : (expect_token("keyword", "while"), tmp = parenthesised(), semicolon(), tmp)
                });

              case "while":
@@ -778,30 +765,30 @@ function parse($TEXT, options) {
                             ? (next(), null)
                             : can_insert_semicolon()
                             ? null
-                             : prog1(expression, semicolon) )
+                             : (tmp = expression(true), semicolon(), tmp) )
                });

              case "switch":
                return new AST_Switch({
                    expression : parenthesised(),
-                    body       : switch_body_()
+                    body       : in_loop(switch_body_)
                });

              case "throw":
                if (S.token.nlb)
                    croak("Illegal newline after 'throw'");
                return new AST_Throw({
-                    value: prog1(expression, semicolon)
+                    value: (tmp = expression(true), semicolon(), tmp)
                });

              case "try":
                return try_();

              case "var":
-                return prog1(var_, semicolon);
+                return tmp = var_(), semicolon(), tmp;

              case "const":
-                return prog1(const_, semicolon);
+                return tmp = const_(), semicolon(), tmp;

              case "with":
                return new AST_With({
@@ -831,8 +818,8 @@ function parse($TEXT, options) {
        return new AST_LabeledStatement({ body: stat, label: label });
    };

-    function simple_statement() {
-        return new AST_SimpleStatement({ body: prog1(expression, semicolon) });
+    function simple_statement(tmp) {
+        return new AST_SimpleStatement({ body: (tmp = expression(true), semicolon(), tmp) });
    };

    function break_cont(type) {
@@ -869,9 +856,9 @@ function parse($TEXT, options) {

    function regular_for(init) {
        expect(";");
-        var test = is("punc", ";") ? null : expression();
+        var test = is("punc", ";") ? null : expression(true);
        expect(";");
-        var step = is("punc", ")") ? null : expression();
+        var step = is("punc", ")") ? null : expression(true);
        expect(")");
        return new AST_For({
            init      : init,
@@ -883,7 +870,7 @@ function parse($TEXT, options) {

    function for_in(init) {
        var lhs = init instanceof AST_Var ? init.definitions[0].name : null;
-        var obj = expression();
+        var obj = expression(true);
        expect(")");
        return new AST_ForIn({
            init   : init,
@@ -911,10 +898,8 @@ function parse($TEXT, options) {
                next();
                return a;
            })(true, []),
-            body: embed_tokens(function(){
+            body: (function(loop, labels){
                ++S.in_function;
-                var loop = S.in_loop;
-                var labels = S.labels;
                S.in_directives = true;
                S.in_loop = 0;
                S.labels = [];
@@ -923,7 +908,7 @@ function parse($TEXT, options) {
                S.in_loop = loop;
                S.labels = labels;
                return a;
-            })()
+            })(S.in_loop, S.labels)
        });
    };

@@ -951,17 +936,17 @@ function parse($TEXT, options) {
        return a;
    };

-    var switch_body_ = curry(in_loop, function(){
+    function switch_body_() {
        expect("{");
-        var a = [], cur = null, branch = null;
+        var a = [], cur = null, branch = null, tmp;
        while (!is("punc", "}")) {
            if (is("eof")) unexpected();
            if (is("keyword", "case")) {
                if (branch) branch.end = prev();
                cur = [];
                branch = new AST_Case({
-                    start      : prog1(S.token, next),
-                    expression : expression(),
+                    start      : (tmp = S.token, next(), tmp),
+                    expression : expression(true),
                    body       : cur
                });
                a.push(branch);
@@ -971,9 +956,9 @@ function parse($TEXT, options) {
                if (branch) branch.end = prev();
                cur = [];
                branch = new AST_Default({
-                    start : prog1(S.token, next, curry(expect, ":")),
+                    start : (tmp = S.token, next(), expect(":"), tmp),
                    body  : cur
-                })
+                });
                a.push(branch);
            }
            else {
@@ -984,7 +969,7 @@ function parse($TEXT, options) {
        if (branch) branch.end = prev();
        next();
        return a;
-    });
+    };

    function try_() {
        var body = block_(), bcatch = null, bfinally = null;
@@ -1110,7 +1095,7 @@ function parse($TEXT, options) {
            switch (start.value) {
              case "(":
                next();
-                var ex = expression();
+                var ex = expression(true);
                ex.start = start;
                ex.end = S.token;
                expect(")");
@@ -1201,6 +1186,7 @@ function parse($TEXT, options) {
    });

    function as_property_name() {
+        var tmp;
        switch (S.token.type) {
          case "num":
          case "string":
@@ -1208,19 +1194,20 @@ function parse($TEXT, options) {
          case "operator":
          case "keyword":
          case "atom":
-            return prog1(S.token.value, next);
+            return (tmp = S.token.value, next(), tmp);
          default:
            unexpected();
        }
    };

    function as_name() {
+        var tmp;
        switch (S.token.type) {
          case "name":
          case "operator":
          case "keyword":
          case "atom":
-            return prog1(S.token.value, next);
+            return (tmp = S.token.value, next(), tmp);
          default:
            unexpected();
        }
@@ -1254,7 +1241,7 @@ function parse($TEXT, options) {
        }
        if (is("punc", "[")) {
            next();
-            var prop = expression();
+            var prop = expression(true);
            expect("]");
            return subscripts(new AST_Sub({
                start      : start,
@@ -1276,17 +1263,17 @@ function parse($TEXT, options) {
    };

    var maybe_unary = function(allow_calls) {
-        var start = S.token;
-        if (is("operator") && UNARY_PREFIX[S.token.value]) {
+        var start = S.token, tmp;
+        if (is("operator") && UNARY_PREFIX(S.token.value)) {
            var ex = make_unary(AST_UnaryPrefix,
-                                prog1(S.token.value, next),
+                                (tmp = S.token.value, next(), tmp),
                                maybe_unary(allow_calls));
            ex.start = start;
            ex.end = prev();
            return ex;
        }
        var val = expr_atom(allow_calls);
-        while (is("operator") && UNARY_POSTFIX[S.token.value] && !S.token.nlb) {
+        while (is("operator") && UNARY_POSTFIX(S.token.value) && !S.token.nlb) {
            val = make_unary(AST_UnaryPostfix, S.token.value, val);
            val.start = start;
            val.end = S.token;
@@ -1357,13 +1344,13 @@ function parse($TEXT, options) {
    var maybe_assign = function(no_in) {
        var start = S.token;
        var left = maybe_conditional(no_in), val = S.token.value;
-        if (is("operator") && ASSIGNMENT[val]) {
+        if (is("operator") && ASSIGNMENT(val)) {
            if (is_assignable(left)) {
                next();
                return new AST_Assign({
                    start    : start,
                    left     : left,
-                    operator : ASSIGNMENT[val],
+                    operator : val,
                    right    : maybe_assign(no_in),
                    end      : peek()
                });
@@ -1374,8 +1361,6 @@ function parse($TEXT, options) {
    };

    var expression = function(commas, no_in) {
-        if (arguments.length == 0)
-            commas = true;
        var start = S.token;
        var expr = maybe_assign(no_in);
        if (commas && is("punc", ",")) {
--- a/lib/utils.js
+++ b/lib/utils.js
@@ -43,19 +43,6 @@

 "use strict";

-function curry(f) {
-    var args = slice(arguments, 1);
-    return function() { return f.apply(this, args.concat(slice(arguments))); };
-};
-
-function prog1(ret) {
-    if (ret instanceof Function)
-        ret = ret();
-    for (var i = 1, n = arguments.length; --n > 0; ++i)
-        arguments[i]();
-    return ret;
-};
-
 function array_to_hash(a) {
    var ret = Object.create(null);
    for (var i = 0; i < a.length; ++i)