WIP

2012-08-15 13:32:37 +03:00
parent 861e26a666
commit c0ba9e2986
5 changed files with 112 additions and 419 deletions
--- a/lib/test.js
+++ b/lib/test.js
@@ -1,346 +1,5 @@
-var func = function tokenizer($TEXT) {
-
-    var S = {
-        text            : $TEXT.replace(/\r\n?|[\n\u2028\u2029]/g, "\n").replace(/^\uFEFF/, ''),
-        pos             : 0,
-        tokpos          : 0,
-        line            : 0,
-        tokline         : 0,
-        col             : 0,
-        tokcol          : 0,
-        newline_before  : false,
-        regex_allowed   : false,
-        comments_before : []
-    };
-
-    function peek() { return S.text.charAt(S.pos); };
-
-    function next(signal_eof, in_string) {
-        var ch = S.text.charAt(S.pos++);
-        if (signal_eof && !ch)
-            throw EX_EOF;
-        if (ch == "\n") {
-            S.newline_before = S.newline_before || !in_string;
-            ++S.line;
-            S.col = 0;
-        } else {
-            ++S.col;
-        }
-        return ch;
-    };
-
-    function eof() {
-        return !S.peek();
-    };
-
-    function find(what, signal_eof) {
-        var pos = S.text.indexOf(what, S.pos);
-        if (signal_eof && pos == -1) throw EX_EOF;
-        return pos;
-    };
-
-    function start_token() {
-        S.tokline = S.line;
-        S.tokcol = S.col;
-        S.tokpos = S.pos;
-    };
-
-    function token(type, value, is_comment) {
-        S.regex_allowed = ((type == "operator" && !HOP(UNARY_POSTFIX, value)) ||
-                           (type == "keyword" && HOP(KEYWORDS_BEFORE_EXPRESSION, value)) ||
-                           (type == "punc" && HOP(PUNC_BEFORE_EXPRESSION, value)));
-        var ret = {
-            type   : type,
-            value  : value,
-            line   : S.tokline,
-            col    : S.tokcol,
-            pos    : S.tokpos,
-            endpos : S.pos,
-            nlb    : S.newline_before
-        };
-        if (!is_comment) {
-            ret.comments_before = S.comments_before;
-            S.comments_before = [];
-            // make note of any newlines in the comments that came before
-            for (var i = 0, len = ret.comments_before.length; i < len; i++) {
-                ret.nlb = ret.nlb || ret.comments_before[i].nlb;
-            }
-        }
-        S.newline_before = false;
-        return new AST_Token(ret);
-    };
-
-    function skip_whitespace() {
-        while (HOP(WHITESPACE_CHARS, peek()))
-            next();
-    };
-
-    function read_while(pred) {
-        var ret = "", ch = peek(), i = 0;
-        while (ch && pred(ch, i++)) {
-            ret += next();
-            ch = peek();
-        }
-        return ret;
-    };
-
-    function parse_error(err) {
-        js_error(err, S.tokline, S.tokcol, S.tokpos);
-    };
-
-    function read_num(prefix) {
-        var has_e = false, after_e = false, has_x = false, has_dot = prefix == ".";
-        var num = read_while(function(ch, i){
-            if (ch == "x" || ch == "X") {
-                if (has_x) return false;
-                return has_x = true;
-            }
-            if (!has_x && (ch == "E" || ch == "e")) {
-                if (has_e) return false;
-                return has_e = after_e = true;
-            }
-            if (ch == "-") {
-                if (after_e || (i == 0 && !prefix)) return true;
-                return false;
-            }
-            if (ch == "+") return after_e;
-            after_e = false;
-            if (ch == ".") {
-                if (!has_dot && !has_x && !has_e)
-                    return has_dot = true;
-                return false;
-            }
-            return is_alphanumeric_char(ch);
-        });
-        if (prefix)
-            num = prefix + num;
-        var valid = parse_js_number(num);
-        if (!isNaN(valid)) {
-            return token("num", valid);
-        } else {
-            parse_error("Invalid syntax: " + num);
-        }
-    };
-
-    function read_escaped_char(in_string) {
-        var ch = next(true, in_string);
-        switch (ch) {
-          case "n" : return "\n";
-          case "r" : return "\r";
-          case "t" : return "\t";
-          case "b" : return "\b";
-          case "v" : return "\u000b";
-          case "f" : return "\f";
-          case "0" : return "\0";
-          case "x" : return String.fromCharCode(hex_bytes(2));
-          case "u" : return String.fromCharCode(hex_bytes(4));
-          case "\n": return "";
-          default  : return ch;
-        }
-    };
-
-    function hex_bytes(n) {
-        var num = 0;
-        for (; n > 0; --n) {
-            var digit = parseInt(next(true), 16);
-            if (isNaN(digit))
-                parse_error("Invalid hex-character pattern in string");
-            num = (num << 4) | digit;
-        }
-        return num;
-    };
-
-    function read_string() {
-        return with_eof_error("Unterminated string constant", function(){
-            var quote = next(), ret = "";
-            for (;;) {
-                var ch = next(true);
-                if (ch == "\\") {
-                    // read OctalEscapeSequence (XXX: deprecated if "strict mode")
-                    // https://github.com/mishoo/UglifyJS/issues/178
-                    var octal_len = 0, first = null;
-                    ch = read_while(function(ch){
-                        if (ch >= "0" && ch <= "7") {
-                            if (!first) {
-                                first = ch;
-                                return ++octal_len;
-                            }
-                            else if (first <= "3" && octal_len <= 2) return ++octal_len;
-                            else if (first >= "4" && octal_len <= 1) return ++octal_len;
-                        }
-                        return false;
-                    });
-                    if (octal_len > 0) ch = String.fromCharCode(parseInt(ch, 8));
-                    else ch = read_escaped_char(true);
-                }
-                else if (ch == quote) break;
-                ret += ch;
-            }
-            return token("string", ret);
-        });
-    };
-
-    function read_line_comment() {
-        next();
-        var i = find("\n"), ret;
-        if (i == -1) {
-            ret = S.text.substr(S.pos);
-            S.pos = S.text.length;
-        } else {
-            ret = S.text.substring(S.pos, i);
-            S.pos = i;
-        }
-        return token("comment1", ret, true);
-    };
-
-    function read_multiline_comment() {
-        next();
-        return with_eof_error("Unterminated multiline comment", function(){
-            var i = find("*/", true),
-            text = S.text.substring(S.pos, i);
-            S.pos = i + 2;
-            S.line += text.split("\n").length - 1;
-            S.newline_before = S.newline_before || text.indexOf("\n") >= 0;
-
-            // https://github.com/mishoo/UglifyJS/issues/#issue/100
-            if (/^@cc_on/i.test(text)) {
-                warn("WARNING: at line " + S.line);
-                warn("*** Found \"conditional comment\": " + text);
-                warn("*** UglifyJS DISCARDS ALL COMMENTS.  This means your code might no longer work properly in Internet Explorer.");
-            }
-
-            return token("comment2", text, true);
-        });
-    };
-
-    function read_name() {
-        var backslash = false, name = "", ch, escaped = false, hex;
-        while ((ch = peek()) != null) {
-            if (!backslash) {
-                if (ch == "\\") escaped = backslash = true, next();
-                else if (is_identifier_char(ch)) name += next();
-                else break;
-            }
-            else {
-                if (ch != "u") parse_error("Expecting UnicodeEscapeSequence -- uXXXX");
-                ch = read_escaped_char();
-                if (!is_identifier_char(ch)) parse_error("Unicode char: " + ch.charCodeAt(0) + " is not valid in identifier");
-                name += ch;
-                backslash = false;
-            }
-        }
-        if (HOP(KEYWORDS, name) && escaped) {
-            hex = name.charCodeAt(0).toString(16).toUpperCase();
-            name = "\\u" + "0000".substr(hex.length) + hex + name.slice(1);
-        }
-        return name;
-    };
-
-    function read_regexp(regexp) {
-        return with_eof_error("Unterminated regular expression", function(){
-            var prev_backslash = false, ch, in_class = false;
-            while ((ch = next(true))) if (prev_backslash) {
-                regexp += "\\" + ch;
-                prev_backslash = false;
-            } else if (ch == "[") {
-                in_class = true;
-                regexp += ch;
-            } else if (ch == "]" && in_class) {
-                in_class = false;
-                regexp += ch;
-            } else if (ch == "/" && !in_class) {
-                break;
-            } else if (ch == "\\") {
-                prev_backslash = true;
-            } else {
-                regexp += ch;
-            }
-            var mods = read_name();
-            return token("regexp", [ regexp, mods ]);
-        });
-    };
-
-    function read_operator(prefix) {
-        function grow(op) {
-            if (!peek()) return op;
-            var bigger = op + peek();
-            if (HOP(OPERATORS, bigger)) {
-                next();
-                return grow(bigger);
-            } else {
-                return op;
-            }
-        };
-        return token("operator", grow(prefix || next()));
-    };
-
-    function handle_slash() {
-        next();
-        var regex_allowed = S.regex_allowed;
-        switch (peek()) {
-          case "/":
-            S.comments_before.push(read_line_comment());
-            S.regex_allowed = regex_allowed;
-            return next_token();
-          case "*":
-            S.comments_before.push(read_multiline_comment());
-            S.regex_allowed = regex_allowed;
-            return next_token();
-        }
-        return S.regex_allowed ? read_regexp("") : read_operator("/");
-    };
-
-    function handle_dot() {
-        next();
-        return is_digit(peek())
-            ? read_num(".")
-            : token("punc", ".");
-    };
-
-    function read_word() {
-        var word = read_name();
-        return HOP(KEYWORDS_ATOM, word)
-            ? token("atom", word)
-            : !HOP(KEYWORDS, word)
-            ? token("name", word)
-            : HOP(OPERATORS, word)
-            ? token("operator", word)
-            : token("keyword", word);
-    };
-
-    function with_eof_error(eof_error, cont) {
-        try {
-            return cont();
-        } catch(ex) {
-            if (ex === EX_EOF) parse_error(eof_error);
-            else throw ex;
-        }
-    };
-
-    function next_token(force_regexp) {
-        if (force_regexp != null)
-            return read_regexp(force_regexp);
-        skip_whitespace();
-        start_token();
-        var ch = peek();
-        if (!ch) return token("eof");
-        if (is_digit(ch)) return read_num();
-        if (ch == '"' || ch == "'") return read_string();
-        if (HOP(PUNC_CHARS, ch)) return token("punc", next());
-        if (ch == ".") return handle_dot();
-        if (ch == "/") return handle_slash();
-        if (HOP(OPERATOR_CHARS, ch)) return read_operator();
-        if (ch == "\\" || is_identifier_start(ch)) return read_word();
-        parse_error("Unexpected character '" + ch + "'");
-    };
-
-    next_token.context = function(nc) {
-        if (nc) S = nc;
-        return S;
-    };
-
-    return next_token;
-
+var func = function TEST () {
+    
 };

 console.time("parse");