WIP
This commit is contained in:
345
lib/test.js
345
lib/test.js
@@ -1,346 +1,5 @@
|
||||
var func = function tokenizer($TEXT) {
|
||||
|
||||
var S = {
|
||||
text : $TEXT.replace(/\r\n?|[\n\u2028\u2029]/g, "\n").replace(/^\uFEFF/, ''),
|
||||
pos : 0,
|
||||
tokpos : 0,
|
||||
line : 0,
|
||||
tokline : 0,
|
||||
col : 0,
|
||||
tokcol : 0,
|
||||
newline_before : false,
|
||||
regex_allowed : false,
|
||||
comments_before : []
|
||||
};
|
||||
|
||||
function peek() { return S.text.charAt(S.pos); };
|
||||
|
||||
function next(signal_eof, in_string) {
|
||||
var ch = S.text.charAt(S.pos++);
|
||||
if (signal_eof && !ch)
|
||||
throw EX_EOF;
|
||||
if (ch == "\n") {
|
||||
S.newline_before = S.newline_before || !in_string;
|
||||
++S.line;
|
||||
S.col = 0;
|
||||
} else {
|
||||
++S.col;
|
||||
}
|
||||
return ch;
|
||||
};
|
||||
|
||||
function eof() {
|
||||
return !S.peek();
|
||||
};
|
||||
|
||||
function find(what, signal_eof) {
|
||||
var pos = S.text.indexOf(what, S.pos);
|
||||
if (signal_eof && pos == -1) throw EX_EOF;
|
||||
return pos;
|
||||
};
|
||||
|
||||
function start_token() {
|
||||
S.tokline = S.line;
|
||||
S.tokcol = S.col;
|
||||
S.tokpos = S.pos;
|
||||
};
|
||||
|
||||
function token(type, value, is_comment) {
|
||||
S.regex_allowed = ((type == "operator" && !HOP(UNARY_POSTFIX, value)) ||
|
||||
(type == "keyword" && HOP(KEYWORDS_BEFORE_EXPRESSION, value)) ||
|
||||
(type == "punc" && HOP(PUNC_BEFORE_EXPRESSION, value)));
|
||||
var ret = {
|
||||
type : type,
|
||||
value : value,
|
||||
line : S.tokline,
|
||||
col : S.tokcol,
|
||||
pos : S.tokpos,
|
||||
endpos : S.pos,
|
||||
nlb : S.newline_before
|
||||
};
|
||||
if (!is_comment) {
|
||||
ret.comments_before = S.comments_before;
|
||||
S.comments_before = [];
|
||||
// make note of any newlines in the comments that came before
|
||||
for (var i = 0, len = ret.comments_before.length; i < len; i++) {
|
||||
ret.nlb = ret.nlb || ret.comments_before[i].nlb;
|
||||
}
|
||||
}
|
||||
S.newline_before = false;
|
||||
return new AST_Token(ret);
|
||||
};
|
||||
|
||||
function skip_whitespace() {
|
||||
while (HOP(WHITESPACE_CHARS, peek()))
|
||||
next();
|
||||
};
|
||||
|
||||
function read_while(pred) {
|
||||
var ret = "", ch = peek(), i = 0;
|
||||
while (ch && pred(ch, i++)) {
|
||||
ret += next();
|
||||
ch = peek();
|
||||
}
|
||||
return ret;
|
||||
};
|
||||
|
||||
function parse_error(err) {
|
||||
js_error(err, S.tokline, S.tokcol, S.tokpos);
|
||||
};
|
||||
|
||||
function read_num(prefix) {
|
||||
var has_e = false, after_e = false, has_x = false, has_dot = prefix == ".";
|
||||
var num = read_while(function(ch, i){
|
||||
if (ch == "x" || ch == "X") {
|
||||
if (has_x) return false;
|
||||
return has_x = true;
|
||||
}
|
||||
if (!has_x && (ch == "E" || ch == "e")) {
|
||||
if (has_e) return false;
|
||||
return has_e = after_e = true;
|
||||
}
|
||||
if (ch == "-") {
|
||||
if (after_e || (i == 0 && !prefix)) return true;
|
||||
return false;
|
||||
}
|
||||
if (ch == "+") return after_e;
|
||||
after_e = false;
|
||||
if (ch == ".") {
|
||||
if (!has_dot && !has_x && !has_e)
|
||||
return has_dot = true;
|
||||
return false;
|
||||
}
|
||||
return is_alphanumeric_char(ch);
|
||||
});
|
||||
if (prefix)
|
||||
num = prefix + num;
|
||||
var valid = parse_js_number(num);
|
||||
if (!isNaN(valid)) {
|
||||
return token("num", valid);
|
||||
} else {
|
||||
parse_error("Invalid syntax: " + num);
|
||||
}
|
||||
};
|
||||
|
||||
function read_escaped_char(in_string) {
|
||||
var ch = next(true, in_string);
|
||||
switch (ch) {
|
||||
case "n" : return "\n";
|
||||
case "r" : return "\r";
|
||||
case "t" : return "\t";
|
||||
case "b" : return "\b";
|
||||
case "v" : return "\u000b";
|
||||
case "f" : return "\f";
|
||||
case "0" : return "\0";
|
||||
case "x" : return String.fromCharCode(hex_bytes(2));
|
||||
case "u" : return String.fromCharCode(hex_bytes(4));
|
||||
case "\n": return "";
|
||||
default : return ch;
|
||||
}
|
||||
};
|
||||
|
||||
function hex_bytes(n) {
|
||||
var num = 0;
|
||||
for (; n > 0; --n) {
|
||||
var digit = parseInt(next(true), 16);
|
||||
if (isNaN(digit))
|
||||
parse_error("Invalid hex-character pattern in string");
|
||||
num = (num << 4) | digit;
|
||||
}
|
||||
return num;
|
||||
};
|
||||
|
||||
function read_string() {
|
||||
return with_eof_error("Unterminated string constant", function(){
|
||||
var quote = next(), ret = "";
|
||||
for (;;) {
|
||||
var ch = next(true);
|
||||
if (ch == "\\") {
|
||||
// read OctalEscapeSequence (XXX: deprecated if "strict mode")
|
||||
// https://github.com/mishoo/UglifyJS/issues/178
|
||||
var octal_len = 0, first = null;
|
||||
ch = read_while(function(ch){
|
||||
if (ch >= "0" && ch <= "7") {
|
||||
if (!first) {
|
||||
first = ch;
|
||||
return ++octal_len;
|
||||
}
|
||||
else if (first <= "3" && octal_len <= 2) return ++octal_len;
|
||||
else if (first >= "4" && octal_len <= 1) return ++octal_len;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
if (octal_len > 0) ch = String.fromCharCode(parseInt(ch, 8));
|
||||
else ch = read_escaped_char(true);
|
||||
}
|
||||
else if (ch == quote) break;
|
||||
ret += ch;
|
||||
}
|
||||
return token("string", ret);
|
||||
});
|
||||
};
|
||||
|
||||
function read_line_comment() {
|
||||
next();
|
||||
var i = find("\n"), ret;
|
||||
if (i == -1) {
|
||||
ret = S.text.substr(S.pos);
|
||||
S.pos = S.text.length;
|
||||
} else {
|
||||
ret = S.text.substring(S.pos, i);
|
||||
S.pos = i;
|
||||
}
|
||||
return token("comment1", ret, true);
|
||||
};
|
||||
|
||||
function read_multiline_comment() {
|
||||
next();
|
||||
return with_eof_error("Unterminated multiline comment", function(){
|
||||
var i = find("*/", true),
|
||||
text = S.text.substring(S.pos, i);
|
||||
S.pos = i + 2;
|
||||
S.line += text.split("\n").length - 1;
|
||||
S.newline_before = S.newline_before || text.indexOf("\n") >= 0;
|
||||
|
||||
// https://github.com/mishoo/UglifyJS/issues/#issue/100
|
||||
if (/^@cc_on/i.test(text)) {
|
||||
warn("WARNING: at line " + S.line);
|
||||
warn("*** Found \"conditional comment\": " + text);
|
||||
warn("*** UglifyJS DISCARDS ALL COMMENTS. This means your code might no longer work properly in Internet Explorer.");
|
||||
}
|
||||
|
||||
return token("comment2", text, true);
|
||||
});
|
||||
};
|
||||
|
||||
function read_name() {
|
||||
var backslash = false, name = "", ch, escaped = false, hex;
|
||||
while ((ch = peek()) != null) {
|
||||
if (!backslash) {
|
||||
if (ch == "\\") escaped = backslash = true, next();
|
||||
else if (is_identifier_char(ch)) name += next();
|
||||
else break;
|
||||
}
|
||||
else {
|
||||
if (ch != "u") parse_error("Expecting UnicodeEscapeSequence -- uXXXX");
|
||||
ch = read_escaped_char();
|
||||
if (!is_identifier_char(ch)) parse_error("Unicode char: " + ch.charCodeAt(0) + " is not valid in identifier");
|
||||
name += ch;
|
||||
backslash = false;
|
||||
}
|
||||
}
|
||||
if (HOP(KEYWORDS, name) && escaped) {
|
||||
hex = name.charCodeAt(0).toString(16).toUpperCase();
|
||||
name = "\\u" + "0000".substr(hex.length) + hex + name.slice(1);
|
||||
}
|
||||
return name;
|
||||
};
|
||||
|
||||
function read_regexp(regexp) {
|
||||
return with_eof_error("Unterminated regular expression", function(){
|
||||
var prev_backslash = false, ch, in_class = false;
|
||||
while ((ch = next(true))) if (prev_backslash) {
|
||||
regexp += "\\" + ch;
|
||||
prev_backslash = false;
|
||||
} else if (ch == "[") {
|
||||
in_class = true;
|
||||
regexp += ch;
|
||||
} else if (ch == "]" && in_class) {
|
||||
in_class = false;
|
||||
regexp += ch;
|
||||
} else if (ch == "/" && !in_class) {
|
||||
break;
|
||||
} else if (ch == "\\") {
|
||||
prev_backslash = true;
|
||||
} else {
|
||||
regexp += ch;
|
||||
}
|
||||
var mods = read_name();
|
||||
return token("regexp", [ regexp, mods ]);
|
||||
});
|
||||
};
|
||||
|
||||
function read_operator(prefix) {
|
||||
function grow(op) {
|
||||
if (!peek()) return op;
|
||||
var bigger = op + peek();
|
||||
if (HOP(OPERATORS, bigger)) {
|
||||
next();
|
||||
return grow(bigger);
|
||||
} else {
|
||||
return op;
|
||||
}
|
||||
};
|
||||
return token("operator", grow(prefix || next()));
|
||||
};
|
||||
|
||||
function handle_slash() {
|
||||
next();
|
||||
var regex_allowed = S.regex_allowed;
|
||||
switch (peek()) {
|
||||
case "/":
|
||||
S.comments_before.push(read_line_comment());
|
||||
S.regex_allowed = regex_allowed;
|
||||
return next_token();
|
||||
case "*":
|
||||
S.comments_before.push(read_multiline_comment());
|
||||
S.regex_allowed = regex_allowed;
|
||||
return next_token();
|
||||
}
|
||||
return S.regex_allowed ? read_regexp("") : read_operator("/");
|
||||
};
|
||||
|
||||
function handle_dot() {
|
||||
next();
|
||||
return is_digit(peek())
|
||||
? read_num(".")
|
||||
: token("punc", ".");
|
||||
};
|
||||
|
||||
function read_word() {
|
||||
var word = read_name();
|
||||
return HOP(KEYWORDS_ATOM, word)
|
||||
? token("atom", word)
|
||||
: !HOP(KEYWORDS, word)
|
||||
? token("name", word)
|
||||
: HOP(OPERATORS, word)
|
||||
? token("operator", word)
|
||||
: token("keyword", word);
|
||||
};
|
||||
|
||||
function with_eof_error(eof_error, cont) {
|
||||
try {
|
||||
return cont();
|
||||
} catch(ex) {
|
||||
if (ex === EX_EOF) parse_error(eof_error);
|
||||
else throw ex;
|
||||
}
|
||||
};
|
||||
|
||||
function next_token(force_regexp) {
|
||||
if (force_regexp != null)
|
||||
return read_regexp(force_regexp);
|
||||
skip_whitespace();
|
||||
start_token();
|
||||
var ch = peek();
|
||||
if (!ch) return token("eof");
|
||||
if (is_digit(ch)) return read_num();
|
||||
if (ch == '"' || ch == "'") return read_string();
|
||||
if (HOP(PUNC_CHARS, ch)) return token("punc", next());
|
||||
if (ch == ".") return handle_dot();
|
||||
if (ch == "/") return handle_slash();
|
||||
if (HOP(OPERATOR_CHARS, ch)) return read_operator();
|
||||
if (ch == "\\" || is_identifier_start(ch)) return read_word();
|
||||
parse_error("Unexpected character '" + ch + "'");
|
||||
};
|
||||
|
||||
next_token.context = function(nc) {
|
||||
if (nc) S = nc;
|
||||
return S;
|
||||
};
|
||||
|
||||
return next_token;
|
||||
|
||||
var func = function TEST () {
|
||||
|
||||
};
|
||||
|
||||
console.time("parse");
|
||||
|
||||
Reference in New Issue
Block a user