using makeComparator from acorn to generate functions that tests whether a

string is keyword, reserved etc.

speeds up the parser a bit, though not spectacular.. still far from acorn.
This commit is contained in:
Mihai Bazon
2012-10-11 11:52:05 +03:00
parent 172aa7a93c
commit f4584af42c
2 changed files with 63 additions and 97 deletions

View File

@@ -44,89 +44,24 @@
"use strict"; "use strict";
var KEYWORDS = array_to_hash([ var KEYWORDS = 'break case catch const continue debugger default delete do else finally for function if in instanceof new return switch throw try typeof var void while with';
"break", var KEYWORDS_ATOM = 'false null true';
"case", var RESERVED_WORDS = 'abstract boolean byte char class double enum export extends final float goto implements import int interface long native package private protected public short static super synchronized this throws transient volatile'
"catch", + " " + KEYWORDS_ATOM + " " + KEYWORDS;
"const", var KEYWORDS_BEFORE_EXPRESSION = 'return new delete throw else case';
"continue",
"debugger",
"default",
"delete",
"do",
"else",
"finally",
"for",
"function",
"if",
"in",
"instanceof",
"new",
"return",
"switch",
"throw",
"try",
"typeof",
"var",
"void",
"while",
"with"
]);
var RESERVED_WORDS = array_to_hash([ KEYWORDS = makePredicate(KEYWORDS);
"abstract", RESERVED_WORDS = makePredicate(RESERVED_WORDS);
"boolean", KEYWORDS_BEFORE_EXPRESSION = makePredicate(KEYWORDS_BEFORE_EXPRESSION);
"byte", KEYWORDS_ATOM = makePredicate(KEYWORDS_ATOM);
"char",
"class",
"double",
"enum",
"export",
"extends",
"final",
"float",
"goto",
"implements",
"import",
"int",
"interface",
"long",
"native",
"package",
"private",
"protected",
"public",
"short",
"static",
"super",
"synchronized",
"throws",
"transient",
"volatile"
]);
var KEYWORDS_BEFORE_EXPRESSION = array_to_hash([ var OPERATOR_CHARS = makePredicate(characters("+-*&%=<>!?|~^"));
"return",
"new",
"delete",
"throw",
"else",
"case"
]);
var KEYWORDS_ATOM = array_to_hash([
"false",
"null",
"true"
]);
var OPERATOR_CHARS = array_to_hash(characters("+-*&%=<>!?|~^"));
var RE_HEX_NUMBER = /^0x[0-9a-f]+$/i; var RE_HEX_NUMBER = /^0x[0-9a-f]+$/i;
var RE_OCT_NUMBER = /^0[0-7]+$/; var RE_OCT_NUMBER = /^0[0-7]+$/;
var RE_DEC_NUMBER = /^\d*\.?\d*(?:e[+-]?\d*(?:\d\.?|\.?\d)\d*)?$/i; var RE_DEC_NUMBER = /^\d*\.?\d*(?:e[+-]?\d*(?:\d\.?|\.?\d)\d*)?$/i;
var OPERATORS = array_to_hash([ var OPERATORS = makePredicate([
"in", "in",
"instanceof", "instanceof",
"typeof", "typeof",
@@ -173,13 +108,13 @@ var OPERATORS = array_to_hash([
"||" "||"
]); ]);
var WHITESPACE_CHARS = array_to_hash(characters(" \u00a0\n\r\t\f\u000b\u200b\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000")); var WHITESPACE_CHARS = makePredicate(characters(" \u00a0\n\r\t\f\u000b\u200b\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000"));
var PUNC_BEFORE_EXPRESSION = array_to_hash(characters("[{(,.;:")); var PUNC_BEFORE_EXPRESSION = makePredicate(characters("[{(,.;:"));
var PUNC_CHARS = array_to_hash(characters("[]{}(),;:")); var PUNC_CHARS = makePredicate(characters("[]{}(),;:"));
var REGEXP_MODIFIERS = array_to_hash(characters("gmsiy")); var REGEXP_MODIFIERS = makePredicate(characters("gmsiy"));
/* -----[ Tokenizer ]----- */ /* -----[ Tokenizer ]----- */
@@ -214,10 +149,7 @@ function is_unicode_connector_punctuation(ch) {
function is_identifier(name) { function is_identifier(name) {
return /^[a-z_$][a-z0-9_$]*$/i.test(name) return /^[a-z_$][a-z0-9_$]*$/i.test(name)
&& name != "this" && !RESERVED_WORDS(name)
&& !KEYWORDS_ATOM[name]
&& !RESERVED_WORDS[name]
&& !KEYWORDS[name];
}; };
function is_identifier_start(ch) { function is_identifier_start(ch) {
@@ -318,8 +250,8 @@ function tokenizer($TEXT, filename) {
function token(type, value, is_comment) { function token(type, value, is_comment) {
S.regex_allowed = ((type == "operator" && !UNARY_POSTFIX[value]) || S.regex_allowed = ((type == "operator" && !UNARY_POSTFIX[value]) ||
(type == "keyword" && KEYWORDS_BEFORE_EXPRESSION[value]) || (type == "keyword" && KEYWORDS_BEFORE_EXPRESSION(value)) ||
(type == "punc" && PUNC_BEFORE_EXPRESSION[value])); (type == "punc" && PUNC_BEFORE_EXPRESSION(value)));
var ret = { var ret = {
type : type, type : type,
value : value, value : value,
@@ -343,7 +275,7 @@ function tokenizer($TEXT, filename) {
}; };
function skip_whitespace() { function skip_whitespace() {
while (WHITESPACE_CHARS[peek()]) while (WHITESPACE_CHARS(peek()))
next(); next();
}; };
@@ -493,7 +425,7 @@ function tokenizer($TEXT, filename) {
backslash = false; backslash = false;
} }
} }
if (KEYWORDS[name] && escaped) { if (KEYWORDS(name) && escaped) {
hex = name.charCodeAt(0).toString(16).toUpperCase(); hex = name.charCodeAt(0).toString(16).toUpperCase();
name = "\\u" + "0000".substr(hex.length) + hex + name.slice(1); name = "\\u" + "0000".substr(hex.length) + hex + name.slice(1);
} }
@@ -528,7 +460,7 @@ function tokenizer($TEXT, filename) {
function grow(op) { function grow(op) {
if (!peek()) return op; if (!peek()) return op;
var bigger = op + peek(); var bigger = op + peek();
if (OPERATORS[bigger]) { if (OPERATORS(bigger)) {
next(); next();
return grow(bigger); return grow(bigger);
} else { } else {
@@ -563,12 +495,9 @@ function tokenizer($TEXT, filename) {
function read_word() { function read_word() {
var word = read_name(); var word = read_name();
return KEYWORDS_ATOM[word] return KEYWORDS_ATOM(word) ? token("atom", word)
? token("atom", word) : !KEYWORDS(word) ? token("name", word)
: !KEYWORDS[word] : OPERATORS(word) ? token("operator", word)
? token("name", word)
: OPERATORS[word]
? token("operator", word)
: token("keyword", word); : token("keyword", word);
}; };
@@ -590,10 +519,10 @@ function tokenizer($TEXT, filename) {
if (!ch) return token("eof"); if (!ch) return token("eof");
if (is_digit(ch)) return read_num(); if (is_digit(ch)) return read_num();
if (ch == '"' || ch == "'") return read_string(); if (ch == '"' || ch == "'") return read_string();
if (PUNC_CHARS[ch]) return token("punc", next()); if (PUNC_CHARS(ch)) return token("punc", next());
if (ch == ".") return handle_dot(); if (ch == ".") return handle_dot();
if (ch == "/") return handle_slash(); if (ch == "/") return handle_slash();
if (OPERATOR_CHARS[ch]) return read_operator(); if (OPERATOR_CHARS(ch)) return read_operator();
if (ch == "\\" || is_identifier_start(ch)) return read_word(); if (ch == "\\" || is_identifier_start(ch)) return read_word();
parse_error("Unexpected character '" + ch + "'"); parse_error("Unexpected character '" + ch + "'");
}; };

View File

@@ -214,3 +214,40 @@ function set_intersection(a, b) {
return b.indexOf(el) >= 0; return b.indexOf(el) >= 0;
}); });
}; };
// this function is taken from Acorn [1], written by Marijn Haverbeke
// [1] https://github.com/marijnh/acorn
function makePredicate(words) {
if (!(words instanceof Array)) words = words.split(" ");
var f = "", cats = [];
out: for (var i = 0; i < words.length; ++i) {
for (var j = 0; j < cats.length; ++j)
if (cats[j][0].length == words[i].length) {
cats[j].push(words[i]);
continue out;
}
cats.push([words[i]]);
}
function compareTo(arr) {
if (arr.length == 1) return f += "return str === " + JSON.stringify(arr[0]) + ";";
f += "switch(str){";
for (var i = 0; i < arr.length; ++i) f += "case " + JSON.stringify(arr[i]) + ":";
f += "return true}return false;";
}
// When there are more than three length categories, an outer
// switch first dispatches on the lengths, to save on comparisons.
if (cats.length > 3) {
cats.sort(function(a, b) {return b.length - a.length;});
f += "switch(str.length){";
for (var i = 0; i < cats.length; ++i) {
var cat = cats[i];
f += "case " + cat[0].length + ":";
compareTo(cat);
}
f += "}";
// Otherwise, simply generate a flat `switch` statement.
} else {
compareTo(words);
}
return new Function("str", f);
};