using makeComparator from acorn to generate functions that tests whether a

string is keyword, reserved etc.

speeds up the parser a bit, though not spectacular.. still far from acorn.
This commit is contained in:
Mihai Bazon
2012-10-11 11:52:05 +03:00
parent 172aa7a93c
commit f4584af42c
2 changed files with 63 additions and 97 deletions

View File

@@ -44,89 +44,24 @@
"use strict";
var KEYWORDS = array_to_hash([
"break",
"case",
"catch",
"const",
"continue",
"debugger",
"default",
"delete",
"do",
"else",
"finally",
"for",
"function",
"if",
"in",
"instanceof",
"new",
"return",
"switch",
"throw",
"try",
"typeof",
"var",
"void",
"while",
"with"
]);
var KEYWORDS = 'break case catch const continue debugger default delete do else finally for function if in instanceof new return switch throw try typeof var void while with';
var KEYWORDS_ATOM = 'false null true';
var RESERVED_WORDS = 'abstract boolean byte char class double enum export extends final float goto implements import int interface long native package private protected public short static super synchronized this throws transient volatile'
+ " " + KEYWORDS_ATOM + " " + KEYWORDS;
var KEYWORDS_BEFORE_EXPRESSION = 'return new delete throw else case';
var RESERVED_WORDS = array_to_hash([
"abstract",
"boolean",
"byte",
"char",
"class",
"double",
"enum",
"export",
"extends",
"final",
"float",
"goto",
"implements",
"import",
"int",
"interface",
"long",
"native",
"package",
"private",
"protected",
"public",
"short",
"static",
"super",
"synchronized",
"throws",
"transient",
"volatile"
]);
KEYWORDS = makePredicate(KEYWORDS);
RESERVED_WORDS = makePredicate(RESERVED_WORDS);
KEYWORDS_BEFORE_EXPRESSION = makePredicate(KEYWORDS_BEFORE_EXPRESSION);
KEYWORDS_ATOM = makePredicate(KEYWORDS_ATOM);
var KEYWORDS_BEFORE_EXPRESSION = array_to_hash([
"return",
"new",
"delete",
"throw",
"else",
"case"
]);
var KEYWORDS_ATOM = array_to_hash([
"false",
"null",
"true"
]);
var OPERATOR_CHARS = array_to_hash(characters("+-*&%=<>!?|~^"));
var OPERATOR_CHARS = makePredicate(characters("+-*&%=<>!?|~^"));
var RE_HEX_NUMBER = /^0x[0-9a-f]+$/i;
var RE_OCT_NUMBER = /^0[0-7]+$/;
var RE_DEC_NUMBER = /^\d*\.?\d*(?:e[+-]?\d*(?:\d\.?|\.?\d)\d*)?$/i;
var OPERATORS = array_to_hash([
var OPERATORS = makePredicate([
"in",
"instanceof",
"typeof",
@@ -173,13 +108,13 @@ var OPERATORS = array_to_hash([
"||"
]);
var WHITESPACE_CHARS = array_to_hash(characters(" \u00a0\n\r\t\f\u000b\u200b\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000"));
var WHITESPACE_CHARS = makePredicate(characters(" \u00a0\n\r\t\f\u000b\u200b\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000"));
var PUNC_BEFORE_EXPRESSION = array_to_hash(characters("[{(,.;:"));
var PUNC_BEFORE_EXPRESSION = makePredicate(characters("[{(,.;:"));
var PUNC_CHARS = array_to_hash(characters("[]{}(),;:"));
var PUNC_CHARS = makePredicate(characters("[]{}(),;:"));
var REGEXP_MODIFIERS = array_to_hash(characters("gmsiy"));
var REGEXP_MODIFIERS = makePredicate(characters("gmsiy"));
/* -----[ Tokenizer ]----- */
@@ -214,10 +149,7 @@ function is_unicode_connector_punctuation(ch) {
function is_identifier(name) {
return /^[a-z_$][a-z0-9_$]*$/i.test(name)
&& name != "this"
&& !KEYWORDS_ATOM[name]
&& !RESERVED_WORDS[name]
&& !KEYWORDS[name];
&& !RESERVED_WORDS(name)
};
function is_identifier_start(ch) {
@@ -318,8 +250,8 @@ function tokenizer($TEXT, filename) {
function token(type, value, is_comment) {
S.regex_allowed = ((type == "operator" && !UNARY_POSTFIX[value]) ||
(type == "keyword" && KEYWORDS_BEFORE_EXPRESSION[value]) ||
(type == "punc" && PUNC_BEFORE_EXPRESSION[value]));
(type == "keyword" && KEYWORDS_BEFORE_EXPRESSION(value)) ||
(type == "punc" && PUNC_BEFORE_EXPRESSION(value)));
var ret = {
type : type,
value : value,
@@ -343,7 +275,7 @@ function tokenizer($TEXT, filename) {
};
function skip_whitespace() {
while (WHITESPACE_CHARS[peek()])
while (WHITESPACE_CHARS(peek()))
next();
};
@@ -493,7 +425,7 @@ function tokenizer($TEXT, filename) {
backslash = false;
}
}
if (KEYWORDS[name] && escaped) {
if (KEYWORDS(name) && escaped) {
hex = name.charCodeAt(0).toString(16).toUpperCase();
name = "\\u" + "0000".substr(hex.length) + hex + name.slice(1);
}
@@ -528,7 +460,7 @@ function tokenizer($TEXT, filename) {
function grow(op) {
if (!peek()) return op;
var bigger = op + peek();
if (OPERATORS[bigger]) {
if (OPERATORS(bigger)) {
next();
return grow(bigger);
} else {
@@ -563,12 +495,9 @@ function tokenizer($TEXT, filename) {
function read_word() {
var word = read_name();
return KEYWORDS_ATOM[word]
? token("atom", word)
: !KEYWORDS[word]
? token("name", word)
: OPERATORS[word]
? token("operator", word)
return KEYWORDS_ATOM(word) ? token("atom", word)
: !KEYWORDS(word) ? token("name", word)
: OPERATORS(word) ? token("operator", word)
: token("keyword", word);
};
@@ -590,10 +519,10 @@ function tokenizer($TEXT, filename) {
if (!ch) return token("eof");
if (is_digit(ch)) return read_num();
if (ch == '"' || ch == "'") return read_string();
if (PUNC_CHARS[ch]) return token("punc", next());
if (PUNC_CHARS(ch)) return token("punc", next());
if (ch == ".") return handle_dot();
if (ch == "/") return handle_slash();
if (OPERATOR_CHARS[ch]) return read_operator();
if (OPERATOR_CHARS(ch)) return read_operator();
if (ch == "\\" || is_identifier_start(ch)) return read_word();
parse_error("Unexpected character '" + ch + "'");
};

View File

@@ -214,3 +214,40 @@ function set_intersection(a, b) {
return b.indexOf(el) >= 0;
});
};
// this function is taken from Acorn [1], written by Marijn Haverbeke
// [1] https://github.com/marijnh/acorn
function makePredicate(words) {
if (!(words instanceof Array)) words = words.split(" ");
var f = "", cats = [];
out: for (var i = 0; i < words.length; ++i) {
for (var j = 0; j < cats.length; ++j)
if (cats[j][0].length == words[i].length) {
cats[j].push(words[i]);
continue out;
}
cats.push([words[i]]);
}
function compareTo(arr) {
if (arr.length == 1) return f += "return str === " + JSON.stringify(arr[0]) + ";";
f += "switch(str){";
for (var i = 0; i < arr.length; ++i) f += "case " + JSON.stringify(arr[i]) + ":";
f += "return true}return false;";
}
// When there are more than three length categories, an outer
// switch first dispatches on the lengths, to save on comparisons.
if (cats.length > 3) {
cats.sort(function(a, b) {return b.length - a.length;});
f += "switch(str.length){";
for (var i = 0; i < cats.length; ++i) {
var cat = cats[i];
f += "case " + cat[0].length + ":";
compareTo(cat);
}
f += "}";
// Otherwise, simply generate a flat `switch` statement.
} else {
compareTo(words);
}
return new Function("str", f);
};