using makeComparator from acorn to generate functions that tests whether a
string is keyword, reserved etc. speeds up the parser a bit, though not spectacular.. still far from acorn.
This commit is contained in:
123
lib/parse.js
123
lib/parse.js
@@ -44,89 +44,24 @@
|
|||||||
|
|
||||||
"use strict";
|
"use strict";
|
||||||
|
|
||||||
var KEYWORDS = array_to_hash([
|
var KEYWORDS = 'break case catch const continue debugger default delete do else finally for function if in instanceof new return switch throw try typeof var void while with';
|
||||||
"break",
|
var KEYWORDS_ATOM = 'false null true';
|
||||||
"case",
|
var RESERVED_WORDS = 'abstract boolean byte char class double enum export extends final float goto implements import int interface long native package private protected public short static super synchronized this throws transient volatile'
|
||||||
"catch",
|
+ " " + KEYWORDS_ATOM + " " + KEYWORDS;
|
||||||
"const",
|
var KEYWORDS_BEFORE_EXPRESSION = 'return new delete throw else case';
|
||||||
"continue",
|
|
||||||
"debugger",
|
|
||||||
"default",
|
|
||||||
"delete",
|
|
||||||
"do",
|
|
||||||
"else",
|
|
||||||
"finally",
|
|
||||||
"for",
|
|
||||||
"function",
|
|
||||||
"if",
|
|
||||||
"in",
|
|
||||||
"instanceof",
|
|
||||||
"new",
|
|
||||||
"return",
|
|
||||||
"switch",
|
|
||||||
"throw",
|
|
||||||
"try",
|
|
||||||
"typeof",
|
|
||||||
"var",
|
|
||||||
"void",
|
|
||||||
"while",
|
|
||||||
"with"
|
|
||||||
]);
|
|
||||||
|
|
||||||
var RESERVED_WORDS = array_to_hash([
|
KEYWORDS = makePredicate(KEYWORDS);
|
||||||
"abstract",
|
RESERVED_WORDS = makePredicate(RESERVED_WORDS);
|
||||||
"boolean",
|
KEYWORDS_BEFORE_EXPRESSION = makePredicate(KEYWORDS_BEFORE_EXPRESSION);
|
||||||
"byte",
|
KEYWORDS_ATOM = makePredicate(KEYWORDS_ATOM);
|
||||||
"char",
|
|
||||||
"class",
|
|
||||||
"double",
|
|
||||||
"enum",
|
|
||||||
"export",
|
|
||||||
"extends",
|
|
||||||
"final",
|
|
||||||
"float",
|
|
||||||
"goto",
|
|
||||||
"implements",
|
|
||||||
"import",
|
|
||||||
"int",
|
|
||||||
"interface",
|
|
||||||
"long",
|
|
||||||
"native",
|
|
||||||
"package",
|
|
||||||
"private",
|
|
||||||
"protected",
|
|
||||||
"public",
|
|
||||||
"short",
|
|
||||||
"static",
|
|
||||||
"super",
|
|
||||||
"synchronized",
|
|
||||||
"throws",
|
|
||||||
"transient",
|
|
||||||
"volatile"
|
|
||||||
]);
|
|
||||||
|
|
||||||
var KEYWORDS_BEFORE_EXPRESSION = array_to_hash([
|
var OPERATOR_CHARS = makePredicate(characters("+-*&%=<>!?|~^"));
|
||||||
"return",
|
|
||||||
"new",
|
|
||||||
"delete",
|
|
||||||
"throw",
|
|
||||||
"else",
|
|
||||||
"case"
|
|
||||||
]);
|
|
||||||
|
|
||||||
var KEYWORDS_ATOM = array_to_hash([
|
|
||||||
"false",
|
|
||||||
"null",
|
|
||||||
"true"
|
|
||||||
]);
|
|
||||||
|
|
||||||
var OPERATOR_CHARS = array_to_hash(characters("+-*&%=<>!?|~^"));
|
|
||||||
|
|
||||||
var RE_HEX_NUMBER = /^0x[0-9a-f]+$/i;
|
var RE_HEX_NUMBER = /^0x[0-9a-f]+$/i;
|
||||||
var RE_OCT_NUMBER = /^0[0-7]+$/;
|
var RE_OCT_NUMBER = /^0[0-7]+$/;
|
||||||
var RE_DEC_NUMBER = /^\d*\.?\d*(?:e[+-]?\d*(?:\d\.?|\.?\d)\d*)?$/i;
|
var RE_DEC_NUMBER = /^\d*\.?\d*(?:e[+-]?\d*(?:\d\.?|\.?\d)\d*)?$/i;
|
||||||
|
|
||||||
var OPERATORS = array_to_hash([
|
var OPERATORS = makePredicate([
|
||||||
"in",
|
"in",
|
||||||
"instanceof",
|
"instanceof",
|
||||||
"typeof",
|
"typeof",
|
||||||
@@ -173,13 +108,13 @@ var OPERATORS = array_to_hash([
|
|||||||
"||"
|
"||"
|
||||||
]);
|
]);
|
||||||
|
|
||||||
var WHITESPACE_CHARS = array_to_hash(characters(" \u00a0\n\r\t\f\u000b\u200b\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000"));
|
var WHITESPACE_CHARS = makePredicate(characters(" \u00a0\n\r\t\f\u000b\u200b\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000"));
|
||||||
|
|
||||||
var PUNC_BEFORE_EXPRESSION = array_to_hash(characters("[{(,.;:"));
|
var PUNC_BEFORE_EXPRESSION = makePredicate(characters("[{(,.;:"));
|
||||||
|
|
||||||
var PUNC_CHARS = array_to_hash(characters("[]{}(),;:"));
|
var PUNC_CHARS = makePredicate(characters("[]{}(),;:"));
|
||||||
|
|
||||||
var REGEXP_MODIFIERS = array_to_hash(characters("gmsiy"));
|
var REGEXP_MODIFIERS = makePredicate(characters("gmsiy"));
|
||||||
|
|
||||||
/* -----[ Tokenizer ]----- */
|
/* -----[ Tokenizer ]----- */
|
||||||
|
|
||||||
@@ -214,10 +149,7 @@ function is_unicode_connector_punctuation(ch) {
|
|||||||
|
|
||||||
function is_identifier(name) {
|
function is_identifier(name) {
|
||||||
return /^[a-z_$][a-z0-9_$]*$/i.test(name)
|
return /^[a-z_$][a-z0-9_$]*$/i.test(name)
|
||||||
&& name != "this"
|
&& !RESERVED_WORDS(name)
|
||||||
&& !KEYWORDS_ATOM[name]
|
|
||||||
&& !RESERVED_WORDS[name]
|
|
||||||
&& !KEYWORDS[name];
|
|
||||||
};
|
};
|
||||||
|
|
||||||
function is_identifier_start(ch) {
|
function is_identifier_start(ch) {
|
||||||
@@ -318,8 +250,8 @@ function tokenizer($TEXT, filename) {
|
|||||||
|
|
||||||
function token(type, value, is_comment) {
|
function token(type, value, is_comment) {
|
||||||
S.regex_allowed = ((type == "operator" && !UNARY_POSTFIX[value]) ||
|
S.regex_allowed = ((type == "operator" && !UNARY_POSTFIX[value]) ||
|
||||||
(type == "keyword" && KEYWORDS_BEFORE_EXPRESSION[value]) ||
|
(type == "keyword" && KEYWORDS_BEFORE_EXPRESSION(value)) ||
|
||||||
(type == "punc" && PUNC_BEFORE_EXPRESSION[value]));
|
(type == "punc" && PUNC_BEFORE_EXPRESSION(value)));
|
||||||
var ret = {
|
var ret = {
|
||||||
type : type,
|
type : type,
|
||||||
value : value,
|
value : value,
|
||||||
@@ -343,7 +275,7 @@ function tokenizer($TEXT, filename) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
function skip_whitespace() {
|
function skip_whitespace() {
|
||||||
while (WHITESPACE_CHARS[peek()])
|
while (WHITESPACE_CHARS(peek()))
|
||||||
next();
|
next();
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -493,7 +425,7 @@ function tokenizer($TEXT, filename) {
|
|||||||
backslash = false;
|
backslash = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (KEYWORDS[name] && escaped) {
|
if (KEYWORDS(name) && escaped) {
|
||||||
hex = name.charCodeAt(0).toString(16).toUpperCase();
|
hex = name.charCodeAt(0).toString(16).toUpperCase();
|
||||||
name = "\\u" + "0000".substr(hex.length) + hex + name.slice(1);
|
name = "\\u" + "0000".substr(hex.length) + hex + name.slice(1);
|
||||||
}
|
}
|
||||||
@@ -528,7 +460,7 @@ function tokenizer($TEXT, filename) {
|
|||||||
function grow(op) {
|
function grow(op) {
|
||||||
if (!peek()) return op;
|
if (!peek()) return op;
|
||||||
var bigger = op + peek();
|
var bigger = op + peek();
|
||||||
if (OPERATORS[bigger]) {
|
if (OPERATORS(bigger)) {
|
||||||
next();
|
next();
|
||||||
return grow(bigger);
|
return grow(bigger);
|
||||||
} else {
|
} else {
|
||||||
@@ -563,12 +495,9 @@ function tokenizer($TEXT, filename) {
|
|||||||
|
|
||||||
function read_word() {
|
function read_word() {
|
||||||
var word = read_name();
|
var word = read_name();
|
||||||
return KEYWORDS_ATOM[word]
|
return KEYWORDS_ATOM(word) ? token("atom", word)
|
||||||
? token("atom", word)
|
: !KEYWORDS(word) ? token("name", word)
|
||||||
: !KEYWORDS[word]
|
: OPERATORS(word) ? token("operator", word)
|
||||||
? token("name", word)
|
|
||||||
: OPERATORS[word]
|
|
||||||
? token("operator", word)
|
|
||||||
: token("keyword", word);
|
: token("keyword", word);
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -590,10 +519,10 @@ function tokenizer($TEXT, filename) {
|
|||||||
if (!ch) return token("eof");
|
if (!ch) return token("eof");
|
||||||
if (is_digit(ch)) return read_num();
|
if (is_digit(ch)) return read_num();
|
||||||
if (ch == '"' || ch == "'") return read_string();
|
if (ch == '"' || ch == "'") return read_string();
|
||||||
if (PUNC_CHARS[ch]) return token("punc", next());
|
if (PUNC_CHARS(ch)) return token("punc", next());
|
||||||
if (ch == ".") return handle_dot();
|
if (ch == ".") return handle_dot();
|
||||||
if (ch == "/") return handle_slash();
|
if (ch == "/") return handle_slash();
|
||||||
if (OPERATOR_CHARS[ch]) return read_operator();
|
if (OPERATOR_CHARS(ch)) return read_operator();
|
||||||
if (ch == "\\" || is_identifier_start(ch)) return read_word();
|
if (ch == "\\" || is_identifier_start(ch)) return read_word();
|
||||||
parse_error("Unexpected character '" + ch + "'");
|
parse_error("Unexpected character '" + ch + "'");
|
||||||
};
|
};
|
||||||
|
|||||||
37
lib/utils.js
37
lib/utils.js
@@ -214,3 +214,40 @@ function set_intersection(a, b) {
|
|||||||
return b.indexOf(el) >= 0;
|
return b.indexOf(el) >= 0;
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// this function is taken from Acorn [1], written by Marijn Haverbeke
|
||||||
|
// [1] https://github.com/marijnh/acorn
|
||||||
|
function makePredicate(words) {
|
||||||
|
if (!(words instanceof Array)) words = words.split(" ");
|
||||||
|
var f = "", cats = [];
|
||||||
|
out: for (var i = 0; i < words.length; ++i) {
|
||||||
|
for (var j = 0; j < cats.length; ++j)
|
||||||
|
if (cats[j][0].length == words[i].length) {
|
||||||
|
cats[j].push(words[i]);
|
||||||
|
continue out;
|
||||||
|
}
|
||||||
|
cats.push([words[i]]);
|
||||||
|
}
|
||||||
|
function compareTo(arr) {
|
||||||
|
if (arr.length == 1) return f += "return str === " + JSON.stringify(arr[0]) + ";";
|
||||||
|
f += "switch(str){";
|
||||||
|
for (var i = 0; i < arr.length; ++i) f += "case " + JSON.stringify(arr[i]) + ":";
|
||||||
|
f += "return true}return false;";
|
||||||
|
}
|
||||||
|
// When there are more than three length categories, an outer
|
||||||
|
// switch first dispatches on the lengths, to save on comparisons.
|
||||||
|
if (cats.length > 3) {
|
||||||
|
cats.sort(function(a, b) {return b.length - a.length;});
|
||||||
|
f += "switch(str.length){";
|
||||||
|
for (var i = 0; i < cats.length; ++i) {
|
||||||
|
var cat = cats[i];
|
||||||
|
f += "case " + cat[0].length + ":";
|
||||||
|
compareTo(cat);
|
||||||
|
}
|
||||||
|
f += "}";
|
||||||
|
// Otherwise, simply generate a flat `switch` statement.
|
||||||
|
} else {
|
||||||
|
compareTo(words);
|
||||||
|
}
|
||||||
|
return new Function("str", f);
|
||||||
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user