using makeComparator from acorn to generate functions that tests whether a
string is keyword, reserved etc. speeds up the parser a bit, though not spectacular.. still far from acorn.
This commit is contained in:
123
lib/parse.js
123
lib/parse.js
@@ -44,89 +44,24 @@
|
||||
|
||||
"use strict";
|
||||
|
||||
var KEYWORDS = array_to_hash([
|
||||
"break",
|
||||
"case",
|
||||
"catch",
|
||||
"const",
|
||||
"continue",
|
||||
"debugger",
|
||||
"default",
|
||||
"delete",
|
||||
"do",
|
||||
"else",
|
||||
"finally",
|
||||
"for",
|
||||
"function",
|
||||
"if",
|
||||
"in",
|
||||
"instanceof",
|
||||
"new",
|
||||
"return",
|
||||
"switch",
|
||||
"throw",
|
||||
"try",
|
||||
"typeof",
|
||||
"var",
|
||||
"void",
|
||||
"while",
|
||||
"with"
|
||||
]);
|
||||
var KEYWORDS = 'break case catch const continue debugger default delete do else finally for function if in instanceof new return switch throw try typeof var void while with';
|
||||
var KEYWORDS_ATOM = 'false null true';
|
||||
var RESERVED_WORDS = 'abstract boolean byte char class double enum export extends final float goto implements import int interface long native package private protected public short static super synchronized this throws transient volatile'
|
||||
+ " " + KEYWORDS_ATOM + " " + KEYWORDS;
|
||||
var KEYWORDS_BEFORE_EXPRESSION = 'return new delete throw else case';
|
||||
|
||||
var RESERVED_WORDS = array_to_hash([
|
||||
"abstract",
|
||||
"boolean",
|
||||
"byte",
|
||||
"char",
|
||||
"class",
|
||||
"double",
|
||||
"enum",
|
||||
"export",
|
||||
"extends",
|
||||
"final",
|
||||
"float",
|
||||
"goto",
|
||||
"implements",
|
||||
"import",
|
||||
"int",
|
||||
"interface",
|
||||
"long",
|
||||
"native",
|
||||
"package",
|
||||
"private",
|
||||
"protected",
|
||||
"public",
|
||||
"short",
|
||||
"static",
|
||||
"super",
|
||||
"synchronized",
|
||||
"throws",
|
||||
"transient",
|
||||
"volatile"
|
||||
]);
|
||||
KEYWORDS = makePredicate(KEYWORDS);
|
||||
RESERVED_WORDS = makePredicate(RESERVED_WORDS);
|
||||
KEYWORDS_BEFORE_EXPRESSION = makePredicate(KEYWORDS_BEFORE_EXPRESSION);
|
||||
KEYWORDS_ATOM = makePredicate(KEYWORDS_ATOM);
|
||||
|
||||
var KEYWORDS_BEFORE_EXPRESSION = array_to_hash([
|
||||
"return",
|
||||
"new",
|
||||
"delete",
|
||||
"throw",
|
||||
"else",
|
||||
"case"
|
||||
]);
|
||||
|
||||
var KEYWORDS_ATOM = array_to_hash([
|
||||
"false",
|
||||
"null",
|
||||
"true"
|
||||
]);
|
||||
|
||||
var OPERATOR_CHARS = array_to_hash(characters("+-*&%=<>!?|~^"));
|
||||
var OPERATOR_CHARS = makePredicate(characters("+-*&%=<>!?|~^"));
|
||||
|
||||
var RE_HEX_NUMBER = /^0x[0-9a-f]+$/i;
|
||||
var RE_OCT_NUMBER = /^0[0-7]+$/;
|
||||
var RE_DEC_NUMBER = /^\d*\.?\d*(?:e[+-]?\d*(?:\d\.?|\.?\d)\d*)?$/i;
|
||||
|
||||
var OPERATORS = array_to_hash([
|
||||
var OPERATORS = makePredicate([
|
||||
"in",
|
||||
"instanceof",
|
||||
"typeof",
|
||||
@@ -173,13 +108,13 @@ var OPERATORS = array_to_hash([
|
||||
"||"
|
||||
]);
|
||||
|
||||
var WHITESPACE_CHARS = array_to_hash(characters(" \u00a0\n\r\t\f\u000b\u200b\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000"));
|
||||
var WHITESPACE_CHARS = makePredicate(characters(" \u00a0\n\r\t\f\u000b\u200b\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000"));
|
||||
|
||||
var PUNC_BEFORE_EXPRESSION = array_to_hash(characters("[{(,.;:"));
|
||||
var PUNC_BEFORE_EXPRESSION = makePredicate(characters("[{(,.;:"));
|
||||
|
||||
var PUNC_CHARS = array_to_hash(characters("[]{}(),;:"));
|
||||
var PUNC_CHARS = makePredicate(characters("[]{}(),;:"));
|
||||
|
||||
var REGEXP_MODIFIERS = array_to_hash(characters("gmsiy"));
|
||||
var REGEXP_MODIFIERS = makePredicate(characters("gmsiy"));
|
||||
|
||||
/* -----[ Tokenizer ]----- */
|
||||
|
||||
@@ -214,10 +149,7 @@ function is_unicode_connector_punctuation(ch) {
|
||||
|
||||
function is_identifier(name) {
|
||||
return /^[a-z_$][a-z0-9_$]*$/i.test(name)
|
||||
&& name != "this"
|
||||
&& !KEYWORDS_ATOM[name]
|
||||
&& !RESERVED_WORDS[name]
|
||||
&& !KEYWORDS[name];
|
||||
&& !RESERVED_WORDS(name)
|
||||
};
|
||||
|
||||
function is_identifier_start(ch) {
|
||||
@@ -318,8 +250,8 @@ function tokenizer($TEXT, filename) {
|
||||
|
||||
function token(type, value, is_comment) {
|
||||
S.regex_allowed = ((type == "operator" && !UNARY_POSTFIX[value]) ||
|
||||
(type == "keyword" && KEYWORDS_BEFORE_EXPRESSION[value]) ||
|
||||
(type == "punc" && PUNC_BEFORE_EXPRESSION[value]));
|
||||
(type == "keyword" && KEYWORDS_BEFORE_EXPRESSION(value)) ||
|
||||
(type == "punc" && PUNC_BEFORE_EXPRESSION(value)));
|
||||
var ret = {
|
||||
type : type,
|
||||
value : value,
|
||||
@@ -343,7 +275,7 @@ function tokenizer($TEXT, filename) {
|
||||
};
|
||||
|
||||
function skip_whitespace() {
|
||||
while (WHITESPACE_CHARS[peek()])
|
||||
while (WHITESPACE_CHARS(peek()))
|
||||
next();
|
||||
};
|
||||
|
||||
@@ -493,7 +425,7 @@ function tokenizer($TEXT, filename) {
|
||||
backslash = false;
|
||||
}
|
||||
}
|
||||
if (KEYWORDS[name] && escaped) {
|
||||
if (KEYWORDS(name) && escaped) {
|
||||
hex = name.charCodeAt(0).toString(16).toUpperCase();
|
||||
name = "\\u" + "0000".substr(hex.length) + hex + name.slice(1);
|
||||
}
|
||||
@@ -528,7 +460,7 @@ function tokenizer($TEXT, filename) {
|
||||
function grow(op) {
|
||||
if (!peek()) return op;
|
||||
var bigger = op + peek();
|
||||
if (OPERATORS[bigger]) {
|
||||
if (OPERATORS(bigger)) {
|
||||
next();
|
||||
return grow(bigger);
|
||||
} else {
|
||||
@@ -563,12 +495,9 @@ function tokenizer($TEXT, filename) {
|
||||
|
||||
function read_word() {
|
||||
var word = read_name();
|
||||
return KEYWORDS_ATOM[word]
|
||||
? token("atom", word)
|
||||
: !KEYWORDS[word]
|
||||
? token("name", word)
|
||||
: OPERATORS[word]
|
||||
? token("operator", word)
|
||||
return KEYWORDS_ATOM(word) ? token("atom", word)
|
||||
: !KEYWORDS(word) ? token("name", word)
|
||||
: OPERATORS(word) ? token("operator", word)
|
||||
: token("keyword", word);
|
||||
};
|
||||
|
||||
@@ -590,10 +519,10 @@ function tokenizer($TEXT, filename) {
|
||||
if (!ch) return token("eof");
|
||||
if (is_digit(ch)) return read_num();
|
||||
if (ch == '"' || ch == "'") return read_string();
|
||||
if (PUNC_CHARS[ch]) return token("punc", next());
|
||||
if (PUNC_CHARS(ch)) return token("punc", next());
|
||||
if (ch == ".") return handle_dot();
|
||||
if (ch == "/") return handle_slash();
|
||||
if (OPERATOR_CHARS[ch]) return read_operator();
|
||||
if (OPERATOR_CHARS(ch)) return read_operator();
|
||||
if (ch == "\\" || is_identifier_start(ch)) return read_word();
|
||||
parse_error("Unexpected character '" + ch + "'");
|
||||
};
|
||||
|
||||
37
lib/utils.js
37
lib/utils.js
@@ -214,3 +214,40 @@ function set_intersection(a, b) {
|
||||
return b.indexOf(el) >= 0;
|
||||
});
|
||||
};
|
||||
|
||||
// this function is taken from Acorn [1], written by Marijn Haverbeke
|
||||
// [1] https://github.com/marijnh/acorn
|
||||
function makePredicate(words) {
|
||||
if (!(words instanceof Array)) words = words.split(" ");
|
||||
var f = "", cats = [];
|
||||
out: for (var i = 0; i < words.length; ++i) {
|
||||
for (var j = 0; j < cats.length; ++j)
|
||||
if (cats[j][0].length == words[i].length) {
|
||||
cats[j].push(words[i]);
|
||||
continue out;
|
||||
}
|
||||
cats.push([words[i]]);
|
||||
}
|
||||
function compareTo(arr) {
|
||||
if (arr.length == 1) return f += "return str === " + JSON.stringify(arr[0]) + ";";
|
||||
f += "switch(str){";
|
||||
for (var i = 0; i < arr.length; ++i) f += "case " + JSON.stringify(arr[i]) + ":";
|
||||
f += "return true}return false;";
|
||||
}
|
||||
// When there are more than three length categories, an outer
|
||||
// switch first dispatches on the lengths, to save on comparisons.
|
||||
if (cats.length > 3) {
|
||||
cats.sort(function(a, b) {return b.length - a.length;});
|
||||
f += "switch(str.length){";
|
||||
for (var i = 0; i < cats.length; ++i) {
|
||||
var cat = cats[i];
|
||||
f += "case " + cat[0].length + ":";
|
||||
compareTo(cat);
|
||||
}
|
||||
f += "}";
|
||||
// Otherwise, simply generate a flat `switch` statement.
|
||||
} else {
|
||||
compareTo(words);
|
||||
}
|
||||
return new Function("str", f);
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user