init repo

This commit is contained in:
Mihai Bazon
2012-05-27 14:09:01 +03:00
commit 562b12f021
5 changed files with 2445 additions and 0 deletions

297
lib/ast.js Normal file
View File

@@ -0,0 +1,297 @@
function DEFNODE(type, props, methods, base) {
if (arguments.length < 4) base = AST_Node;
if (!props) props = [];
else props = props.split(/\s+/);
if (base && base.PROPS)
props = props.concat(base.PROPS);
var code = "return function AST_" + type + "(props){ if (props) { ";
for (var i = props.length; --i >= 0;) {
code += "this." + props[i] + " = props." + props[i] + ";";
}
code += " } }";
var ctor = new Function(code)();
if (base) {
ctor.prototype = new base;
}
ctor.prototype.CTOR = ctor;
ctor.PROPS = props || null;
if (type) {
ctor.prototype.TYPE = ctor.TYPE = type;
}
if (methods) for (var i in methods) if (HOP(methods, i)) {
ctor.prototype[i] = methods[i];
}
return ctor;
};
var AST_Token = DEFNODE("Token", "type value line col pos endpos nlb", {
}, null);
var AST_Node = DEFNODE("Node", "start end", {
}, null);
var AST_Directive = DEFNODE("Directive", "value", {
});
var AST_Debugger = DEFNODE("Debugger", null, {
});
var AST_Parenthesized = DEFNODE("Parenthesized", "expression", {
documentation: "Represents an expression which is always parenthesized. Used for the \
conditions in IF/WHILE."
});
var AST_Bracketed = DEFNODE("Bracketed", "body", {
documentation: "Represents a block of statements that are always included in brackets. \
Used for bodies of FUNCTION/TRY/CATCH/THROW/SWITCH."
});
/* -----[ loops ]----- */
var AST_LabeledStatement = DEFNODE("LabeledStatement", "label body", {
});
var AST_Statement = DEFNODE("Statement", null, {
}, AST_LabeledStatement);
var AST_Do = DEFNODE("Do", "condition", {
}, AST_LabeledStatement);
var AST_While = DEFNODE("While", "condition", {
}, AST_LabeledStatement);
var AST_For = DEFNODE("For", "init condition step", {
}, AST_LabeledStatement);
var AST_ForIn = DEFNODE("ForIn", "init name object", {
}, AST_LabeledStatement);
var AST_With = DEFNODE("With", "expression body", {
});
var AST_LoopControl = DEFNODE("LoopControl", "label", {
});
var AST_Break = DEFNODE("Break", null, {
}, AST_LoopControl);
var AST_Continue = DEFNODE("Continue", null, {
}, AST_LoopControl);
/* -----[ functions ]----- */
var AST_Scope = DEFNODE("Scope", "identifiers body", {
});
var AST_Toplevel = DEFNODE("Toplevel", null, {
}, AST_Scope);
var AST_Lambda = DEFNODE("Lambda", "name argnames", {
}, AST_Scope);
var AST_Function = DEFNODE("Function", null, {
}, AST_Lambda);
var AST_Defun = DEFNODE("Defun", null, {
}, AST_Function);
/* -----[ JUMPS ]----- */
var AST_Jump = DEFNODE("Jump", "value");
var AST_Return = DEFNODE("Return", null, {
}, AST_Jump);
var AST_Throw = DEFNODE("Throw", null, {
}, AST_Jump);
/* -----[ IF ]----- */
var AST_If = DEFNODE("If", "condition consequent alternative", {
});
/* -----[ SWITCH ]----- */
var AST_Switch = DEFNODE("Switch", "expression", {
}, AST_LabeledStatement);
var AST_SwitchBlock = DEFNODE("SwitchBlock", null, {
}, AST_Bracketed);
var AST_SwitchBranch = DEFNODE("SwitchBranch", "body", {
});
var AST_Default = DEFNODE("Default", null, {
}, AST_SwitchBranch);
var AST_Case = DEFNODE("Case", "expression", {
}, AST_SwitchBranch);
/* -----[ EXCEPTIONS ]----- */
var AST_Try = DEFNODE("Try", "btry bcatch bfinally", {
});
var AST_Catch = DEFNODE("Catch", "argname body", {
});
var AST_Finally = DEFNODE("Finally", null, {
}, AST_Bracketed);
/* -----[ VAR/CONST ]----- */
var AST_Definitions = DEFNODE("Definitions", "definitions", {
});
var AST_Var = DEFNODE("Var", null, {
}, AST_Definitions);
var AST_Const = DEFNODE("Const", null, {
}, AST_Definitions);
var AST_VarDef = DEFNODE("VarDef", "name value", {
});
/* -----[ OTHER ]----- */
var AST_Call = DEFNODE("Call", "expression args", {
});
var AST_New = DEFNODE("New", null, {
}, AST_Call);
var AST_Seq = DEFNODE("Seq", "first second", {
});
var AST_PropAccess = DEFNODE("PropAccess", "expression property", {
});
var AST_Dot = DEFNODE("Dot", null, {
}, AST_PropAccess);
var AST_Sub = DEFNODE("Sub", null, {
}, AST_PropAccess);
var AST_Unary = DEFNODE("Unary", "operator expression", {
});
var AST_UnaryPrefix = DEFNODE("UnaryPrefix", null, {
}, AST_Unary);
var AST_UnaryPostfix = DEFNODE("UnaryPostfix", null, {
}, AST_Unary);
var AST_Binary = DEFNODE("Binary", "left operator right", {
});
var AST_Conditional = DEFNODE("Conditional", "condition consequent alternative", {
});
var AST_Assign = DEFNODE("Assign", "left operator right", {
});
/* -----[ LITERALS ]----- */
var AST_RegExp = DEFNODE("Regexp", "pattern mods", {
});
var AST_Array = DEFNODE("Array", "elements", {
});
var AST_Object = DEFNODE("Object", "properties", {
});
var AST_ObjectProperty = DEFNODE("ObjectProperty");
var AST_ObjectKeyVal = DEFNODE("ObjectKeyval", "key value", {
}, AST_ObjectProperty);
var AST_ObjectSetter = DEFNODE("ObjectSetter", "name func", {
}, AST_ObjectProperty);
var AST_ObjectGetter = DEFNODE("ObjectGetter", "name func", {
}, AST_ObjectProperty);
var AST_Symbol = DEFNODE("Symbol", "name", {
});
var AST_String = DEFNODE("String", "value", {
});
var AST_Number = DEFNODE("Number", "value", {
});
var AST_Boolean = DEFNODE("Boolean", "value", {
});
var AST_Atom = DEFNODE("Atom", null, {
});
var AST_Null = DEFNODE("Null", null, {
}, AST_Atom);
var AST_Undefined = DEFNODE("Undefined", null, {
}, AST_Atom);
var AST_False = DEFNODE("False", null, {
}, AST_Atom);
var AST_True = DEFNODE("True", null, {
}, AST_Atom);

5
lib/index.html Normal file
View File

@@ -0,0 +1,5 @@
<script src="utils.js"></script>
<script src="ast.js"></script>
<script src="parse.js"></script>
<script src="test.js"></script>

1353
lib/parse.js Normal file

File diff suppressed because it is too large Load Diff

752
lib/test.js Normal file
View File

@@ -0,0 +1,752 @@
var func = function parse($TEXT, exigent_mode) {
var S = {
input : typeof $TEXT == "string" ? tokenizer($TEXT, true) : $TEXT,
token : null,
prev : null,
peeked : null,
in_function : 0,
in_directives : true,
in_loop : 0,
labels : []
};
S.token = next();
function is(type, value) {
return is_token(S.token, type, value);
};
function peek() { return S.peeked || (S.peeked = S.input()); };
function next() {
S.prev = S.token;
if (S.peeked) {
S.token = S.peeked;
S.peeked = null;
} else {
S.token = S.input();
}
S.in_directives = S.in_directives && (
S.token.type == "string" || is("punc", ";")
);
return S.token;
};
function prev() {
return S.prev;
};
function croak(msg, line, col, pos) {
var ctx = S.input.context();
js_error(msg,
line != null ? line : ctx.tokline,
col != null ? col : ctx.tokcol,
pos != null ? pos : ctx.tokpos);
};
function token_error(token, msg) {
croak(msg, token.line, token.col);
};
function unexpected(token) {
if (token == null)
token = S.token;
token_error(token, "Unexpected token: " + token.type + " (" + token.value + ")");
};
function expect_token(type, val) {
if (is(type, val)) {
return next();
}
token_error(S.token, "Unexpected token " + S.token.type + ", expected " + type);
};
function expect(punc) { return expect_token("punc", punc); };
function can_insert_semicolon() {
return !exigent_mode && (
S.token.nlb || is("eof") || is("punc", "}")
);
};
function semicolon() {
if (is("punc", ";")) next();
else if (!can_insert_semicolon()) unexpected();
};
function parenthesised() {
expect("(");
var ex = expression();
expect(")");
return ex;
};
function embed_tokens(parser) {
return function() {
var start = S.token;
var expr = parser.apply(this, arguments);
var end = prev();
expr.start = start;
expr.end = end;
return expr;
};
};
var statement = embed_tokens(function() {
if (is("operator", "/") || is("operator", "/=")) {
S.peeked = null;
S.token = S.input(S.token.value.substr(1)); // force regexp
}
switch (S.token.type) {
case "string":
var dir = S.in_directives, stat = simple_statement();
// XXXv2: decide how to fix directives
// if (dir && stat instanceof AST_String && !is("punc", ","))
// return new AST_Directive({ value: stat.value });
return stat;
case "num":
case "regexp":
case "operator":
case "atom":
return simple_statement();
case "name":
return is_token(peek(), "punc", ":")
? labeled_statement()
: simple_statement();
case "punc":
switch (S.token.value) {
case "{":
return new AST_Statement({ body: block_() });
case "[":
case "(":
return simple_statement();
case ";":
next();
return new AST_Statement();
default:
unexpected();
}
case "keyword":
switch (prog1(S.token.value, next)) {
case "break":
return break_cont(AST_Break);
case "continue":
return break_cont(AST_Continue);
case "debugger":
semicolon();
return new AST_Debugger();
case "do":
return new AST_Do({
body : in_loop(statement),
condition : (expect_token("while"), prog1(parenthesised, semicolon))
});
case "while":
return new AST_While({
condition : parenthesised(),
body : in_loop(statement)
});
case "for":
return for_();
case "function":
return function_(true);
case "if":
return if_();
case "return":
if (S.in_function == 0)
croak("'return' outside of function");
return new AST_Return({
value: ( is("punc", ";")
? (next(), null)
: can_insert_semicolon()
? null
: prog1(expression, semicolon) )
});
case "switch":
return new AST_Switch({
expression : parenthesised(),
body : switch_block_()
});
case "throw":
if (S.token.nlb)
croak("Illegal newline after 'throw'");
return new AST_Throw({
value: prog1(expression, semicolon)
});
case "try":
return try_();
case "var":
return prog1(var_, semicolon);
case "const":
return prog1(const_, semicolon);
case "with":
return new AST_With({
expression : parenthesised(),
body : statement()
});
default:
unexpected();
}
}
});
function labeled_statement() {
var label = S.token.value;
next();
expect(":");
S.labels.push(label);
var start = S.token, stat = statement();
if (exigent_mode && !(stat instanceof AST_LabeledStatement))
unexpected(start);
S.labels.pop();
stat.label = label;
return stat;
};
function simple_statement() {
return new AST_Statement({ body: prog1(expression, semicolon) });
};
function break_cont(type) {
var name = null;
if (!can_insert_semicolon()) {
name = is("name") ? S.token.value : null;
}
if (name != null) {
next();
if (!member(name, S.labels))
croak("Label " + name + " without matching loop or statement");
}
else if (S.in_loop == 0)
croak(type.TYPE + " not inside a loop or switch");
semicolon();
return new type({ label: name });
};
function for_() {
expect("(");
var init = null;
if (!is("punc", ";")) {
init = is("keyword", "var")
? (next(), var_(true))
: expression(true, true);
if (is("operator", "in")) {
if (init instanceof AST_Var && init.definitions.length > 1)
croak("Only one variable declaration allowed in for..in loop");
next();
return for_in(init);
}
}
return regular_for(init);
};
function regular_for(init) {
expect(";");
var test = is("punc", ";") ? null : expression();
expect(";");
var step = is("punc", ")") ? null : expression();
expect(")");
return new AST_For({
init : init,
condition : test,
step : step,
body : in_loop(statement)
});
};
function for_in(init) {
var lhs = init instanceof AST_Var ? init.definitions[0].name : init;
var obj = expression();
expect(")");
return new AST_ForIn({
init : init,
lhs : lhs,
object : obj,
body : in_loop(statement)
});
};
var function_ = function(in_statement) {
var name = is("name") ? as_symbol() : null;
if (in_statement && !name)
unexpected();
expect("(");
var ctor = in_statement ? AST_Defun : AST_Function;
return new ctor({
name: name,
argnames: (function(first, a){
while (!is("punc", ")")) {
if (first) first = false; else expect(",");
a.push(as_symbol());
}
next();
return a;
})(true, []),
body: embed_tokens(function(){
++S.in_function;
var loop = S.in_loop;
S.in_directives = true;
S.in_loop = 0;
var a = block_();
--S.in_function;
S.in_loop = loop;
return new AST_Bracketed({ body: a });
})()
});
};
function if_() {
var cond = parenthesised(), body = statement(), belse = null;
if (is("keyword", "else")) {
next();
belse = statement();
}
return new AST_If({
condition : cond,
consequent : body,
alternative : belse
});
};
function block_() {
expect("{");
var a = [];
while (!is("punc", "}")) {
if (is("eof")) unexpected();
a.push(statement());
}
next();
return a;
};
var switch_block_ = embed_tokens(curry(in_loop, function(){
expect("{");
var a = [], cur = null;
while (!is("punc", "}")) {
if (is("eof")) unexpected();
if (is("keyword", "case")) {
next();
cur = [];
a.push(new AST_Case({ expression: expression(), body: cur }));
expect(":");
}
else if (is("keyword", "default")) {
next();
expect(":");
cur = [];
a.push(new AST_Default({ body: cur }));
}
else {
if (!cur) unexpected();
cur.push(statement());
}
}
next();
return new AST_SwitchBlock({ body: a });
}));
function try_() {
var body = new AST_Bracketed({
body: block_()
}), bcatch = null, bfinally = null;
if (is("keyword", "catch")) {
next();
expect("(");
var name = as_symbol();
next();
expect(")");
bcatch = new AST_Catch({
argname : name,
body : new AST_Bracketed({ body: block_() })
});
}
if (is("keyword", "finally")) {
next();
bfinally = new AST_Finally({ body: block_() });
}
if (!bcatch && !bfinally)
croak("Missing catch/finally blocks");
return new AST_Try({
btry : body,
bcatch : bcatch,
bfinally : bfinally
});
};
function vardefs(no_in) {
var a = [];
for (;;) {
a.push(new AST_VarDef({
start : S.token,
name : as_symbol(),
value : is("operator", "=") ? (next(), expression(false, no_in)) : null,
end : prev()
}));
if (!is("punc", ","))
break;
next();
}
return a;
};
var var_ = embed_tokens(function(no_in) {
return new AST_Var({
definitions: vardefs(no_in)
});
});
var const_ = embed_tokens(function() {
return new AST_Const({
definitions: vardefs()
});
});
var new_ = embed_tokens(function() {
var newexp = expr_atom(false), args;
if (is("punc", "(")) {
next();
args = expr_list(")");
} else {
args = [];
}
return subscripts(new AST_New({
expression : newexp,
args : args
}), true);
});
function as_atom_node() {
var tok = S.token, ret;
switch (tok.type) {
case "name":
return as_symbol();
case "num":
ret = new AST_Number({ start: tok, end: tok, value: tok.value });
break;
case "string":
ret = new AST_String({ start: tok, end: tok, value: tok.value });
break;
case "regexp":
ret = new AST_RegExp({ start: tok, end: tok, pattern: tok.value[0], mods: tok.value[1] });
break;
case "atom":
switch (tok.value) {
case "false":
ret = new AST_False({ start: tok, end: tok });
break;
case "true":
ret = new AST_True({ start: tok, end: tok });
break;
case "null":
ret = new AST_Null({ start: tok, end: tok });
break;
}
break;
}
next();
return ret;
};
var expr_atom = function(allow_calls) {
if (is("operator", "new")) {
next();
return new_();
}
if (is("punc")) {
switch (S.token.value) {
case "(":
next();
return subscripts(prog1(expression, curry(expect, ")")), allow_calls);
case "[":
next();
return subscripts(array_(), allow_calls);
case "{":
next();
return subscripts(object_(), allow_calls);
}
unexpected();
}
if (is("keyword", "function")) {
var start = S.token;
next();
var func = function_(false);
func.start = start;
func.end = prev();
return subscripts(func, allow_calls);
}
if (HOP(ATOMIC_START_TOKEN, S.token.type)) {
return subscripts(as_atom_node(), allow_calls);
}
unexpected();
};
function expr_list(closing, allow_trailing_comma, allow_empty) {
var first = true, a = [];
while (!is("punc", closing)) {
if (first) first = false; else expect(",");
if (allow_trailing_comma && is("punc", closing)) break;
if (is("punc", ",") && allow_empty) {
a.push(new AST_Undefined({ start: S.token, end: S.token }));
} else {
a.push(expression(false));
}
}
next();
return a;
};
function array_() {
return new AST_Array({
elements: expr_list("]", !exigent_mode, true)
});
};
var object_ = embed_tokens(function() {
var first = true, a = [];
while (!is("punc", "}")) {
if (first) first = false; else expect(",");
if (!exigent_mode && is("punc", "}"))
// allow trailing comma
break;
var start = S.token;
var type = start.type;
var name = as_property_name();
if (type == "name" && !is("punc", ":")) {
if (name.name == "get") {
a.push(new AST_ObjectGetter({
start : start,
name : name,
func : function_(false),
end : prev()
}));
continue;
}
if (name.name == "set") {
a.push(new AST_ObjectSetter({
start : start,
name : name,
func : function_(false),
end : prev()
}));
continue;
}
}
expect(":");
a.push(new AST_ObjectKeyVal({
start : start,
key : name,
value : expression(false),
end : prev()
}));
}
next();
return new AST_Object({ properties: a });
});
function as_property_name() {
switch (S.token.type) {
case "num":
case "string":
return as_symbol(true);
}
return as_name();
};
function as_name() {
switch (S.token.type) {
case "name":
case "operator":
case "keyword":
case "atom":
return as_symbol(true);
default:
unexpected();
}
};
function as_symbol(noerror) {
if (!noerror && !is("name")) croak("Name expected");
var sym = new AST_Symbol({
name : String(S.token.value),
start : S.token,
end : S.token
});
next();
return sym;
};
var subscripts = embed_tokens(function(expr, allow_calls) {
if (is("punc", ".")) {
next();
return subscripts(new AST_Dot({
expression : expr,
property : as_name()
}), allow_calls);
}
if (is("punc", "[")) {
next();
return subscripts(new AST_Sub({
expression : expr,
property : prog1(expression, curry(expect, "]"))
}), allow_calls);
}
if (allow_calls && is("punc", "(")) {
next();
return subscripts(new AST_Call({
expression : expr,
args : expr_list(")")
}), true);
}
return expr;
});
var maybe_unary = embed_tokens(function(allow_calls) {
if (is("operator") && HOP(UNARY_PREFIX, S.token.value)) {
return make_unary(AST_UnaryPrefix,
prog1(S.token.value, next),
maybe_unary(allow_calls));
}
var val = expr_atom(allow_calls);
while (is("operator") && HOP(UNARY_POSTFIX, S.token.value) && !S.token.nlb) {
val = make_unary(AST_UnaryPostfix, S.token.value, val);
next();
}
return val;
});
function make_unary(ctor, op, expr) {
if ((op == "++" || op == "--") && !is_assignable(expr))
croak("Invalid use of " + op + " operator");
return new ctor({ operator: op, expression: expr });
};
var expr_op = embed_tokens(function(left, min_prec, no_in) {
var op = is("operator") ? S.token.value : null;
if (op == "in" && no_in) op = null;
var prec = op != null ? PRECEDENCE[op] : null;
if (prec != null && prec > min_prec) {
next();
var right = expr_op(maybe_unary(true), prec, no_in);
return expr_op(new AST_Binary({
left : left,
operator : op,
right : right
}), min_prec, no_in);
}
return left;
});
function expr_ops(no_in) {
return expr_op(maybe_unary(true), 0, no_in);
};
var maybe_conditional = embed_tokens(function(no_in) {
var expr = expr_ops(no_in);
if (is("operator", "?")) {
next();
var yes = expression(false);
expect(":");
return new AST_Conditional({
condition: expr,
consequent: yes,
alternative: expression(false, no_in)
});
}
return expr;
});
function is_assignable(expr) {
if (!exigent_mode) return true;
switch (expr[0]+"") {
case "dot":
case "sub":
case "new":
case "call":
return true;
case "name":
return expr[1] != "this";
}
};
var maybe_assign = embed_tokens(function(no_in) {
var left = maybe_conditional(no_in), val = S.token.value;
if (is("operator") && HOP(ASSIGNMENT, val)) {
if (is_assignable(left)) {
next();
return new AST_Assign({
left : left,
operator : ASSIGNMENT[val],
right : maybe_assign(no_in)
});
}
croak("Invalid assignment");
}
return left;
});
var expression = embed_tokens(function(commas, no_in) {
if (arguments.length == 0)
commas = true;
var expr = maybe_assign(no_in);
if (commas && is("punc", ",")) {
next();
return new AST_Seq({
first : expr,
second : expression(true, no_in)
});
}
return expr;
});
function in_loop(cont) {
++S.in_loop;
var ret = cont();
--S.in_loop;
return ret;
};
return new AST_Toplevel({
body: (function(a){
while (!is("eof"))
a.push(statement());
return a;
})([])
});
};
console.time("parse");
var ast = parse(func.toString());
console.timeEnd("parse");
console.log(ast);
// var moo = 1, i, man = moo + bar;
// try {
// loop: while (/foobar/.test(bar)) {
// alert(bar);
// continue loop;
// }
// } finally {
// return crap;
// }

38
lib/utils.js Normal file
View File

@@ -0,0 +1,38 @@
function curry(f) {
var args = slice(arguments, 1);
return function() { return f.apply(this, args.concat(slice(arguments))); };
};
function prog1(ret) {
if (ret instanceof Function)
ret = ret();
for (var i = 1, n = arguments.length; --n > 0; ++i)
arguments[i]();
return ret;
};
function array_to_hash(a) {
var ret = {};
for (var i = 0; i < a.length; ++i)
ret[a[i]] = true;
return ret;
};
function slice(a, start) {
return Array.prototype.slice.call(a, start || 0);
};
function characters(str) {
return str.split("");
};
function member(name, array) {
for (var i = array.length; --i >= 0;)
if (array[i] == name)
return true;
return false;
};
function HOP(obj, prop) {
return Object.prototype.hasOwnProperty.call(obj, prop);
};