fixed some mess with symbols/scope

- all symbols now have a `thedef` property which is a SymbolDef object,
  instead of the `uniq` that we had before (pointing to the first occurrence
  of the name as declaration).

- for undeclared symbols we still create a SymbolDef object in the toplevel
  scope but mark it "undeclared"

- we can now call figure_out_scope after squeezing, which is useful in order
  not to mangle names that were dropped by the squeezer
This commit is contained in:
Mihai Bazon
2012-09-11 15:42:28 +03:00
parent da407d46c6
commit 8e82d8d94c
6 changed files with 204 additions and 150 deletions

View File

@@ -73,6 +73,7 @@ UglifyJS.base54.sort();
files.forEach(do_file_3);
if (ARGS.v) {
sys.error("BASE54 digits: " + UglifyJS.base54.get());
//sys.error("Frequency: " + sys.inspect(UglifyJS.base54.freq()));
}
output = output.get();

View File

@@ -549,11 +549,11 @@ var AST_ObjectGetter = DEFNODE("ObjectGetter", null, {
$documentation: "An object getter property",
}, AST_ObjectProperty);
var AST_Symbol = DEFNODE("Symbol", "scope name global undeclared", {
var AST_Symbol = DEFNODE("Symbol", "scope name thedef", {
$documentation: "Base class for all symbols",
});
var AST_SymbolDeclaration = DEFNODE("SymbolDeclaration", "references", {
var AST_SymbolDeclaration = DEFNODE("SymbolDeclaration", null, {
$documentation: "A declaration symbol (symbol in var/const, function name or argument, symbol in catch)",
}, AST_Symbol);
@@ -581,7 +581,7 @@ var AST_Label = DEFNODE("Label", null, {
$documentation: "Symbol naming a label (declaration)",
}, AST_SymbolDeclaration);
var AST_SymbolRef = DEFNODE("SymbolRef", "symbol", {
var AST_SymbolRef = DEFNODE("SymbolRef", null, {
$documentation: "Reference to some symbol (not definition/declaration)",
}, AST_Symbol);

View File

@@ -884,16 +884,9 @@ function OutputStream(options) {
self.value._do_print(output, true);
});
DEFPRINT(AST_Symbol, function(self, output){
output.print_name(self.name);
});
DEFPRINT(AST_SymbolDeclaration, function(self, output){
var def = self.definition();
output.print_name(def.mangled_name || def.name);
});
DEFPRINT(AST_SymbolRef, function(self, output){
var def = self.symbol;
output.print_name(def ? def.mangled_name || def.name : self.name);
});
DEFPRINT(AST_This, function(self, output){
output.print("this");
});

View File

@@ -41,6 +41,26 @@
***********************************************************************/
function SymbolDef(scope, orig) {
this.name = orig.name;
this.orig = [ orig ];
this.scope = scope;
this.references = [];
this.global = false;
this.mangled_name = null;
this.undeclared = false;
};
SymbolDef.prototype = {
unmangleable: function() {
return this.global || this.undeclared || this.scope.uses_eval || this.scope.uses_with;
},
mangle: function() {
if (!this.mangled_name && !this.unmangleable())
this.mangled_name = this.scope.next_mangled();
}
};
AST_Toplevel.DEFMETHOD("figure_out_scope", function(){
// This does what ast_add_scope did in UglifyJS v1.
//
@@ -76,7 +96,10 @@ AST_Toplevel.DEFMETHOD("figure_out_scope", function(){
delete labels[l.name];
return true; // no descend again
}
if (node instanceof AST_SymbolDeclaration) {
if (node instanceof AST_Symbol) {
node.scope = scope;
}
if (node instanceof AST_Label) {
node.init_scope_vars();
}
if (node instanceof AST_SymbolLambda) {
@@ -88,7 +111,7 @@ AST_Toplevel.DEFMETHOD("figure_out_scope", function(){
// scope when we encounter the AST_Defun node (which is
// instanceof AST_Scope) but we get to the symbol a bit
// later.
scope.parent_scope.def_function(node);
(node.scope = scope.parent_scope).def_function(node);
}
else if (node instanceof AST_SymbolVar) {
scope.def_variable(node);
@@ -102,9 +125,6 @@ AST_Toplevel.DEFMETHOD("figure_out_scope", function(){
// identifier in the enclosing scope)
scope.def_variable(node);
}
else if (node instanceof AST_SymbolRef) {
node.scope = scope;
}
if (node instanceof AST_LabelRef) {
var sym = labels[node.name];
if (!sym) throw new Error("Undefined label " + node.name);
@@ -115,6 +135,7 @@ AST_Toplevel.DEFMETHOD("figure_out_scope", function(){
// pass 2: find back references and eval
var func = null;
var globals = self.globals = {};
var tw = new TreeWalker(function(node, descend){
if (node instanceof AST_Lambda) {
var prev_func = func;
@@ -124,18 +145,28 @@ AST_Toplevel.DEFMETHOD("figure_out_scope", function(){
return true;
}
if (node instanceof AST_SymbolRef) {
var sym = node.scope.find_variable(node);
node.reference(sym);
var name = node.name;
var sym = node.scope.find_variable(name);
if (!sym) {
if (node.name == "eval") {
for (var s = node.scope;
s && !s.uses_eval;
s = s.parent_scope) s.uses_eval = true;
var g;
if (HOP(globals, name)) {
g = globals[name];
} else {
g = new SymbolDef(self, node);
g.undeclared = true;
}
if (node.name == "arguments") {
node.thedef = g;
if (name == "eval") {
for (var s = node.scope; s && !s.uses_eval; s = s.parent_scope)
s.uses_eval = true;
}
if (name == "arguments") {
func.uses_arguments = true;
}
} else {
node.thedef = sym;
}
node.reference();
}
});
self.walk(tw);
@@ -156,128 +187,47 @@ AST_Lambda.DEFMETHOD("init_scope_vars", function(){
this.uses_arguments = false;
});
AST_SymbolDeclaration.DEFMETHOD("init_scope_vars", function(){
AST_SymbolRef.DEFMETHOD("reference", function() {
var def = this.definition();
def.references.push(this);
var orig_scope = def.scope, s = this.scope;
while (s) {
push_uniq(s.enclosed, def);
s = s.parent_scope;
}
});
AST_Label.DEFMETHOD("init_scope_vars", function(){
this.references = [];
});
AST_Toplevel.DEFMETHOD("scope_warnings", function(options){
options = defaults(options, {
undeclared : false, // this makes a lot of noise
unreferenced : true,
assign_to_global : true,
func_arguments : true,
nested_defuns : true,
eval : true
});
var tw = new TreeWalker(function(node){
if (options.undeclared
&& node instanceof AST_SymbolRef
&& node.undeclared)
{
// XXX: this also warns about JS standard names,
// i.e. Object, Array, parseInt etc. Should add a list of
// exceptions.
AST_Node.warn("Undeclared symbol: {name} [{line},{col}]", {
name: node.name,
line: node.start.line,
col: node.start.col
});
}
if (options.assign_to_global)
{
var sym = null;
if (node instanceof AST_Assign && node.left instanceof AST_SymbolRef)
sym = node.left;
else if (node instanceof AST_ForIn && node.init instanceof AST_SymbolRef)
sym = node.init;
if (sym
&& (sym.undeclared
|| (sym.symbol.global
&& sym.scope !== sym.symbol.scope))) {
AST_Node.warn("{msg}: {name} [{line},{col}]", {
msg: sym.undeclared ? "Accidental global?" : "Assignment to global",
name: sym.name,
line: sym.start.line,
col: sym.start.col
});
}
}
if (options.eval
&& node instanceof AST_SymbolRef
&& node.undeclared
&& node.name == "eval") {
AST_Node.warn("Eval is used [{line},{col}]", node.start);
}
if (options.unreferenced
&& node instanceof AST_SymbolDeclaration
&& node.unreferenced()) {
AST_Node.warn("{type} {name} is declared but not referenced [{line},{col}]", {
type: node instanceof AST_Label ? "Label" : "Symbol",
name: node.name,
line: node.start.line,
col: node.start.col
});
}
if (options.func_arguments
&& node instanceof AST_Lambda
&& node.uses_arguments) {
AST_Node.warn("arguments used in function {name} [{line},{col}]", {
name: node.name ? node.name.name : "anonymous",
line: node.start.line,
col: node.start.col
});
}
if (options.nested_defuns
&& node instanceof AST_Defun
&& !(tw.parent() instanceof AST_Scope)) {
AST_Node.warn("Function {name} declared in nested statement [{line},{col}]", {
name: node.name.name,
line: node.start.line,
col: node.start.col
});
}
});
this.walk(tw);
});
AST_SymbolRef.DEFMETHOD("reference", function(symbol) {
if (symbol) {
this.symbol = symbol;
var origin = symbol.scope;
symbol.references.push(this);
for (var s = this.scope; s; s = s.parent_scope) {
push_uniq(s.enclosed, symbol);
if (s === origin) break;
}
} else {
this.undeclared = true;
for (var s = this.scope; s; s = s.parent_scope) {
push_uniq(s.enclosed, this);
}
}
AST_LabelRef.DEFMETHOD("reference", function(def){
this.thedef = def;
def.references.push(this);
});
AST_Scope.DEFMETHOD("find_variable", function(name){
if (name instanceof AST_Symbol) name = name.name;
return HOP(this.variables, name)
? this.variables[name]
: ((this.name && this.name.name == name && this.name)
|| (this.parent_scope && this.parent_scope.find_variable(name)));
: (this.parent_scope && this.parent_scope.find_variable(name));
});
AST_Scope.DEFMETHOD("def_function", function(symbol){
this.functions[symbol.name] = symbol;
this.def_variable(symbol);
this.functions[symbol.name] = this.def_variable(symbol);
});
AST_Scope.DEFMETHOD("def_variable", function(symbol){
symbol.global = !this.parent_scope;
var def;
if (!HOP(this.variables, symbol.name)) {
this.variables[symbol.name] = symbol;
def = new SymbolDef(this, symbol);
this.variables[symbol.name] = def;
def.global = !this.parent_scope;
} else {
symbol.uniq = this.variables[symbol.name];
def = this.variables[symbol.name];
def.orig.push(symbol);
}
symbol.scope = this;
return symbol.thedef = def;
});
AST_Scope.DEFMETHOD("next_mangled", function(){
@@ -297,33 +247,29 @@ AST_Scope.DEFMETHOD("next_mangled", function(){
}
});
AST_SymbolDeclaration.DEFMETHOD("unmangleable", function(){
return this.global || this.scope.uses_eval || this.scope.uses_with;
AST_Symbol.DEFMETHOD("unmangleable", function(){
return this.definition().unmangleable();
});
// labels are always mangleable
AST_Label.DEFMETHOD("unmangleable", function(){
return false;
});
AST_SymbolDeclaration.DEFMETHOD("mangle", function(){
if (this.uniq && this.uniq !== this) {
this.uniq.mangle();
}
else if (!(this.mangled_name || this.unmangleable())) {
this.mangled_name = this.scope.next_mangled();
}
});
AST_Label.DEFMETHOD("mangle", function(){
throw new Error("Don't call this");
});
AST_SymbolDeclaration.DEFMETHOD("unreferenced", function(){
AST_Symbol.DEFMETHOD("unreferenced", function(){
return this.definition().references.length == 0;
});
AST_SymbolDeclaration.DEFMETHOD("definition", function(){
return this.uniq || this;
AST_Symbol.DEFMETHOD("undeclared", function(){
return this.definition().undeclared;
});
AST_Symbol.DEFMETHOD("definition", function(){
return this.thedef;
});
AST_Symbol.DEFMETHOD("global", function(){
return this.definition().global;
});
AST_Toplevel.DEFMETHOD("mangle_names", function(){
@@ -332,6 +278,7 @@ AST_Toplevel.DEFMETHOD("mangle_names", function(){
// present (and for AST_SymbolRef-s it'll use the mangled name of
// the AST_SymbolDeclaration that it points to).
var lname = -1;
var to_mangle = [];
var tw = new TreeWalker(function(node, descend){
if (node instanceof AST_LabeledStatement) {
// lname is incremented when we get to the AST_Label
@@ -347,7 +294,7 @@ AST_Toplevel.DEFMETHOD("mangle_names", function(){
for (var i in a) if (HOP(a, i)) {
var symbol = a[i];
if (!(is_setget && symbol instanceof AST_SymbolLambda))
symbol.mangle();
to_mangle.push(symbol);
}
return;
}
@@ -359,6 +306,16 @@ AST_Toplevel.DEFMETHOD("mangle_names", function(){
}
});
this.walk(tw);
// strangely, if we try to give more frequently used variables
// shorter name, the size after gzip seems to be higher! so leave
// this commented out I guess...
//
// to_mangle = mergeSort(to_mangle, function(a, b){
// return b.references.length - a.references.length;
// });
to_mangle.forEach(function(def){ def.mangle() });
});
AST_Toplevel.DEFMETHOD("compute_char_frequency", function(){
@@ -419,9 +376,7 @@ AST_Toplevel.DEFMETHOD("compute_char_frequency", function(){
base54.consider("catch");
else if (node instanceof AST_Finally)
base54.consider("finally");
else if (node instanceof AST_SymbolDeclaration && node.unmangleable())
base54.consider(node.name);
else if (node instanceof AST_SymbolRef && !node.uniq && !(node instanceof AST_LabelRef))
else if (node instanceof AST_Symbol && node.unmangleable())
base54.consider(node.name);
else if (node instanceof AST_Unary || node instanceof AST_Binary)
base54.consider(node.operator);
@@ -442,12 +397,12 @@ var base54 = (function() {
base54.consider = function(str){
for (var i = str.length; --i >= 0;) {
var ch = str.charAt(i);
if (string.indexOf(ch))
if (string.indexOf(ch) >= 0)
++frequency[ch];
}
};
base54.sort = function() {
chars.sort(function(a, b){
chars = mergeSort(chars, function(a, b){
if (is_digit(a) && !is_digit(b)) return 1;
if (is_digit(b) && !is_digit(a)) return -1;
return frequency[b] - frequency[a];
@@ -456,6 +411,7 @@ var base54 = (function() {
base54.reset = reset;
reset();
base54.get = function(){ return chars };
base54.freq = function(){ return frequency };
function base54(num) {
var ret = "", base = 54;
do {
@@ -467,3 +423,82 @@ var base54 = (function() {
};
return base54;
})();
AST_Toplevel.DEFMETHOD("scope_warnings", function(options){
options = defaults(options, {
undeclared : false, // this makes a lot of noise
unreferenced : true,
assign_to_global : true,
func_arguments : true,
nested_defuns : true,
eval : true
});
var tw = new TreeWalker(function(node){
if (options.undeclared
&& node instanceof AST_SymbolRef
&& node.undeclared())
{
// XXX: this also warns about JS standard names,
// i.e. Object, Array, parseInt etc. Should add a list of
// exceptions.
AST_Node.warn("Undeclared symbol: {name} [{line},{col}]", {
name: node.name,
line: node.start.line,
col: node.start.col
});
}
if (options.assign_to_global)
{
var sym = null;
if (node instanceof AST_Assign && node.left instanceof AST_SymbolRef)
sym = node.left;
else if (node instanceof AST_ForIn && node.init instanceof AST_SymbolRef)
sym = node.init;
if (sym
&& (sym.undeclared()
|| (sym.global() && sym.scope !== sym.definition().scope))) {
AST_Node.warn("{msg}: {name} [{line},{col}]", {
msg: sym.undeclared() ? "Accidental global?" : "Assignment to global",
name: sym.name,
line: sym.start.line,
col: sym.start.col
});
}
}
if (options.eval
&& node instanceof AST_SymbolRef
&& node.undeclared()
&& node.name == "eval") {
AST_Node.warn("Eval is used [{line},{col}]", node.start);
}
if (options.unreferenced
&& node instanceof AST_SymbolDeclaration
&& node.unreferenced()) {
AST_Node.warn("{type} {name} is declared but not referenced [{line},{col}]", {
type: node instanceof AST_Label ? "Label" : "Symbol",
name: node.name,
line: node.start.line,
col: node.start.col
});
}
if (options.func_arguments
&& node instanceof AST_Lambda
&& node.uses_arguments) {
AST_Node.warn("arguments used in function {name} [{line},{col}]", {
name: node.name ? node.name.name : "anonymous",
line: node.start.line,
col: node.start.col
});
}
if (options.nested_defuns
&& node instanceof AST_Defun
&& !(tw.parent() instanceof AST_Scope)) {
AST_Node.warn("Function {name} declared in nested statement [{line},{col}]", {
name: node.name.name,
line: node.start.line,
col: node.start.col
});
}
});
this.walk(tw);
});

View File

@@ -151,3 +151,27 @@ function string_template(text, props) {
return props[p];
});
};
function mergeSort(array, cmp) {
if (array.length < 2) return array.slice();
function merge(a, b) {
var r = [], ai = 0, bi = 0, i = 0;
while (ai < a.length && bi < b.length) {
cmp(a[ai], b[bi]) <= 0
? r[i++] = a[ai++]
: r[i++] = b[bi++];
}
if (ai < a.length) r.push.apply(r, a.slice(ai));
if (bi < b.length) r.push.apply(r, b.slice(bi));
return r;
};
function _ms(a) {
if (a.length <= 1)
return a;
var m = Math.floor(a.length / 2), left = a.slice(0, m), right = a.slice(m);
left = _ms(left);
right = _ms(right);
return merge(left, right);
};
return _ms(array);
};

View File

@@ -16,6 +16,7 @@ ast.compute_char_frequency();
UglifyJS.base54.sort();
ast.figure_out_scope();
ast.scope_warnings();
ast.mangle_names();
sys.error(UglifyJS.base54.get());