adaptive base54 digits depending on char frequency (WIP)

This commit is contained in:
Mihai Bazon
2012-09-10 18:25:52 +03:00
parent a41e6cfabb
commit 6b9aeb5325
4 changed files with 138 additions and 71 deletions

View File

@@ -70,7 +70,12 @@ var output = UglifyJS.OutputStream({
source_map: SOURCE_MAP
});
files.forEach(do_file);
files = files.map(do_file_1);
files = files.map(do_file_2);
files.forEach(do_file_3);
if (ARGS.v) {
sys.error("BASE54 digits: " + UglifyJS.base54.get());
}
output = output.get();
@@ -100,7 +105,7 @@ if (ARGS.stats) {
/* -----[ functions ]----- */
function do_file(file) {
function do_file_1(file) {
if (ARGS.v) {
sys.error("Compressing " + file);
}
@@ -112,13 +117,27 @@ function do_file(file) {
time_it("scope", function(){
ast.figure_out_scope();
});
time_it("mangle", function(){
ast.mangle_names();
});
time_it("squeeze", function(){
var compressor = UglifyJS.Compressor({});
ast = ast.squeeze(compressor);
});
ast.filename = file;
return ast;
}
function do_file_2(ast) {
time_it("scope", function(){
//ast.figure_out_scope();
ast.compute_char_frequency();
});
return ast;
}
function do_file_3(ast) {
var file = ast.filename;
time_it("mangle", function(){
ast.mangle_names();
});
time_it("generate", function(){
if (SOURCE_MAP) {
if (ARGS.p != null) {

View File

@@ -296,14 +296,19 @@ AST_Scope.DEFMETHOD("next_mangled", function(){
}
});
AST_SymbolDeclaration.DEFMETHOD("unmangleable", function(){
return this.global || this.scope.uses_eval || this.scope.uses_with;
});
AST_Label.DEFMETHOD("unmangleable", function(){
return false;
});
AST_SymbolDeclaration.DEFMETHOD("mangle", function(){
if (this.uniq) {
if (this.uniq && this.uniq !== this) {
this.uniq.mangle();
}
else if (!(this.global
|| this.scope.uses_eval
|| this.scope.uses_with
|| this.mangled_name)) {
else if (!(this.mangled_name || this.unmangleable())) {
this.mangled_name = this.scope.next_mangled();
}
});
@@ -354,3 +359,103 @@ AST_Toplevel.DEFMETHOD("mangle_names", function(){
});
this.walk(tw);
});
AST_Toplevel.DEFMETHOD("compute_char_frequency", function(){
var tw = new TreeWalker(function(node){
if (node instanceof AST_Constant)
base54.consider(node.print_to_string());
else if (node instanceof AST_Debugger)
base54.consider("debugger");
else if (node instanceof AST_Directive)
base54.consider(node.value);
else if (node instanceof AST_While)
base54.consider("while");
else if (node instanceof AST_Do)
base54.consider("dowhile");
else if (node instanceof AST_If) {
base54.consider("if");
if (node.alternative) base54.consider("else");
}
else if (node instanceof AST_Var)
base54.consider("var");
else if (node instanceof AST_Const)
base54.consider("const");
else if (node instanceof AST_Lambda)
base54.consider("function");
else if (node instanceof AST_For)
base54.consider("for");
else if (node instanceof AST_ForIn)
base54.consider("forin");
else if (node instanceof AST_Switch)
base54.consider("switch");
else if (node instanceof AST_Case)
base54.consider("case");
else if (node instanceof AST_Default)
base54.consider("default");
else if (node instanceof AST_With)
base54.consider("with");
else if (node instanceof AST_ObjectSetter)
base54.consider("set" + node.key);
else if (node instanceof AST_ObjectGetter)
base54.consider("get" + node.key);
else if (node instanceof AST_ObjectKeyVal)
base54.consider(node.key);
else if (node instanceof AST_New)
base54.consider("new");
else if (node instanceof AST_This)
base54.consider("this");
else if (node instanceof AST_Try)
base54.consider("try");
else if (node instanceof AST_Catch)
base54.consider("catch");
else if (node instanceof AST_Finally)
base54.consider("finally");
else if (node instanceof AST_SymbolDeclaration && node.unmangleable())
base54.consider(node.name);
else if (node instanceof AST_SymbolRef && !node.uniq && !(node instanceof AST_LabelRef))
base54.consider(node.name);
else if (node instanceof AST_Unary || node instanceof AST_Binary)
base54.consider(node.operator);
else if (node instanceof AST_Dot)
base54.consider(node.property);
});
this.walk(tw);
base54.sort();
});
var base54 = (function() {
var string = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$_0123456789";
var chars, frequency;
function reset() {
frequency = {};
chars = string.split("");
chars.map(function(ch){ frequency[ch] = 0 });
}
base54.consider = function(str){
for (var i = str.length; --i >= 0;) {
var ch = str.charAt(i);
if (string.indexOf(ch))
++frequency[ch];
}
};
base54.sort = function() {
chars.sort(function(a, b){
if (is_digit(a) && !is_digit(b)) return 1;
if (is_digit(b) && !is_digit(a)) return -1;
return frequency[b] - frequency[a];
});
};
base54.reset = reset;
reset();
base54.get = function(){ return chars };
function base54(num) {
var ret = "", base = 54;
do {
ret += chars[num % base];
num = Math.floor(num / base);
base = 64;
} while (num > 0);
return ret;
};
return base54;
})();

View File

@@ -141,23 +141,6 @@ var MAP = (function(){
return MAP;
})();
// XXX: currently this is optimized for jQuery, though I have the
// feeling it works well in general for many scripts (well, better
// than alphabetical order). It would be nice if we could adapt it to
// the currently running script.
var BASE54_DIGITS = "etnrisouaflchpdvmgybwESxTNCkLAOM_DPHBjFIqRUzWXV$JKQGYZ0516372984";
//var BASE54_DIGITS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$_0123456789";
function base54(num) {
var ret = "", base = 54;
do {
ret += BASE54_DIGITS.charAt(num % base);
num = Math.floor(num / base);
base = 64;
} while (num > 0);
return ret;
};
function push_uniq(array, el) {
if (array.indexOf(el) < 0)
array.push(el);

View File

@@ -8,47 +8,7 @@ var UglifyJS = require("../tools/node");
var filename = process.argv[2];
var code = fs.readFileSync(filename, "utf8");
var ast = time_it("parse", function() {
return UglifyJS.parse(code);
});
time_it("scope", function(){
// calling figure_out_scope is a prerequisite for mangle_names,
// scope_warnings and compress
//
// perhaps figure_out_scope should be called automatically by the
// parser, but there might be instances where the functionality is
// not needed.
ast.figure_out_scope();
});
ast.scope_warnings();
time_it("mangle", function(){
ast.mangle_names();
});
time_it("compress", function(){
var compressor = new UglifyJS.Compressor({
// sequences : true,
// properties : true,
// dead_code : true,
// keep_comps : true,
// drop_debugger : true,
// unsafe : true,
// warnings : true
});
ast = ast.squeeze(compressor);
});
var stream = UglifyJS.OutputStream({ beautify: true });
time_it("generate", function(){
ast.print(stream);
});
sys.puts(stream.get());
function time_it(name, cont) {
var t1 = new Date().getTime();
try { return cont(); }
finally { sys.debug("// " + name + ": " + ((new Date().getTime() - t1) / 1000).toFixed(3) + " sec."); }
};
var ast = UglifyJS.parse(code);
ast.figure_out_scope();
ast.compute_char_frequency();
console.log(UglifyJS.base54.get().join(","));