Don't mix strings with directives in output

* Don't interpret strings with escaped content as directive
 * Don't interpret strings after empty statement as directive
 * Adapt output to prevent strings being represent as directive
 * Introduce UGLIFY_DEBUG to allow internal testing like EXPECT_DIRECTIVE
This commit is contained in:
Anthony Van de Gejuchte
2016-06-13 21:11:08 +02:00
committed by Richard van Velzen
parent d7971ba0e4
commit 2149bfb707
8 changed files with 361 additions and 15 deletions

View File

@@ -452,7 +452,7 @@ can pass additional arguments that control the code output:
objects
- `space-colon` (default `true`) -- insert a space after the colon signs
- `ascii-only` (default `false`) -- escape Unicode characters in strings and
regexps
regexps (affects directives with non-ascii characters becoming invalid)
- `inline-script` (default `false`) -- escape the slash in occurrences of
`</script` in strings
- `width` (default 80) -- only takes effect when beautification is on, this

View File

@@ -43,6 +43,8 @@
"use strict";
var EXPECT_DIRECTIVE = /^$|[;{][\s\n]*$/;
function OutputStream(options) {
options = defaults(options, {
@@ -354,7 +356,18 @@ function OutputStream(options) {
force_semicolon : force_semicolon,
to_ascii : to_ascii,
print_name : function(name) { print(make_name(name)) },
print_string : function(str, quote) { print(encode_string(str, quote)) },
print_string : function(str, quote, escape_directive) {
var encoded = encode_string(str, quote);
if (escape_directive === true && encoded.indexOf("\\") === -1) {
// Insert semicolons to break directive prologue
if (!EXPECT_DIRECTIVE.test(OUTPUT)) {
force_semicolon();
}
force_semicolon();
}
print(encoded);
},
encode_string : encode_string,
next_indent : next_indent,
with_indent : with_indent,
with_block : with_block,
@@ -386,6 +399,7 @@ function OutputStream(options) {
};
var use_asm = false;
var in_directive = false;
AST_Node.DEFMETHOD("print", function(stream, force_parens){
var self = this, generator = self._codegen, prev_use_asm = use_asm;
@@ -642,9 +656,16 @@ function OutputStream(options) {
/* -----[ statements ]----- */
function display_body(body, is_toplevel, output) {
function display_body(body, is_toplevel, output, allow_directives) {
var last = body.length - 1;
in_directive = allow_directives;
body.forEach(function(stmt, i){
if (in_directive === true && !(stmt instanceof AST_Directive ||
stmt instanceof AST_EmptyStatement ||
(stmt instanceof AST_SimpleStatement && stmt.body instanceof AST_String)
)) {
in_directive = false;
}
if (!(stmt instanceof AST_EmptyStatement)) {
output.indent();
stmt.print(output);
@@ -653,7 +674,14 @@ function OutputStream(options) {
if (is_toplevel) output.newline();
}
}
if (in_directive === true &&
stmt instanceof AST_SimpleStatement &&
stmt.body instanceof AST_String
) {
in_directive = false;
}
});
in_directive = false;
};
AST_StatementWithBody.DEFMETHOD("_do_print_body", function(output){
@@ -665,7 +693,7 @@ function OutputStream(options) {
output.semicolon();
});
DEFPRINT(AST_Toplevel, function(self, output){
display_body(self.body, true, output);
display_body(self.body, true, output, true);
output.print("");
});
DEFPRINT(AST_LabeledStatement, function(self, output){
@@ -677,9 +705,9 @@ function OutputStream(options) {
self.body.print(output);
output.semicolon();
});
function print_bracketed(body, output) {
function print_bracketed(body, output, allow_directives) {
if (body.length > 0) output.with_block(function(){
display_body(body, false, output);
display_body(body, false, output, allow_directives);
});
else output.print("{}");
};
@@ -779,7 +807,7 @@ function OutputStream(options) {
});
});
output.space();
print_bracketed(self.body, output);
print_bracketed(self.body, output, true);
});
DEFPRINT(AST_Lambda, function(self, output){
self._do_print(output);
@@ -1185,7 +1213,7 @@ function OutputStream(options) {
output.print(self.getValue());
});
DEFPRINT(AST_String, function(self, output){
output.print_string(self.getValue(), self.quote);
output.print_string(self.getValue(), self.quote, in_directive);
});
DEFPRINT(AST_Number, function(self, output){
if (use_asm && self.start.raw != null) {

View File

@@ -815,9 +815,10 @@ function parse($TEXT, options) {
handle_regexp();
switch (S.token.type) {
case "string":
if (S.in_directives) {
if (is_token(peek(), "punc", ";") || peek().nlb) {
S.input.add_directive(S.token.raw.substr(1, S.token.raw.length - 2));
var dir = false;
if (S.in_directives === true) {
if ((is_token(peek(), "punc", ";") || peek().nlb) && S.token.raw.indexOf("\\") === -1) {
S.input.add_directive(S.token.value);
} else {
S.in_directives = false;
}
@@ -855,6 +856,7 @@ function parse($TEXT, options) {
case "(":
return simple_statement();
case ";":
S.in_directives = false;
next();
return new AST_EmptyStatement();
default:

View File

@@ -58,4 +58,313 @@ describe("Directives", function() {
assert.strictEqual(tokenizer.has_directive("use asm"), false);
assert.strictEqual(tokenizer.has_directive("use thing"), false);
});
it("Should know which strings are directive and which ones are not", function() {
var test_directive = function(tokenizer, test) {
test.directives.map(function(directive) {
assert.strictEqual(tokenizer.has_directive(directive), true, directive + " in " + test.input);
});
test.non_directives.map(function(fake_directive) {
assert.strictEqual(tokenizer.has_directive(fake_directive), false, fake_directive + " in " + test.input);
});
}
var tests = [
{
input: '"use strict"\n',
directives: ["use strict"],
non_directives: ["use asm"]
},
{
input: '"use\\\nstrict";',
directives: [],
non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"]
},
{
input: '"use strict"\n"use asm"\n"use bar"\n',
directives: ["use strict", "use asm", "use bar"],
non_directives: ["use foo", "use\\x20strict"]
},
{
input: '"use \\\nstrict";"use strict";',
directives: [],
non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"]
},
{
input: '"\\76";',
directives: [],
non_directives: [">", "\\76"]
},
{
input: '"use strict"', // no ; or newline
directives: [],
non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"]
},
{
input: ';"use strict"',
directives: [],
non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"]
},
// Duplicate above code but put it in a function
{
input: 'function foo() {"use strict"\n',
directives: ["use strict"],
non_directives: ["use asm"]
},
{
input: 'function foo() {"use\\\nstrict";',
directives: [],
non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"]
},
{
input: 'function foo() {"use strict"\n"use asm"\n"use bar"\n',
directives: ["use strict", "use asm", "use bar"],
non_directives: ["use foo", "use\\x20strict"]
},
{
input: 'function foo() {"use \\\nstrict";"use strict";',
directives: [],
non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"]
},
{
input: 'var foo = function() {"\\76";',
directives: [],
non_directives: [">", "\\76"]
},
{
input: 'var foo = function() {"use strict"', // no ; or newline
directives: [],
non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"]
},
{
input: 'var foo = function() {;"use strict"',
directives: [],
non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"]
},
// Special cases
{
input: '"1";"2";"3";"4";;"5"',
directives: ["1", "2", "3", "4"],
non_directives: ["5", "6", "use strict", "use asm"]
},
{
input: 'if(1){"use strict";',
directives: [],
non_directives: ["use strict", "use\nstrict", "use \nstrict", "use asm"]
},
{
input: '"use strict";try{"use asm";',
directives: ["use strict"],
non_directives: ["use\nstrict", "use \nstrict", "use asm"]
}
];
for (var i = 0; i < tests.length; i++) {
// Fail parser deliberately to get state at failure
var tokenizer = uglify.tokenizer(tests[i].input + "]", "foo.js");
try {
var parser = uglify.parse(tokenizer);
throw new Error("Expected parser to fail");
} catch (e) {
assert.strictEqual(e instanceof uglify.JS_Parse_Error, true);
assert.strictEqual(e.message, "Unexpected token: punc (])");
}
test_directive(tokenizer, tests[i]);
}
});
it("Should test EXPECT_DIRECTIVE RegExp", function() {
var tests = [
["", true],
["'test';", true],
["'test';;", true],
["'tests';\n", true],
["'tests'", false],
["'tests'; \n\t", true],
["'tests';\n\n", true],
["\n\n\"use strict\";\n\n", true]
];
for (var i = 0; i < tests.length; i++) {
assert.strictEqual(uglify.EXPECT_DIRECTIVE.test(tests[i][0]), tests[i][1], tests[i][0]);
}
});
it("Should only print 2 semicolons spread over 2 lines in beautify mode", function() {
assert.strictEqual(
uglify.minify(
'"use strict";\'use strict\';"use strict";"use strict";;\'use strict\';console.log(\'use strict\');',
{fromString: true, output: {beautify: true, quote_style: 3}, compress: false}
).code,
'"use strict";\n\n\'use strict\';\n\n"use strict";\n\n"use strict";\n\n;\'use strict\';\n\nconsole.log(\'use strict\');'
);
});
it("Should not add double semicolons in non-scoped block statements to avoid strings becoming directives", function() {
var tests = [
[
'{"use\x20strict"}',
'{"use strict"}'
],
[
'function foo(){"use\x20strict";}', // Valid place for directives
'function foo(){"use strict"}'
],
[
'try{"use\x20strict"}catch(e){}finally{"use\x20strict"}',
'try{"use strict"}catch(e){}finally{"use strict"}'
],
[
'if(1){"use\x20strict"} else {"use strict"}',
'if(1){"use strict"}else{"use strict"}'
]
];
for (var i = 0; i < tests.length; i++) {
assert.strictEqual(
uglify.minify(tests[i][0], {fromString: true, quote_style: 3, compress: false}).code,
tests[i][1],
tests[i][0]
);
}
});
it("Should add double semicolon when relying on automatic semicolon insertion", function() {
var code = uglify.minify('"use strict";"use\\x20strict";',
{fromString: true, output: {semicolons: false}, compress: false}
).code;
assert.strictEqual(code, '"use strict";;"use strict"\n');
});
it("Should check quote style of directives", function() {
var tests = [
// 0. Prefer double quotes, unless string contains more double quotes than single quotes
[
'"testing something";',
0,
'"testing something";'
],
[
"'use strict';",
0,
'"use strict";'
],
[
'"\\\'use strict\\\'";', // Not a directive as it contains quotes
0,
';"\'use strict\'";',
],
[
"'\"use strict\"';",
0,
"'\"use strict\"';",
],
// 1. Always use single quote
[
'"testing something";',
1,
"'testing something';"
],
[
"'use strict';",
1,
"'use strict';"
],
[
'"\'use strict\'";',
1,
// Intentionally causes directive breakage at cost of less logic, usage should be rare anyway
"'\\'use strict\\'';",
],
[
"'\\'use strict\\'';", // Not a valid directive
1,
"'\\'use strict\\'';" // But no ; necessary as directive stays invalid
],
[
"'\"use strict\"';",
1,
"'\"use strict\"';",
],
// 2. Always use double quote
[
'"testing something";',
2,
'"testing something";'
],
[
"'use strict';",
2,
'"use strict";'
],
[
'"\'use strict\'";',
2,
"\"'use strict'\";",
],
[
"'\"use strict\"';",
2,
// Intentionally causes directive breakage at cost of less logic, usage should be rare anyway
'"\\\"use strict\\\"";',
],
[
'"\\"use strict\\"";', // Not a valid directive
2,
'"\\"use strict\\"";' // But no ; necessary as directive stays invalid
],
// 3. Always use original
[
'"testing something";',
3,
'"testing something";'
],
[
"'use strict';",
3,
"'use strict';",
],
[
'"\'use strict\'";',
3,
'"\'use strict\'";',
],
[
"'\"use strict\"';",
3,
"'\"use strict\"';",
],
];
for (var i = 0; i < tests.length; i++) {
assert.strictEqual(
uglify.minify(tests[i][0], {fromString: true, output:{quote_style: tests[i][1]}, compress: false}).code,
tests[i][2],
tests[i][0] + " using mode " + tests[i][1]
);
}
});
it("Should be able to compress without side effects", function() {
// NOTE: the "use asm" directive disables any optimisation after being defined
var tests = [
[
'"use strict";"use strict";"use strict";"use foo";"use strict";;"use sloppy";doSomething("foo");',
'"use strict";"use foo";doSomething("foo");'
],
[
// Nothing gets optimised in the compressor because "use asm" is the first statement
'"use asm";"use\\x20strict";1+1;',
'"use asm";;"use strict";1+1;' // Yet, the parser noticed that "use strict" wasn't a directive
]
];
for (var i = 0; i < tests.length; i++) {
assert.strictEqual(
uglify.minify(tests[i][0], {fromString: true, compress: {collapse_vars: true, side_effects: true}}).code,
tests[i][1],
tests[i][0]
);
}
});
});

View File

@@ -59,13 +59,13 @@ describe("String literals", function() {
it("Should not throw error outside strict mode if string contains escaped octalIntegerLiteral", function() {
var tests = [
['"\\76";', '">";'],
['"\\76";', ';">";'],
['"\\0"', '"\\0";'],
['"\\08"', '"\\08";'],
['"\\008"', '"\\08";'],
['"\\0008"', '"\\08";'],
['"use strict" === "use strict";\n"\\76";', '"use strict"==="use strict";">";'],
// ['"use\\\n strict";\n"\\07";', '"use\\\n strict";\n"\\u0007";'] // TODO No way to store this content literally yet as directive
['"use\\\n strict";\n"\\07";', ';"use strict";"\07";']
];
for (var test in tests) {

View File

@@ -1,5 +1,7 @@
#! /usr/bin/env node
global.UGLIFY_DEBUG = true;
var U = require("../tools/node");
var path = require("path");
var fs = require("fs");

View File

@@ -17,3 +17,7 @@ exports["string_template"] = string_template;
exports["tokenizer"] = tokenizer;
exports["is_identifier"] = is_identifier;
exports["SymbolDef"] = SymbolDef;
if (DEBUG) {
exports["EXPECT_DIRECTIVE"] = EXPECT_DIRECTIVE;
}

View File

@@ -25,11 +25,12 @@ var FILES = exports.FILES = [
var UglifyJS = exports;
new Function("MOZ_SourceMap", "exports", FILES.map(function(file){
new Function("MOZ_SourceMap", "exports", "DEBUG", FILES.map(function(file){
return fs.readFileSync(file, "utf8");
}).join("\n\n"))(
require("source-map"),
UglifyJS
UglifyJS,
!!global.UGLIFY_DEBUG
);
UglifyJS.AST_Node.warn_function = function(txt) {