extend support for Unicode (#4228)

This commit is contained in:
Alex Lam S.L
2020-10-19 02:34:17 +01:00
committed by GitHub
parent 96bf7fceab
commit 8ecaa40c6e
5 changed files with 175 additions and 148 deletions

View File

@@ -1,35 +0,0 @@
ascii_only_true: {
options = {}
beautify = {
ascii_only : true,
ie8 : false,
beautify : false,
}
input: {
function f() {
return "\x000\x001\x007\x008\x00" +
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" +
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
"\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\uffff";
}
}
expect_exact: 'function f(){return"\\x000\\x001\\x007\\x008\\0"+"\\0\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\b\\t\\n\\v\\f\\r\\x0e\\x0f"+"\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f"+\' !"# ... }~\\x7f\\x80\\x81 ... \\xfe\\xff\\u0fff\\uffff\'}'
}
ascii_only_false: {
options = {}
beautify = {
ascii_only : false,
ie8 : false,
beautify : false,
}
input: {
function f() {
return "\x000\x001\x007\x008\x00" +
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" +
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
"\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\uffff";
}
}
expect_exact: 'function f(){return"\\x000\\x001\\x007\\x008\\0"+"\\0\x01\x02\x03\x04\x05\x06\x07\\b\\t\\n\\v\\f\\r\x0e\x0f"+"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"+\' !"# ... }~\x7f\x80\x81 ... \xfe\xff\u0fff\uffff\'}'
}

View File

@@ -1,3 +1,37 @@
ascii_only_false: {
options = {}
beautify = {
ascii_only: false,
}
input: {
console.log(
"\x000\x001\x007\x008\x00",
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
"\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\uffff"
);
}
expect_exact: 'console.log("\\x000\\x001\\x007\\x008\\0","\\0\x01\x02\x03\x04\x05\x06\x07\\b\\t\\n\\v\\f\\r\x0e\x0f","\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",\' !"# ... }~\x7f\x80\x81 ... \xfe\xff\u0fff\uffff\');'
expect_stdout: true
}
ascii_only_true: {
options = {}
beautify = {
ascii_only: true,
}
input: {
console.log(
"\x000\x001\x007\x008\x00",
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
"\x20\x21\x22\x23 ... \x7d\x7e\x7f\x80\x81 ... \xfe\xff\u0fff\uffff"
);
}
expect_exact: 'console.log("\\x000\\x001\\x007\\x008\\0","\\0\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\b\\t\\n\\v\\f\\r\\x0e\\x0f","\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f",\' !"# ... }~\\x7f\\x80\\x81 ... \\xfe\\xff\\u0fff\\uffff\');'
expect_stdout: true
}
unicode_parse_variables: {
options = {}
input: {
@@ -141,3 +175,35 @@ issue_2569: {
}
expect_exact: 'new RegExp("[\\udc42-\\udcaa\\udd74-\\udd96\\ude45-\\ude4f\\udea3-\\udecc]");'
}
surrogate_pair: {
beautify = {
ascii_only: false,
}
input: {
var \u{2f800} = {
\u{2f801}: "\u{100000}",
};
\u{2f800}.\u{2f802} = "\u{100001}";
console.log(typeof \u{2f800}, \u{2f800}.\u{2f801}, \u{2f800}["\u{2f802}"]);
}
expect_exact: 'var \ud87e\udc00={"\ud87e\udc01":"\udbc0\udc00"};\ud87e\udc00.\ud87e\udc02="\udbc0\udc01";console.log(typeof \ud87e\udc00,\ud87e\udc00.\ud87e\udc01,\ud87e\udc00["\ud87e\udc02"]);'
expect_stdout: "object \udbc0\udc00 \udbc0\udc01"
node_version: ">=4"
}
surrogate_pair_ascii: {
beautify = {
ascii_only: true,
}
input: {
var \u{2f800} = {
\u{2f801}: "\u{100000}",
};
\u{2f800}.\u{2f802} = "\u{100001}";
console.log(typeof \u{2f800}, \u{2f800}.\u{2f801}, \u{2f800}["\u{2f802}"]);
}
expect_exact: 'var \\u{2f800}={"\\ud87e\\udc01":"\\udbc0\\udc00"};\\u{2f800}.\\u{2f802}="\\udbc0\\udc01";console.log(typeof \\u{2f800},\\u{2f800}.\\u{2f801},\\u{2f800}["\\ud87e\\udc02"]);'
expect_stdout: "object \udbc0\udc00 \udbc0\udc01"
node_version: ">=4"
}

View File

@@ -1,64 +1,58 @@
var assert = require("assert");
var run_code = require("../sandbox").run_code;
var UglifyJS = require("../node");
describe("String literals", function() {
it("Should throw syntax error if a string literal contains a newline", function() {
var inputs = [
[
"'\n'",
"'\r'",
'"\r\n"',
"'\u2028'",
'"\u2029"'
];
var test = function(input) {
return function() {
'"\u2029"',
].forEach(function(input) {
assert.throws(function() {
var ast = UglifyJS.parse(input);
};
};
var error = function(e) {
return e instanceof UglifyJS.JS_Parse_Error
&& e.message === "Unterminated string constant";
};
for (var input in inputs) {
assert.throws(test(inputs[input]), error);
}
}, function(e) {
return e instanceof UglifyJS.JS_Parse_Error
&& e.message === "Unterminated string constant";
});
});
});
it("Should handle line continuation correctly", function() {
[
'"\\\r"',
'"\\\n"',
'"\\\r\n"',
].forEach(function(str) {
var code = "console.log(" + str + ");";
var result = UglifyJS.minify(code);
if (result.error) throw result.error;
assert.strictEqual(run_code(result.code), run_code(code));
});
});
it("Should not throw syntax error if a string has a line continuation", function() {
var output = UglifyJS.parse('var a = "a\\\nb";').print_to_string();
assert.equal(output, 'var a="ab";');
var ast = UglifyJS.parse('var a = "a\\\nb";');
assert.equal(ast.print_to_string(), 'var a="ab";');
});
it("Should throw error in strict mode if string contains escaped octalIntegerLiteral", function() {
var inputs = [
[
'"use strict";\n"\\76";',
'"use strict";\nvar foo = "\\76";',
'"use strict";\n"\\1";',
'"use strict";\n"\\07";',
'"use strict";\n"\\011"'
];
var test = function(input) {
return function() {
'"use strict";\n"\\011"',
].forEach(function(input) {
assert.throws(function() {
var output = UglifyJS.parse(input);
}
};
var error = function(e) {
return e instanceof UglifyJS.JS_Parse_Error
&& e.message === "Legacy octal escape sequences are not allowed in strict mode";
}
for (var input in inputs) {
assert.throws(test(inputs[input]), error);
}
}, function(e) {
return e instanceof UglifyJS.JS_Parse_Error
&& e.message === "Legacy octal escape sequences are not allowed in strict mode";
});
});
});
it("Should not throw error outside strict mode if string contains escaped octalIntegerLiteral", function() {
var tests = [
[
[ ';"\\76";', ';">";' ],
[ ';"\\0";', ';"\\0";' ],
[ ';"\\08"', ';"\\x008";' ],
@@ -66,19 +60,15 @@ describe("String literals", function() {
[ ';"\\0008"', ';"\\x008";' ],
[ ';"use\\\n strict";\n"\\07";', ';"use strict";"\07";' ],
[ '"use strict" === "use strict";\n"\\76";', '"use strict"==="use strict";">";' ],
];
for (var test in tests) {
var output = UglifyJS.parse(tests[test][0]).print_to_string();
assert.equal(output, tests[test][1]);
}
].forEach(function(test) {
var ast = UglifyJS.parse(test[0]);
assert.equal(ast.print_to_string(), test[1]);
});
});
it("Should not throw error when digit is 8 or 9", function() {
assert.equal(UglifyJS.parse('"use strict";;"\\08"').print_to_string(), '"use strict";;"\\x008";');
assert.equal(UglifyJS.parse('"use strict";;"\\09"').print_to_string(), '"use strict";;"\\x009";');
});
it("Should not unescape unpaired surrogates", function() {
var code = [];
for (var i = 0; i <= 0xF; i++) {
@@ -115,4 +105,33 @@ describe("String literals", function() {
assert.ok(code.length > ascii.code.length);
assert.strictEqual(eval(code), eval(ascii.code));
});
it("Should reject invalid Unicode escape sequence", function() {
[
'var foo = "\\u-111"',
'var bar = "\\u{-1}"',
'var baz = "\\ugggg"',
].forEach(function(test) {
assert.throws(function() {
UglifyJS.parse(test);
}, function(e) {
return e instanceof UglifyJS.JS_Parse_Error
&& e.message === "Invalid hex-character pattern in string";
});
});
});
it("Should reject invalid code points in Unicode escape sequence", function() {
[
// A bit over the valid range
'"\\u{110000}"',
// 32-bit overflow resulting in "a"
'"\\u{100000061}"',
].forEach(function(test) {
assert.throws(function() {
UglifyJS.parse(test);
}, function(e) {
return e instanceof UglifyJS.JS_Parse_Error
&& /^Invalid character code: /.test(e.message);
});
});
});
});