Extend unicode support
* Support \u{xxxx} syntax
* Add support for surrogate pairs
* Allow identifiers to have unicode escape sequence
This commit is contained in:
committed by
Richard van Velzen
parent
07785d0003
commit
63c432f4fa
@@ -77,9 +77,12 @@ function OutputStream(options) {
|
||||
var OUTPUT = "";
|
||||
|
||||
function to_ascii(str, identifier) {
|
||||
return str.replace(/[\u0000-\u001f\u007f-\uffff]/g, function(ch) {
|
||||
var code = ch.charCodeAt(0).toString(16);
|
||||
if (code.length <= 2 && !identifier) {
|
||||
return str.replace(/[\ud800-\udbff][\udc00-\udfff]|[\u0000-\u001f\u007f-\uffff]/g, function(ch) {
|
||||
var code = get_full_char_code(ch, 0).toString(16);
|
||||
|
||||
if ((identifier && code.length === 1) || code.length > 4) {
|
||||
return "\\u{" + code + "}";
|
||||
} else if (code.length <= 2 && !identifier) {
|
||||
while (code.length < 2) code = "0" + code;
|
||||
return "\\x" + code;
|
||||
} else {
|
||||
@@ -107,7 +110,7 @@ function OutputStream(options) {
|
||||
case "\u2029": return "\\u2029";
|
||||
case "\ufeff": return "\\ufeff";
|
||||
case "\0":
|
||||
return /[0-7]/.test(str.charAt(i+1)) ? "\\x00" : "\\0";
|
||||
return /[0-7]/.test(get_full_char(str, i+1)) ? "\\x00" : "\\0";
|
||||
}
|
||||
return s;
|
||||
});
|
||||
@@ -158,7 +161,13 @@ function OutputStream(options) {
|
||||
var last = null;
|
||||
|
||||
function last_char() {
|
||||
return last.charAt(last.length - 1);
|
||||
var char = last.charAt(last.length - 1);
|
||||
|
||||
if (is_surrogate_pair_tail(char)) {
|
||||
return last.charAt(last.length - 2) + char;
|
||||
}
|
||||
|
||||
return char;
|
||||
};
|
||||
|
||||
function maybe_newline() {
|
||||
@@ -170,7 +179,7 @@ function OutputStream(options) {
|
||||
|
||||
function print(str) {
|
||||
str = String(str);
|
||||
var ch = str.charAt(0);
|
||||
var ch = get_full_char(str, 0);
|
||||
if (might_need_semicolon) {
|
||||
might_need_semicolon = false;
|
||||
|
||||
|
||||
176
lib/parse.js
176
lib/parse.js
File diff suppressed because one or more lines are too long
@@ -15,3 +15,29 @@ unicode_parse_variables: {
|
||||
var l০ = 3;
|
||||
}
|
||||
}
|
||||
|
||||
unicode_escaped_identifier: {
|
||||
input: {
|
||||
var \u{61} = "foo";
|
||||
var \u{10000} = "bar";
|
||||
}
|
||||
expect_exact: 'var a="foo";var \u{10000}="bar";';
|
||||
}
|
||||
|
||||
unicode_identifier_ascii_only: {
|
||||
beautify = {ascii_only: true}
|
||||
input: {
|
||||
var \u{0061} = "hi";
|
||||
var bar = "h\u{0065}llo";
|
||||
var \u{10000} = "testing \u{101111}";
|
||||
}
|
||||
expect_exact: 'var a="hi";var bar="hello";var \\u{10000}="testing \\u{101111}";'
|
||||
}
|
||||
|
||||
unicode_string_literals: {
|
||||
beautify = {ascii_only: true}
|
||||
input: {
|
||||
var a = "6 length unicode character: \u{101111}";
|
||||
}
|
||||
expect_exact: 'var a="6 length unicode character: \\u{101111}";'
|
||||
}
|
||||
|
||||
133
test/mocha/unicode.js
Normal file
133
test/mocha/unicode.js
Normal file
@@ -0,0 +1,133 @@
|
||||
var assert = require("assert");
|
||||
var uglify = require("../../");
|
||||
|
||||
describe("Unicode", function() {
|
||||
it("Should not accept invalid code ranges in unicode escape", function() {
|
||||
var tests = [
|
||||
"\\u{110000}", // A bit over the unicode range
|
||||
"\\u{100000061} = 'foo'", // 32-bit overflow resulting in "a"
|
||||
"\\u{fffffffffff}", // A bit too much over the unicode range
|
||||
];
|
||||
|
||||
var exec = function(test) {
|
||||
return function() {
|
||||
uglify.parse(test);
|
||||
}
|
||||
}
|
||||
|
||||
var fail = function(e) {
|
||||
return e instanceof uglify.JS_Parse_Error
|
||||
&& e.message === "SyntaxError: Unicode reference out of bounce";
|
||||
}
|
||||
|
||||
for (var i = 0; i < tests.length; i++) {
|
||||
assert.throws(exec(tests[i]), fail);
|
||||
}
|
||||
});
|
||||
|
||||
it("Should not accept invalid unicode sequences", function() {
|
||||
var tests = [
|
||||
"var foo = '\\u-111'",
|
||||
"var bar = '\\u{-1}'",
|
||||
"var baz = '\\ugggg'"
|
||||
];
|
||||
|
||||
var exec = function(test) {
|
||||
return function() {
|
||||
uglify.parse(test);
|
||||
}
|
||||
}
|
||||
|
||||
var fail = function(e) {
|
||||
return e instanceof uglify.JS_Parse_Error
|
||||
&& e.message === "SyntaxError: Invalid hex-character pattern in string";
|
||||
}
|
||||
|
||||
for (var i = 0; i < tests.length; i++) {
|
||||
assert.throws(exec(tests[i]), fail);
|
||||
}
|
||||
});
|
||||
|
||||
it("Should throw error if escaped first identifier char is not part of ID_start", function() {
|
||||
var tests = [
|
||||
'var \\u{0} = "foo";',
|
||||
'var \\u{10ffff} = "bar";',
|
||||
'var \\u000a = "what\'s up";'
|
||||
];
|
||||
|
||||
var exec = function(test) {
|
||||
return function() {
|
||||
uglify.parse(test);
|
||||
}
|
||||
}
|
||||
|
||||
var fail = function(e) {
|
||||
return e instanceof uglify.JS_Parse_Error
|
||||
&& e.message === "SyntaxError: First identifier char is an invalid identifier char";
|
||||
}
|
||||
|
||||
for (var i = 0; i < tests.length; i++) {
|
||||
assert.throws(exec(tests[i]), fail);
|
||||
}
|
||||
});
|
||||
|
||||
it("Should throw error if escaped non-first identifier char is not part of ID_start", function() {
|
||||
var tests = [
|
||||
'var a\\u{0} = "foo";',
|
||||
'var a\\u{10ffff} = "bar";',
|
||||
'var z\\u000a = "what\'s up";'
|
||||
];
|
||||
|
||||
var exec = function(test) {
|
||||
return function() {
|
||||
uglify.parse(test);
|
||||
}
|
||||
}
|
||||
|
||||
var fail = function(e) {
|
||||
return e instanceof uglify.JS_Parse_Error
|
||||
&& e.message === "SyntaxError: Invalid escaped identifier char";
|
||||
}
|
||||
|
||||
for (var i = 0; i < tests.length; i++) {
|
||||
assert.throws(exec(tests[i]), fail);
|
||||
}
|
||||
});
|
||||
|
||||
it("Should throw error if identifier is a keyword with a escape sequences", function() {
|
||||
var tests = [
|
||||
'var \\u0069\\u006e = "foo"', // in
|
||||
'var \\u0076\\u0061\\u0072 = "bar"', // var
|
||||
'var \\u{66}\\u{6f}\\u{72} = "baz"', // for
|
||||
'var \\u0069\\u{66} = "foobar"' // if
|
||||
];
|
||||
|
||||
var exec = function(test) {
|
||||
return function() {
|
||||
uglify.parse(test);
|
||||
}
|
||||
}
|
||||
|
||||
var fail = function(e) {
|
||||
return e instanceof uglify.JS_Parse_Error
|
||||
&& e.message === "SyntaxError: Escaped characters are not allowed in keywords";
|
||||
}
|
||||
|
||||
for (var i = 0; i < tests.length; i++) {
|
||||
assert.throws(exec(tests[i]), fail);
|
||||
}
|
||||
});
|
||||
|
||||
it("Should read strings containing surigates correctly", function() {
|
||||
var tests = [
|
||||
['var a = "\ud800\udc00";', 'var a="\\u{10000}";'],
|
||||
['var b = "\udbff\udfff";', 'var b="\\u{10ffff}";']
|
||||
];
|
||||
|
||||
for (var i = 0; i < tests.length; i++) {
|
||||
assert.strictEqual(uglify.minify(tests[i][0], {
|
||||
fromString: true, output: { ascii_only: true}
|
||||
}).code, tests[i][1]);
|
||||
}
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user