diff --git a/lib/output.js b/lib/output.js index 9fb97c81..ccd614d3 100644 --- a/lib/output.js +++ b/lib/output.js @@ -115,19 +115,15 @@ function OutputStream(options) { var OUTPUT = ""; var to_utf8 = options.ascii_only ? function(str, identifier) { - return str.replace(/[\ud800-\udbff][\udc00-\udfff]|[\u0000-\u001f\u007f-\uffff]/g, function(ch) { - var code = get_full_char_code(ch, 0).toString(16); - - if ((identifier && code.length === 1 && options.ecma >= 6) || code.length > 4) { - if (options.ecma < 6) { - if (identifier) { - return ch; // no \u{} support - } - return "\\u" + ch.charCodeAt(0).toString(16) + "\\u" - + ch.charCodeAt(1).toString(16); - } + if (options.ecma >= 6) { + str = str.replace(/[\ud800-\udbff][\udc00-\udfff]/g, function(ch) { + var code = get_full_char_code(ch, 0).toString(16); return "\\u{" + code + "}"; - } else if (code.length <= 2 && !identifier) { + }); + } + return str.replace(/[\u0000-\u001f\u007f-\uffff]/g, function(ch) { + var code = ch.charCodeAt(0).toString(16); + if (code.length <= 2 && !identifier) { while (code.length < 2) code = "0" + code; return "\\x" + code; } else { diff --git a/test/compress/unicode.js b/test/compress/unicode.js index b2db1d68..d57165ea 100644 --- a/test/compress/unicode.js +++ b/test/compress/unicode.js @@ -43,15 +43,6 @@ unicode_string_literals: { expect_exact: 'var a="6 length unicode character: \\u{101111}";' } -// Don't escape identifiers below es6 (or in this case double escaped in expect_exact) -unicode_output_es5_surrogates: { - beautify = {ascii_only: true, ecma: 5} - input: { - var \u{10000} = "6 length unicode character: \u{10FFFF}"; - } - expect_exact: 'var \u{10000}="6 length unicode character: \\udbff\\udfff";' -} - check_escape_style: { beautify = {ascii_only: true, ecma: 6} input: { @@ -65,20 +56,6 @@ check_escape_style: { expect_exact: 'var a="\\x01";var \\ua0081="\\x10";var \\u0100="\\u0100";var \\u1000="\\u1000";var \\u{10000}="\\u{10000}";var \\u{2f800}="\\u{100000}";' } -// Don't escape identifiers below es6, no escaped identifiers support and no \u{} syntax -check_escape_style_es5: { - beautify = {ascii_only: true, ecma: 5} - input: { - var a = "\x01"; - var \ua0081 = "\x10"; // \u0081 only in ID_Continue - var \u0100 = "\u0100"; - var \u1000 = "\u1000"; - var \u{10000} = "\u{10000}"; // Identifier won't be escaped in es 5.1 - var \u{2f800} = "\u{100000}"; // Same - } - expect_exact: 'var a="\\x01";var \\ua0081="\\x10";var \\u0100="\\u0100";var \\u1000="\\u1000";var \ud800\udc00="\\ud800\\udc00";var \ud87e\udc00="\\udbc0\\udc00";' -} - ID_continue_with_surrogate_pair: { beautify = {ascii_only: true, ecma: 6} input: { diff --git a/test/mocha/unicode.js b/test/mocha/unicode.js index 37950e45..e5d94836 100644 --- a/test/mocha/unicode.js +++ b/test/mocha/unicode.js @@ -1,4 +1,5 @@ var assert = require("assert"); +var semver = require("semver"); var uglify = require("../node"); describe("Unicode", function() { @@ -138,8 +139,33 @@ describe("Unicode", function() { it("Should parse raw characters correctly", function() { var ast = uglify.parse('console.log("\\udbff");'); - assert.strictEqual(ast.print_to_string(), 'console.log("\udbff");'); + assert.strictEqual(ast.print_to_string(), 'console.log("\\udbff");'); ast = uglify.parse(ast.print_to_string()); - assert.strictEqual(ast.print_to_string(), 'console.log("\udbff");'); + assert.strictEqual(ast.print_to_string(), 'console.log("\\udbff");'); }); + + if (semver.satisfies(process.version, ">=4")) { + it("Should not unescape unpaired surrogates", function() { + var code = []; + for (var i = 0; i <= 0xFFFF; i++) { + code.push("\\u{" + i.toString(16) + "}"); + } + code = '"' + code.join() + '"'; + [true, false].forEach(function(ascii_only) { + [5, 6].forEach(function(ecma) { + var result = uglify.minify(code, { + compress: false, + mangle: false, + output: { + ascii_only: ascii_only + }, + ecma: ecma + }); + if (result.error) throw result.error; + assert.ok(code.length > result.code.length); + assert.strictEqual(eval(code), eval(result.code)); + }); + }); + }); + } });