Don't allow escaped surrogated identifiers + introduce ascii_identifiers

Don't use 2 characters for surrogates in identifiers because there is support for the \u{} syntax when escaped identifiers were introduced. Also catch eof errors while reading identifier names Introduce ascii_identifiers: By setting ascii_identifiers to undefined (default value), ascii_identifiers will print identifiers using the same setting as ascii_only within the limits of the ecmascript 6 grammar. ascii_identifiers accept true and false, allowing identifiers to be printed under different settings than strings with the ascii_only setting.
2016-07-22 17:33:24 +02:00
parent 110a1ac885
commit 27d3669800
5 changed files with 73 additions and 9 deletions
--- a/lib/output.js
+++ b/lib/output.js
@@ -53,6 +53,7 @@ function OutputStream(options) {
        quote_keys       : false,
        space_colon      : true,
        ascii_only       : false,
+        ascii_identifiers: undefined,
        unescape_regexps : false,
        inline_script    : false,
        width            : 80,
@@ -70,6 +71,8 @@ function OutputStream(options) {
        keep_quoted_props: false,
        ecma             : 5,
    }, true);
+    if (typeof options.ascii_identifiers === 'undefined')
+        options.ascii_identifiers = options.ascii_only;

    var indentation = 0;
    var current_col = 0;
@@ -81,8 +84,11 @@ function OutputStream(options) {
        return str.replace(/[\ud800-\udbff][\udc00-\udfff]|[\u0000-\u001f\u007f-\uffff]/g, function(ch) {
            var code = get_full_char_code(ch, 0).toString(16);

-            if ((identifier && code.length === 1 && !options.es5) || code.length > 4) {
+            if ((identifier && code.length === 1 && options.ecma >= 6) || code.length > 4) {
                if (options.ecma < 6) {
+                    if (identifier) {
+                        return ch; // no \u{} support
+                    }
                    return "\\u" + ch.charCodeAt(0).toString(16) + "\\u"
                        + ch.charCodeAt(1).toString(16);
                }
@@ -165,7 +171,7 @@ function OutputStream(options) {

    function make_name(name) {
        name = name.toString();
-        if (options.ascii_only)
+        if (options.ascii_identifiers)
            name = to_ascii(name, true);
        return name;
    };
--- a/lib/parse.js
+++ b/lib/parse.js
@@ -551,7 +551,7 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
        return next_token;
    });

-    function read_name() {
+    var read_name = with_eof_error("SyntaxError: Unterminated identifier name", function() {
        var name = "", ch, escaped = false, hex;
        var read_escaped_identifier_char = function() {
            escaped = true;
@@ -593,7 +593,7 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
            parse_error("SyntaxError: Escaped characters are not allowed in keywords");
        }
        return name;
-    };
+    });

    var read_regexp = with_eof_error("SyntaxError: Unterminated regular expression", function(regexp){
        var prev_backslash = false, ch, in_class = false;
--- a/test/compress/unicode.js
+++ b/test/compress/unicode.js
@@ -43,12 +43,13 @@ unicode_string_literals: {
    expect_exact: 'var a="6 length unicode character: \\u{101111}";'
 }

+// Don't escape identifiers below es6 (or in this case double escaped in expect_exact)
 unicode_output_es5_surrogates: {
    beautify = {ascii_only: true, ecma: 5}
    input: {
        var \u{10000} = "6 length unicode character: \u{10FFFF}";
    }
-    expect_exact: 'var \\ud800\\udc00="6 length unicode character: \\udbff\\udfff";'
+    expect_exact: 'var \u{10000}="6 length unicode character: \\udbff\\udfff";'
 }

 check_escape_style: {
@@ -64,6 +65,7 @@ check_escape_style: {
    expect_exact: 'var a="\\x01";var \\ua0081="\\x10";var \\u0100="\\u0100";var \\u1000="\\u1000";var \\u{10000}="\\u{10000}";var \\u{2f800}="\\u{100000}";'
 }

+// Don't escape identifiers below es6, no escaped identifiers support and no \u{} syntax
 check_escape_style_es5: {
    beautify = {ascii_only: true, ecma: 5}
    input: {
@@ -71,10 +73,10 @@ check_escape_style_es5: {
        var \ua0081 = "\x10"; // \u0081 only in ID_Continue
        var \u0100 = "\u0100";
        var \u1000 = "\u1000";
-        var \u{10000} = "\u{10000}";
-        var \u{2f800} = "\u{100000}";
+        var \u{10000} = "\u{10000}"; // Identifier won't be escaped in es 5.1
+        var \u{2f800} = "\u{100000}"; // Same
    }
-    expect_exact: 'var a="\\x01";var \\ua0081="\\x10";var \\u0100="\\u0100";var \\u1000="\\u1000";var \\ud800\\udc00="\\ud800\\udc00";var \\ud87e\\udc00="\\udbc0\\udc00";'
+    expect_exact: 'var a="\\x01";var \\ua0081="\\x10";var \\u0100="\\u0100";var \\u1000="\\u1000";var \ud800\udc00="\\ud800\\udc00";var \ud87e\udc00="\\udbc0\\udc00";'
 }

 ID_continue_with_surrogate_pair: {
@@ -99,4 +101,20 @@ non_escape_2_non_escape: {
        var µþ = "µþ";
    }
    expect_exact: 'var µþ="µþ";'
+}
+
+non_escape_2_half_escape1: {
+    beautify = {ascii_only: false, ascii_identifiers: true, ecma: 6}
+    input: {
+        var µþ = "µþ";
+    }
+    expect_exact: 'var \\u00b5\\u00fe="µþ";'
+}
+
+non_escape_2_half_escape2: {
+    beautify = {ascii_only: true, ascii_identifiers: false, ecma: 6}
+    input: {
+        var µþ = "µþ";
+    }
+    expect_exact: 'var µþ="\\xb5\\xfe";'
 }
--- a/test/mocha/eof.js
+++ b/test/mocha/eof.js
@@ -0,0 +1,36 @@
+var assert = require("assert");
+var uglify = require("../../");
+
+describe("EOF", function() {
+    it("Should test code for at least throwing syntax error when incomplete", function() {
+        var error = function(e) {
+            return e instanceof uglify.JS_Parse_Error &&
+                /^SyntaxError: /.test(e.message);
+        }
+        var parse = function(test) {
+            return function() {
+                uglify.parse(test);
+            }
+        }
+        // Chops off 1 char at a time until limit or start of string is reached
+        // The passed code must still be valid when unchopped
+        var test_eol = function(input, chopLimit) {
+            if (chopLimit === undefined) {
+                chopLimit = input.length - 1;
+            }
+
+            assert.doesNotThrow(parse(input), "Expected valid code for \n" + input);
+
+            for (var i = input.length - 1; chopLimit > 0; chopLimit--, i--) {
+                var code = input.substr(0, i);
+                assert.throws(parse(code), error, code);
+            }
+        }
+
+        test_eol("var \\u1234", 7); // Incomplete identifier
+        test_eol("'Incomplete string'");
+        test_eol("/Unterminated regex/");
+        test_eol("` Unterminated template string`");
+        test_eol("/* Unfinishing multiline comment */");
+    });
+});
--- a/test/mocha/unicode.js
+++ b/test/mocha/unicode.js
@@ -52,7 +52,11 @@ describe("Unicode", function() {
        var tests = [
            'var \\u{0} = "foo";',
            'var \\u{10ffff} = "bar";',
-            'var \\u000a = "what\'s up";'
+            'var \\u000a = "what\'s up";',
+             // Valid ID_Start, but using up 2 escaped characters and not fitting in IdentifierStart
+            'var \\ud800\\udc00 = "Hello";',
+            'var \\udbff\\udfff = "Unicode";', // Same as previous test
+            'var \\ud800\udc01 = "Weird unicode";', // Same as above, but mixed escaped with unicode chars
        ];

        var exec = function(test) {