From ff7f6139ba515aa289893955c5d1d7cb1d1ba7bc Mon Sep 17 00:00:00 2001 From: Anthony Van de Gejuchte Date: Mon, 4 Jul 2016 14:30:27 +0200 Subject: [PATCH] Improve multi-line comment parsing * Make sure comments are skipped correctly with surrogates * Fix regression in multiline comments with nlb --- lib/parse.js | 18 +++++++++++++++++- test/compress/html_comments.js | 9 +++++++++ test/mocha/comment.js | 5 +++-- 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/lib/parse.js b/lib/parse.js index f843bc4b..1c0482f3 100644 --- a/lib/parse.js +++ b/lib/parse.js @@ -148,6 +148,21 @@ function get_full_char_code(str, pos) { return str.charCodeAt(pos); } +function get_full_char_length(str) { + var surrogates = 0; + + for (var i = 0; i < str.length; i++) { + if (str.charCodeAt(i) >= 0xd800 && str.charCodeAt(i) <= 0xdbff) { + if (str.charCodeAt(i + 1) >= 0xdc00 && str.charCodeAt(i + 1) <= 0xdfff) { + surrogates++; + i++; + } + } + } + + return str.length - surrogates; +} + function from_char_code(code) { // Based on https://github.com/mathiasbynens/String.fromCodePoint/blob/master/fromcodepoint.js if (code > 0xFFFF) { @@ -493,8 +508,9 @@ function tokenizer($TEXT, filename, html5_comments, shebang) { var i = find("*/", true); var text = S.text.substring(S.pos, i).replace(/\r\n|\r|\u2028|\u2029/g, '\n'); // update stream position - forward(text.length /* doesn't count \r\n as 2 char while S.pos - i does */ + 2); + forward(get_full_char_length(text) /* text length doesn't count \r\n as 2 char while S.pos - i does */ + 2); S.comments_before.push(token("comment2", text, true)); + S.newline_before = S.newline_before || text.indexOf("\n") >= 0; S.regex_allowed = regex_allowed; return next_token; }); diff --git a/test/compress/html_comments.js b/test/compress/html_comments.js index 8495b433..39973c3d 100644 --- a/test/compress/html_comments.js +++ b/test/compress/html_comments.js @@ -69,3 +69,12 @@ html_comment_in_string_literal: { } expect_exact: 'function f(){return"\\x3c!--HTML--\\x3ecomment in\\x3c!--string literal--\\x3e"}'; } + +html_comment_after_multiline_comment: { + input: { + var foo; /* +*/--> var bar; + var foobar; + } + expect_exact: "var foo;var foobar;" +} diff --git a/test/mocha/comment.js b/test/mocha/comment.js index 69cdb3d5..d135bb0a 100644 --- a/test/mocha/comment.js +++ b/test/mocha/comment.js @@ -31,7 +31,8 @@ describe("Comment", function() { "/*Some comment 2\r\n\r\n\r\n*/\r\n>\n\n\n\n\n\n", "/*Some comment 3\r\r\r*/\r>\n\n\n\n\n\n", "/*Some comment 4\u2028\u2028\u2028*/\u2028>\n\n\n\n\n\n", - "/*Some comment 5\u2029\u2029\u2029*/\u2029>\n\n\n\n\n\n" + "/*Some comment 5\u2029\u2029\u2029*/\u2029>\n\n\n\n\n\n", + "/*Some comment 6\udbff\udfff\udbff\udfff\n\n\n*/\n>\n\n\n\n\n" ]; var fail = function(e) { @@ -43,7 +44,7 @@ describe("Comment", function() { for (var i = 0; i < tests.length; i++) { assert.throws(function() { - uglify.parse(tests[i], {fromString: true}) + uglify.parse(tests[i], {fromString: true}); }, fail, tests[i]); } });