Improve multi-line comment parsing
* Make sure comments are skipped correctly with surrogates * Fix regression in multiline comments with nlb
This commit is contained in:
18
lib/parse.js
18
lib/parse.js
@@ -148,6 +148,21 @@ function get_full_char_code(str, pos) {
|
|||||||
return str.charCodeAt(pos);
|
return str.charCodeAt(pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function get_full_char_length(str) {
|
||||||
|
var surrogates = 0;
|
||||||
|
|
||||||
|
for (var i = 0; i < str.length; i++) {
|
||||||
|
if (str.charCodeAt(i) >= 0xd800 && str.charCodeAt(i) <= 0xdbff) {
|
||||||
|
if (str.charCodeAt(i + 1) >= 0xdc00 && str.charCodeAt(i + 1) <= 0xdfff) {
|
||||||
|
surrogates++;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return str.length - surrogates;
|
||||||
|
}
|
||||||
|
|
||||||
function from_char_code(code) {
|
function from_char_code(code) {
|
||||||
// Based on https://github.com/mathiasbynens/String.fromCodePoint/blob/master/fromcodepoint.js
|
// Based on https://github.com/mathiasbynens/String.fromCodePoint/blob/master/fromcodepoint.js
|
||||||
if (code > 0xFFFF) {
|
if (code > 0xFFFF) {
|
||||||
@@ -493,8 +508,9 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
|
|||||||
var i = find("*/", true);
|
var i = find("*/", true);
|
||||||
var text = S.text.substring(S.pos, i).replace(/\r\n|\r|\u2028|\u2029/g, '\n');
|
var text = S.text.substring(S.pos, i).replace(/\r\n|\r|\u2028|\u2029/g, '\n');
|
||||||
// update stream position
|
// update stream position
|
||||||
forward(text.length /* doesn't count \r\n as 2 char while S.pos - i does */ + 2);
|
forward(get_full_char_length(text) /* text length doesn't count \r\n as 2 char while S.pos - i does */ + 2);
|
||||||
S.comments_before.push(token("comment2", text, true));
|
S.comments_before.push(token("comment2", text, true));
|
||||||
|
S.newline_before = S.newline_before || text.indexOf("\n") >= 0;
|
||||||
S.regex_allowed = regex_allowed;
|
S.regex_allowed = regex_allowed;
|
||||||
return next_token;
|
return next_token;
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -69,3 +69,12 @@ html_comment_in_string_literal: {
|
|||||||
}
|
}
|
||||||
expect_exact: 'function f(){return"\\x3c!--HTML--\\x3ecomment in\\x3c!--string literal--\\x3e"}';
|
expect_exact: 'function f(){return"\\x3c!--HTML--\\x3ecomment in\\x3c!--string literal--\\x3e"}';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
html_comment_after_multiline_comment: {
|
||||||
|
input: {
|
||||||
|
var foo; /*
|
||||||
|
*/--> var bar;
|
||||||
|
var foobar;
|
||||||
|
}
|
||||||
|
expect_exact: "var foo;var foobar;"
|
||||||
|
}
|
||||||
|
|||||||
@@ -31,7 +31,8 @@ describe("Comment", function() {
|
|||||||
"/*Some comment 2\r\n\r\n\r\n*/\r\n>\n\n\n\n\n\n",
|
"/*Some comment 2\r\n\r\n\r\n*/\r\n>\n\n\n\n\n\n",
|
||||||
"/*Some comment 3\r\r\r*/\r>\n\n\n\n\n\n",
|
"/*Some comment 3\r\r\r*/\r>\n\n\n\n\n\n",
|
||||||
"/*Some comment 4\u2028\u2028\u2028*/\u2028>\n\n\n\n\n\n",
|
"/*Some comment 4\u2028\u2028\u2028*/\u2028>\n\n\n\n\n\n",
|
||||||
"/*Some comment 5\u2029\u2029\u2029*/\u2029>\n\n\n\n\n\n"
|
"/*Some comment 5\u2029\u2029\u2029*/\u2029>\n\n\n\n\n\n",
|
||||||
|
"/*Some comment 6\udbff\udfff\udbff\udfff\n\n\n*/\n>\n\n\n\n\n"
|
||||||
];
|
];
|
||||||
|
|
||||||
var fail = function(e) {
|
var fail = function(e) {
|
||||||
@@ -43,7 +44,7 @@ describe("Comment", function() {
|
|||||||
|
|
||||||
for (var i = 0; i < tests.length; i++) {
|
for (var i = 0; i < tests.length; i++) {
|
||||||
assert.throws(function() {
|
assert.throws(function() {
|
||||||
uglify.parse(tests[i], {fromString: true})
|
uglify.parse(tests[i], {fromString: true});
|
||||||
}, fail, tests[i]);
|
}, fail, tests[i]);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user