Improve multi-line comment parsing

* Make sure comments are skipped correctly with surrogates * Fix regression in multiline comments with nlb
2016-07-04 14:30:27 +02:00
parent 0db7caf13b
commit ff7f6139ba
3 changed files with 29 additions and 3 deletions
--- a/lib/parse.js
+++ b/lib/parse.js
@@ -148,6 +148,21 @@ function get_full_char_code(str, pos) {
    return str.charCodeAt(pos);
 }

+function get_full_char_length(str) {
+    var surrogates = 0;
+
+    for (var i = 0; i < str.length; i++) {
+        if (str.charCodeAt(i) >= 0xd800 && str.charCodeAt(i) <= 0xdbff) {
+            if (str.charCodeAt(i + 1) >= 0xdc00 && str.charCodeAt(i + 1) <= 0xdfff) {
+                surrogates++;
+                i++;
+            }
+        }
+    }
+
+    return str.length - surrogates;
+}
+
 function from_char_code(code) {
    // Based on https://github.com/mathiasbynens/String.fromCodePoint/blob/master/fromcodepoint.js
    if (code > 0xFFFF) {
@@ -493,8 +508,9 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
        var i = find("*/", true);
        var text = S.text.substring(S.pos, i).replace(/\r\n|\r|\u2028|\u2029/g, '\n');
        // update stream position
-        forward(text.length /* doesn't count \r\n as 2 char while S.pos - i does */ + 2);
+        forward(get_full_char_length(text) /* text length doesn't count \r\n as 2 char while S.pos - i does */ + 2);
        S.comments_before.push(token("comment2", text, true));
+        S.newline_before = S.newline_before || text.indexOf("\n") >= 0;
        S.regex_allowed = regex_allowed;
        return next_token;
    });