diff --git a/bin/uglifyjs b/bin/uglifyjs index 77c1ae5e..57e33d9b 100755 --- a/bin/uglifyjs +++ b/bin/uglifyjs @@ -212,7 +212,7 @@ function run() { fatal("ERROR: " + ex.message); } if (program.output == "spidermonkey") { - console.log(JSON.stringify(UglifyJS.parse(new Buffer(result.code).toString()).to_mozilla_ast(), null, 2)); + console.log(JSON.stringify(UglifyJS.parse(result.code).to_mozilla_ast(), null, 2)); } else if (program.output) { fs.writeFileSync(program.output, result.code); if (result.map) { diff --git a/lib/output.js b/lib/output.js index 48bb1dc9..5d130055 100644 --- a/lib/output.js +++ b/lib/output.js @@ -215,16 +215,6 @@ function OutputStream(options) { var might_add_newline = 0; var last = ""; - function last_char() { - var char = last.charAt(last.length - 1); - - if (is_surrogate_pair_tail(char)) { - return last.charAt(last.length - 2) + char; - } - - return char; - }; - var ensure_line_len = options.max_line_len ? function() { if (current_col > options.max_line_len) { if (might_add_newline) { @@ -247,7 +237,7 @@ function OutputStream(options) { function print(str) { str = String(str); var ch = get_full_char(str, 0); - var prev = last_char(); + var prev = get_full_char(last, last.length - 1); if (might_need_semicolon) { might_need_semicolon = false; diff --git a/lib/parse.js b/lib/parse.js index d693af9b..492509de 100644 --- a/lib/parse.js +++ b/lib/parse.js @@ -134,8 +134,17 @@ var UNICODE = { function get_full_char(str, pos) { var char = str.charAt(pos); - if (char >= "\ud800" && char <= "\udbff") { - return char + str.charAt(pos + 1); + if (is_surrogate_pair_head(char)) { + var next = str.charAt(pos + 1); + if (is_surrogate_pair_tail(next)) { + return char + next; + } + } + if (is_surrogate_pair_tail(char)) { + var prev = str.charAt(pos - 1); + if (is_surrogate_pair_head(prev)) { + return prev + char; + } } return char; } @@ -152,8 +161,8 @@ function get_full_char_length(str) { var surrogates = 0; for (var i = 0; i < str.length; i++) { - if (str.charCodeAt(i) >= 0xd800 && str.charCodeAt(i) <= 0xdbff) { - if (str.charCodeAt(i + 1) >= 0xdc00 && str.charCodeAt(i + 1) <= 0xdfff) { + if (is_surrogate_pair_head(str.charCodeAt(i))) { + if (is_surrogate_pair_tail(str.charCodeAt(i + 1))) { surrogates++; i++; } @@ -291,7 +300,7 @@ function tokenizer($TEXT, filename, html5_comments, shebang) { ch = "\n"; } } else { - if (is_surrogate_pair_head(ch)) { + if (ch.length > 1) { ++S.pos; ++S.col; } diff --git a/test/mocha/unicode.js b/test/mocha/unicode.js index af110fbb..8abd3ee3 100644 --- a/test/mocha/unicode.js +++ b/test/mocha/unicode.js @@ -135,4 +135,11 @@ describe("Unicode", function() { }).code, tests[i][1]); } }); + + it("Should parse raw characters correctly", function() { + var ast = uglify.parse('console.log("\\udbff");'); + assert.strictEqual(ast.print_to_string(), 'console.log("\udbff");'); + ast = uglify.parse(ast.print_to_string()); + assert.strictEqual(ast.print_to_string(), 'console.log("\udbff");'); + }); });