improve unicode handling (#3648)
This commit is contained in:
@@ -119,15 +119,20 @@ function OutputStream(options) {
|
||||
});
|
||||
} : function(str) {
|
||||
var s = "";
|
||||
for (var i = 0; i < str.length; i++) {
|
||||
if (is_surrogate_pair_head(str[i]) && !is_surrogate_pair_tail(str[i + 1])
|
||||
|| is_surrogate_pair_tail(str[i]) && !is_surrogate_pair_head(str[i - 1])) {
|
||||
s += "\\u" + str.charCodeAt(i).toString(16);
|
||||
} else {
|
||||
s += str[i];
|
||||
for (var i = 0, j = 0; i < str.length; i++) {
|
||||
var code = str.charCodeAt(i);
|
||||
if (is_surrogate_pair_head(code)) {
|
||||
if (is_surrogate_pair_tail(str.charCodeAt(i + 1))) {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
} else if (!is_surrogate_pair_tail(code)) {
|
||||
continue;
|
||||
}
|
||||
return s;
|
||||
s += str.slice(j, i) + "\\u" + code.toString(16);
|
||||
j = i + 1;
|
||||
}
|
||||
return j == 0 ? str : s + str.slice(j);
|
||||
};
|
||||
|
||||
function make_string(str, quote) {
|
||||
|
||||
@@ -133,14 +133,10 @@ function is_letter(code) {
|
||||
}
|
||||
|
||||
function is_surrogate_pair_head(code) {
|
||||
if (typeof code == "string")
|
||||
code = code.charCodeAt(0);
|
||||
return code >= 0xd800 && code <= 0xdbff;
|
||||
}
|
||||
|
||||
function is_surrogate_pair_tail(code) {
|
||||
if (typeof code == "string")
|
||||
code = code.charCodeAt(0);
|
||||
return code >= 0xdc00 && code <= 0xdfff;
|
||||
}
|
||||
|
||||
|
||||
@@ -16,6 +16,81 @@ unicode_parse_variables: {
|
||||
}
|
||||
}
|
||||
|
||||
unicode_escaped_identifier: {
|
||||
input: {
|
||||
var \u0061 = "\ud800\udc00";
|
||||
console.log(a);
|
||||
}
|
||||
expect_exact: 'var a="\ud800\udc00";console.log(a);'
|
||||
expect_stdout: "\ud800\udc00"
|
||||
}
|
||||
|
||||
unicode_identifier_ascii_only: {
|
||||
beautify = {
|
||||
ascii_only: true,
|
||||
}
|
||||
input: {
|
||||
var \u0061 = "testing \udbc4\udd11";
|
||||
var bar = "h\u0065llo";
|
||||
console.log(a, \u0062\u0061r);
|
||||
}
|
||||
expect_exact: 'var a="testing \\udbc4\\udd11";var bar="hello";console.log(a,bar);'
|
||||
expect_stdout: "testing \udbc4\udd11 hello"
|
||||
}
|
||||
|
||||
unicode_string_literals: {
|
||||
beautify = {
|
||||
ascii_only: true,
|
||||
}
|
||||
input: {
|
||||
var a = "6 length unicode character: \udbc4\udd11";
|
||||
console.log(\u0061);
|
||||
}
|
||||
expect_exact: 'var a="6 length unicode character: \\udbc4\\udd11";console.log(a);'
|
||||
expect_stdout: "6 length unicode character: \udbc4\udd11"
|
||||
}
|
||||
|
||||
check_escape_style: {
|
||||
beautify = {
|
||||
ascii_only: true,
|
||||
}
|
||||
input: {
|
||||
var a = "\x01";
|
||||
var \ua0081 = "\x10"; // \u0081 only in ID_Continue
|
||||
var \u0100 = "\u0100";
|
||||
var \u1000 = "\u1000";
|
||||
var \u1000 = "\ud800\udc00";
|
||||
var \u3f80 = "\udbc0\udc00";
|
||||
console.log(\u0061, \ua0081, \u0100, \u1000, \u3f80);
|
||||
}
|
||||
expect_exact: 'var a="\\x01";var \\ua0081="\\x10";var \\u0100="\\u0100";var \\u1000="\\u1000";var \\u1000="\\ud800\\udc00";var \\u3f80="\\udbc0\\udc00";console.log(a,\\ua0081,\\u0100,\\u1000,\\u3f80);'
|
||||
expect_stdout: "\u0001 \u0010 \u0100 \ud800\udc00 \udbc0\udc00"
|
||||
}
|
||||
|
||||
escape_non_escaped_identifier: {
|
||||
beautify = {
|
||||
ascii_only: true,
|
||||
}
|
||||
input: {
|
||||
var µþ = "µþ";
|
||||
console.log(\u00b5þ);
|
||||
}
|
||||
expect_exact: 'var \\u00b5\\u00fe="\\xb5\\xfe";console.log(\\u00b5\\u00fe);'
|
||||
expect_stdout: "µþ"
|
||||
}
|
||||
|
||||
non_escape_2_non_escape: {
|
||||
beautify = {
|
||||
ascii_only: false,
|
||||
}
|
||||
input: {
|
||||
var µþ = "µþ";
|
||||
console.log(\u00b5þ);
|
||||
}
|
||||
expect_exact: 'var µþ="µþ";console.log(µþ);'
|
||||
expect_stdout: "µþ"
|
||||
}
|
||||
|
||||
issue_2242_1: {
|
||||
beautify = {
|
||||
ascii_only: false,
|
||||
@@ -24,6 +99,7 @@ issue_2242_1: {
|
||||
console.log("\ud83d", "\ude00", "\ud83d\ude00", "\ud83d@\ude00");
|
||||
}
|
||||
expect_exact: 'console.log("\\ud83d","\\ude00","\ud83d\ude00","\\ud83d@\\ude00");'
|
||||
expect_stdout: "\ud83d \ude00 \ud83d\ude00 \ud83d@\ude00"
|
||||
}
|
||||
|
||||
issue_2242_2: {
|
||||
@@ -34,6 +110,7 @@ issue_2242_2: {
|
||||
console.log("\ud83d", "\ude00", "\ud83d\ude00", "\ud83d@\ude00");
|
||||
}
|
||||
expect_exact: 'console.log("\\ud83d","\\ude00","\\ud83d\\ude00","\\ud83d@\\ude00");'
|
||||
expect_stdout: "\ud83d \ude00 \ud83d\ude00 \ud83d@\ude00"
|
||||
}
|
||||
|
||||
issue_2242_3: {
|
||||
@@ -44,6 +121,7 @@ issue_2242_3: {
|
||||
console.log("\ud83d" + "\ude00", "\ud83d" + "@" + "\ude00");
|
||||
}
|
||||
expect_exact: 'console.log("\\ud83d"+"\\ude00","\\ud83d"+"@"+"\\ude00");'
|
||||
expect_stdout: "\ud83d\ude00 \ud83d@\ude00"
|
||||
}
|
||||
|
||||
issue_2242_4: {
|
||||
@@ -54,6 +132,7 @@ issue_2242_4: {
|
||||
console.log("\ud83d" + "\ude00", "\ud83d" + "@" + "\ude00");
|
||||
}
|
||||
expect_exact: 'console.log("\ud83d\ude00","\\ud83d@\\ude00");'
|
||||
expect_stdout: "\ud83d\ude00 \ud83d@\ude00"
|
||||
}
|
||||
|
||||
issue_2569: {
|
||||
|
||||
Reference in New Issue
Block a user