improve unicode handling (#3648)

This commit is contained in:
Alex Lam S.L
2019-12-28 18:06:51 +00:00
committed by GitHub
parent 4cf612dc9f
commit 22b47cdd63
3 changed files with 91 additions and 11 deletions

View File

@@ -119,15 +119,20 @@ function OutputStream(options) {
});
} : function(str) {
var s = "";
for (var i = 0; i < str.length; i++) {
if (is_surrogate_pair_head(str[i]) && !is_surrogate_pair_tail(str[i + 1])
|| is_surrogate_pair_tail(str[i]) && !is_surrogate_pair_head(str[i - 1])) {
s += "\\u" + str.charCodeAt(i).toString(16);
} else {
s += str[i];
for (var i = 0, j = 0; i < str.length; i++) {
var code = str.charCodeAt(i);
if (is_surrogate_pair_head(code)) {
if (is_surrogate_pair_tail(str.charCodeAt(i + 1))) {
i++;
continue;
}
} else if (!is_surrogate_pair_tail(code)) {
continue;
}
s += str.slice(j, i) + "\\u" + code.toString(16);
j = i + 1;
}
return s;
return j == 0 ? str : s + str.slice(j);
};
function make_string(str, quote) {

View File

@@ -133,14 +133,10 @@ function is_letter(code) {
}
function is_surrogate_pair_head(code) {
if (typeof code == "string")
code = code.charCodeAt(0);
return code >= 0xd800 && code <= 0xdbff;
}
function is_surrogate_pair_tail(code) {
if (typeof code == "string")
code = code.charCodeAt(0);
return code >= 0xdc00 && code <= 0xdfff;
}