Extend unicode support

* Support \u{xxxx} syntax
 * Add support for surrogate pairs
 * Allow identifiers to have unicode escape sequence
This commit is contained in:
Anthony Van de Gejuchte
2016-06-22 17:12:23 +02:00
committed by Richard van Velzen
parent 07785d0003
commit 63c432f4fa
4 changed files with 292 additions and 64 deletions

View File

@@ -77,9 +77,12 @@ function OutputStream(options) {
var OUTPUT = "";
function to_ascii(str, identifier) {
return str.replace(/[\u0000-\u001f\u007f-\uffff]/g, function(ch) {
var code = ch.charCodeAt(0).toString(16);
if (code.length <= 2 && !identifier) {
return str.replace(/[\ud800-\udbff][\udc00-\udfff]|[\u0000-\u001f\u007f-\uffff]/g, function(ch) {
var code = get_full_char_code(ch, 0).toString(16);
if ((identifier && code.length === 1) || code.length > 4) {
return "\\u{" + code + "}";
} else if (code.length <= 2 && !identifier) {
while (code.length < 2) code = "0" + code;
return "\\x" + code;
} else {
@@ -107,7 +110,7 @@ function OutputStream(options) {
case "\u2029": return "\\u2029";
case "\ufeff": return "\\ufeff";
case "\0":
return /[0-7]/.test(str.charAt(i+1)) ? "\\x00" : "\\0";
return /[0-7]/.test(get_full_char(str, i+1)) ? "\\x00" : "\\0";
}
return s;
});
@@ -158,7 +161,13 @@ function OutputStream(options) {
var last = null;
function last_char() {
return last.charAt(last.length - 1);
var char = last.charAt(last.length - 1);
if (is_surrogate_pair_tail(char)) {
return last.charAt(last.length - 2) + char;
}
return char;
};
function maybe_newline() {
@@ -170,7 +179,7 @@ function OutputStream(options) {
function print(str) {
str = String(str);
var ch = str.charAt(0);
var ch = get_full_char(str, 0);
if (might_need_semicolon) {
might_need_semicolon = false;