improve RegExp handling (#1959)

- remove `options.output.unescape_regexps` - preserve original pattern whenever possible fixes #54 fixes #1929
2017-05-17 20:10:50 +08:00
parent 87c3a2c0ce
commit 569c21e952
3 changed files with 23 additions and 44 deletions
@@ -70,7 +70,6 @@ function OutputStream(options) {
        semicolons       : true,
        shebang          : true,
        source_map       : null,
-        unescape_regexps : false,
        width            : 80,
        wrap_iife        : false,
    }, true);
@@ -1260,45 +1259,14 @@ function OutputStream(options) {
        }
    });

-    function regexp_safe_literal(code) {
-        return [
-            0x5c   , // \
-            0x2f   , // /
-            0x2e   , // .
-            0x2b   , // +
-            0x2a   , // *
-            0x3f   , // ?
-            0x28   , // (
-            0x29   , // )
-            0x5b   , // [
-            0x5d   , // ]
-            0x7b   , // {
-            0x7d   , // }
-            0x24   , // $
-            0x5e   , // ^
-            0x3a   , // :
-            0x7c   , // |
-            0x21   , // !
-            0x0a   , // \n
-            0x0d   , // \r
-            0x00   , // \0
-            0xfeff , // Unicode BOM
-            0x2028 , // unicode "line separator"
-            0x2029 , // unicode "paragraph separator"
-        ].indexOf(code) < 0;
-    };
-
    DEFPRINT(AST_RegExp, function(self, output){
-        var str = self.getValue().toString();
+        var regexp = self.getValue();
+        var str = regexp.toString();
+        if (regexp.raw_source) {
+            str = "/" + regexp.raw_source + str.slice(str.lastIndexOf("/"));
+        }
        if (output.option("ascii_only")) {
            str = output.to_ascii(str);
-        } else if (output.option("unescape_regexps")) {
-            str = str.split("\\\\").map(function(str){
-                return str.replace(/\\u[0-9a-fA-F]{4}|\\x[0-9a-fA-F]{2}/g, function(s){
-                    var code = parseInt(s.substr(2), 16);
-                    return regexp_safe_literal(code) ? String.fromCharCode(code) : s;
-                });
-            }).join("\\\\");
        }
        output.print(str);
        var p = output.parent();
@@ -477,29 +477,31 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
        return name;
    };

-    var read_regexp = with_eof_error("Unterminated regular expression", function(regexp){
+    var read_regexp = with_eof_error("Unterminated regular expression", function(source) {
        var prev_backslash = false, ch, in_class = false;
        while ((ch = next(true))) if (NEWLINE_CHARS(ch)) {
            parse_error("Unexpected line terminator");
        } else if (prev_backslash) {
-            regexp += "\\" + ch;
+            source += "\\" + ch;
            prev_backslash = false;
        } else if (ch == "[") {
            in_class = true;
-            regexp += ch;
+            source += ch;
        } else if (ch == "]" && in_class) {
            in_class = false;
-            regexp += ch;
+            source += ch;
        } else if (ch == "/" && !in_class) {
            break;
        } else if (ch == "\\") {
            prev_backslash = true;
        } else {
-            regexp += ch;
+            source += ch;
        }
        var mods = read_name();
        try {
-          return token("regexp", new RegExp(regexp, mods));
+            var regexp = new RegExp(source, mods);
+            regexp.raw_source = source;
+            return token("regexp", regexp);
        } catch(e) {
            parse_error(e.message);
        }
@@ -8,3 +8,12 @@ octal_escape_sequence: {
        var border_check = "\x20\x30\x38\x30\x00\x30\xc0\x30";
    }
 }
+
+issue_1929: {
+    input: {
+        function f(s) {
+            return s.split(/[\\/]/);
+        }
+    }
+    expect_exact: "function f(s){return s.split(/[\\\\/]/)}"
+}