Various LineTerminator changes
* Escaped newlines should also produce SyntaxError * Fix multiline comment parsing and add tests * Adapt makePredicate to handle \u2028 and \u2029 * Move up nlb check in regex so it's checked before any escape handling * Change error messages to conform ecma standard * Find_eol not recornizing \u2028 and \u2029 as line terminator * Remove \u180e as it is removed in unicode 6.3.0 from the category zs
This commit is contained in:
46
lib/parse.js
46
lib/parse.js
@@ -107,7 +107,9 @@ var OPERATORS = makePredicate([
|
|||||||
"||"
|
"||"
|
||||||
]);
|
]);
|
||||||
|
|
||||||
var WHITESPACE_CHARS = makePredicate(characters(" \u00a0\n\r\t\f\u000b\u200b\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000\uFEFF"));
|
var WHITESPACE_CHARS = makePredicate(characters(" \u00a0\n\r\t\f\u000b\u200b\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000\uFEFF"));
|
||||||
|
|
||||||
|
var NEWLINE_CHARS = makePredicate(characters("\n\r\u2028\u2029"));
|
||||||
|
|
||||||
var PUNC_BEFORE_EXPRESSION = makePredicate(characters("[{(,.;:"));
|
var PUNC_BEFORE_EXPRESSION = makePredicate(characters("[{(,.;:"));
|
||||||
|
|
||||||
@@ -234,7 +236,7 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
|
|||||||
var ch = S.text.charAt(S.pos++);
|
var ch = S.text.charAt(S.pos++);
|
||||||
if (signal_eof && !ch)
|
if (signal_eof && !ch)
|
||||||
throw EX_EOF;
|
throw EX_EOF;
|
||||||
if ("\r\n\u2028\u2029".indexOf(ch) >= 0) {
|
if (NEWLINE_CHARS(ch)) {
|
||||||
S.newline_before = S.newline_before || !in_string;
|
S.newline_before = S.newline_before || !in_string;
|
||||||
++S.line;
|
++S.line;
|
||||||
S.col = 0;
|
S.col = 0;
|
||||||
@@ -261,7 +263,7 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
|
|||||||
var text = S.text;
|
var text = S.text;
|
||||||
for (var i = S.pos, n = S.text.length; i < n; ++i) {
|
for (var i = S.pos, n = S.text.length; i < n; ++i) {
|
||||||
var ch = text[i];
|
var ch = text[i];
|
||||||
if (ch == '\n' || ch == '\r')
|
if (NEWLINE_CHARS(ch))
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
@@ -313,8 +315,7 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
function skip_whitespace() {
|
function skip_whitespace() {
|
||||||
var ch;
|
while (WHITESPACE_CHARS(peek()))
|
||||||
while (WHITESPACE_CHARS(ch = peek()) || ch == "\u2028" || ch == "\u2029")
|
|
||||||
next();
|
next();
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -352,7 +353,7 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
|
|||||||
if (!isNaN(valid)) {
|
if (!isNaN(valid)) {
|
||||||
return token("num", valid);
|
return token("num", valid);
|
||||||
} else {
|
} else {
|
||||||
parse_error("Invalid syntax: " + num);
|
parse_error("SyntaxError: Invalid syntax: " + num);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -400,18 +401,18 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
|
|||||||
for (; n > 0; --n) {
|
for (; n > 0; --n) {
|
||||||
var digit = parseInt(next(true), 16);
|
var digit = parseInt(next(true), 16);
|
||||||
if (isNaN(digit))
|
if (isNaN(digit))
|
||||||
parse_error("Invalid hex-character pattern in string");
|
parse_error("SyntaxError: Invalid hex-character pattern in string");
|
||||||
num = (num << 4) | digit;
|
num = (num << 4) | digit;
|
||||||
}
|
}
|
||||||
return num;
|
return num;
|
||||||
};
|
};
|
||||||
|
|
||||||
var read_string = with_eof_error("Unterminated string constant", function(quote_char){
|
var read_string = with_eof_error("SyntaxError: Unterminated string constant", function(quote_char){
|
||||||
var quote = next(), ret = "";
|
var quote = next(), ret = "";
|
||||||
for (;;) {
|
for (;;) {
|
||||||
var ch = next(true, true);
|
var ch = next(true, true);
|
||||||
if (ch == "\\") ch = read_escaped_char(true);
|
if (ch == "\\") ch = read_escaped_char(true);
|
||||||
else if ("\r\n\u2028\u2029".indexOf(ch) >= 0) parse_error("Unterminated string constant");
|
else if (NEWLINE_CHARS(ch)) parse_error("SyntaxError: Unterminated string constant");
|
||||||
else if (ch == quote) break;
|
else if (ch == quote) break;
|
||||||
ret += ch;
|
ret += ch;
|
||||||
}
|
}
|
||||||
@@ -436,21 +437,14 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
|
|||||||
return next_token;
|
return next_token;
|
||||||
};
|
};
|
||||||
|
|
||||||
var skip_multiline_comment = with_eof_error("Unterminated multiline comment", function(){
|
var skip_multiline_comment = with_eof_error("SyntaxError: Unterminated multiline comment", function(){
|
||||||
var regex_allowed = S.regex_allowed;
|
var regex_allowed = S.regex_allowed;
|
||||||
var i = find("*/", true);
|
var i = find("*/", true);
|
||||||
var text = S.text.substring(S.pos, i).replace(/\r\n|\r/g, '\n');
|
var text = S.text.substring(S.pos, i).replace(/\r\n|\r|\u2028|\u2029/g, '\n');
|
||||||
var a = text.split("\n"), n = a.length;
|
|
||||||
// update stream position
|
// update stream position
|
||||||
S.pos = i + 2;
|
forward(text.length /* doesn't count \r\n as 2 char while S.pos - i does */ + 2);
|
||||||
S.line += n - 1;
|
|
||||||
if (n > 1) S.col = a[n - 1].length;
|
|
||||||
else S.col += a[n - 1].length;
|
|
||||||
S.col += 2;
|
|
||||||
var nlb = S.newline_before = S.newline_before || text.indexOf("\n") >= 0;
|
|
||||||
S.comments_before.push(token("comment2", text, true));
|
S.comments_before.push(token("comment2", text, true));
|
||||||
S.regex_allowed = regex_allowed;
|
S.regex_allowed = regex_allowed;
|
||||||
S.newline_before = nlb;
|
|
||||||
return next_token;
|
return next_token;
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -463,9 +457,9 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
|
|||||||
else break;
|
else break;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (ch != "u") parse_error("Expecting UnicodeEscapeSequence -- uXXXX");
|
if (ch != "u") parse_error("SyntaxError: Expecting UnicodeEscapeSequence -- uXXXX");
|
||||||
ch = read_escaped_char();
|
ch = read_escaped_char();
|
||||||
if (!is_identifier_char(ch)) parse_error("Unicode char: " + ch.charCodeAt(0) + " is not valid in identifier");
|
if (!is_identifier_char(ch)) parse_error("SyntaxError: Unicode char: " + ch.charCodeAt(0) + " is not valid in identifier");
|
||||||
name += ch;
|
name += ch;
|
||||||
backslash = false;
|
backslash = false;
|
||||||
}
|
}
|
||||||
@@ -477,9 +471,11 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
|
|||||||
return name;
|
return name;
|
||||||
};
|
};
|
||||||
|
|
||||||
var read_regexp = with_eof_error("Unterminated regular expression", function(regexp){
|
var read_regexp = with_eof_error("SyntaxError: Unterminated regular expression", function(regexp){
|
||||||
var prev_backslash = false, ch, in_class = false;
|
var prev_backslash = false, ch, in_class = false;
|
||||||
while ((ch = next(true))) if (prev_backslash) {
|
while ((ch = next(true))) if (NEWLINE_CHARS(ch)) {
|
||||||
|
parse_error("SyntaxError: Unexpected line terminator");
|
||||||
|
} else if (prev_backslash) {
|
||||||
regexp += "\\" + ch;
|
regexp += "\\" + ch;
|
||||||
prev_backslash = false;
|
prev_backslash = false;
|
||||||
} else if (ch == "[") {
|
} else if (ch == "[") {
|
||||||
@@ -492,8 +488,6 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
|
|||||||
break;
|
break;
|
||||||
} else if (ch == "\\") {
|
} else if (ch == "\\") {
|
||||||
prev_backslash = true;
|
prev_backslash = true;
|
||||||
} else if ("\r\n\u2028\u2029".indexOf(ch) >= 0) {
|
|
||||||
parse_error("Unexpected line terminator");
|
|
||||||
} else {
|
} else {
|
||||||
regexp += ch;
|
regexp += ch;
|
||||||
}
|
}
|
||||||
@@ -602,7 +596,7 @@ function tokenizer($TEXT, filename, html5_comments, shebang) {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
parse_error("Unexpected character '" + ch + "'");
|
parse_error("SyntaxError: Unexpected character '" + ch + "'");
|
||||||
};
|
};
|
||||||
|
|
||||||
next_token.context = function(nc) {
|
next_token.context = function(nc) {
|
||||||
|
|||||||
13
lib/utils.js
13
lib/utils.js
@@ -227,10 +227,19 @@ function makePredicate(words) {
|
|||||||
}
|
}
|
||||||
cats.push([words[i]]);
|
cats.push([words[i]]);
|
||||||
}
|
}
|
||||||
|
function quote(word) {
|
||||||
|
return JSON.stringify(word).replace(/[\u2028\u2029]/g, function(s) {
|
||||||
|
switch (s) {
|
||||||
|
case "\u2028": return "\\u2028";
|
||||||
|
case "\u2029": return "\\u2029";
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
});
|
||||||
|
}
|
||||||
function compareTo(arr) {
|
function compareTo(arr) {
|
||||||
if (arr.length == 1) return f += "return str === " + JSON.stringify(arr[0]) + ";";
|
if (arr.length == 1) return f += "return str === " + quote(arr[0]) + ";";
|
||||||
f += "switch(str){";
|
f += "switch(str){";
|
||||||
for (var i = 0; i < arr.length; ++i) f += "case " + JSON.stringify(arr[i]) + ":";
|
for (var i = 0; i < arr.length; ++i) f += "case " + quote(arr[i]) + ":";
|
||||||
f += "return true}return false;";
|
f += "return true}return false;";
|
||||||
}
|
}
|
||||||
// When there are more than three length categories, an outer
|
// When there are more than three length categories, an outer
|
||||||
|
|||||||
50
test/mocha/comment.js
Normal file
50
test/mocha/comment.js
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
var assert = require("assert");
|
||||||
|
var uglify = require("../../");
|
||||||
|
|
||||||
|
describe("Comment", function() {
|
||||||
|
it("Should recognize eol of single line comments", function() {
|
||||||
|
var tests = [
|
||||||
|
"//Some comment 1\n>",
|
||||||
|
"//Some comment 2\r>",
|
||||||
|
"//Some comment 3\r\n>",
|
||||||
|
"//Some comment 4\u2028>",
|
||||||
|
"//Some comment 5\u2029>"
|
||||||
|
];
|
||||||
|
|
||||||
|
var fail = function(e) {
|
||||||
|
return e instanceof uglify.JS_Parse_Error &&
|
||||||
|
e.message === "SyntaxError: Unexpected token: operator (>)" &&
|
||||||
|
e.line === 2 &&
|
||||||
|
e.col === 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (var i = 0; i < tests.length; i++) {
|
||||||
|
assert.throws(function() {
|
||||||
|
uglify.parse(tests[i], {fromString: true})
|
||||||
|
}, fail, tests[i]);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it("Should update the position of a multiline comment correctly", function() {
|
||||||
|
var tests = [
|
||||||
|
"/*Some comment 1\n\n\n*/\n>\n\n\n\n\n\n",
|
||||||
|
"/*Some comment 2\r\n\r\n\r\n*/\r\n>\n\n\n\n\n\n",
|
||||||
|
"/*Some comment 3\r\r\r*/\r>\n\n\n\n\n\n",
|
||||||
|
"/*Some comment 4\u2028\u2028\u2028*/\u2028>\n\n\n\n\n\n",
|
||||||
|
"/*Some comment 5\u2029\u2029\u2029*/\u2029>\n\n\n\n\n\n"
|
||||||
|
];
|
||||||
|
|
||||||
|
var fail = function(e) {
|
||||||
|
return e instanceof uglify.JS_Parse_Error &&
|
||||||
|
e.message === "SyntaxError: Unexpected token: operator (>)" &&
|
||||||
|
e.line === 5 &&
|
||||||
|
e.col === 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (var i = 0; i < tests.length; i++) {
|
||||||
|
assert.throws(function() {
|
||||||
|
uglify.parse(tests[i], {fromString: true})
|
||||||
|
}, fail, tests[i]);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -37,6 +37,10 @@ describe("line-endings", function() {
|
|||||||
"/\r/",
|
"/\r/",
|
||||||
"/\u2028/",
|
"/\u2028/",
|
||||||
"/\u2029/",
|
"/\u2029/",
|
||||||
|
"/\\\n/",
|
||||||
|
"/\\\r/",
|
||||||
|
"/\\\u2028/",
|
||||||
|
"/\\\u2029/",
|
||||||
"/someRandomTextLike[]()*AndThen\n/"
|
"/someRandomTextLike[]()*AndThen\n/"
|
||||||
]
|
]
|
||||||
var test = function(input) {
|
var test = function(input) {
|
||||||
@@ -46,7 +50,7 @@ describe("line-endings", function() {
|
|||||||
}
|
}
|
||||||
var fail = function(e) {
|
var fail = function(e) {
|
||||||
return e instanceof Uglify.JS_Parse_Error &&
|
return e instanceof Uglify.JS_Parse_Error &&
|
||||||
e.message === "Unexpected line terminator";
|
e.message === "SyntaxError: Unexpected line terminator";
|
||||||
}
|
}
|
||||||
for (var i = 0; i < inputs.length; i++) {
|
for (var i = 0; i < inputs.length; i++) {
|
||||||
assert.throws(test(inputs[i]), fail);
|
assert.throws(test(inputs[i]), fail);
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ describe("String literals", function() {
|
|||||||
|
|
||||||
var error = function(e) {
|
var error = function(e) {
|
||||||
return e instanceof UglifyJS.JS_Parse_Error &&
|
return e instanceof UglifyJS.JS_Parse_Error &&
|
||||||
e.message === "Unterminated string constant";
|
e.message === "SyntaxError: Unterminated string constant";
|
||||||
};
|
};
|
||||||
|
|
||||||
for (var input in inputs) {
|
for (var input in inputs) {
|
||||||
|
|||||||
Reference in New Issue
Block a user