381 lines
11 KiB
JavaScript
Executable File
381 lines
11 KiB
JavaScript
Executable File
'use strict';
|
|
|
|
var TokenType = require('./const.js').TokenType;
|
|
|
|
var TAB = 9;
|
|
var N = 10;
|
|
var F = 12;
|
|
var R = 13;
|
|
var SPACE = 32;
|
|
var DOUBLE_QUOTE = 34;
|
|
var QUOTE = 39;
|
|
var RIGHT_PARENTHESIS = 41;
|
|
var STAR = 42;
|
|
var SLASH = 47;
|
|
var BACK_SLASH = 92;
|
|
var UNDERSCORE = 95;
|
|
var LEFT_CURLY_BRACE = 123;
|
|
var RIGHT_CURLY_BRACE = 125;
|
|
|
|
var WHITESPACE = 1;
|
|
var PUNCTUATOR = 2;
|
|
var DIGIT = 3;
|
|
var STRING = 4;
|
|
|
|
var PUNCTUATION = {
|
|
9: TokenType.Tab, // '\t'
|
|
10: TokenType.Newline, // '\n'
|
|
13: TokenType.Newline, // '\r'
|
|
32: TokenType.Space, // ' '
|
|
33: TokenType.ExclamationMark, // '!'
|
|
34: TokenType.QuotationMark, // '"'
|
|
35: TokenType.NumberSign, // '#'
|
|
36: TokenType.DollarSign, // '$'
|
|
37: TokenType.PercentSign, // '%'
|
|
38: TokenType.Ampersand, // '&'
|
|
39: TokenType.Apostrophe, // '\''
|
|
40: TokenType.LeftParenthesis, // '('
|
|
41: TokenType.RightParenthesis, // ')'
|
|
42: TokenType.Asterisk, // '*'
|
|
43: TokenType.PlusSign, // '+'
|
|
44: TokenType.Comma, // ','
|
|
45: TokenType.HyphenMinus, // '-'
|
|
46: TokenType.FullStop, // '.'
|
|
47: TokenType.Solidus, // '/'
|
|
58: TokenType.Colon, // ':'
|
|
59: TokenType.Semicolon, // ';'
|
|
60: TokenType.LessThanSign, // '<'
|
|
61: TokenType.EqualsSign, // '='
|
|
62: TokenType.GreaterThanSign, // '>'
|
|
63: TokenType.QuestionMark, // '?'
|
|
64: TokenType.CommercialAt, // '@'
|
|
91: TokenType.LeftSquareBracket, // '['
|
|
93: TokenType.RightSquareBracket, // ']'
|
|
94: TokenType.CircumflexAccent, // '^'
|
|
95: TokenType.LowLine, // '_'
|
|
123: TokenType.LeftCurlyBracket, // '{'
|
|
124: TokenType.VerticalLine, // '|'
|
|
125: TokenType.RightCurlyBracket, // '}'
|
|
126: TokenType.Tilde // '~'
|
|
};
|
|
var SYMBOL_CATEGORY_LENGTH = Math.max.apply(null, Object.keys(PUNCTUATION)) + 1;
|
|
var SYMBOL_CATEGORY = new Uint32Array(SYMBOL_CATEGORY_LENGTH);
|
|
var IS_PUNCTUATOR = new Uint32Array(SYMBOL_CATEGORY_LENGTH);
|
|
|
|
// fill categories
|
|
Object.keys(PUNCTUATION).forEach(function(key) {
|
|
SYMBOL_CATEGORY[Number(key)] = PUNCTUATOR;
|
|
IS_PUNCTUATOR[Number(key)] = PUNCTUATOR;
|
|
}, SYMBOL_CATEGORY);
|
|
|
|
// don't treat as punctuator
|
|
IS_PUNCTUATOR[UNDERSCORE] = 0;
|
|
|
|
for (var i = 48; i <= 57; i++) {
|
|
SYMBOL_CATEGORY[i] = DIGIT;
|
|
}
|
|
|
|
SYMBOL_CATEGORY[SPACE] = WHITESPACE;
|
|
SYMBOL_CATEGORY[TAB] = WHITESPACE;
|
|
SYMBOL_CATEGORY[N] = WHITESPACE;
|
|
SYMBOL_CATEGORY[R] = WHITESPACE;
|
|
SYMBOL_CATEGORY[F] = WHITESPACE;
|
|
|
|
SYMBOL_CATEGORY[QUOTE] = STRING;
|
|
SYMBOL_CATEGORY[DOUBLE_QUOTE] = STRING;
|
|
|
|
//
|
|
// scanner
|
|
//
|
|
|
|
var Scanner = function(source, initBlockMode, initLine, initColumn) {
|
|
this.source = source;
|
|
|
|
this.pos = source.charCodeAt(0) === 0xFEFF ? 1 : 0;
|
|
this.eof = this.pos === this.source.length;
|
|
this.line = typeof initLine === 'undefined' ? 1 : initLine;
|
|
this.lineStartPos = typeof initColumn === 'undefined' ? -1 : -initColumn;
|
|
|
|
this.minBlockMode = initBlockMode ? 1 : 0;
|
|
this.blockMode = this.minBlockMode;
|
|
this.urlMode = false;
|
|
|
|
this.prevToken = null;
|
|
this.token = null;
|
|
this.buffer = [];
|
|
};
|
|
|
|
Scanner.prototype = {
|
|
lookup: function(offset) {
|
|
if (offset === 0) {
|
|
return this.token;
|
|
}
|
|
|
|
for (var i = this.buffer.length; !this.eof && i < offset; i++) {
|
|
this.buffer.push(this.getToken());
|
|
}
|
|
|
|
return offset <= this.buffer.length ? this.buffer[offset - 1] : null;
|
|
},
|
|
lookupType: function(offset, type) {
|
|
var token = this.lookup(offset);
|
|
|
|
return token !== null && token.type === type;
|
|
},
|
|
next: function() {
|
|
var newToken = null;
|
|
|
|
if (this.buffer.length !== 0) {
|
|
newToken = this.buffer.shift();
|
|
} else if (!this.eof) {
|
|
newToken = this.getToken();
|
|
}
|
|
|
|
this.prevToken = this.token;
|
|
this.token = newToken;
|
|
|
|
return newToken;
|
|
},
|
|
|
|
tokenize: function() {
|
|
var tokens = [];
|
|
|
|
for (; this.pos < this.source.length; this.pos++) {
|
|
tokens.push(this.getToken());
|
|
}
|
|
|
|
return tokens;
|
|
},
|
|
|
|
getToken: function() {
|
|
var code = this.source.charCodeAt(this.pos);
|
|
var line = this.line;
|
|
var column = this.pos - this.lineStartPos;
|
|
var offset = this.pos;
|
|
var next;
|
|
var type;
|
|
var value;
|
|
|
|
switch (code < SYMBOL_CATEGORY_LENGTH ? SYMBOL_CATEGORY[code] : 0) {
|
|
case DIGIT:
|
|
type = TokenType.DecimalNumber;
|
|
value = this.readDecimalNumber();
|
|
break;
|
|
|
|
case STRING:
|
|
type = TokenType.String;
|
|
value = this.readString(code);
|
|
break;
|
|
|
|
case WHITESPACE:
|
|
type = TokenType.Space;
|
|
value = this.readSpaces();
|
|
break;
|
|
|
|
case PUNCTUATOR:
|
|
if (code === SLASH) {
|
|
next = this.pos + 1 < this.source.length ? this.source.charCodeAt(this.pos + 1) : 0;
|
|
|
|
if (next === STAR) { // /*
|
|
type = TokenType.Comment;
|
|
value = this.readComment();
|
|
break;
|
|
} else if (next === SLASH && !this.urlMode) { // //
|
|
if (this.blockMode > 0) {
|
|
var skip = 2;
|
|
|
|
while (this.source.charCodeAt(this.pos + 2) === SLASH) {
|
|
skip++;
|
|
}
|
|
|
|
type = TokenType.Identifier;
|
|
value = this.readIdentifier(skip);
|
|
|
|
this.urlMode = this.urlMode || value === 'url';
|
|
} else {
|
|
type = TokenType.Unknown;
|
|
value = this.readUnknown();
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
type = PUNCTUATION[code];
|
|
value = String.fromCharCode(code);
|
|
this.pos++;
|
|
|
|
if (code === RIGHT_PARENTHESIS) {
|
|
this.urlMode = false;
|
|
} else if (code === LEFT_CURLY_BRACE) {
|
|
this.blockMode++;
|
|
} else if (code === RIGHT_CURLY_BRACE) {
|
|
if (this.blockMode > this.minBlockMode) {
|
|
this.blockMode--;
|
|
}
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
type = TokenType.Identifier;
|
|
value = this.readIdentifier(0);
|
|
|
|
this.urlMode = this.urlMode || value === 'url';
|
|
}
|
|
|
|
this.eof = this.pos === this.source.length;
|
|
|
|
return {
|
|
type: type,
|
|
value: value,
|
|
|
|
offset: offset,
|
|
line: line,
|
|
column: column
|
|
};
|
|
},
|
|
|
|
isNewline: function(code) {
|
|
if (code === N || code === F || code === R) {
|
|
if (code === R && this.pos + 1 < this.source.length && this.source.charCodeAt(this.pos + 1) === N) {
|
|
this.pos++;
|
|
}
|
|
|
|
this.line++;
|
|
this.lineStartPos = this.pos;
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
},
|
|
|
|
readSpaces: function() {
|
|
var start = this.pos;
|
|
|
|
for (; this.pos < this.source.length; this.pos++) {
|
|
var code = this.source.charCodeAt(this.pos);
|
|
|
|
if (!this.isNewline(code) && code !== SPACE && code !== TAB) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
return this.source.substring(start, this.pos);
|
|
},
|
|
|
|
readComment: function() {
|
|
var start = this.pos;
|
|
|
|
for (this.pos += 2; this.pos < this.source.length; this.pos++) {
|
|
var code = this.source.charCodeAt(this.pos);
|
|
|
|
if (code === STAR) { // */
|
|
if (this.source.charCodeAt(this.pos + 1) === SLASH) {
|
|
this.pos += 2;
|
|
break;
|
|
}
|
|
} else {
|
|
this.isNewline(code);
|
|
}
|
|
}
|
|
|
|
return this.source.substring(start, this.pos);
|
|
},
|
|
|
|
readUnknown: function() {
|
|
var start = this.pos;
|
|
|
|
for (this.pos += 2; this.pos < this.source.length; this.pos++) {
|
|
if (this.isNewline(this.source.charCodeAt(this.pos), this.source)) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
return this.source.substring(start, this.pos);
|
|
},
|
|
|
|
readString: function(quote) {
|
|
var start = this.pos;
|
|
var res = '';
|
|
|
|
for (this.pos++; this.pos < this.source.length; this.pos++) {
|
|
var code = this.source.charCodeAt(this.pos);
|
|
|
|
if (code === BACK_SLASH) {
|
|
var end = this.pos++;
|
|
|
|
if (this.isNewline(this.source.charCodeAt(this.pos), this.source)) {
|
|
res += this.source.substring(start, end);
|
|
start = this.pos + 1;
|
|
}
|
|
} else if (code === quote) {
|
|
this.pos++;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return res + this.source.substring(start, this.pos);
|
|
},
|
|
|
|
readDecimalNumber: function() {
|
|
var start = this.pos;
|
|
var code;
|
|
|
|
for (this.pos++; this.pos < this.source.length; this.pos++) {
|
|
code = this.source.charCodeAt(this.pos);
|
|
|
|
if (code < 48 || code > 57) { // 0 .. 9
|
|
break;
|
|
}
|
|
}
|
|
|
|
return this.source.substring(start, this.pos);
|
|
},
|
|
|
|
readIdentifier: function(skip) {
|
|
var start = this.pos;
|
|
|
|
for (this.pos += skip; this.pos < this.source.length; this.pos++) {
|
|
var code = this.source.charCodeAt(this.pos);
|
|
|
|
if (code === BACK_SLASH) {
|
|
this.pos++;
|
|
|
|
// skip escaped unicode sequence that can ends with space
|
|
// [0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?
|
|
for (var i = 0; i < 7 && this.pos + i < this.source.length; i++) {
|
|
code = this.source.charCodeAt(this.pos + i);
|
|
|
|
if (i !== 6) {
|
|
if ((code >= 48 && code <= 57) || // 0 .. 9
|
|
(code >= 65 && code <= 70) || // A .. F
|
|
(code >= 97 && code <= 102)) { // a .. f
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if (i > 0) {
|
|
this.pos += i - 1;
|
|
if (code === SPACE || code === TAB || this.isNewline(code)) {
|
|
this.pos++;
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
} else if (code < SYMBOL_CATEGORY_LENGTH &&
|
|
IS_PUNCTUATOR[code] === PUNCTUATOR) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
return this.source.substring(start, this.pos);
|
|
}
|
|
};
|
|
|
|
// warm up tokenizer to elimitate code branches that never execute
|
|
// fix soft deoptimizations (insufficient type feedback)
|
|
new Scanner('\n\r\r\n\f//""\'\'/**/1a;.{url(a)}').lookup(1e3);
|
|
|
|
module.exports = Scanner;
|