vnfmarket/src/main/webapp/vnfmarket/node_modules/zeparser/Tokenizer.js

   1 if (typeof exports !== 'undefined') {\r
   2         var window = {Unicode: require('./unicodecategories').Unicode};\r
   3         exports.Tokenizer = Tokenizer;\r
   4 }\r
   5 \r
   6 /*!\r
   7  * Tokenizer for JavaScript / ECMAScript 5\r
   8  * (c) Peter van der Zee, qfox.nl\r
   9  */\r
  10 \r
  11 /**\r
  12  * @param {Object} inp\r
  13  */\r
  14 function Tokenizer(inp){\r
  15         this.inp = inp||'';\r
  16         // replace all other line terminators with \n (leave \r\n in tact though). we should probably remove the shadowInp when finished...\r
  17         // only replace \r if it is not followed by a \n else \r\n would become \n\n causing a double newline where it is just a single\r
  18         this.shadowInp = (inp||'').replace(Tokenizer.regexNormalizeNewlines, '\n');\r
  19         this.pos = 0;\r
  20         this.line = 0;\r
  21         this.column = 0;\r
  22         this.cache = {};\r
  23         \r
  24         this.errorStack = [];\r
  25         \r
  26         this.wtree = [];\r
  27         this.btree = [];\r
  28         \r
  29 //      this.regexWhiteSpace = Tokenizer.regexWhiteSpace;\r
  30         this.regexLineTerminator = Tokenizer.regexLineTerminator; // used in fallback\r
  31         this.regexAsciiIdentifier = Tokenizer.regexAsciiIdentifier;\r
  32         this.hashAsciiIdentifier = Tokenizer.hashAsciiIdentifier;\r
  33 //      this.regexHex = Tokenizer.regexHex;\r
  34         this.hashHex = Tokenizer.hashHex\r
  35         this.regexUnicodeEscape = Tokenizer.regexUnicodeEscape;\r
  36         this.regexIdentifierStop = Tokenizer.regexIdentifierStop;\r
  37         this.hashIdentifierStop = Tokenizer.hashIdentifierStop;\r
  38 //      this.regexPunctuators = Tokenizer.regexPunctuators;\r
  39         this.regexNumber = Tokenizer.regexNumber;\r
  40         this.regexNewline = Tokenizer.regexNewline;\r
  41         \r
  42         this.regexBig = Tokenizer.regexBig;\r
  43         this.regexBigAlt = Tokenizer.regexBigAlt;\r
  44         \r
  45         this.tokenCount = 0;\r
  46         this.tokenCountNoWhite = 0;\r
  47         \r
  48         this.Unicode = window.Unicode;\r
  49         \r
  50         // if the Parser throws an error. it will set this property to the next match\r
  51         // at the time of the error (which was not what it was expecting at that point) \r
  52         // and pass on an "error" match. the error should be scooped on the stack and \r
  53         // this property should be returned, without looking at the input...\r
  54         this.errorEscape = null;\r
  55 };\r
  56 \r
  57 Tokenizer.prototype = {\r
  58         inp:null,\r
  59         shadowInp:null,\r
  60         pos:null,\r
  61         line:null,\r
  62         column:null,\r
  63         cache:null,\r
  64         errorStack:null,\r
  65         \r
  66         wtree: null, // contains whitespace (spaces, comments, newlines)\r
  67         btree: null, // does not contain any whitespace tokens.\r
  68         \r
  69         regexLineTerminator:null,\r
  70         regexAsciiIdentifier:null,\r
  71         hashAsciiIdentifier:null,\r
  72         hashHex:null,\r
  73         regexUnicodeEscape:null,\r
  74         regexIdentifierStop:null,\r
  75         hashIdentifierStop:null,\r
  76         regexNumber:null,\r
  77         regexNewline:null,\r
  78         regexBig:null,\r
  79         regexBigAlt:null,\r
  80         tokenCount:null,\r
  81         tokenCountNoWhite:null,\r
  82         \r
  83         Unicode:null,\r
  84         \r
  85         // storeCurrentAndFetchNextToken(bool, false, false true) to get just one token\r
  86         storeCurrentAndFetchNextToken: function(noRegex, returnValue, stack, _dontStore){\r
  87                 var regex = !noRegex; // TOFIX :)\r
  88                 var pos = this.pos;\r
  89                 var inp = this.inp;\r
  90                 var shadowInp = this.shadowInp;\r
  91                 var matchedNewline = false;\r
  92                 do {\r
  93                         if (!_dontStore) {\r
  94                                 ++this.tokenCount;\r
  95                                 stack.push(returnValue);\r
  96                                 // did the parent Parser throw up?\r
  97                                 if (this.errorEscape) {\r
  98                                         returnValue = this.errorEscape;\r
  99                                         this.errorEscape = null;\r
 100                                         return returnValue;\r
 101                                 }\r
 102                         }\r
 103                         _dontStore = false;\r
 104                 \r
 105                         if (pos >= inp.length) {\r
 106                                 returnValue = {start:inp.length,stop:inp.length,name:12/*EOF*/};\r
 107                                 break; \r
 108                         }\r
 109                         var returnValue = null;\r
 110                 \r
 111                         var start = pos;\r
 112                         var chr = inp[pos];\r
 113         \r
 114                         //                                                      1 ws                                                    2 lt                               3 scmt 4 mcmt 5/6 str 7 nr     8 rx  9 punc\r
 115                         //if (true) {\r
 116                                 // substring method (I think this is faster..)\r
 117                                 var part2 = inp.substring(pos,pos+4);\r
 118                                 var part = this.regexBig.exec(part2);\r
 119                         //} else {\r
 120                         //      // non-substring method (lastIndex)\r
 121                         //      // this method does not need a substring to apply it\r
 122                         //      this.regexBigAlt.lastIndex = pos;\r
 123                         //      var part = this.regexBigAlt.exec(inp);\r
 124                         //}\r
 125                         \r
 126                         if (part[1]) { //this.regexWhiteSpace.test(chr)) { // SP, TAB, VT, FF, NBSP, BOM (, TOFIX: USP)\r
 127                                 ++pos;\r
 128                                 returnValue = {start:start,stop:pos,name:9/*WHITE_SPACE*/,line:this.line,col:this.column,isWhite:true};\r
 129                                 ++this.column;\r
 130                         } else if (part[2]) { //this.regexLineTerminator.test(chr)) { // LF, CR, LS, PS\r
 131                                 var end = pos+1;\r
 132                                 if (chr=='\r' && inp[pos+1] == '\n') ++end; // support crlf=>lf\r
 133                                 returnValue = {start:pos,stop:end,name:10/*LINETERMINATOR*/,line:this.line,col:this.column,isWhite:true};\r
 134                                 pos = end;\r
 135                                 // mark newlines for ASI\r
 136                                 matchedNewline = true;\r
 137                                 ++this.line;\r
 138                                 this.column = 0;\r
 139                                 returnValue.hasNewline = 1;\r
 140                         } else if (part[3]) { //chr == '/' && inp[pos+1] == '/') {\r
 141                                 pos = shadowInp.indexOf('\n',pos);\r
 142                                 if (pos == -1) pos = inp.length;\r
 143                                 returnValue = {start:start,stop:pos,name:7/*COMMENT_SINGLE*/,line:this.line,col:this.column,isComment:true,isWhite:true};\r
 144                                 this.column = returnValue.stop;\r
 145                         } else if (part[4]) { //chr == '/' && inp[pos+1] == '*') {\r
 146                                 var newpos = inp.indexOf('*/',pos);\r
 147                                 if (newpos == -1) {\r
 148                                         newpos = shadowInp.indexOf('\n', pos);\r
 149                                         if (newpos < 0) pos += 2;\r
 150                                         else pos = newpos;\r
 151                                         returnValue = {start:start,stop:pos,name:14/*error*/,value:inp.substring(start, pos),line:this.line,col:this.column,isComment:true,isWhite:true,tokenError:true,error:Tokenizer.Error.UnterminatedMultiLineComment};\r
 152                                         this.errorStack.push(returnValue);\r
 153                                 } else {\r
 154                                         pos = newpos+2;\r
 155                                         returnValue = {start:start,stop:pos,name:8/*COMMENT_MULTI*/,value:inp.substring(start, pos),line:this.line,col:this.column,isComment:true,isWhite:true};\r
 156         \r
 157                                         // multi line comments are also reason for asi, but only if they contain at least one newline (use shadow input, because all line terminators would be valid...)\r
 158                                         var shadowValue = shadowInp.substring(start, pos);\r
 159                                         var i = 0, hasNewline = 0;\r
 160                                         while (i < (i = shadowValue.indexOf('\n', i+1))) {\r
 161                                                 ++hasNewline;\r
 162                                         }\r
 163                                         if (hasNewline) {\r
 164                                                 matchedNewline = true;\r
 165                                                 returnValue.hasNewline = hasNewline;\r
 166                                                 this.line += hasNewline;\r
 167                                                 this.column = 0;\r
 168                                         } else {\r
 169                                                 this.column = returnValue.stop;\r
 170                                         }\r
 171                                 }\r
 172                         } else if (part[5]) { //chr == "'") {\r
 173                                 // old method\r
 174                                 //console.log("old method");\r
 175                                 \r
 176                                 var hasNewline = 0;\r
 177                                 do {\r
 178                                         // process escaped characters\r
 179                                         while (pos < inp.length && inp[++pos] == '\\') {\r
 180                                                 if (shadowInp[pos+1] == '\n') ++hasNewline;\r
 181                                                 ++pos;\r
 182                                         }\r
 183                                         if (this.regexLineTerminator.test(inp[pos])) {\r
 184                                                 returnValue = {start:start,stop:pos,name:14/*error*/,value:inp.substring(start, pos),isString:true,tokenError:true,error:Tokenizer.Error.UnterminatedDoubleStringNewline};\r
 185                                                 this.errorStack.push(returnValue);\r
 186                                                 break;\r
 187                                         }\r
 188                                 } while (pos < inp.length && inp[pos] != "'");\r
 189                                 if (returnValue) {} // error\r
 190                                 else if (inp[pos] != "'") {\r
 191                                         returnValue = {start:start,stop:pos,name:14/*error*/,value:inp.substring(start, pos),isString:true,tokenError:true,error:Tokenizer.Error.UnterminatedDoubleStringOther};\r
 192                                         this.errorStack.push(returnValue);\r
 193                                 } else {\r
 194                                         ++pos;\r
 195                                         returnValue = {start:start,stop:pos,name:5/*STRING_SINGLE*/,isPrimitive:true,isString:true};\r
 196                                         if (hasNewline) {\r
 197                                                 returnValue.hasNewline = hasNewline;\r
 198                                                 this.line += hasNewline;\r
 199                                                 this.column = 0;\r
 200                                         } else {\r
 201                                                 this.column += (pos-start);\r
 202                                         }\r
 203                                 }                               \r
 204                         } else if (part[6]) { //chr == '"') {\r
 205                                 var hasNewline = 0;\r
 206                                 // TODO: something like this: var regexmatch = /([^\']|$)+/.match();\r
 207                                 do {\r
 208                                         // process escaped chars\r
 209                                         while (pos < inp.length && inp[++pos] == '\\') {\r
 210                                                 if (shadowInp[pos+1] == '\n') ++hasNewline;\r
 211                                                 ++pos;\r
 212                                         }\r
 213                                         if (this.regexLineTerminator.test(inp[pos])) {\r
 214                                                 returnValue = {start:start,stop:pos,name:14/*error*/,value:inp.substring(start, pos),isString:true,tokenError:true,error:Tokenizer.Error.UnterminatedSingleStringNewline};\r
 215                                                 this.errorStack.push(returnValue);\r
 216                                                 break;\r
 217                                         }\r
 218                                 } while (pos < inp.length && inp[pos] != '"');\r
 219                                 if (returnValue) {}\r
 220                                 else if (inp[pos] != '"') {\r
 221                                         returnValue = {start:start,stop:pos,name:14/*error*/,value:inp.substring(start, pos),isString:true,tokenError:true,error:Tokenizer.Error.UnterminatedSingleStringOther};\r
 222                                         this.errorStack.push(returnValue);\r
 223                                 } else {\r
 224                                         ++pos;\r
 225                                         returnValue = {start:start,stop:pos,name:6/*STRING_DOUBLE*/,isPrimitive:true,isString:true};\r
 226                                         if (hasNewline) {\r
 227                                                 returnValue.hasNewline = hasNewline;\r
 228                                                 this.line += hasNewline;\r
 229                                                 this.column = 0;\r
 230                                         } else {\r
 231                                                 this.column += (pos-start);\r
 232                                         }\r
 233                                 }\r
 234                         } else if (part[7]) { //(chr >= '0' && chr <= '9') || (chr == '.' && inp[pos+1] >= '0' && inp[pos+1] <= '9')) {\r
 235                                 var nextPart = inp.substring(pos, pos+30);\r
 236                                 var match = nextPart.match(this.regexNumber);\r
 237                                 if (match[2]) { // decimal\r
 238                                         var value = match[2];\r
 239                                         var parsingOctal = value[0] == '0' && value[1] && value[1] != 'e' && value[1] != 'E' && value[1] != '.';\r
 240                                         if (parsingOctal) {\r
 241                                                 returnValue = {start:start,stop:pos,name:14/*error*/,isNumber:true,isOctal:true,tokenError:true,error:Tokenizer.Error.IllegalOctalEscape,value:value};\r
 242                                                 this.errorStack.push(returnValue);\r
 243                                         } else {\r
 244                                                 returnValue = {start:start,stop:start+value.length,name:4/*NUMERIC_DEC*/,isPrimitive:true,isNumber:true,value:value};\r
 245                                         }\r
 246                                 } else if (match[1]) { // hex\r
 247                                         var value = match[1];\r
 248                                         returnValue = {start:start,stop:start+value.length,name:3/*NUMERIC_HEX*/,isPrimitive:true,isNumber:true,value:value};\r
 249                                 } else {\r
 250                                         throw 'unexpected parser errror... regex fail :(';\r
 251                                 }\r
 252                                 \r
 253                                 if (value.length < 300) {\r
 254                                         pos += value.length;\r
 255                                 } else {\r
 256                                         // old method of parsing numbers. only used for extremely long number literals (300+ chars).\r
 257                                         // this method does not require substringing... just memory :)\r
 258                                         var tmpReturnValue = this.oldNumberParser(pos, chr, inp, returnValue, start, Tokenizer);\r
 259                                         pos = tmpReturnValue[0];\r
 260                                         returnValue = tmpReturnValue[1];\r
 261                                 }\r
 262                         } else if (regex && part[8]) { //chr == '/') { // regex cannot start with /* (would be multiline comment, and not make sense anyways). but if it was /* then an earlier if would have eated it. so we only check for /\r
 263                                 var twinfo = []; // matching {[( info\r
 264                                 var found = false;\r
 265                                 var parens = [];\r
 266                                 var nonLethalError = null;\r
 267                                 while (++pos < inp.length) {\r
 268                                         chr = shadowInp[pos];\r
 269                                         // parse RegularExpressionChar\r
 270                                         if (chr == '\n') {\r
 271                                                 returnValue = {start:start,stop:pos,name:14/*error*/,tokenError:true,errorHasContent:true,error:Tokenizer.Error.UnterminatedRegularExpressionNewline};\r
 272                                                 this.errorStack.push(returnValue);\r
 273                                                 break; // fail\r
 274                                         } else if (chr == '/') {\r
 275                                                 found = true;\r
 276                                                 break;\r
 277                                         } else if (chr == '?' || chr == '*' || chr == '+') {\r
 278                                                 nonLethalError = Tokenizer.Error.NothingToRepeat;\r
 279                                         } else if (chr == '^') {\r
 280                                                 if (\r
 281                                                         inp[pos-1] != '/' && \r
 282                                                         inp[pos-1] != '|' && \r
 283                                                         inp[pos-1] != '(' &&\r
 284                                                         !(inp[pos-3] == '(' && inp[pos-2] == '?' && (inp[pos-1] == ':' || inp[pos-1] == '!' || inp[pos-1] == '='))\r
 285                                                 ) {\r
 286                                                         nonLethalError = Tokenizer.Error.StartOfMatchShouldBeAtStart;\r
 287                                                 }\r
 288                                         } else if (chr == '$') {\r
 289                                                 if (inp[pos+1] != '/' && inp[pos+1] != '|' && inp[pos+1] != ')') nonLethalError = Tokenizer.Error.DollarShouldBeEnd;\r
 290                                         } else if (chr == '}') {\r
 291                                                 nonLethalError = Tokenizer.Error.MissingOpeningCurly;\r
 292                                         } else { // it's a "character" (can be group or class), something to match\r
 293                                                 // match parenthesis\r
 294                                                 if (chr == '(') {\r
 295                                                         parens.push(pos-start);\r
 296                                                 } else if (chr == ')') {\r
 297                                                         if (parens.length == 0) {\r
 298                                                                 nonLethalError = {start:start,stop:pos,name:14/*error*/,tokenError:true,error:Tokenizer.Error.RegexNoOpenGroups};\r
 299                                                         } else {\r
 300                                                                 var twin = parens.pop();\r
 301                                                                 var now = pos-start;\r
 302                                                                 twinfo[twin] = now;\r
 303                                                                 twinfo[now] = twin;\r
 304                                                         }\r
 305                                                 }\r
 306                                                 // first process character class\r
 307                                                 if (chr == '[') {\r
 308                                                         var before = pos-start;\r
 309                                                         while (++pos < inp.length && shadowInp[pos] != '\n' && inp[pos] != ']') {\r
 310                                                                 // only newline is not allowed in class range\r
 311                                                                 // anything else can be escaped, most of it does not have to be escaped...\r
 312                                                                 if (inp[pos] == '\\') {\r
 313                                                                         if (shadowInp[pos+1] == '\n') break;\r
 314                                                                         else ++pos; // skip next char. (mainly prohibits ] to be picked up as closing the group...)\r
 315                                                                 }\r
 316                                                         } \r
 317                                                         if (inp[pos] != ']') {\r
 318                                                                 returnValue = {start:start,stop:pos,name:14/*error*/,tokenError:true,error:Tokenizer.Error.ClosingClassRangeNotFound};\r
 319                                                                 this.errorStack.push(returnValue);\r
 320                                                                 break;\r
 321                                                         } else {\r
 322                                                                 var after = pos-start;\r
 323                                                                 twinfo[before] = after;\r
 324                                                                 twinfo[after] = before;\r
 325                                                         }\r
 326                                                 } else if (chr == '\\' && shadowInp[pos+1] != '\n') {\r
 327                                                         // is ok anywhere in the regex (match next char literally, regardless of its otherwise special meaning)\r
 328                                                         ++pos;\r
 329                                                 }\r
 330                                                 \r
 331                                                 // now process repeaters (+, ? and *)\r
 332                                                 \r
 333                                                 // non-collecting group (?:...) and positive (?=...) or negative (?!...) lookahead\r
 334                                                 if (chr == '(') {\r
 335                                                         if (inp[pos+1] == '?' && (inp[pos+2] == ':' || inp[pos+2] == '=' || inp[pos+2] == '!')) {\r
 336                                                                 pos += 2;\r
 337                                                         }\r
 338                                                 }\r
 339                                                 // matching "char"\r
 340                                                 else if (inp[pos+1] == '?') ++pos;\r
 341                                                 else if (inp[pos+1] == '*' || inp[pos+1] == '+') {\r
 342                                                         ++pos;\r
 343                                                         if (inp[pos+1] == '?') ++pos; // non-greedy match\r
 344                                                 } else if (inp[pos+1] == '{') {\r
 345                                                         pos += 1;\r
 346                                                         var before = pos-start;\r
 347                                                         // quantifier:\r
 348                                                         // - {n}\r
 349                                                         // - {n,}\r
 350                                                         // - {n,m}\r
 351                                                         if (!/[0-9]/.test(inp[pos+1])) {\r
 352                                                                 nonLethalError = Tokenizer.Error.QuantifierRequiresNumber;\r
 353                                                         }\r
 354                                                         while (++pos < inp.length && /[0-9]/.test(inp[pos+1]));\r
 355                                                         if (inp[pos+1] == ',') {\r
 356                                                                 ++pos;\r
 357                                                                 while (pos < inp.length && /[0-9]/.test(inp[pos+1])) ++pos;\r
 358                                                         }\r
 359                                                         if (inp[pos+1] != '}') {\r
 360                                                                 nonLethalError = Tokenizer.Error.QuantifierRequiresClosingCurly;\r
 361                                                         } else {\r
 362                                                                 ++pos;\r
 363                                                                 var after = pos-start;\r
 364                                                                 twinfo[before] = after;\r
 365                                                                 twinfo[after] = before;\r
 366                                                                 if (inp[pos+1] == '?') ++pos; // non-greedy match\r
 367                                                         }\r
 368                                                 }\r
 369                                         }\r
 370                                 }\r
 371                                 // if found=false, fail right now. otherwise try to parse an identifiername (that's all RegularExpressionFlags is..., but it's constructed in a stupid fashion)\r
 372                                 if (!found || returnValue) {\r
 373                                         if (!returnValue) {\r
 374                                                 returnValue = {start:start,stop:pos,name:14/*error*/,tokenError:true,error:Tokenizer.Error.UnterminatedRegularExpressionOther};\r
 375                                                 this.errorStack.push(returnValue);\r
 376                                         }\r
 377                                 } else {\r
 378                                         // this is the identifier scanner, for now\r
 379                                         do ++pos;\r
 380                                         while (pos < inp.length && this.hashAsciiIdentifier[inp[pos]]); /*this.regexAsciiIdentifier.test(inp[pos])*/ \r
 381         \r
 382                                         if (parens.length) {\r
 383                                                 // nope, this is still an error, there was at least one paren that did not have a matching twin\r
 384                                                 if (parens.length > 0) returnValue = {start:start,stop:pos,name:14/*error*/,tokenError:true,error:Tokenizer.Error.RegexOpenGroup};\r
 385                                                 this.errorStack.push(returnValue);\r
 386                                         } else if (nonLethalError) {\r
 387                                                 returnValue = {start:start,stop:pos,name:14/*error*/,errorHasContent:true,tokenError:true,error:nonLethalError};\r
 388                                                 this.errorStack.push(returnValue);\r
 389                                         } else {\r
 390                                                 returnValue = {start:start,stop:pos,name:1/*REG_EX*/,isPrimitive:true};\r
 391                                         }                               \r
 392                                 }\r
 393                                 returnValue.twinfo = twinfo;\r
 394                         } else {\r
 395                                 // note: operators need to be ordered from longest to smallest. regex will take care of the rest.\r
 396                                 // no need to worry about div vs regex. if looking for regex, earlier if will have eaten it\r
 397                                 //var result = this.regexPunctuators.exec(inp.substring(pos,pos+4));\r
 398                                 \r
 399                                 // note: due to the regex, the single forward slash might be caught by an earlier part of the regex. so check for that.\r
 400                                 var result = part[8] || part[9];\r
 401                                 if (result) {\r
 402                                         //result = result[1];\r
 403                                         returnValue = {start:pos,stop:pos+=result.length,name:11/*PUNCTUATOR*/,value:result};\r
 404                                 } else {\r
 405                                         var found = false;\r
 406                                         // identifiers cannot start with a number. but if the leading string would be a number, another if would have eaten it already for numeric literal :)\r
 407                                         while (pos < inp.length) {\r
 408                                                 var c = inp[pos];\r
 409         \r
 410                                                 if (this.hashAsciiIdentifier[c]) ++pos; //if (this.regexAsciiIdentifier.test(c)) ++pos;\r
 411                                                 else if (c == '\\' && this.regexUnicodeEscape.test(inp.substring(pos,pos+6))) pos += 6; // this is like a \uxxxx\r
 412                                                 // ok, now test unicode ranges...\r
 413                                                 // basically this hardly ever happens so there's little risk of this hitting performance\r
 414                                                 // however, if you do happen to have used them, it's not a problem. the parser will support it :)\r
 415                                                 else if (this.Unicode) { // the unicode is optional.\r
 416                                                         // these chars may not be part of identifier. i want to try to prevent running the unicode regexes here...\r
 417                                                         if (this.hashIdentifierStop[c] /*this.regexIdentifierStop.test(c)*/) break;\r
 418                                                         // for most scripts, the code wont reach here. which is good, because this is going to be relatively slow :)\r
 419                                                         var Unicode = this.Unicode; // cache\r
 420                                                         if (!(\r
 421                                                                         // these may all occur in an identifier... (pure a specification compliance thing :)\r
 422                                                                         Unicode.Lu.test(c) || Unicode.Ll.test(c) || Unicode.Lt.test(c) || Unicode.Lm.test(c) || \r
 423                                                                         Unicode.Lo.test(c) || Unicode.Nl.test(c) || Unicode.Mn.test(c) || Unicode.Mc.test(c) ||\r
 424                                                                         Unicode.Nd.test(c) || Unicode.Pc.test(c) || Unicode.sp.test(c)\r
 425                                                         )) break; // end of match.\r
 426                                                         // passed, next char\r
 427                                                         ++pos;\r
 428                                                 } else break; // end of match.\r
 429                         \r
 430                                                 found = true;\r
 431                                         }\r
 432                 \r
 433                                         if (found) {\r
 434                                                 returnValue = {start:start,stop:pos,name:2/*IDENTIFIER*/,value:inp.substring(start,pos)};\r
 435                                                 if (returnValue.value == 'undefined' || returnValue.value == 'null' || returnValue.value == 'true' || returnValue.value == 'false') returnValue.isPrimitive = true;\r
 436                                         } else {\r
 437                                                 if (inp[pos] == '`') {\r
 438                                                         returnValue = {start:start,stop:pos+1,name:14/*error*/,tokenError:true,error:Tokenizer.Error.BacktickNotSupported};\r
 439                                                         this.errorStack.push(returnValue);\r
 440                                                 } else if (inp[pos] == '\\') {\r
 441                                                         if (inp[pos+1] == 'u') {\r
 442                                                                 returnValue = {start:start,stop:pos+1,name:14/*error*/,tokenError:true,error:Tokenizer.Error.InvalidUnicodeEscape};\r
 443                                                                 this.errorStack.push(returnValue);\r
 444                                                         } else {\r
 445                                                                 returnValue = {start:start,stop:pos+1,name:14/*error*/,tokenError:true,error:Tokenizer.Error.InvalidBackslash};\r
 446                                                                 this.errorStack.push(returnValue);\r
 447                                                         }\r
 448                                                 } else {\r
 449                                                         returnValue = {start:start,stop:pos+1,name:14/*error*/,tokenError:true,error:Tokenizer.Error.Unknown,value:c};\r
 450                                                         this.errorStack.push(returnValue);\r
 451                                                         // try to skip this char. it's not going anywhere.\r
 452                                                 }\r
 453                                                 ++pos;\r
 454                                         }\r
 455                                 }\r
 456                         }\r
 457                         \r
 458                         if (returnValue) {\r
 459                                 // note that ASI's are slipstreamed in here from the parser since the tokenizer cant determine that\r
 460                                 // if this part ever changes, make sure you change that too :)\r
 461                                 returnValue.tokposw = this.wtree.length;\r
 462                                 this.wtree.push(returnValue);\r
 463                                 if (!returnValue.isWhite) {\r
 464                                         returnValue.tokposb = this.btree.length;\r
 465                                         this.btree.push(returnValue);\r
 466                                 } \r
 467                         }\r
 468                         \r
 469                         \r
 470                 } while (stack && returnValue && returnValue.isWhite); // WHITE_SPACE LINETERMINATOR COMMENT_SINGLE COMMENT_MULTI\r
 471                 ++this.tokenCountNoWhite;\r
 472                 \r
 473                 this.pos = pos;\r
 474         \r
 475                 if (matchedNewline) returnValue.newline = true;\r
 476                 return returnValue;\r
 477         },\r
 478         addTokenToStreamBefore: function(token, match){\r
 479                 var wtree = this.wtree;\r
 480                 var btree = this.btree;\r
 481                 if (match.name == 12/*asi*/) {\r
 482                         token.tokposw = wtree.length;\r
 483                         wtree.push(token);\r
 484                         token.tokposb = btree.length;\r
 485                         btree.push(token);\r
 486                 } else {\r
 487                         token.tokposw = match.tokposw;\r
 488                         wtree[token.tokposw] = token;\r
 489                         match.tokposw += 1;\r
 490                         wtree[match.tokposw] = match;\r
 491 \r
 492                         if (match.tokposb) {\r
 493                                 token.tokposb = match.tokposb;\r
 494                                 btree[token.tokposb] = token;\r
 495                                 match.tokposb += 1;\r
 496                                 btree[match.tokposb] = match;\r
 497                         }\r
 498                 }\r
 499         },\r
 500         oldNumberParser: function(pos, chr, inp, returnValue, start, Tokenizer){\r
 501                 ++pos;\r
 502                 // either: 0x 0X 0 .3\r
 503                 if (chr == '0' && (inp[pos] == 'x' || inp[pos] == 'X')) {\r
 504                         // parsing hex\r
 505                         while (++pos < inp.length && this.hashHex[inp[pos]]); // this.regexHex.test(inp[pos]));\r
 506                         returnValue = {start:start,stop:pos,name:3/*NUMERIC_HEX*/,isPrimitive:true,isNumber:true};\r
 507                 } else {\r
 508                         var parsingOctal = chr == '0' && inp[pos] >= '0' && inp[pos] <= '9';\r
 509                         // parsing dec\r
 510                         if (chr != '.') { // integer part\r
 511                                 while (pos < inp.length && inp[pos] >= '0' && inp[pos] <= '9') ++pos;\r
 512                                 if (inp[pos] == '.') ++pos;\r
 513                         }\r
 514                         // decimal part\r
 515                         while (pos < inp.length && inp[pos] >= '0' && inp[pos] <= '9') ++pos;\r
 516                         // exponent part\r
 517                         if (inp[pos] == 'e' || inp[pos] == 'E') {\r
 518                                 if (inp[++pos] == '+' || inp[pos] == '-') ++pos;\r
 519                                 var expPosBak = pos;\r
 520                                 while (pos < inp.length && inp[pos] >= '0' && inp[pos] <= '9') ++pos;\r
 521                                 if (expPosBak == pos) {\r
 522                                         returnValue = {start:start,stop:pos,name:14/*error*/,tokenError:true,error:Tokenizer.Error.NumberExponentRequiresDigits};\r
 523                                         this.errorStack.push(returnValue);\r
 524                                 }\r
 525                         }\r
 526                         if (returnValue.name != 14/*error*/) {\r
 527                                 if (parsingOctal) {\r
 528                                         returnValue = {start:start,stop:pos,name:14/*error*/,isNumber:true,isOctal:true,tokenError:true,error:Tokenizer.Error.IllegalOctalEscape};\r
 529                                         this.errorStack.push(returnValue);\r
 530                                         console.log("foo")\r
 531                                 } else {\r
 532                                         returnValue = {start:start,stop:pos,name:4/*NUMERIC_DEC*/,isPrimitive:true,isNumber:true};\r
 533                                 }\r
 534                         }\r
 535                 }\r
 536                 return [pos, returnValue];\r
 537         },\r
 538         tokens: function(arrx){\r
 539                 arrx = arrx || [];\r
 540                 var n = 0;\r
 541                 var last;\r
 542                 var stack = [];\r
 543                 while ((last = this.storeCurrentAndFetchNextToken(!arrx[n++], false, false, true)) && last.name != 12/*EOF*/) stack.push(last);\r
 544                 return stack;\r
 545         },\r
 546         fixValues: function(){\r
 547                 this.wtree.forEach(function(t){\r
 548                         if (!t.value) t.value = this.inp.substring(t.start, t.stop);\r
 549                 },this);\r
 550         }\r
 551 };\r
 552 \r
 553 //#ifdef TEST_SUITE\r
 554 Tokenizer.escape = function(s){\r
 555         return s.replace(/\n/g,'\\n').replace(/\t/g,'\\t').replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/\uFFFF/g, '\\uFFFF').replace(/\s/g, function(s){\r
 556                 // replace whitespace as is...\r
 557                 var ord = s.charCodeAt(0).toString(16);\r
 558                 switch (ord.length) {\r
 559                         case 1: ord = '000'+ord; break;\r
 560                         case 2: ord = '00'+ord; break;\r
 561                         case 3: ord = '0'+ord; break;\r
 562                 }\r
 563                 return '\\u'+ord;\r
 564         });\r
 565 };\r
 566 Tokenizer.testSuite = function(arr){\r
 567         var out = document.createElement('pre');\r
 568         document.body.appendChild(out);\r
 569         var debug = function(){\r
 570                 var f = document.createElement('div');\r
 571                 f.innerHTML = Array.prototype.slice.call(arguments).join(' ');\r
 572                 out.appendChild(f);\r
 573                 return arguments[0];\r
 574         };\r
 575 \r
 576         debug("Running test suite...",arr.length,"tests");\r
 577         debug(' ');\r
 578         var start = +new Date;\r
 579         var ok = 0;\r
 580         var fail = 0;\r
 581         for (var i=0; i<arr.length; ++i) {\r
 582                 var test = arr[i], result;\r
 583                 var input = test[1];\r
 584                 var outputLen = test[2];\r
 585                 var regexHints = test[4] ? test[3] : null; // if flags, then len=4\r
 586                 var desc = test[4] || test[3];\r
 587                 \r
 588                 var result = new Tokenizer(input).tokens(regexHints); // regexHints can be null, that's ok\r
 589                 if (result.length == outputLen) {\r
 590                         debug('<span class="green">Test '+i+' ok:</span>',desc);\r
 591                         ++ok;\r
 592                 } else {\r
 593                         debug('<b class="red">Test failed:</span>',desc,'(found',result.length,'expected',outputLen+')'),console.log(desc, result);\r
 594                         ++fail;\r
 595                 }\r
 596                 debug('<b>'+Tokenizer.escape(input)+'</b>');\r
 597                 debug('<br/>');\r
 598         }\r
 599         debug("Tokenizer test suite finished ("+(+new Date - start)+' ms). ok:'+ok+', fail:'+fail);\r
 600 };\r
 601 //#endif\r
 602 \r
 603 Tokenizer.regexWhiteSpace = /[ \t\u000B\u000C\u00A0\uFFFF]/;\r
 604 Tokenizer.regexLineTerminator = /[\u000A\u000D\u2028\u2029]/;\r
 605 Tokenizer.regexAsciiIdentifier = /[a-zA-Z0-9\$_]/;\r
 606 Tokenizer.hashAsciiIdentifier = {_:1,$:1,a:1,b:1,c:1,d:1,e:1,f:1,g:1,h:1,i:1,j:1,k:1,l:1,m:1,n:1,o:1,p:1,q:1,r:1,s:1,t:1,u:1,v:1,w:1,x:1,y:1,z:1,A:1,B:1,C:1,D:1,E:1,F:1,G:1,H:1,I:1,J:1,K:1,L:1,M:1,N:1,O:1,P:1,Q:1,R:1,S:1,T:1,U:1,V:1,W:1,X:1,Y:1,Z:1,0:1,1:1,2:1,3:1,4:1,5:1,6:1,7:1,8:1,9:1};\r
 607 Tokenizer.regexHex = /[0-9A-Fa-f]/;\r
 608 Tokenizer.hashHex = {0:1,1:1,2:1,3:1,4:1,5:1,6:1,7:1,8:1,9:1,a:1,b:1,c:1,d:1,e:1,f:1,A:1,B:1,C:1,D:1,E:1,F:1};\r
 609 Tokenizer.regexUnicodeEscape = /u[0-9A-Fa-f]{4}/; // the \ is already checked at usage...\r
 610 Tokenizer.regexIdentifierStop = /[\>\=\!\|\<\+\-\&\*\%\^\/\{\}\(\)\[\]\.\;\,\~\?\:\ \t\n\\\'\"]/; \r
 611 Tokenizer.hashIdentifierStop = {'>':1,'=':1,'!':1,'|':1,'<':1,'+':1,'-':1,'&':1,'*':1,'%':1,'^':1,'/':1,'{':1,'}':1,'(':1,')':1,'[':1,']':1,'.':1,';':1,',':1,'~':1,'?':1,':':1,'\\':1,'\'':1,'"':1,' ':1,'\t':1,'\n':1};\r
 612 Tokenizer.regexNewline = /\n/g;\r
 613 //Tokenizer.regexPunctuators = /^(>>>=|===|!==|>>>|<<=|>>=|<=|>=|==|!=|\+\+|--|<<|>>|\&\&|\|\||\+=|-=|\*=|%=|\&=|\|=|\^=|\/=|\{|\}|\(|\)|\[|\]|\.|;|,|<|>|\+|-|\*|%|\||\&|\||\^|!|~|\?|:|=|\/)/;\r
 614 Tokenizer.Unidocde = window.Unicode;\r
 615 Tokenizer.regexNumber = /^(?:(0[xX][0-9A-Fa-f]+)|((?:(?:(?:(?:[0-9]+)(?:\.[0-9]*)?))|(?:\.[0-9]+))(?:[eE][-+]?[0-9]{1,})?))/;\r
 616 Tokenizer.regexNormalizeNewlines = /(\u000D[^\u000A])|[\u2028\u2029]/;\r
 617 \r
 618 //                                                      1 ws                                                    2 lt                               3 scmt 4 mcmt 5/6 str 7 nr     8 rx  9 punc\r
 619 Tokenizer.regexBig = /^([ \t\u000B\u000C\u00A0\uFFFF])?([\u000A\u000D\u2028\u2029])?(\/\/)?(\/\*)?(')?(")?(\.?[0-9])?(?:(\/)[^=])?(>>>=|===|!==|>>>|<<=|>>=|<=|>=|==|!=|\+\+|--|<<|>>|\&\&|\|\||\+=|-=|\*=|%=|\&=|\|=|\^=|\/=|\{|\}|\(|\)|\[|\]|\.|;|,|<|>|\+|-|\*|%|\||\&|\||\^|!|~|\?|:|=|\/)?/;\r
 620 Tokenizer.regexBigAlt = /([ \t\u000B\u000C\u00A0\uFFFF])?([\u000A\u000D\u2028\u2029])?(\/\/)?(\/\*)?(')?(")?(\.?[0-9])?(?:(\/)[^=])?(>>>=|===|!==|>>>|<<=|>>=|<=|>=|==|!=|\+\+|--|<<|>>|\&\&|\|\||\+=|-=|\*=|%=|\&=|\|=|\^=|\/=|\{|\}|\(|\)|\[|\]|\.|;|,|<|>|\+|-|\*|%|\||\&|\||\^|!|~|\?|:|=|\/)?/g;\r
 621 \r
 622 Tokenizer.Error = {\r
 623         UnterminatedSingleStringNewline: {msg:'Newlines are not allowed in string literals'},\r
 624         UnterminatedSingleStringOther: {msg:'Unterminated single string'},\r
 625         UnterminatedDoubleStringNewline: {msg:'Newlines are not allowed in string literals'},\r
 626         UnterminatedDoubleStringOther: {msg:'Unterminated double string'},\r
 627         UnterminatedRegularExpressionNewline: {msg:'Newlines are not allowed in regular expressions'},\r
 628         NothingToRepeat: {msg:'Used a repeat character (*?+) in a regex without something prior to it to match'},\r
 629         ClosingClassRangeNotFound: {msg: 'Unable to find ] for class range'},\r
 630         RegexOpenGroup: {msg: 'Open group did not find closing parenthesis'},\r
 631         RegexNoOpenGroups: {msg: 'Closing parenthesis found but no group open'},\r
 632         UnterminatedRegularExpressionOther: {msg:'Unterminated regular expression'},\r
 633         UnterminatedMultiLineComment: {msg:'Unterminated multi line comment'},\r
 634         UnexpectedIdentifier: {msg:'Unexpected identifier'},\r
 635         IllegalOctalEscape: {msg:'Octal escapes are not valid'},\r
 636         Unknown: {msg:'Unknown input'}, // if this happens, my parser is bad :(\r
 637         NumberExponentRequiresDigits: {msg:'Numbers with exponents require at least one digit after the `e`'},\r
 638         BacktickNotSupported: {msg:'The backtick is not used in js, maybe you copy/pasted from a fancy site/doc?'},\r
 639         InvalidUnicodeEscape: {msg:'Encountered an invalid unicode escape, must be followed by exactly four hex numbers'},\r
 640         InvalidBackslash: {msg:'Encountered a backslash where it not allowed'},\r
 641         StartOfMatchShouldBeAtStart: {msg: 'The ^ signifies the start of match but was not found at a start'},\r
 642         DollarShouldBeEnd: {msg: 'The $ signifies the stop of match but was not found at a stop'},\r
 643         QuantifierRequiresNumber: {msg:'Quantifier curly requires at least one digit before the comma'},\r
 644         QuantifierRequiresClosingCurly: {msg:'Quantifier curly requires to be closed'},\r
 645         MissingOpeningCurly: {msg:'Encountered closing quantifier curly without seeing an opening curly'}\r
 646 };\r