1 /* 2 * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.tools.javac.parser; 27 28 import java.nio; 29 30 import com.sun.tools.javac.code.Source; 31 import com.sun.tools.javac.file.JavacFileManager; 32 import com.sun.tools.javac.util; 33 34 35 import static com.sun.tools.javac.parser.Token.*; 36 import static com.sun.tools.javac.util.LayoutCharacters.*; 37 38 /** The lexical analyzer maps an input stream consisting of 39 * ASCII characters and Unicode escapes into a token sequence. 40 * 41 * <p><b>This is NOT part of any supported API. 42 * If you write code that depends on this, you do so at your own risk. 43 * This code and its internal interfaces are subject to change or 44 * deletion without notice.</b> 45 */ 46 public class Scanner implements Lexer { 47 48 private static boolean scannerDebug = false; 49 50 /* Output variables; set by nextToken(): 51 */ 52 53 /** The token, set by nextToken(). 54 */ 55 private Token token; 56 57 /** Allow hex floating-point literals. 58 */ 59 private boolean allowHexFloats; 60 61 /** Allow binary literals. 62 */ 63 private boolean allowBinaryLiterals; 64 65 /** Allow underscores in literals. 66 */ 67 private boolean allowUnderscoresInLiterals; 68 69 /** The source language setting. 70 */ 71 private Source source; 72 73 /** The token's position, 0-based offset from beginning of text. 74 */ 75 private int pos; 76 77 /** Character position just after the last character of the token. 78 */ 79 private int endPos; 80 81 /** The last character position of the previous token. 82 */ 83 private int prevEndPos; 84 85 /** The position where a lexical error occurred; 86 */ 87 private int errPos = Position.NOPOS; 88 89 /** The name of an identifier or token: 90 */ 91 private Name name; 92 93 /** The radix of a numeric literal token. 94 */ 95 private int radix; 96 97 /** Has a @deprecated been encountered in last doc comment? 98 * this needs to be reset by client. 99 */ 100 protected boolean deprecatedFlag = false; 101 102 /** A character buffer for literals. 103 */ 104 private char[] sbuf = new char[128]; 105 private int sp; 106 107 /** The input buffer, index of next chacter to be read, 108 * index of one past last character in buffer. 109 */ 110 private char[] buf; 111 private int bp; 112 private int buflen; 113 private int eofPos; 114 115 /** The current character. 116 */ 117 private char ch; 118 119 /** The buffer index of the last converted unicode character 120 */ 121 private int unicodeConversionBp = -1; 122 123 /** The log to be used for error reporting. 124 */ 125 private final Log log; 126 127 /** The name table. */ 128 private final Names names; 129 130 /** The keyword table. */ 131 private final Keywords keywords; 132 133 /** Common code for constructors. */ 134 private Scanner(ScannerFactory fac) { 135 log = fac.log; 136 names = fac.names; 137 keywords = fac.keywords; 138 source = fac.source; 139 allowBinaryLiterals = source.allowBinaryLiterals(); 140 allowHexFloats = source.allowHexFloats(); 141 allowUnderscoresInLiterals = source.allowUnderscoresInLiterals(); 142 } 143 144 private static final boolean hexFloatsWork = hexFloatsWork(); 145 private static boolean hexFloatsWork() { 146 try { 147 Float.valueOf("0x1.0p1"); 148 return true; 149 } catch (NumberFormatException ex) { 150 return false; 151 } 152 } 153 154 /** Create a scanner from the input buffer. buffer must implement 155 * array() and compact(), and remaining() must be less than limit(). 156 */ 157 protected Scanner(ScannerFactory fac, CharBuffer buffer) { 158 this(fac, JavacFileManager.toArray(buffer), buffer.limit()); 159 } 160 161 /** 162 * Create a scanner from the input array. This method might 163 * modify the array. To avoid copying the input array, ensure 164 * that {@code inputLength < input.length} or 165 * {@code input[input.length -1]} is a white space character. 166 * 167 * @param fac the factory which created this Scanner 168 * @param input the input, might be modified 169 * @param inputLength the size of the input. 170 * Must be positive and less than or equal to input.length. 171 */ 172 protected Scanner(ScannerFactory fac, char[] input, int inputLength) { 173 this(fac); 174 eofPos = inputLength; 175 if (inputLength == input.length) { 176 if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) { 177 inputLength--; 178 } else { 179 char[] newInput = new char[inputLength + 1]; 180 System.arraycopy(input, 0, newInput, 0, input.length); 181 input = newInput; 182 } 183 } 184 buf = input; 185 buflen = inputLength; 186 buf[buflen] = EOI; 187 bp = -1; 188 scanChar(); 189 } 190 191 /** Report an error at the given position using the provided arguments. 192 */ 193 private void lexError(int pos, String key, Object... args) { 194 log.error(pos, key, args); 195 token = ERROR; 196 errPos = pos; 197 } 198 199 /** Report an error at the current token position using the provided 200 * arguments. 201 */ 202 private void lexError(String key, Object... args) { 203 lexError(pos, key, args); 204 } 205 206 /** Convert an ASCII digit from its base (8, 10, or 16) 207 * to its value. 208 */ 209 private int digit(int base) { 210 char c = ch; 211 int result = Character.digit(c, base); 212 if (result >= 0 && c > 0x7f) { 213 lexError(pos+1, "illegal.nonascii.digit"); 214 ch = "0123456789abcdef".charAt(result); 215 } 216 return result; 217 } 218 219 /** Convert unicode escape; bp points to initial '\' character 220 * (Spec 3.3). 221 */ 222 private void convertUnicode() { 223 if (ch == '\\' && unicodeConversionBp != bp) { 224 bp++; ch = buf[bp]; 225 if (ch == 'u') { 226 do { 227 bp++; ch = buf[bp]; 228 } while (ch == 'u'); 229 int limit = bp + 3; 230 if (limit < buflen) { 231 int d = digit(16); 232 int code = d; 233 while (bp < limit && d >= 0) { 234 bp++; ch = buf[bp]; 235 d = digit(16); 236 code = (code << 4) + d; 237 } 238 if (d >= 0) { 239 ch = (char)code; 240 unicodeConversionBp = bp; 241 return; 242 } 243 } 244 lexError(bp, "illegal.unicode.esc"); 245 } else { 246 bp--; 247 ch = '\\'; 248 } 249 } 250 } 251 252 /** Read next character. 253 */ 254 private void scanChar() { 255 ch = buf[++bp]; 256 if (ch == '\\') { 257 convertUnicode(); 258 } 259 } 260 261 /** Read next character in comment, skipping over double '\' characters. 262 */ 263 private void scanCommentChar() { 264 scanChar(); 265 if (ch == '\\') { 266 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) { 267 bp++; 268 } else { 269 convertUnicode(); 270 } 271 } 272 } 273 274 /** Append a character to sbuf. 275 */ 276 private void putChar(char ch) { 277 if (sp == sbuf.length) { 278 char[] newsbuf = new char[sbuf.length * 2]; 279 System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length); 280 sbuf = newsbuf; 281 } 282 sbuf[sp++] = ch; 283 } 284 285 /** Read next character in character or string literal and copy into sbuf. 286 */ 287 private void scanLitChar() { 288 if (ch == '\\') { 289 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) { 290 bp++; 291 putChar('\\'); 292 scanChar(); 293 } else { 294 scanChar(); 295 switch (ch) { 296 case '0': case '1': case '2': case '3': 297 case '4': case '5': case '6': case '7': 298 char leadch = ch; 299 int oct = digit(8); 300 scanChar(); 301 if ('0' <= ch && ch <= '7') { 302 oct = oct * 8 + digit(8); 303 scanChar(); 304 if (leadch <= '3' && '0' <= ch && ch <= '7') { 305 oct = oct * 8 + digit(8); 306 scanChar(); 307 } 308 } 309 putChar((char)oct); 310 break; 311 case 'b': 312 putChar('\b'); scanChar(); break; 313 case 't': 314 putChar('\t'); scanChar(); break; 315 case 'n': 316 putChar('\n'); scanChar(); break; 317 case 'f': 318 putChar('\f'); scanChar(); break; 319 case 'r': 320 putChar('\r'); scanChar(); break; 321 case '\'': 322 putChar('\''); scanChar(); break; 323 case '\"': 324 putChar('\"'); scanChar(); break; 325 case '\\': 326 putChar('\\'); scanChar(); break; 327 default: 328 lexError(bp, "illegal.esc.char"); 329 } 330 } 331 } else if (bp != buflen) { 332 putChar(ch); scanChar(); 333 } 334 } 335 336 private void scanDigits(int digitRadix) { 337 char saveCh; 338 int savePos; 339 do { 340 if (ch != '_') { 341 putChar(ch); 342 } else { 343 if (!allowUnderscoresInLiterals) { 344 lexError("unsupported.underscore.lit", source.name); 345 allowUnderscoresInLiterals = true; 346 } 347 } 348 saveCh = ch; 349 savePos = bp; 350 scanChar(); 351 } while (digit(digitRadix) >= 0 || ch == '_'); 352 if (saveCh == '_') 353 lexError(savePos, "illegal.underscore"); 354 } 355 356 /** Read fractional part of hexadecimal floating point number. 357 */ 358 private void scanHexExponentAndSuffix() { 359 if (ch == 'p' || ch == 'P') { 360 putChar(ch); 361 scanChar(); 362 skipIllegalUnderscores(); 363 if (ch == '+' || ch == '-') { 364 putChar(ch); 365 scanChar(); 366 } 367 skipIllegalUnderscores(); 368 if ('0' <= ch && ch <= '9') { 369 scanDigits(10); 370 if (!allowHexFloats) { 371 lexError("unsupported.fp.lit", source.name); 372 allowHexFloats = true; 373 } 374 else if (!hexFloatsWork) 375 lexError("unsupported.cross.fp.lit"); 376 } else 377 lexError("malformed.fp.lit"); 378 } else { 379 lexError("malformed.fp.lit"); 380 } 381 if (ch == 'f' || ch == 'F') { 382 putChar(ch); 383 scanChar(); 384 token = FLOATLITERAL; 385 } else { 386 if (ch == 'd' || ch == 'D') { 387 putChar(ch); 388 scanChar(); 389 } 390 token = DOUBLELITERAL; 391 } 392 } 393 394 /** Read fractional part of floating point number. 395 */ 396 private void scanFraction() { 397 skipIllegalUnderscores(); 398 if ('0' <= ch && ch <= '9') { 399 scanDigits(10); 400 } 401 int sp1 = sp; 402 if (ch == 'e' || ch == 'E') { 403 putChar(ch); 404 scanChar(); 405 skipIllegalUnderscores(); 406 if (ch == '+' || ch == '-') { 407 putChar(ch); 408 scanChar(); 409 } 410 skipIllegalUnderscores(); 411 if ('0' <= ch && ch <= '9') { 412 scanDigits(10); 413 return; 414 } 415 lexError("malformed.fp.lit"); 416 sp = sp1; 417 } 418 } 419 420 /** Read fractional part and 'd' or 'f' suffix of floating point number. 421 */ 422 private void scanFractionAndSuffix() { 423 this.radix = 10; 424 scanFraction(); 425 if (ch == 'f' || ch == 'F') { 426 putChar(ch); 427 scanChar(); 428 token = FLOATLITERAL; 429 } else { 430 if (ch == 'd' || ch == 'D') { 431 putChar(ch); 432 scanChar(); 433 } 434 token = DOUBLELITERAL; 435 } 436 } 437 438 /** Read fractional part and 'd' or 'f' suffix of floating point number. 439 */ 440 private void scanHexFractionAndSuffix(boolean seendigit) { 441 this.radix = 16; 442 Assert.check(ch == '.'); 443 putChar(ch); 444 scanChar(); 445 skipIllegalUnderscores(); 446 if (digit(16) >= 0) { 447 seendigit = true; 448 scanDigits(16); 449 } 450 if (!seendigit) 451 lexError("invalid.hex.number"); 452 else 453 scanHexExponentAndSuffix(); 454 } 455 456 private void skipIllegalUnderscores() { 457 if (ch == '_') { 458 lexError(bp, "illegal.underscore"); 459 while (ch == '_') 460 scanChar(); 461 } 462 } 463 464 /** Read a number. 465 * @param radix The radix of the number; one of 2, j8, 10, 16. 466 */ 467 private void scanNumber(int radix) { 468 this.radix = radix; 469 // for octal, allow base-10 digit in case it's a float literal 470 int digitRadix = (radix == 8 ? 10 : radix); 471 boolean seendigit = false; 472 if (digit(digitRadix) >= 0) { 473 seendigit = true; 474 scanDigits(digitRadix); 475 } 476 if (radix == 16 && ch == '.') { 477 scanHexFractionAndSuffix(seendigit); 478 } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) { 479 scanHexExponentAndSuffix(); 480 } else if (digitRadix == 10 && ch == '.') { 481 putChar(ch); 482 scanChar(); 483 scanFractionAndSuffix(); 484 } else if (digitRadix == 10 && 485 (ch == 'e' || ch == 'E' || 486 ch == 'f' || ch == 'F' || 487 ch == 'd' || ch == 'D')) { 488 scanFractionAndSuffix(); 489 } else { 490 if (ch == 'l' || ch == 'L') { 491 scanChar(); 492 token = LONGLITERAL; 493 } else { 494 token = INTLITERAL; 495 } 496 } 497 } 498 499 /** Read an identifier. 500 */ 501 private void scanIdent() { 502 boolean isJavaIdentifierPart; 503 char high; 504 do { 505 if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch; 506 // optimization, was: putChar(ch); 507 508 scanChar(); 509 switch (ch) { 510 case 'A': case 'B': case 'C': case 'D': case 'E': 511 case 'F': case 'G': case 'H': case 'I': case 'J': 512 case 'K': case 'L': case 'M': case 'N': case 'O': 513 case 'P': case 'Q': case 'R': case 'S': case 'T': 514 case 'U': case 'V': case 'W': case 'X': case 'Y': 515 case 'Z': 516 case 'a': case 'b': case 'c': case 'd': case 'e': 517 case 'f': case 'g': case 'h': case 'i': case 'j': 518 case 'k': case 'l': case 'm': case 'n': case 'o': 519 case 'p': case 'q': case 'r': case 's': case 't': 520 case 'u': case 'v': case 'w': case 'x': case 'y': 521 case 'z': 522 case '$': case '_': 523 case '0': case '1': case '2': case '3': case '4': 524 case '5': case '6': case '7': case '8': case '9': 525 case '\u0000': case '\u0001': case '\u0002': case '\u0003': 526 case '\u0004': case '\u0005': case '\u0006': case '\u0007': 527 case '\u0008': case '\u000E': case '\u000F': case '\u0010': 528 case '\u0011': case '\u0012': case '\u0013': case '\u0014': 529 case '\u0015': case '\u0016': case '\u0017': 530 case '\u0018': case '\u0019': case '\u001B': 531 case '\u007F': 532 break; 533 case '\u001A': // EOI is also a legal identifier part 534 if (bp >= buflen) { 535 name = names.fromChars(sbuf, 0, sp); 536 token = keywords.key(name); 537 return; 538 } 539 break; 540 default: 541 if (ch < '\u0080') { 542 // all ASCII range chars already handled, above 543 isJavaIdentifierPart = false; 544 } else { 545 high = scanSurrogates(); 546 if (high != 0) { 547 if (sp == sbuf.length) { 548 putChar(high); 549 } else { 550 sbuf[sp++] = high; 551 } 552 isJavaIdentifierPart = Character.isJavaIdentifierPart( 553 Character.toCodePoint(high, ch)); 554 } else { 555 isJavaIdentifierPart = Character.isJavaIdentifierPart(ch); 556 } 557 } 558 if (!isJavaIdentifierPart) { 559 name = names.fromChars(sbuf, 0, sp); 560 token = keywords.key(name); 561 return; 562 } 563 } 564 } while (true); 565 } 566 567 /** Are surrogates supported? 568 */ 569 final static boolean surrogatesSupported = surrogatesSupported(); 570 private static boolean surrogatesSupported() { 571 try { 572 Character.isHighSurrogate('a'); 573 return true; 574 } catch (NoSuchMethodError ex) { 575 return false; 576 } 577 } 578 579 /** Scan surrogate pairs. If 'ch' is a high surrogate and 580 * the next character is a low surrogate, then put the low 581 * surrogate in 'ch', and return the high surrogate. 582 * otherwise, just return 0. 583 */ 584 private char scanSurrogates() { 585 if (surrogatesSupported && Character.isHighSurrogate(ch)) { 586 char high = ch; 587 588 scanChar(); 589 590 if (Character.isLowSurrogate(ch)) { 591 return high; 592 } 593 594 ch = high; 595 } 596 597 return 0; 598 } 599 600 /** Return true if ch can be part of an operator. 601 */ 602 private boolean isSpecial(char ch) { 603 switch (ch) { 604 case '!': case '%': case '&': case '*': case '?': 605 case '+': case '-': case ':': case '<': case '=': 606 case '>': case '^': case '|': case '~': 607 case '@': 608 return true; 609 default: 610 return false; 611 } 612 } 613 614 /** Read longest possible sequence of special characters and convert 615 * to token. 616 */ 617 private void scanOperator() { 618 while (true) { 619 putChar(ch); 620 Name newname = names.fromChars(sbuf, 0, sp); 621 if (keywords.key(newname) == IDENTIFIER) { 622 sp--; 623 break; 624 } 625 name = newname; 626 token = keywords.key(newname); 627 scanChar(); 628 if (!isSpecial(ch)) break; 629 } 630 } 631 632 /** 633 * Scan a documention comment; determine if a deprecated tag is present. 634 * Called once the initial /, * have been skipped, positioned at the second * 635 * (which is treated as the beginning of the first line). 636 * Stops positioned at the closing '/'. 637 */ 638 @SuppressWarnings("fallthrough") 639 private void scanDocComment() { 640 boolean deprecatedPrefix = false; 641 642 forEachLine: 643 while (bp < buflen) { 644 645 // Skip optional WhiteSpace at beginning of line 646 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) { 647 scanCommentChar(); 648 } 649 650 // Skip optional consecutive Stars 651 while (bp < buflen && ch == '*') { 652 scanCommentChar(); 653 if (ch == '/') { 654 return; 655 } 656 } 657 658 // Skip optional WhiteSpace after Stars 659 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) { 660 scanCommentChar(); 661 } 662 663 deprecatedPrefix = false; 664 // At beginning of line in the JavaDoc sense. 665 if (bp < buflen && ch == '@' && !deprecatedFlag) { 666 scanCommentChar(); 667 if (bp < buflen && ch == 'd') { 668 scanCommentChar(); 669 if (bp < buflen && ch == 'e') { 670 scanCommentChar(); 671 if (bp < buflen && ch == 'p') { 672 scanCommentChar(); 673 if (bp < buflen && ch == 'r') { 674 scanCommentChar(); 675 if (bp < buflen && ch == 'e') { 676 scanCommentChar(); 677 if (bp < buflen && ch == 'c') { 678 scanCommentChar(); 679 if (bp < buflen && ch == 'a') { 680 scanCommentChar(); 681 if (bp < buflen && ch == 't') { 682 scanCommentChar(); 683 if (bp < buflen && ch == 'e') { 684 scanCommentChar(); 685 if (bp < buflen && ch == 'd') { 686 deprecatedPrefix = true; 687 scanCommentChar(); 688 }}}}}}}}}}} 689 if (deprecatedPrefix && bp < buflen) { 690 if (Character.isWhitespace(ch)) { 691 deprecatedFlag = true; 692 } else if (ch == '*') { 693 scanCommentChar(); 694 if (ch == '/') { 695 deprecatedFlag = true; 696 return; 697 } 698 } 699 } 700 701 // Skip rest of line 702 while (bp < buflen) { 703 switch (ch) { 704 case '*': 705 scanCommentChar(); 706 if (ch == '/') { 707 return; 708 } 709 break; 710 case CR: // (Spec 3.4) 711 scanCommentChar(); 712 if (ch != LF) { 713 continue forEachLine; 714 } 715 /* fall through to LF case */ 716 case LF: // (Spec 3.4) 717 scanCommentChar(); 718 continue forEachLine; 719 default: 720 scanCommentChar(); 721 } 722 } // rest of line 723 } // forEachLine 724 return; 725 } 726 727 /** The value of a literal token, recorded as a string. 728 * For integers, leading 0x and 'l' suffixes are suppressed. 729 */ 730 public String stringVal() { 731 return new String(sbuf, 0, sp); 732 } 733 734 /** Read token. 735 */ 736 public void nextToken() { 737 738 try { 739 prevEndPos = endPos; 740 sp = 0; 741 742 while (true) { 743 pos = bp; 744 switch (ch) { 745 case ' ': // (Spec 3.6) 746 case '\t': // (Spec 3.6) 747 case FF: // (Spec 3.6) 748 do { 749 scanChar(); 750 } while (ch == ' ' || ch == '\t' || ch == FF); 751 endPos = bp; 752 processWhiteSpace(); 753 break; 754 case LF: // (Spec 3.4) 755 scanChar(); 756 endPos = bp; 757 processLineTerminator(); 758 break; 759 case CR: // (Spec 3.4) 760 scanChar(); 761 if (ch == LF) { 762 scanChar(); 763 } 764 endPos = bp; 765 processLineTerminator(); 766 break; 767 case 'A': case 'B': case 'C': case 'D': case 'E': 768 case 'F': case 'G': case 'H': case 'I': case 'J': 769 case 'K': case 'L': case 'M': case 'N': case 'O': 770 case 'P': case 'Q': case 'R': case 'S': case 'T': 771 case 'U': case 'V': case 'W': case 'X': case 'Y': 772 case 'Z': 773 case 'a': case 'b': case 'c': case 'd': case 'e': 774 case 'f': case 'g': case 'h': case 'i': case 'j': 775 case 'k': case 'l': case 'm': case 'n': case 'o': 776 case 'p': case 'q': case 'r': case 's': case 't': 777 case 'u': case 'v': case 'w': case 'x': case 'y': 778 case 'z': 779 case '$': case '_': 780 scanIdent(); 781 return; 782 case '0': 783 scanChar(); 784 if (ch == 'x' || ch == 'X') { 785 scanChar(); 786 skipIllegalUnderscores(); 787 if (ch == '.') { 788 scanHexFractionAndSuffix(false); 789 } else if (digit(16) < 0) { 790 lexError("invalid.hex.number"); 791 } else { 792 scanNumber(16); 793 } 794 } else if (ch == 'b' || ch == 'B') { 795 if (!allowBinaryLiterals) { 796 lexError("unsupported.binary.lit", source.name); 797 allowBinaryLiterals = true; 798 } 799 scanChar(); 800 skipIllegalUnderscores(); 801 if (digit(2) < 0) { 802 lexError("invalid.binary.number"); 803 } else { 804 scanNumber(2); 805 } 806 } else { 807 putChar('0'); 808 if (ch == '_') { 809 int savePos = bp; 810 do { 811 scanChar(); 812 } while (ch == '_'); 813 if (digit(10) < 0) { 814 lexError(savePos, "illegal.underscore"); 815 } 816 } 817 scanNumber(8); 818 } 819 return; 820 case '1': case '2': case '3': case '4': 821 case '5': case '6': case '7': case '8': case '9': 822 scanNumber(10); 823 return; 824 case '.': 825 scanChar(); 826 if ('0' <= ch && ch <= '9') { 827 putChar('.'); 828 scanFractionAndSuffix(); 829 } else if (ch == '.') { 830 putChar('.'); putChar('.'); 831 scanChar(); 832 if (ch == '.') { 833 scanChar(); 834 putChar('.'); 835 token = ELLIPSIS; 836 } else { 837 lexError("malformed.fp.lit"); 838 } 839 } else { 840 token = DOT; 841 } 842 return; 843 case ',': 844 scanChar(); token = COMMA; return; 845 case ';': 846 scanChar(); token = SEMI; return; 847 case '(': 848 scanChar(); token = LPAREN; return; 849 case ')': 850 scanChar(); token = RPAREN; return; 851 case '[': 852 scanChar(); token = LBRACKET; return; 853 case ']': 854 scanChar(); token = RBRACKET; return; 855 case '{': 856 scanChar(); token = LBRACE; return; 857 case '}': 858 scanChar(); token = RBRACE; return; 859 case '/': 860 scanChar(); 861 if (ch == '/') { 862 do { 863 scanCommentChar(); 864 } while (ch != CR && ch != LF && bp < buflen); 865 if (bp < buflen) { 866 endPos = bp; 867 processComment(CommentStyle.LINE); 868 } 869 break; 870 } else if (ch == '*') { 871 scanChar(); 872 CommentStyle style; 873 if (ch == '*') { 874 style = CommentStyle.JAVADOC; 875 scanDocComment(); 876 } else { 877 style = CommentStyle.BLOCK; 878 while (bp < buflen) { 879 if (ch == '*') { 880 scanChar(); 881 if (ch == '/') break; 882 } else { 883 scanCommentChar(); 884 } 885 } 886 } 887 if (ch == '/') { 888 scanChar(); 889 endPos = bp; 890 processComment(style); 891 break; 892 } else { 893 lexError("unclosed.comment"); 894 return; 895 } 896 } else if (ch == '=') { 897 name = names.slashequals; 898 token = SLASHEQ; 899 scanChar(); 900 } else { 901 name = names.slash; 902 token = SLASH; 903 } 904 return; 905 case '\'': 906 scanChar(); 907 if (ch == '\'') { 908 lexError("empty.char.lit"); 909 } else { 910 if (ch == CR || ch == LF) 911 lexError(pos, "illegal.line.end.in.char.lit"); 912 scanLitChar(); 913 if (ch == '\'') { 914 scanChar(); 915 token = CHARLITERAL; 916 } else { 917 lexError(pos, "unclosed.char.lit"); 918 } 919 } 920 return; 921 case '\"': 922 scanChar(); 923 while (ch != '\"' && ch != CR && ch != LF && bp < buflen) 924 scanLitChar(); 925 if (ch == '\"') { 926 token = STRINGLITERAL; 927 scanChar(); 928 } else { 929 lexError(pos, "unclosed.str.lit"); 930 } 931 return; 932 default: 933 if (isSpecial(ch)) { 934 scanOperator(); 935 } else { 936 boolean isJavaIdentifierStart; 937 if (ch < '\u0080') { 938 // all ASCII range chars already handled, above 939 isJavaIdentifierStart = false; 940 } else { 941 char high = scanSurrogates(); 942 if (high != 0) { 943 if (sp == sbuf.length) { 944 putChar(high); 945 } else { 946 sbuf[sp++] = high; 947 } 948 949 isJavaIdentifierStart = Character.isJavaIdentifierStart( 950 Character.toCodePoint(high, ch)); 951 } else { 952 isJavaIdentifierStart = Character.isJavaIdentifierStart(ch); 953 } 954 } 955 if (isJavaIdentifierStart) { 956 scanIdent(); 957 } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5 958 token = EOF; 959 pos = bp = eofPos; 960 } else { 961 lexError("illegal.char", String.valueOf((int)ch)); 962 scanChar(); 963 } 964 } 965 return; 966 } 967 } 968 } finally { 969 endPos = bp; 970 if (scannerDebug) 971 System.out.println("nextToken(" + pos 972 + "," + endPos + ")=|" + 973 new String(getRawCharacters(pos, endPos)) 974 + "|"); 975 } 976 } 977 978 /** Return the current token, set by nextToken(). 979 */ 980 public Token token() { 981 return token; 982 } 983 984 /** Sets the current token. 985 */ 986 public void token(Token token) { 987 this.token = token; 988 } 989 990 /** Return the current token's position: a 0-based 991 * offset from beginning of the raw input stream 992 * (before unicode translation) 993 */ 994 public int pos() { 995 return pos; 996 } 997 998 /** Return the last character position of the current token. 999 */ 1000 public int endPos() { 1001 return endPos; 1002 } 1003 1004 /** Return the last character position of the previous token. 1005 */ 1006 public int prevEndPos() { 1007 return prevEndPos; 1008 } 1009 1010 /** Return the position where a lexical error occurred; 1011 */ 1012 public int errPos() { 1013 return errPos; 1014 } 1015 1016 /** Set the position where a lexical error occurred; 1017 */ 1018 public void errPos(int pos) { 1019 errPos = pos; 1020 } 1021 1022 /** Return the name of an identifier or token for the current token. 1023 */ 1024 public Name name() { 1025 return name; 1026 } 1027 1028 /** Return the radix of a numeric literal token. 1029 */ 1030 public int radix() { 1031 return radix; 1032 } 1033 1034 /** Has a @deprecated been encountered in last doc comment? 1035 * This needs to be reset by client with resetDeprecatedFlag. 1036 */ 1037 public boolean deprecatedFlag() { 1038 return deprecatedFlag; 1039 } 1040 1041 public void resetDeprecatedFlag() { 1042 deprecatedFlag = false; 1043 } 1044 1045 /** 1046 * Returns the documentation string of the current token. 1047 */ 1048 public String docComment() { 1049 return null; 1050 } 1051 1052 /** 1053 * Returns a copy of the input buffer, up to its inputLength. 1054 * Unicode escape sequences are not translated. 1055 */ 1056 public char[] getRawCharacters() { 1057 char[] chars = new char[buflen]; 1058 System.arraycopy(buf, 0, chars, 0, buflen); 1059 return chars; 1060 } 1061 1062 /** 1063 * Returns a copy of a character array subset of the input buffer. 1064 * The returned array begins at the <code>beginIndex</code> and 1065 * extends to the character at index <code>endIndex - 1</code>. 1066 * Thus the length of the substring is <code>endIndex-beginIndex</code>. 1067 * This behavior is like 1068 * <code>String.substring(beginIndex, endIndex)</code>. 1069 * Unicode escape sequences are not translated. 1070 * 1071 * @param beginIndex the beginning index, inclusive. 1072 * @param endIndex the ending index, exclusive. 1073 * @throws IndexOutOfBounds if either offset is outside of the 1074 * array bounds 1075 */ 1076 public char[] getRawCharacters(int beginIndex, int endIndex) { 1077 int length = endIndex - beginIndex; 1078 char[] chars = new char[length]; 1079 System.arraycopy(buf, beginIndex, chars, 0, length); 1080 return chars; 1081 } 1082 1083 public enum CommentStyle { 1084 LINE, 1085 BLOCK, 1086 JAVADOC, 1087 } 1088 1089 /** 1090 * Called when a complete comment has been scanned. pos and endPos 1091 * will mark the comment boundary. 1092 */ 1093 protected void processComment(CommentStyle style) { 1094 if (scannerDebug) 1095 System.out.println("processComment(" + pos 1096 + "," + endPos + "," + style + ")=|" 1097 + new String(getRawCharacters(pos, endPos)) 1098 + "|"); 1099 } 1100 1101 /** 1102 * Called when a complete whitespace run has been scanned. pos and endPos 1103 * will mark the whitespace boundary. 1104 */ 1105 protected void processWhiteSpace() { 1106 if (scannerDebug) 1107 System.out.println("processWhitespace(" + pos 1108 + "," + endPos + ")=|" + 1109 new String(getRawCharacters(pos, endPos)) 1110 + "|"); 1111 } 1112 1113 /** 1114 * Called when a line terminator has been processed. 1115 */ 1116 protected void processLineTerminator() { 1117 if (scannerDebug) 1118 System.out.println("processTerminator(" + pos 1119 + "," + endPos + ")=|" + 1120 new String(getRawCharacters(pos, endPos)) 1121 + "|"); 1122 } 1123 1124 /** Build a map for translating between line numbers and 1125 * positions in the input. 1126 * 1127 * @return a LineMap */ 1128 public Position.LineMap getLineMap() { 1129 return Position.makeLineMap(buf, buflen, false); 1130 } 1131 1132 }