Home » openjdk-7 » com.sun.tools » javac » parser » [javadoc | source]

    1   /*
    2    * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
    3    * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4    *
    5    * This code is free software; you can redistribute it and/or modify it
    6    * under the terms of the GNU General Public License version 2 only, as
    7    * published by the Free Software Foundation.  Oracle designates this
    8    * particular file as subject to the "Classpath" exception as provided
    9    * by Oracle in the LICENSE file that accompanied this code.
   10    *
   11    * This code is distributed in the hope that it will be useful, but WITHOUT
   12    * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   13    * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   14    * version 2 for more details (a copy is included in the LICENSE file that
   15    * accompanied this code).
   16    *
   17    * You should have received a copy of the GNU General Public License version
   18    * 2 along with this work; if not, write to the Free Software Foundation,
   19    * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   20    *
   21    * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   22    * or visit www.oracle.com if you need additional information or have any
   23    * questions.
   24    */
   25   
   26   package com.sun.tools.javac.parser;
   27   
   28   import java.nio;
   29   
   30   import com.sun.tools.javac.code.Source;
   31   import com.sun.tools.javac.file.JavacFileManager;
   32   import com.sun.tools.javac.util;
   33   
   34   
   35   import static com.sun.tools.javac.parser.Token.*;
   36   import static com.sun.tools.javac.util.LayoutCharacters.*;
   37   
   38   /** The lexical analyzer maps an input stream consisting of
   39    *  ASCII characters and Unicode escapes into a token sequence.
   40    *
   41    *  <p><b>This is NOT part of any supported API.
   42    *  If you write code that depends on this, you do so at your own risk.
   43    *  This code and its internal interfaces are subject to change or
   44    *  deletion without notice.</b>
   45    */
   46   public class Scanner implements Lexer {
   47   
   48       private static boolean scannerDebug = false;
   49   
   50       /* Output variables; set by nextToken():
   51        */
   52   
   53       /** The token, set by nextToken().
   54        */
   55       private Token token;
   56   
   57       /** Allow hex floating-point literals.
   58        */
   59       private boolean allowHexFloats;
   60   
   61       /** Allow binary literals.
   62        */
   63       private boolean allowBinaryLiterals;
   64   
   65       /** Allow underscores in literals.
   66        */
   67       private boolean allowUnderscoresInLiterals;
   68   
   69       /** The source language setting.
   70        */
   71       private Source source;
   72   
   73       /** The token's position, 0-based offset from beginning of text.
   74        */
   75       private int pos;
   76   
   77       /** Character position just after the last character of the token.
   78        */
   79       private int endPos;
   80   
   81       /** The last character position of the previous token.
   82        */
   83       private int prevEndPos;
   84   
   85       /** The position where a lexical error occurred;
   86        */
   87       private int errPos = Position.NOPOS;
   88   
   89       /** The name of an identifier or token:
   90        */
   91       private Name name;
   92   
   93       /** The radix of a numeric literal token.
   94        */
   95       private int radix;
   96   
   97       /** Has a @deprecated been encountered in last doc comment?
   98        *  this needs to be reset by client.
   99        */
  100       protected boolean deprecatedFlag = false;
  101   
  102       /** A character buffer for literals.
  103        */
  104       private char[] sbuf = new char[128];
  105       private int sp;
  106   
  107       /** The input buffer, index of next chacter to be read,
  108        *  index of one past last character in buffer.
  109        */
  110       private char[] buf;
  111       private int bp;
  112       private int buflen;
  113       private int eofPos;
  114   
  115       /** The current character.
  116        */
  117       private char ch;
  118   
  119       /** The buffer index of the last converted unicode character
  120        */
  121       private int unicodeConversionBp = -1;
  122   
  123       /** The log to be used for error reporting.
  124        */
  125       private final Log log;
  126   
  127       /** The name table. */
  128       private final Names names;
  129   
  130       /** The keyword table. */
  131       private final Keywords keywords;
  132   
  133       /** Common code for constructors. */
  134       private Scanner(ScannerFactory fac) {
  135           log = fac.log;
  136           names = fac.names;
  137           keywords = fac.keywords;
  138           source = fac.source;
  139           allowBinaryLiterals = source.allowBinaryLiterals();
  140           allowHexFloats = source.allowHexFloats();
  141           allowUnderscoresInLiterals = source.allowUnderscoresInLiterals();
  142       }
  143   
  144       private static final boolean hexFloatsWork = hexFloatsWork();
  145       private static boolean hexFloatsWork() {
  146           try {
  147               Float.valueOf("0x1.0p1");
  148               return true;
  149           } catch (NumberFormatException ex) {
  150               return false;
  151           }
  152       }
  153   
  154       /** Create a scanner from the input buffer.  buffer must implement
  155        *  array() and compact(), and remaining() must be less than limit().
  156        */
  157       protected Scanner(ScannerFactory fac, CharBuffer buffer) {
  158           this(fac, JavacFileManager.toArray(buffer), buffer.limit());
  159       }
  160   
  161       /**
  162        * Create a scanner from the input array.  This method might
  163        * modify the array.  To avoid copying the input array, ensure
  164        * that {@code inputLength < input.length} or
  165        * {@code input[input.length -1]} is a white space character.
  166        *
  167        * @param fac the factory which created this Scanner
  168        * @param input the input, might be modified
  169        * @param inputLength the size of the input.
  170        * Must be positive and less than or equal to input.length.
  171        */
  172       protected Scanner(ScannerFactory fac, char[] input, int inputLength) {
  173           this(fac);
  174           eofPos = inputLength;
  175           if (inputLength == input.length) {
  176               if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
  177                   inputLength--;
  178               } else {
  179                   char[] newInput = new char[inputLength + 1];
  180                   System.arraycopy(input, 0, newInput, 0, input.length);
  181                   input = newInput;
  182               }
  183           }
  184           buf = input;
  185           buflen = inputLength;
  186           buf[buflen] = EOI;
  187           bp = -1;
  188           scanChar();
  189       }
  190   
  191       /** Report an error at the given position using the provided arguments.
  192        */
  193       private void lexError(int pos, String key, Object... args) {
  194           log.error(pos, key, args);
  195           token = ERROR;
  196           errPos = pos;
  197       }
  198   
  199       /** Report an error at the current token position using the provided
  200        *  arguments.
  201        */
  202       private void lexError(String key, Object... args) {
  203           lexError(pos, key, args);
  204       }
  205   
  206       /** Convert an ASCII digit from its base (8, 10, or 16)
  207        *  to its value.
  208        */
  209       private int digit(int base) {
  210           char c = ch;
  211           int result = Character.digit(c, base);
  212           if (result >= 0 && c > 0x7f) {
  213               lexError(pos+1, "illegal.nonascii.digit");
  214               ch = "0123456789abcdef".charAt(result);
  215           }
  216           return result;
  217       }
  218   
  219       /** Convert unicode escape; bp points to initial '\' character
  220        *  (Spec 3.3).
  221        */
  222       private void convertUnicode() {
  223           if (ch == '\\' && unicodeConversionBp != bp) {
  224               bp++; ch = buf[bp];
  225               if (ch == 'u') {
  226                   do {
  227                       bp++; ch = buf[bp];
  228                   } while (ch == 'u');
  229                   int limit = bp + 3;
  230                   if (limit < buflen) {
  231                       int d = digit(16);
  232                       int code = d;
  233                       while (bp < limit && d >= 0) {
  234                           bp++; ch = buf[bp];
  235                           d = digit(16);
  236                           code = (code << 4) + d;
  237                       }
  238                       if (d >= 0) {
  239                           ch = (char)code;
  240                           unicodeConversionBp = bp;
  241                           return;
  242                       }
  243                   }
  244                   lexError(bp, "illegal.unicode.esc");
  245               } else {
  246                   bp--;
  247                   ch = '\\';
  248               }
  249           }
  250       }
  251   
  252       /** Read next character.
  253        */
  254       private void scanChar() {
  255           ch = buf[++bp];
  256           if (ch == '\\') {
  257               convertUnicode();
  258           }
  259       }
  260   
  261       /** Read next character in comment, skipping over double '\' characters.
  262        */
  263       private void scanCommentChar() {
  264           scanChar();
  265           if (ch == '\\') {
  266               if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
  267                   bp++;
  268               } else {
  269                   convertUnicode();
  270               }
  271           }
  272       }
  273   
  274       /** Append a character to sbuf.
  275        */
  276       private void putChar(char ch) {
  277           if (sp == sbuf.length) {
  278               char[] newsbuf = new char[sbuf.length * 2];
  279               System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
  280               sbuf = newsbuf;
  281           }
  282           sbuf[sp++] = ch;
  283       }
  284   
  285       /** Read next character in character or string literal and copy into sbuf.
  286        */
  287       private void scanLitChar() {
  288           if (ch == '\\') {
  289               if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
  290                   bp++;
  291                   putChar('\\');
  292                   scanChar();
  293               } else {
  294                   scanChar();
  295                   switch (ch) {
  296                   case '0': case '1': case '2': case '3':
  297                   case '4': case '5': case '6': case '7':
  298                       char leadch = ch;
  299                       int oct = digit(8);
  300                       scanChar();
  301                       if ('0' <= ch && ch <= '7') {
  302                           oct = oct * 8 + digit(8);
  303                           scanChar();
  304                           if (leadch <= '3' && '0' <= ch && ch <= '7') {
  305                               oct = oct * 8 + digit(8);
  306                               scanChar();
  307                           }
  308                       }
  309                       putChar((char)oct);
  310                       break;
  311                   case 'b':
  312                       putChar('\b'); scanChar(); break;
  313                   case 't':
  314                       putChar('\t'); scanChar(); break;
  315                   case 'n':
  316                       putChar('\n'); scanChar(); break;
  317                   case 'f':
  318                       putChar('\f'); scanChar(); break;
  319                   case 'r':
  320                       putChar('\r'); scanChar(); break;
  321                   case '\'':
  322                       putChar('\''); scanChar(); break;
  323                   case '\"':
  324                       putChar('\"'); scanChar(); break;
  325                   case '\\':
  326                       putChar('\\'); scanChar(); break;
  327                   default:
  328                       lexError(bp, "illegal.esc.char");
  329                   }
  330               }
  331           } else if (bp != buflen) {
  332               putChar(ch); scanChar();
  333           }
  334       }
  335   
  336       private void scanDigits(int digitRadix) {
  337           char saveCh;
  338           int savePos;
  339           do {
  340               if (ch != '_') {
  341                   putChar(ch);
  342               } else {
  343                   if (!allowUnderscoresInLiterals) {
  344                       lexError("unsupported.underscore.lit", source.name);
  345                       allowUnderscoresInLiterals = true;
  346                   }
  347               }
  348               saveCh = ch;
  349               savePos = bp;
  350               scanChar();
  351           } while (digit(digitRadix) >= 0 || ch == '_');
  352           if (saveCh == '_')
  353               lexError(savePos, "illegal.underscore");
  354       }
  355   
  356       /** Read fractional part of hexadecimal floating point number.
  357        */
  358       private void scanHexExponentAndSuffix() {
  359           if (ch == 'p' || ch == 'P') {
  360               putChar(ch);
  361               scanChar();
  362               skipIllegalUnderscores();
  363               if (ch == '+' || ch == '-') {
  364                   putChar(ch);
  365                   scanChar();
  366               }
  367               skipIllegalUnderscores();
  368               if ('0' <= ch && ch <= '9') {
  369                   scanDigits(10);
  370                   if (!allowHexFloats) {
  371                       lexError("unsupported.fp.lit", source.name);
  372                       allowHexFloats = true;
  373                   }
  374                   else if (!hexFloatsWork)
  375                       lexError("unsupported.cross.fp.lit");
  376               } else
  377                   lexError("malformed.fp.lit");
  378           } else {
  379               lexError("malformed.fp.lit");
  380           }
  381           if (ch == 'f' || ch == 'F') {
  382               putChar(ch);
  383               scanChar();
  384               token = FLOATLITERAL;
  385           } else {
  386               if (ch == 'd' || ch == 'D') {
  387                   putChar(ch);
  388                   scanChar();
  389               }
  390               token = DOUBLELITERAL;
  391           }
  392       }
  393   
  394       /** Read fractional part of floating point number.
  395        */
  396       private void scanFraction() {
  397           skipIllegalUnderscores();
  398           if ('0' <= ch && ch <= '9') {
  399               scanDigits(10);
  400           }
  401           int sp1 = sp;
  402           if (ch == 'e' || ch == 'E') {
  403               putChar(ch);
  404               scanChar();
  405               skipIllegalUnderscores();
  406               if (ch == '+' || ch == '-') {
  407                   putChar(ch);
  408                   scanChar();
  409               }
  410               skipIllegalUnderscores();
  411               if ('0' <= ch && ch <= '9') {
  412                   scanDigits(10);
  413                   return;
  414               }
  415               lexError("malformed.fp.lit");
  416               sp = sp1;
  417           }
  418       }
  419   
  420       /** Read fractional part and 'd' or 'f' suffix of floating point number.
  421        */
  422       private void scanFractionAndSuffix() {
  423           this.radix = 10;
  424           scanFraction();
  425           if (ch == 'f' || ch == 'F') {
  426               putChar(ch);
  427               scanChar();
  428               token = FLOATLITERAL;
  429           } else {
  430               if (ch == 'd' || ch == 'D') {
  431                   putChar(ch);
  432                   scanChar();
  433               }
  434               token = DOUBLELITERAL;
  435           }
  436       }
  437   
  438       /** Read fractional part and 'd' or 'f' suffix of floating point number.
  439        */
  440       private void scanHexFractionAndSuffix(boolean seendigit) {
  441           this.radix = 16;
  442           Assert.check(ch == '.');
  443           putChar(ch);
  444           scanChar();
  445           skipIllegalUnderscores();
  446           if (digit(16) >= 0) {
  447               seendigit = true;
  448               scanDigits(16);
  449           }
  450           if (!seendigit)
  451               lexError("invalid.hex.number");
  452           else
  453               scanHexExponentAndSuffix();
  454       }
  455   
  456       private void skipIllegalUnderscores() {
  457           if (ch == '_') {
  458               lexError(bp, "illegal.underscore");
  459               while (ch == '_')
  460                   scanChar();
  461           }
  462       }
  463   
  464       /** Read a number.
  465        *  @param radix  The radix of the number; one of 2, j8, 10, 16.
  466        */
  467       private void scanNumber(int radix) {
  468           this.radix = radix;
  469           // for octal, allow base-10 digit in case it's a float literal
  470           int digitRadix = (radix == 8 ? 10 : radix);
  471           boolean seendigit = false;
  472           if (digit(digitRadix) >= 0) {
  473               seendigit = true;
  474               scanDigits(digitRadix);
  475           }
  476           if (radix == 16 && ch == '.') {
  477               scanHexFractionAndSuffix(seendigit);
  478           } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
  479               scanHexExponentAndSuffix();
  480           } else if (digitRadix == 10 && ch == '.') {
  481               putChar(ch);
  482               scanChar();
  483               scanFractionAndSuffix();
  484           } else if (digitRadix == 10 &&
  485                      (ch == 'e' || ch == 'E' ||
  486                       ch == 'f' || ch == 'F' ||
  487                       ch == 'd' || ch == 'D')) {
  488               scanFractionAndSuffix();
  489           } else {
  490               if (ch == 'l' || ch == 'L') {
  491                   scanChar();
  492                   token = LONGLITERAL;
  493               } else {
  494                   token = INTLITERAL;
  495               }
  496           }
  497       }
  498   
  499       /** Read an identifier.
  500        */
  501       private void scanIdent() {
  502           boolean isJavaIdentifierPart;
  503           char high;
  504           do {
  505               if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
  506               // optimization, was: putChar(ch);
  507   
  508               scanChar();
  509               switch (ch) {
  510               case 'A': case 'B': case 'C': case 'D': case 'E':
  511               case 'F': case 'G': case 'H': case 'I': case 'J':
  512               case 'K': case 'L': case 'M': case 'N': case 'O':
  513               case 'P': case 'Q': case 'R': case 'S': case 'T':
  514               case 'U': case 'V': case 'W': case 'X': case 'Y':
  515               case 'Z':
  516               case 'a': case 'b': case 'c': case 'd': case 'e':
  517               case 'f': case 'g': case 'h': case 'i': case 'j':
  518               case 'k': case 'l': case 'm': case 'n': case 'o':
  519               case 'p': case 'q': case 'r': case 's': case 't':
  520               case 'u': case 'v': case 'w': case 'x': case 'y':
  521               case 'z':
  522               case '$': case '_':
  523               case '0': case '1': case '2': case '3': case '4':
  524               case '5': case '6': case '7': case '8': case '9':
  525               case '\u0000': case '\u0001': case '\u0002': case '\u0003':
  526               case '\u0004': case '\u0005': case '\u0006': case '\u0007':
  527               case '\u0008': case '\u000E': case '\u000F': case '\u0010':
  528               case '\u0011': case '\u0012': case '\u0013': case '\u0014':
  529               case '\u0015': case '\u0016': case '\u0017':
  530               case '\u0018': case '\u0019': case '\u001B':
  531               case '\u007F':
  532                   break;
  533               case '\u001A': // EOI is also a legal identifier part
  534                   if (bp >= buflen) {
  535                       name = names.fromChars(sbuf, 0, sp);
  536                       token = keywords.key(name);
  537                       return;
  538                   }
  539                   break;
  540               default:
  541                   if (ch < '\u0080') {
  542                       // all ASCII range chars already handled, above
  543                       isJavaIdentifierPart = false;
  544                   } else {
  545                       high = scanSurrogates();
  546                       if (high != 0) {
  547                           if (sp == sbuf.length) {
  548                               putChar(high);
  549                           } else {
  550                               sbuf[sp++] = high;
  551                           }
  552                           isJavaIdentifierPart = Character.isJavaIdentifierPart(
  553                               Character.toCodePoint(high, ch));
  554                       } else {
  555                           isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
  556                       }
  557                   }
  558                   if (!isJavaIdentifierPart) {
  559                       name = names.fromChars(sbuf, 0, sp);
  560                       token = keywords.key(name);
  561                       return;
  562                   }
  563               }
  564           } while (true);
  565       }
  566   
  567       /** Are surrogates supported?
  568        */
  569       final static boolean surrogatesSupported = surrogatesSupported();
  570       private static boolean surrogatesSupported() {
  571           try {
  572               Character.isHighSurrogate('a');
  573               return true;
  574           } catch (NoSuchMethodError ex) {
  575               return false;
  576           }
  577       }
  578   
  579       /** Scan surrogate pairs.  If 'ch' is a high surrogate and
  580        *  the next character is a low surrogate, then put the low
  581        *  surrogate in 'ch', and return the high surrogate.
  582        *  otherwise, just return 0.
  583        */
  584       private char scanSurrogates() {
  585           if (surrogatesSupported && Character.isHighSurrogate(ch)) {
  586               char high = ch;
  587   
  588               scanChar();
  589   
  590               if (Character.isLowSurrogate(ch)) {
  591                   return high;
  592               }
  593   
  594               ch = high;
  595           }
  596   
  597           return 0;
  598       }
  599   
  600       /** Return true if ch can be part of an operator.
  601        */
  602       private boolean isSpecial(char ch) {
  603           switch (ch) {
  604           case '!': case '%': case '&': case '*': case '?':
  605           case '+': case '-': case ':': case '<': case '=':
  606           case '>': case '^': case '|': case '~':
  607           case '@':
  608               return true;
  609           default:
  610               return false;
  611           }
  612       }
  613   
  614       /** Read longest possible sequence of special characters and convert
  615        *  to token.
  616        */
  617       private void scanOperator() {
  618           while (true) {
  619               putChar(ch);
  620               Name newname = names.fromChars(sbuf, 0, sp);
  621               if (keywords.key(newname) == IDENTIFIER) {
  622                   sp--;
  623                   break;
  624               }
  625               name = newname;
  626               token = keywords.key(newname);
  627               scanChar();
  628               if (!isSpecial(ch)) break;
  629           }
  630       }
  631   
  632       /**
  633        * Scan a documention comment; determine if a deprecated tag is present.
  634        * Called once the initial /, * have been skipped, positioned at the second *
  635        * (which is treated as the beginning of the first line).
  636        * Stops positioned at the closing '/'.
  637        */
  638       @SuppressWarnings("fallthrough")
  639       private void scanDocComment() {
  640           boolean deprecatedPrefix = false;
  641   
  642           forEachLine:
  643           while (bp < buflen) {
  644   
  645               // Skip optional WhiteSpace at beginning of line
  646               while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
  647                   scanCommentChar();
  648               }
  649   
  650               // Skip optional consecutive Stars
  651               while (bp < buflen && ch == '*') {
  652                   scanCommentChar();
  653                   if (ch == '/') {
  654                       return;
  655                   }
  656               }
  657   
  658               // Skip optional WhiteSpace after Stars
  659               while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
  660                   scanCommentChar();
  661               }
  662   
  663               deprecatedPrefix = false;
  664               // At beginning of line in the JavaDoc sense.
  665               if (bp < buflen && ch == '@' && !deprecatedFlag) {
  666                   scanCommentChar();
  667                   if (bp < buflen && ch == 'd') {
  668                       scanCommentChar();
  669                       if (bp < buflen && ch == 'e') {
  670                           scanCommentChar();
  671                           if (bp < buflen && ch == 'p') {
  672                               scanCommentChar();
  673                               if (bp < buflen && ch == 'r') {
  674                                   scanCommentChar();
  675                                   if (bp < buflen && ch == 'e') {
  676                                       scanCommentChar();
  677                                       if (bp < buflen && ch == 'c') {
  678                                           scanCommentChar();
  679                                           if (bp < buflen && ch == 'a') {
  680                                               scanCommentChar();
  681                                               if (bp < buflen && ch == 't') {
  682                                                   scanCommentChar();
  683                                                   if (bp < buflen && ch == 'e') {
  684                                                       scanCommentChar();
  685                                                       if (bp < buflen && ch == 'd') {
  686                                                           deprecatedPrefix = true;
  687                                                           scanCommentChar();
  688                                                       }}}}}}}}}}}
  689               if (deprecatedPrefix && bp < buflen) {
  690                   if (Character.isWhitespace(ch)) {
  691                       deprecatedFlag = true;
  692                   } else if (ch == '*') {
  693                       scanCommentChar();
  694                       if (ch == '/') {
  695                           deprecatedFlag = true;
  696                           return;
  697                       }
  698                   }
  699               }
  700   
  701               // Skip rest of line
  702               while (bp < buflen) {
  703                   switch (ch) {
  704                   case '*':
  705                       scanCommentChar();
  706                       if (ch == '/') {
  707                           return;
  708                       }
  709                       break;
  710                   case CR: // (Spec 3.4)
  711                       scanCommentChar();
  712                       if (ch != LF) {
  713                           continue forEachLine;
  714                       }
  715                       /* fall through to LF case */
  716                   case LF: // (Spec 3.4)
  717                       scanCommentChar();
  718                       continue forEachLine;
  719                   default:
  720                       scanCommentChar();
  721                   }
  722               } // rest of line
  723           } // forEachLine
  724           return;
  725       }
  726   
  727       /** The value of a literal token, recorded as a string.
  728        *  For integers, leading 0x and 'l' suffixes are suppressed.
  729        */
  730       public String stringVal() {
  731           return new String(sbuf, 0, sp);
  732       }
  733   
  734       /** Read token.
  735        */
  736       public void nextToken() {
  737   
  738           try {
  739               prevEndPos = endPos;
  740               sp = 0;
  741   
  742               while (true) {
  743                   pos = bp;
  744                   switch (ch) {
  745                   case ' ': // (Spec 3.6)
  746                   case '\t': // (Spec 3.6)
  747                   case FF: // (Spec 3.6)
  748                       do {
  749                           scanChar();
  750                       } while (ch == ' ' || ch == '\t' || ch == FF);
  751                       endPos = bp;
  752                       processWhiteSpace();
  753                       break;
  754                   case LF: // (Spec 3.4)
  755                       scanChar();
  756                       endPos = bp;
  757                       processLineTerminator();
  758                       break;
  759                   case CR: // (Spec 3.4)
  760                       scanChar();
  761                       if (ch == LF) {
  762                           scanChar();
  763                       }
  764                       endPos = bp;
  765                       processLineTerminator();
  766                       break;
  767                   case 'A': case 'B': case 'C': case 'D': case 'E':
  768                   case 'F': case 'G': case 'H': case 'I': case 'J':
  769                   case 'K': case 'L': case 'M': case 'N': case 'O':
  770                   case 'P': case 'Q': case 'R': case 'S': case 'T':
  771                   case 'U': case 'V': case 'W': case 'X': case 'Y':
  772                   case 'Z':
  773                   case 'a': case 'b': case 'c': case 'd': case 'e':
  774                   case 'f': case 'g': case 'h': case 'i': case 'j':
  775                   case 'k': case 'l': case 'm': case 'n': case 'o':
  776                   case 'p': case 'q': case 'r': case 's': case 't':
  777                   case 'u': case 'v': case 'w': case 'x': case 'y':
  778                   case 'z':
  779                   case '$': case '_':
  780                       scanIdent();
  781                       return;
  782                   case '0':
  783                       scanChar();
  784                       if (ch == 'x' || ch == 'X') {
  785                           scanChar();
  786                           skipIllegalUnderscores();
  787                           if (ch == '.') {
  788                               scanHexFractionAndSuffix(false);
  789                           } else if (digit(16) < 0) {
  790                               lexError("invalid.hex.number");
  791                           } else {
  792                               scanNumber(16);
  793                           }
  794                       } else if (ch == 'b' || ch == 'B') {
  795                           if (!allowBinaryLiterals) {
  796                               lexError("unsupported.binary.lit", source.name);
  797                               allowBinaryLiterals = true;
  798                           }
  799                           scanChar();
  800                           skipIllegalUnderscores();
  801                           if (digit(2) < 0) {
  802                               lexError("invalid.binary.number");
  803                           } else {
  804                               scanNumber(2);
  805                           }
  806                       } else {
  807                           putChar('0');
  808                           if (ch == '_') {
  809                               int savePos = bp;
  810                               do {
  811                                   scanChar();
  812                               } while (ch == '_');
  813                               if (digit(10) < 0) {
  814                                   lexError(savePos, "illegal.underscore");
  815                               }
  816                           }
  817                           scanNumber(8);
  818                       }
  819                       return;
  820                   case '1': case '2': case '3': case '4':
  821                   case '5': case '6': case '7': case '8': case '9':
  822                       scanNumber(10);
  823                       return;
  824                   case '.':
  825                       scanChar();
  826                       if ('0' <= ch && ch <= '9') {
  827                           putChar('.');
  828                           scanFractionAndSuffix();
  829                       } else if (ch == '.') {
  830                           putChar('.'); putChar('.');
  831                           scanChar();
  832                           if (ch == '.') {
  833                               scanChar();
  834                               putChar('.');
  835                               token = ELLIPSIS;
  836                           } else {
  837                               lexError("malformed.fp.lit");
  838                           }
  839                       } else {
  840                           token = DOT;
  841                       }
  842                       return;
  843                   case ',':
  844                       scanChar(); token = COMMA; return;
  845                   case ';':
  846                       scanChar(); token = SEMI; return;
  847                   case '(':
  848                       scanChar(); token = LPAREN; return;
  849                   case ')':
  850                       scanChar(); token = RPAREN; return;
  851                   case '[':
  852                       scanChar(); token = LBRACKET; return;
  853                   case ']':
  854                       scanChar(); token = RBRACKET; return;
  855                   case '{':
  856                       scanChar(); token = LBRACE; return;
  857                   case '}':
  858                       scanChar(); token = RBRACE; return;
  859                   case '/':
  860                       scanChar();
  861                       if (ch == '/') {
  862                           do {
  863                               scanCommentChar();
  864                           } while (ch != CR && ch != LF && bp < buflen);
  865                           if (bp < buflen) {
  866                               endPos = bp;
  867                               processComment(CommentStyle.LINE);
  868                           }
  869                           break;
  870                       } else if (ch == '*') {
  871                           scanChar();
  872                           CommentStyle style;
  873                           if (ch == '*') {
  874                               style = CommentStyle.JAVADOC;
  875                               scanDocComment();
  876                           } else {
  877                               style = CommentStyle.BLOCK;
  878                               while (bp < buflen) {
  879                                   if (ch == '*') {
  880                                       scanChar();
  881                                       if (ch == '/') break;
  882                                   } else {
  883                                       scanCommentChar();
  884                                   }
  885                               }
  886                           }
  887                           if (ch == '/') {
  888                               scanChar();
  889                               endPos = bp;
  890                               processComment(style);
  891                               break;
  892                           } else {
  893                               lexError("unclosed.comment");
  894                               return;
  895                           }
  896                       } else if (ch == '=') {
  897                           name = names.slashequals;
  898                           token = SLASHEQ;
  899                           scanChar();
  900                       } else {
  901                           name = names.slash;
  902                           token = SLASH;
  903                       }
  904                       return;
  905                   case '\'':
  906                       scanChar();
  907                       if (ch == '\'') {
  908                           lexError("empty.char.lit");
  909                       } else {
  910                           if (ch == CR || ch == LF)
  911                               lexError(pos, "illegal.line.end.in.char.lit");
  912                           scanLitChar();
  913                           if (ch == '\'') {
  914                               scanChar();
  915                               token = CHARLITERAL;
  916                           } else {
  917                               lexError(pos, "unclosed.char.lit");
  918                           }
  919                       }
  920                       return;
  921                   case '\"':
  922                       scanChar();
  923                       while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
  924                           scanLitChar();
  925                       if (ch == '\"') {
  926                           token = STRINGLITERAL;
  927                           scanChar();
  928                       } else {
  929                           lexError(pos, "unclosed.str.lit");
  930                       }
  931                       return;
  932                   default:
  933                       if (isSpecial(ch)) {
  934                           scanOperator();
  935                       } else {
  936                           boolean isJavaIdentifierStart;
  937                           if (ch < '\u0080') {
  938                               // all ASCII range chars already handled, above
  939                               isJavaIdentifierStart = false;
  940                           } else {
  941                               char high = scanSurrogates();
  942                               if (high != 0) {
  943                                   if (sp == sbuf.length) {
  944                                       putChar(high);
  945                                   } else {
  946                                       sbuf[sp++] = high;
  947                                   }
  948   
  949                                   isJavaIdentifierStart = Character.isJavaIdentifierStart(
  950                                       Character.toCodePoint(high, ch));
  951                               } else {
  952                                   isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
  953                               }
  954                           }
  955                           if (isJavaIdentifierStart) {
  956                               scanIdent();
  957                           } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
  958                               token = EOF;
  959                               pos = bp = eofPos;
  960                           } else {
  961                               lexError("illegal.char", String.valueOf((int)ch));
  962                               scanChar();
  963                           }
  964                       }
  965                       return;
  966                   }
  967               }
  968           } finally {
  969               endPos = bp;
  970               if (scannerDebug)
  971                   System.out.println("nextToken(" + pos
  972                                      + "," + endPos + ")=|" +
  973                                      new String(getRawCharacters(pos, endPos))
  974                                      + "|");
  975           }
  976       }
  977   
  978       /** Return the current token, set by nextToken().
  979        */
  980       public Token token() {
  981           return token;
  982       }
  983   
  984       /** Sets the current token.
  985        */
  986       public void token(Token token) {
  987           this.token = token;
  988       }
  989   
  990       /** Return the current token's position: a 0-based
  991        *  offset from beginning of the raw input stream
  992        *  (before unicode translation)
  993        */
  994       public int pos() {
  995           return pos;
  996       }
  997   
  998       /** Return the last character position of the current token.
  999        */
 1000       public int endPos() {
 1001           return endPos;
 1002       }
 1003   
 1004       /** Return the last character position of the previous token.
 1005        */
 1006       public int prevEndPos() {
 1007           return prevEndPos;
 1008       }
 1009   
 1010       /** Return the position where a lexical error occurred;
 1011        */
 1012       public int errPos() {
 1013           return errPos;
 1014       }
 1015   
 1016       /** Set the position where a lexical error occurred;
 1017        */
 1018       public void errPos(int pos) {
 1019           errPos = pos;
 1020       }
 1021   
 1022       /** Return the name of an identifier or token for the current token.
 1023        */
 1024       public Name name() {
 1025           return name;
 1026       }
 1027   
 1028       /** Return the radix of a numeric literal token.
 1029        */
 1030       public int radix() {
 1031           return radix;
 1032       }
 1033   
 1034       /** Has a @deprecated been encountered in last doc comment?
 1035        *  This needs to be reset by client with resetDeprecatedFlag.
 1036        */
 1037       public boolean deprecatedFlag() {
 1038           return deprecatedFlag;
 1039       }
 1040   
 1041       public void resetDeprecatedFlag() {
 1042           deprecatedFlag = false;
 1043       }
 1044   
 1045       /**
 1046        * Returns the documentation string of the current token.
 1047        */
 1048       public String docComment() {
 1049           return null;
 1050       }
 1051   
 1052       /**
 1053        * Returns a copy of the input buffer, up to its inputLength.
 1054        * Unicode escape sequences are not translated.
 1055        */
 1056       public char[] getRawCharacters() {
 1057           char[] chars = new char[buflen];
 1058           System.arraycopy(buf, 0, chars, 0, buflen);
 1059           return chars;
 1060       }
 1061   
 1062       /**
 1063        * Returns a copy of a character array subset of the input buffer.
 1064        * The returned array begins at the <code>beginIndex</code> and
 1065        * extends to the character at index <code>endIndex - 1</code>.
 1066        * Thus the length of the substring is <code>endIndex-beginIndex</code>.
 1067        * This behavior is like
 1068        * <code>String.substring(beginIndex, endIndex)</code>.
 1069        * Unicode escape sequences are not translated.
 1070        *
 1071        * @param beginIndex the beginning index, inclusive.
 1072        * @param endIndex the ending index, exclusive.
 1073        * @throws IndexOutOfBounds if either offset is outside of the
 1074        *         array bounds
 1075        */
 1076       public char[] getRawCharacters(int beginIndex, int endIndex) {
 1077           int length = endIndex - beginIndex;
 1078           char[] chars = new char[length];
 1079           System.arraycopy(buf, beginIndex, chars, 0, length);
 1080           return chars;
 1081       }
 1082   
 1083       public enum CommentStyle {
 1084           LINE,
 1085           BLOCK,
 1086           JAVADOC,
 1087       }
 1088   
 1089       /**
 1090        * Called when a complete comment has been scanned. pos and endPos
 1091        * will mark the comment boundary.
 1092        */
 1093       protected void processComment(CommentStyle style) {
 1094           if (scannerDebug)
 1095               System.out.println("processComment(" + pos
 1096                                  + "," + endPos + "," + style + ")=|"
 1097                                  + new String(getRawCharacters(pos, endPos))
 1098                                  + "|");
 1099       }
 1100   
 1101       /**
 1102        * Called when a complete whitespace run has been scanned. pos and endPos
 1103        * will mark the whitespace boundary.
 1104        */
 1105       protected void processWhiteSpace() {
 1106           if (scannerDebug)
 1107               System.out.println("processWhitespace(" + pos
 1108                                  + "," + endPos + ")=|" +
 1109                                  new String(getRawCharacters(pos, endPos))
 1110                                  + "|");
 1111       }
 1112   
 1113       /**
 1114        * Called when a line terminator has been processed.
 1115        */
 1116       protected void processLineTerminator() {
 1117           if (scannerDebug)
 1118               System.out.println("processTerminator(" + pos
 1119                                  + "," + endPos + ")=|" +
 1120                                  new String(getRawCharacters(pos, endPos))
 1121                                  + "|");
 1122       }
 1123   
 1124       /** Build a map for translating between line numbers and
 1125        * positions in the input.
 1126        *
 1127        * @return a LineMap */
 1128       public Position.LineMap getLineMap() {
 1129           return Position.makeLineMap(buf, buflen, false);
 1130       }
 1131   
 1132   }

Home » openjdk-7 » com.sun.tools » javac » parser » [javadoc | source]