Home » openjdk-7 » java » io » [javadoc | source]

    1   /*
    2    * Copyright (c) 1995, 2005, Oracle and/or its affiliates. All rights reserved.
    3    * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    4    *
    5    * This code is free software; you can redistribute it and/or modify it
    6    * under the terms of the GNU General Public License version 2 only, as
    7    * published by the Free Software Foundation.  Oracle designates this
    8    * particular file as subject to the "Classpath" exception as provided
    9    * by Oracle in the LICENSE file that accompanied this code.
   10    *
   11    * This code is distributed in the hope that it will be useful, but WITHOUT
   12    * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   13    * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   14    * version 2 for more details (a copy is included in the LICENSE file that
   15    * accompanied this code).
   16    *
   17    * You should have received a copy of the GNU General Public License version
   18    * 2 along with this work; if not, write to the Free Software Foundation,
   19    * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   20    *
   21    * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   22    * or visit www.oracle.com if you need additional information or have any
   23    * questions.
   24    */
   25   
   26   package java.io;
   27   
   28   import java.util.Arrays;
   29   
   30   /**
   31    * The <code>StreamTokenizer</code> class takes an input stream and
   32    * parses it into "tokens", allowing the tokens to be
   33    * read one at a time. The parsing process is controlled by a table
   34    * and a number of flags that can be set to various states. The
   35    * stream tokenizer can recognize identifiers, numbers, quoted
   36    * strings, and various comment styles.
   37    * <p>
   38    * Each byte read from the input stream is regarded as a character
   39    * in the range <code>'&#92;u0000'</code> through <code>'&#92;u00FF'</code>.
   40    * The character value is used to look up five possible attributes of
   41    * the character: <i>white space</i>, <i>alphabetic</i>,
   42    * <i>numeric</i>, <i>string quote</i>, and <i>comment character</i>.
   43    * Each character can have zero or more of these attributes.
   44    * <p>
   45    * In addition, an instance has four flags. These flags indicate:
   46    * <ul>
   47    * <li>Whether line terminators are to be returned as tokens or treated
   48    *     as white space that merely separates tokens.
   49    * <li>Whether C-style comments are to be recognized and skipped.
   50    * <li>Whether C++-style comments are to be recognized and skipped.
   51    * <li>Whether the characters of identifiers are converted to lowercase.
   52    * </ul>
   53    * <p>
   54    * A typical application first constructs an instance of this class,
   55    * sets up the syntax tables, and then repeatedly loops calling the
   56    * <code>nextToken</code> method in each iteration of the loop until
   57    * it returns the value <code>TT_EOF</code>.
   58    *
   59    * @author  James Gosling
   60    * @see     java.io.StreamTokenizer#nextToken()
   61    * @see     java.io.StreamTokenizer#TT_EOF
   62    * @since   JDK1.0
   63    */
   64   
   65   public class StreamTokenizer {
   66   
   67       /* Only one of these will be non-null */
   68       private Reader reader = null;
   69       private InputStream input = null;
   70   
   71       private char buf[] = new char[20];
   72   
   73       /**
   74        * The next character to be considered by the nextToken method.  May also
   75        * be NEED_CHAR to indicate that a new character should be read, or SKIP_LF
   76        * to indicate that a new character should be read and, if it is a '\n'
   77        * character, it should be discarded and a second new character should be
   78        * read.
   79        */
   80       private int peekc = NEED_CHAR;
   81   
   82       private static final int NEED_CHAR = Integer.MAX_VALUE;
   83       private static final int SKIP_LF = Integer.MAX_VALUE - 1;
   84   
   85       private boolean pushedBack;
   86       private boolean forceLower;
   87       /** The line number of the last token read */
   88       private int LINENO = 1;
   89   
   90       private boolean eolIsSignificantP = false;
   91       private boolean slashSlashCommentsP = false;
   92       private boolean slashStarCommentsP = false;
   93   
   94       private byte ctype[] = new byte[256];
   95       private static final byte CT_WHITESPACE = 1;
   96       private static final byte CT_DIGIT = 2;
   97       private static final byte CT_ALPHA = 4;
   98       private static final byte CT_QUOTE = 8;
   99       private static final byte CT_COMMENT = 16;
  100   
  101       /**
  102        * After a call to the <code>nextToken</code> method, this field
  103        * contains the type of the token just read. For a single character
  104        * token, its value is the single character, converted to an integer.
  105        * For a quoted string token, its value is the quote character.
  106        * Otherwise, its value is one of the following:
  107        * <ul>
  108        * <li><code>TT_WORD</code> indicates that the token is a word.
  109        * <li><code>TT_NUMBER</code> indicates that the token is a number.
  110        * <li><code>TT_EOL</code> indicates that the end of line has been read.
  111        *     The field can only have this value if the
  112        *     <code>eolIsSignificant</code> method has been called with the
  113        *     argument <code>true</code>.
  114        * <li><code>TT_EOF</code> indicates that the end of the input stream
  115        *     has been reached.
  116        * </ul>
  117        * <p>
  118        * The initial value of this field is -4.
  119        *
  120        * @see     java.io.StreamTokenizer#eolIsSignificant(boolean)
  121        * @see     java.io.StreamTokenizer#nextToken()
  122        * @see     java.io.StreamTokenizer#quoteChar(int)
  123        * @see     java.io.StreamTokenizer#TT_EOF
  124        * @see     java.io.StreamTokenizer#TT_EOL
  125        * @see     java.io.StreamTokenizer#TT_NUMBER
  126        * @see     java.io.StreamTokenizer#TT_WORD
  127        */
  128       public int ttype = TT_NOTHING;
  129   
  130       /**
  131        * A constant indicating that the end of the stream has been read.
  132        */
  133       public static final int TT_EOF = -1;
  134   
  135       /**
  136        * A constant indicating that the end of the line has been read.
  137        */
  138       public static final int TT_EOL = '\n';
  139   
  140       /**
  141        * A constant indicating that a number token has been read.
  142        */
  143       public static final int TT_NUMBER = -2;
  144   
  145       /**
  146        * A constant indicating that a word token has been read.
  147        */
  148       public static final int TT_WORD = -3;
  149   
  150       /* A constant indicating that no token has been read, used for
  151        * initializing ttype.  FIXME This could be made public and
  152        * made available as the part of the API in a future release.
  153        */
  154       private static final int TT_NOTHING = -4;
  155   
  156       /**
  157        * If the current token is a word token, this field contains a
  158        * string giving the characters of the word token. When the current
  159        * token is a quoted string token, this field contains the body of
  160        * the string.
  161        * <p>
  162        * The current token is a word when the value of the
  163        * <code>ttype</code> field is <code>TT_WORD</code>. The current token is
  164        * a quoted string token when the value of the <code>ttype</code> field is
  165        * a quote character.
  166        * <p>
  167        * The initial value of this field is null.
  168        *
  169        * @see     java.io.StreamTokenizer#quoteChar(int)
  170        * @see     java.io.StreamTokenizer#TT_WORD
  171        * @see     java.io.StreamTokenizer#ttype
  172        */
  173       public String sval;
  174   
  175       /**
  176        * If the current token is a number, this field contains the value
  177        * of that number. The current token is a number when the value of
  178        * the <code>ttype</code> field is <code>TT_NUMBER</code>.
  179        * <p>
  180        * The initial value of this field is 0.0.
  181        *
  182        * @see     java.io.StreamTokenizer#TT_NUMBER
  183        * @see     java.io.StreamTokenizer#ttype
  184        */
  185       public double nval;
  186   
  187       /** Private constructor that initializes everything except the streams. */
  188       private StreamTokenizer() {
  189           wordChars('a', 'z');
  190           wordChars('A', 'Z');
  191           wordChars(128 + 32, 255);
  192           whitespaceChars(0, ' ');
  193           commentChar('/');
  194           quoteChar('"');
  195           quoteChar('\'');
  196           parseNumbers();
  197       }
  198   
  199       /**
  200        * Creates a stream tokenizer that parses the specified input
  201        * stream. The stream tokenizer is initialized to the following
  202        * default state:
  203        * <ul>
  204        * <li>All byte values <code>'A'</code> through <code>'Z'</code>,
  205        *     <code>'a'</code> through <code>'z'</code>, and
  206        *     <code>'&#92;u00A0'</code> through <code>'&#92;u00FF'</code> are
  207        *     considered to be alphabetic.
  208        * <li>All byte values <code>'&#92;u0000'</code> through
  209        *     <code>'&#92;u0020'</code> are considered to be white space.
  210        * <li><code>'/'</code> is a comment character.
  211        * <li>Single quote <code>'&#92;''</code> and double quote <code>'"'</code>
  212        *     are string quote characters.
  213        * <li>Numbers are parsed.
  214        * <li>Ends of lines are treated as white space, not as separate tokens.
  215        * <li>C-style and C++-style comments are not recognized.
  216        * </ul>
  217        *
  218        * @deprecated As of JDK version 1.1, the preferred way to tokenize an
  219        * input stream is to convert it into a character stream, for example:
  220        * <blockquote><pre>
  221        *   Reader r = new BufferedReader(new InputStreamReader(is));
  222        *   StreamTokenizer st = new StreamTokenizer(r);
  223        * </pre></blockquote>
  224        *
  225        * @param      is        an input stream.
  226        * @see        java.io.BufferedReader
  227        * @see        java.io.InputStreamReader
  228        * @see        java.io.StreamTokenizer#StreamTokenizer(java.io.Reader)
  229        */
  230       @Deprecated
  231       public StreamTokenizer(InputStream is) {
  232           this();
  233           if (is == null) {
  234               throw new NullPointerException();
  235           }
  236           input = is;
  237       }
  238   
  239       /**
  240        * Create a tokenizer that parses the given character stream.
  241        *
  242        * @param r  a Reader object providing the input stream.
  243        * @since   JDK1.1
  244        */
  245       public StreamTokenizer(Reader r) {
  246           this();
  247           if (r == null) {
  248               throw new NullPointerException();
  249           }
  250           reader = r;
  251       }
  252   
  253       /**
  254        * Resets this tokenizer's syntax table so that all characters are
  255        * "ordinary." See the <code>ordinaryChar</code> method
  256        * for more information on a character being ordinary.
  257        *
  258        * @see     java.io.StreamTokenizer#ordinaryChar(int)
  259        */
  260       public void resetSyntax() {
  261           for (int i = ctype.length; --i >= 0;)
  262               ctype[i] = 0;
  263       }
  264   
  265       /**
  266        * Specifies that all characters <i>c</i> in the range
  267        * <code>low&nbsp;&lt;=&nbsp;<i>c</i>&nbsp;&lt;=&nbsp;high</code>
  268        * are word constituents. A word token consists of a word constituent
  269        * followed by zero or more word constituents or number constituents.
  270        *
  271        * @param   low   the low end of the range.
  272        * @param   hi    the high end of the range.
  273        */
  274       public void wordChars(int low, int hi) {
  275           if (low < 0)
  276               low = 0;
  277           if (hi >= ctype.length)
  278               hi = ctype.length - 1;
  279           while (low <= hi)
  280               ctype[low++] |= CT_ALPHA;
  281       }
  282   
  283       /**
  284        * Specifies that all characters <i>c</i> in the range
  285        * <code>low&nbsp;&lt;=&nbsp;<i>c</i>&nbsp;&lt;=&nbsp;high</code>
  286        * are white space characters. White space characters serve only to
  287        * separate tokens in the input stream.
  288        *
  289        * <p>Any other attribute settings for the characters in the specified
  290        * range are cleared.
  291        *
  292        * @param   low   the low end of the range.
  293        * @param   hi    the high end of the range.
  294        */
  295       public void whitespaceChars(int low, int hi) {
  296           if (low < 0)
  297               low = 0;
  298           if (hi >= ctype.length)
  299               hi = ctype.length - 1;
  300           while (low <= hi)
  301               ctype[low++] = CT_WHITESPACE;
  302       }
  303   
  304       /**
  305        * Specifies that all characters <i>c</i> in the range
  306        * <code>low&nbsp;&lt;=&nbsp;<i>c</i>&nbsp;&lt;=&nbsp;high</code>
  307        * are "ordinary" in this tokenizer. See the
  308        * <code>ordinaryChar</code> method for more information on a
  309        * character being ordinary.
  310        *
  311        * @param   low   the low end of the range.
  312        * @param   hi    the high end of the range.
  313        * @see     java.io.StreamTokenizer#ordinaryChar(int)
  314        */
  315       public void ordinaryChars(int low, int hi) {
  316           if (low < 0)
  317               low = 0;
  318           if (hi >= ctype.length)
  319               hi = ctype.length - 1;
  320           while (low <= hi)
  321               ctype[low++] = 0;
  322       }
  323   
  324       /**
  325        * Specifies that the character argument is "ordinary"
  326        * in this tokenizer. It removes any special significance the
  327        * character has as a comment character, word component, string
  328        * delimiter, white space, or number character. When such a character
  329        * is encountered by the parser, the parser treats it as a
  330        * single-character token and sets <code>ttype</code> field to the
  331        * character value.
  332        *
  333        * <p>Making a line terminator character "ordinary" may interfere
  334        * with the ability of a <code>StreamTokenizer</code> to count
  335        * lines. The <code>lineno</code> method may no longer reflect
  336        * the presence of such terminator characters in its line count.
  337        *
  338        * @param   ch   the character.
  339        * @see     java.io.StreamTokenizer#ttype
  340        */
  341       public void ordinaryChar(int ch) {
  342           if (ch >= 0 && ch < ctype.length)
  343               ctype[ch] = 0;
  344       }
  345   
  346       /**
  347        * Specified that the character argument starts a single-line
  348        * comment. All characters from the comment character to the end of
  349        * the line are ignored by this stream tokenizer.
  350        *
  351        * <p>Any other attribute settings for the specified character are cleared.
  352        *
  353        * @param   ch   the character.
  354        */
  355       public void commentChar(int ch) {
  356           if (ch >= 0 && ch < ctype.length)
  357               ctype[ch] = CT_COMMENT;
  358       }
  359   
  360       /**
  361        * Specifies that matching pairs of this character delimit string
  362        * constants in this tokenizer.
  363        * <p>
  364        * When the <code>nextToken</code> method encounters a string
  365        * constant, the <code>ttype</code> field is set to the string
  366        * delimiter and the <code>sval</code> field is set to the body of
  367        * the string.
  368        * <p>
  369        * If a string quote character is encountered, then a string is
  370        * recognized, consisting of all characters after (but not including)
  371        * the string quote character, up to (but not including) the next
  372        * occurrence of that same string quote character, or a line
  373        * terminator, or end of file. The usual escape sequences such as
  374        * <code>"&#92;n"</code> and <code>"&#92;t"</code> are recognized and
  375        * converted to single characters as the string is parsed.
  376        *
  377        * <p>Any other attribute settings for the specified character are cleared.
  378        *
  379        * @param   ch   the character.
  380        * @see     java.io.StreamTokenizer#nextToken()
  381        * @see     java.io.StreamTokenizer#sval
  382        * @see     java.io.StreamTokenizer#ttype
  383        */
  384       public void quoteChar(int ch) {
  385           if (ch >= 0 && ch < ctype.length)
  386               ctype[ch] = CT_QUOTE;
  387       }
  388   
  389       /**
  390        * Specifies that numbers should be parsed by this tokenizer. The
  391        * syntax table of this tokenizer is modified so that each of the twelve
  392        * characters:
  393        * <blockquote><pre>
  394        *      0 1 2 3 4 5 6 7 8 9 . -
  395        * </pre></blockquote>
  396        * <p>
  397        * has the "numeric" attribute.
  398        * <p>
  399        * When the parser encounters a word token that has the format of a
  400        * double precision floating-point number, it treats the token as a
  401        * number rather than a word, by setting the <code>ttype</code>
  402        * field to the value <code>TT_NUMBER</code> and putting the numeric
  403        * value of the token into the <code>nval</code> field.
  404        *
  405        * @see     java.io.StreamTokenizer#nval
  406        * @see     java.io.StreamTokenizer#TT_NUMBER
  407        * @see     java.io.StreamTokenizer#ttype
  408        */
  409       public void parseNumbers() {
  410           for (int i = '0'; i <= '9'; i++)
  411               ctype[i] |= CT_DIGIT;
  412           ctype['.'] |= CT_DIGIT;
  413           ctype['-'] |= CT_DIGIT;
  414       }
  415   
  416       /**
  417        * Determines whether or not ends of line are treated as tokens.
  418        * If the flag argument is true, this tokenizer treats end of lines
  419        * as tokens; the <code>nextToken</code> method returns
  420        * <code>TT_EOL</code> and also sets the <code>ttype</code> field to
  421        * this value when an end of line is read.
  422        * <p>
  423        * A line is a sequence of characters ending with either a
  424        * carriage-return character (<code>'&#92;r'</code>) or a newline
  425        * character (<code>'&#92;n'</code>). In addition, a carriage-return
  426        * character followed immediately by a newline character is treated
  427        * as a single end-of-line token.
  428        * <p>
  429        * If the <code>flag</code> is false, end-of-line characters are
  430        * treated as white space and serve only to separate tokens.
  431        *
  432        * @param   flag   <code>true</code> indicates that end-of-line characters
  433        *                 are separate tokens; <code>false</code> indicates that
  434        *                 end-of-line characters are white space.
  435        * @see     java.io.StreamTokenizer#nextToken()
  436        * @see     java.io.StreamTokenizer#ttype
  437        * @see     java.io.StreamTokenizer#TT_EOL
  438        */
  439       public void eolIsSignificant(boolean flag) {
  440           eolIsSignificantP = flag;
  441       }
  442   
  443       /**
  444        * Determines whether or not the tokenizer recognizes C-style comments.
  445        * If the flag argument is <code>true</code>, this stream tokenizer
  446        * recognizes C-style comments. All text between successive
  447        * occurrences of <code>/*</code> and <code>*&#47;</code> are discarded.
  448        * <p>
  449        * If the flag argument is <code>false</code>, then C-style comments
  450        * are not treated specially.
  451        *
  452        * @param   flag   <code>true</code> indicates to recognize and ignore
  453        *                 C-style comments.
  454        */
  455       public void slashStarComments(boolean flag) {
  456           slashStarCommentsP = flag;
  457       }
  458   
  459       /**
  460        * Determines whether or not the tokenizer recognizes C++-style comments.
  461        * If the flag argument is <code>true</code>, this stream tokenizer
  462        * recognizes C++-style comments. Any occurrence of two consecutive
  463        * slash characters (<code>'/'</code>) is treated as the beginning of
  464        * a comment that extends to the end of the line.
  465        * <p>
  466        * If the flag argument is <code>false</code>, then C++-style
  467        * comments are not treated specially.
  468        *
  469        * @param   flag   <code>true</code> indicates to recognize and ignore
  470        *                 C++-style comments.
  471        */
  472       public void slashSlashComments(boolean flag) {
  473           slashSlashCommentsP = flag;
  474       }
  475   
  476       /**
  477        * Determines whether or not word token are automatically lowercased.
  478        * If the flag argument is <code>true</code>, then the value in the
  479        * <code>sval</code> field is lowercased whenever a word token is
  480        * returned (the <code>ttype</code> field has the
  481        * value <code>TT_WORD</code> by the <code>nextToken</code> method
  482        * of this tokenizer.
  483        * <p>
  484        * If the flag argument is <code>false</code>, then the
  485        * <code>sval</code> field is not modified.
  486        *
  487        * @param   fl   <code>true</code> indicates that all word tokens should
  488        *               be lowercased.
  489        * @see     java.io.StreamTokenizer#nextToken()
  490        * @see     java.io.StreamTokenizer#ttype
  491        * @see     java.io.StreamTokenizer#TT_WORD
  492        */
  493       public void lowerCaseMode(boolean fl) {
  494           forceLower = fl;
  495       }
  496   
  497       /** Read the next character */
  498       private int read() throws IOException {
  499           if (reader != null)
  500               return reader.read();
  501           else if (input != null)
  502               return input.read();
  503           else
  504               throw new IllegalStateException();
  505       }
  506   
  507       /**
  508        * Parses the next token from the input stream of this tokenizer.
  509        * The type of the next token is returned in the <code>ttype</code>
  510        * field. Additional information about the token may be in the
  511        * <code>nval</code> field or the <code>sval</code> field of this
  512        * tokenizer.
  513        * <p>
  514        * Typical clients of this
  515        * class first set up the syntax tables and then sit in a loop
  516        * calling nextToken to parse successive tokens until TT_EOF
  517        * is returned.
  518        *
  519        * @return     the value of the <code>ttype</code> field.
  520        * @exception  IOException  if an I/O error occurs.
  521        * @see        java.io.StreamTokenizer#nval
  522        * @see        java.io.StreamTokenizer#sval
  523        * @see        java.io.StreamTokenizer#ttype
  524        */
  525       public int nextToken() throws IOException {
  526           if (pushedBack) {
  527               pushedBack = false;
  528               return ttype;
  529           }
  530           byte ct[] = ctype;
  531           sval = null;
  532   
  533           int c = peekc;
  534           if (c < 0)
  535               c = NEED_CHAR;
  536           if (c == SKIP_LF) {
  537               c = read();
  538               if (c < 0)
  539                   return ttype = TT_EOF;
  540               if (c == '\n')
  541                   c = NEED_CHAR;
  542           }
  543           if (c == NEED_CHAR) {
  544               c = read();
  545               if (c < 0)
  546                   return ttype = TT_EOF;
  547           }
  548           ttype = c;              /* Just to be safe */
  549   
  550           /* Set peekc so that the next invocation of nextToken will read
  551            * another character unless peekc is reset in this invocation
  552            */
  553           peekc = NEED_CHAR;
  554   
  555           int ctype = c < 256 ? ct[c] : CT_ALPHA;
  556           while ((ctype & CT_WHITESPACE) != 0) {
  557               if (c == '\r') {
  558                   LINENO++;
  559                   if (eolIsSignificantP) {
  560                       peekc = SKIP_LF;
  561                       return ttype = TT_EOL;
  562                   }
  563                   c = read();
  564                   if (c == '\n')
  565                       c = read();
  566               } else {
  567                   if (c == '\n') {
  568                       LINENO++;
  569                       if (eolIsSignificantP) {
  570                           return ttype = TT_EOL;
  571                       }
  572                   }
  573                   c = read();
  574               }
  575               if (c < 0)
  576                   return ttype = TT_EOF;
  577               ctype = c < 256 ? ct[c] : CT_ALPHA;
  578           }
  579   
  580           if ((ctype & CT_DIGIT) != 0) {
  581               boolean neg = false;
  582               if (c == '-') {
  583                   c = read();
  584                   if (c != '.' && (c < '0' || c > '9')) {
  585                       peekc = c;
  586                       return ttype = '-';
  587                   }
  588                   neg = true;
  589               }
  590               double v = 0;
  591               int decexp = 0;
  592               int seendot = 0;
  593               while (true) {
  594                   if (c == '.' && seendot == 0)
  595                       seendot = 1;
  596                   else if ('0' <= c && c <= '9') {
  597                       v = v * 10 + (c - '0');
  598                       decexp += seendot;
  599                   } else
  600                       break;
  601                   c = read();
  602               }
  603               peekc = c;
  604               if (decexp != 0) {
  605                   double denom = 10;
  606                   decexp--;
  607                   while (decexp > 0) {
  608                       denom *= 10;
  609                       decexp--;
  610                   }
  611                   /* Do one division of a likely-to-be-more-accurate number */
  612                   v = v / denom;
  613               }
  614               nval = neg ? -v : v;
  615               return ttype = TT_NUMBER;
  616           }
  617   
  618           if ((ctype & CT_ALPHA) != 0) {
  619               int i = 0;
  620               do {
  621                   if (i >= buf.length) {
  622                       buf = Arrays.copyOf(buf, buf.length * 2);
  623                   }
  624                   buf[i++] = (char) c;
  625                   c = read();
  626                   ctype = c < 0 ? CT_WHITESPACE : c < 256 ? ct[c] : CT_ALPHA;
  627               } while ((ctype & (CT_ALPHA | CT_DIGIT)) != 0);
  628               peekc = c;
  629               sval = String.copyValueOf(buf, 0, i);
  630               if (forceLower)
  631                   sval = sval.toLowerCase();
  632               return ttype = TT_WORD;
  633           }
  634   
  635           if ((ctype & CT_QUOTE) != 0) {
  636               ttype = c;
  637               int i = 0;
  638               /* Invariants (because \Octal needs a lookahead):
  639                *   (i)  c contains char value
  640                *   (ii) d contains the lookahead
  641                */
  642               int d = read();
  643               while (d >= 0 && d != ttype && d != '\n' && d != '\r') {
  644                   if (d == '\\') {
  645                       c = read();
  646                       int first = c;   /* To allow \377, but not \477 */
  647                       if (c >= '0' && c <= '7') {
  648                           c = c - '0';
  649                           int c2 = read();
  650                           if ('0' <= c2 && c2 <= '7') {
  651                               c = (c << 3) + (c2 - '0');
  652                               c2 = read();
  653                               if ('0' <= c2 && c2 <= '7' && first <= '3') {
  654                                   c = (c << 3) + (c2 - '0');
  655                                   d = read();
  656                               } else
  657                                   d = c2;
  658                           } else
  659                             d = c2;
  660                       } else {
  661                           switch (c) {
  662                           case 'a':
  663                               c = 0x7;
  664                               break;
  665                           case 'b':
  666                               c = '\b';
  667                               break;
  668                           case 'f':
  669                               c = 0xC;
  670                               break;
  671                           case 'n':
  672                               c = '\n';
  673                               break;
  674                           case 'r':
  675                               c = '\r';
  676                               break;
  677                           case 't':
  678                               c = '\t';
  679                               break;
  680                           case 'v':
  681                               c = 0xB;
  682                               break;
  683                           }
  684                           d = read();
  685                       }
  686                   } else {
  687                       c = d;
  688                       d = read();
  689                   }
  690                   if (i >= buf.length) {
  691                       buf = Arrays.copyOf(buf, buf.length * 2);
  692                   }
  693                   buf[i++] = (char)c;
  694               }
  695   
  696               /* If we broke out of the loop because we found a matching quote
  697                * character then arrange to read a new character next time
  698                * around; otherwise, save the character.
  699                */
  700               peekc = (d == ttype) ? NEED_CHAR : d;
  701   
  702               sval = String.copyValueOf(buf, 0, i);
  703               return ttype;
  704           }
  705   
  706           if (c == '/' && (slashSlashCommentsP || slashStarCommentsP)) {
  707               c = read();
  708               if (c == '*' && slashStarCommentsP) {
  709                   int prevc = 0;
  710                   while ((c = read()) != '/' || prevc != '*') {
  711                       if (c == '\r') {
  712                           LINENO++;
  713                           c = read();
  714                           if (c == '\n') {
  715                               c = read();
  716                           }
  717                       } else {
  718                           if (c == '\n') {
  719                               LINENO++;
  720                               c = read();
  721                           }
  722                       }
  723                       if (c < 0)
  724                           return ttype = TT_EOF;
  725                       prevc = c;
  726                   }
  727                   return nextToken();
  728               } else if (c == '/' && slashSlashCommentsP) {
  729                   while ((c = read()) != '\n' && c != '\r' && c >= 0);
  730                   peekc = c;
  731                   return nextToken();
  732               } else {
  733                   /* Now see if it is still a single line comment */
  734                   if ((ct['/'] & CT_COMMENT) != 0) {
  735                       while ((c = read()) != '\n' && c != '\r' && c >= 0);
  736                       peekc = c;
  737                       return nextToken();
  738                   } else {
  739                       peekc = c;
  740                       return ttype = '/';
  741                   }
  742               }
  743           }
  744   
  745           if ((ctype & CT_COMMENT) != 0) {
  746               while ((c = read()) != '\n' && c != '\r' && c >= 0);
  747               peekc = c;
  748               return nextToken();
  749           }
  750   
  751           return ttype = c;
  752       }
  753   
  754       /**
  755        * Causes the next call to the <code>nextToken</code> method of this
  756        * tokenizer to return the current value in the <code>ttype</code>
  757        * field, and not to modify the value in the <code>nval</code> or
  758        * <code>sval</code> field.
  759        *
  760        * @see     java.io.StreamTokenizer#nextToken()
  761        * @see     java.io.StreamTokenizer#nval
  762        * @see     java.io.StreamTokenizer#sval
  763        * @see     java.io.StreamTokenizer#ttype
  764        */
  765       public void pushBack() {
  766           if (ttype != TT_NOTHING)   /* No-op if nextToken() not called */
  767               pushedBack = true;
  768       }
  769   
  770       /**
  771        * Return the current line number.
  772        *
  773        * @return  the current line number of this stream tokenizer.
  774        */
  775       public int lineno() {
  776           return LINENO;
  777       }
  778   
  779       /**
  780        * Returns the string representation of the current stream token and
  781        * the line number it occurs on.
  782        *
  783        * <p>The precise string returned is unspecified, although the following
  784        * example can be considered typical:
  785        *
  786        * <blockquote><pre>Token['a'], line 10</pre></blockquote>
  787        *
  788        * @return  a string representation of the token
  789        * @see     java.io.StreamTokenizer#nval
  790        * @see     java.io.StreamTokenizer#sval
  791        * @see     java.io.StreamTokenizer#ttype
  792        */
  793       public String toString() {
  794           String ret;
  795           switch (ttype) {
  796             case TT_EOF:
  797               ret = "EOF";
  798               break;
  799             case TT_EOL:
  800               ret = "EOL";
  801               break;
  802             case TT_WORD:
  803               ret = sval;
  804               break;
  805             case TT_NUMBER:
  806               ret = "n=" + nval;
  807               break;
  808             case TT_NOTHING:
  809               ret = "NOTHING";
  810               break;
  811             default: {
  812                   /*
  813                    * ttype is the first character of either a quoted string or
  814                    * is an ordinary character. ttype can definitely not be less
  815                    * than 0, since those are reserved values used in the previous
  816                    * case statements
  817                    */
  818                   if (ttype < 256 &&
  819                       ((ctype[ttype] & CT_QUOTE) != 0)) {
  820                       ret = sval;
  821                       break;
  822                   }
  823   
  824                   char s[] = new char[3];
  825                   s[0] = s[2] = '\'';
  826                   s[1] = (char) ttype;
  827                   ret = new String(s);
  828                   break;
  829               }
  830           }
  831           return "Token[" + ret + "], line " + LINENO;
  832       }
  833   
  834   }

Home » openjdk-7 » java » io » [javadoc | source]