Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: com/port80/eclipse/util/JavaCodeScanner.java


1   /**
2    * Modified from org.eclipse.jdt.internal.compiler.parser.Scanner.java
3    * . Added token for '#'.
4    */
5   package com.port80.eclipse.util;
6   
7   import java.util.ArrayList;
8   import java.util.Iterator;
9   import java.util.List;
10  
11  import org.eclipse.jdt.core.compiler.IScanner;
12  import org.eclipse.jdt.core.compiler.InvalidInputException;
13  import org.eclipse.jdt.internal.compiler.ast.StringLiteral;
14  import org.eclipse.jdt.internal.compiler.parser.NLSLine;
15  
16  public class JavaCodeScanner implements IScanner, IJavaCodeSymbol {
17  
18    /* APIs ares
19     - getNextToken() which return the current type of the token
20       (this value is not memorized by the scanner)
21     - getCurrentTokenSource() which provides with the token "REAL" source
22       (aka all unicode have been transformed into a correct char)
23     - sourceStart gives the position into the stream
24     - currentPosition-1 gives the sourceEnd position into the stream 
25    */
26  
27    // 1.4 feature 
28    private boolean assertMode;
29    public boolean useAssertAsAnIndentifier = false;
30    //flag indicating if processed source contains occurrences of keyword assert 
31    public boolean containsAssertKeyword = false;
32  
33    public boolean recordLineSeparator;
34    public char currentCharacter;
35    public int startPosition;
36    public int currentPosition;
37    public int initialPosition, eofPosition;
38    // after this position eof are generated instead of real token from the source
39  
40    public boolean tokenizeComments;
41    public boolean tokenizeWhiteSpace;
42  
43    //source should be viewed as a window (aka a part)
44    //of a entire very large stream
45    public char source[];
46  
47    //unicode support
48    public char[] withoutUnicodeBuffer;
49    public int withoutUnicodePtr; //when == 0 ==> no unicode in the current token
50    public boolean unicodeAsBackSlash = false;
51  
52    public boolean scanningFloatLiteral = false;
53  
54    //support for /** comments
55    //public char[][] comments = new char[10][];
56    public int[] commentStops = new int[10];
57    public int[] commentStarts = new int[10];
58    public int commentPtr = -1; // no comment test with commentPtr value -1
59  
60    //diet parsing support - jump over some method body when requested
61    public boolean diet = false;
62  
63    //support for the  poor-line-debuggers ....
64    //remember the position of the cr/lf
65    public int[] lineEnds = new int[250];
66    public int linePtr = -1;
67    public boolean wasAcr = false;
68  
69    public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
70  
71    public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
72    public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
73    public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
74    public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
75    public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
76    public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
77    public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
78  
79    public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
80    public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
81    public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
82    public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
83  
84    //----------------optimized identifier managment------------------
85    static final char[] charArray_a = new char[] { 'a' },
86      charArray_b = new char[] { 'b' },
87      charArray_c = new char[] { 'c' },
88      charArray_d = new char[] { 'd' },
89      charArray_e = new char[] { 'e' },
90      charArray_f = new char[] { 'f' },
91      charArray_g = new char[] { 'g' },
92      charArray_h = new char[] { 'h' },
93      charArray_i = new char[] { 'i' },
94      charArray_j = new char[] { 'j' },
95      charArray_k = new char[] { 'k' },
96      charArray_l = new char[] { 'l' },
97      charArray_m = new char[] { 'm' },
98      charArray_n = new char[] { 'n' },
99      charArray_o = new char[] { 'o' },
100     charArray_p = new char[] { 'p' },
101     charArray_q = new char[] { 'q' },
102     charArray_r = new char[] { 'r' },
103     charArray_s = new char[] { 's' },
104     charArray_t = new char[] { 't' },
105     charArray_u = new char[] { 'u' },
106     charArray_v = new char[] { 'v' },
107     charArray_w = new char[] { 'w' },
108     charArray_x = new char[] { 'x' },
109     charArray_y = new char[] { 'y' },
110     charArray_z = new char[] { 'z' };
111 
112   static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
113   static final int TableSize = 30, InternalTableSize = 6; //30*6 = 180 entries
114   public static final int OptimizedLength = 6;
115   public /*static*/
116   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
117   // support for detecting non-externalized string literals
118   int currentLineNr = -1;
119   int previousLineNr = -1;
120   NLSLine currentLine = null;
121   List lines = new ArrayList();
122   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
123   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
124   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
125   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
126   public StringLiteral[] nonNLSStrings = null;
127   public boolean checkNonExternalizedStringLiterals = true;
128   public boolean wasNonExternalizedStringLiteral = false;
129 
130   /*static*/ {
131     for (int i = 0; i < 6; i++) {
132       for (int j = 0; j < TableSize; j++) {
133         for (int k = 0; k < InternalTableSize; k++) {
134           charArray_length[i][j][k] = initCharArray;
135         }
136       }
137     }
138   }
139   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
140 
141   public static final int RoundBracket = 0;
142   public static final int SquareBracket = 1;
143   public static final int CurlyBracket = 2;
144   public static final int BracketKinds = 3;
145   public JavaCodeScanner() {
146     this(false, false);
147   }
148   public JavaCodeScanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
149     this(tokenizeComments, tokenizeWhiteSpace, false);
150   }
151   public final boolean atEnd() {
152     // This code is not relevant if source is 
153     // Only a part of the real stream input
154 
155     return source.length == currentPosition;
156   }
157   public char[] getCurrentIdentifierSource() {
158     //return the token REAL source (aka unicodes are precomputed)
159 
160     char[] result;
161     if (withoutUnicodePtr != 0)
162       //0 is used as a fast test flag so the real first char is in position 1
163       System.arraycopy(
164         withoutUnicodeBuffer,
165         1,
166         result = new char[withoutUnicodePtr],
167         0,
168         withoutUnicodePtr);
169     else {
170       int length = currentPosition - startPosition;
171       switch (length) { // see OptimizedLength
172         case 1 :
173           return optimizedCurrentTokenSource1();
174         case 2 :
175           return optimizedCurrentTokenSource2();
176         case 3 :
177           return optimizedCurrentTokenSource3();
178         case 4 :
179           return optimizedCurrentTokenSource4();
180         case 5 :
181           return optimizedCurrentTokenSource5();
182         case 6 :
183           return optimizedCurrentTokenSource6();
184       }
185       //no optimization
186       System.arraycopy(source, startPosition, result = new char[length], 0, length);
187     }
188     return result;
189   }
190   public int getCurrentTokenEndPosition() {
191     return this.currentPosition - 1;
192   }
193   public final char[] getCurrentTokenSource() {
194     // Return the token REAL source (aka unicodes are precomputed)
195 
196     char[] result;
197     if (withoutUnicodePtr != 0)
198       // 0 is used as a fast test flag so the real first char is in position 1
199       System.arraycopy(
200         withoutUnicodeBuffer,
201         1,
202         result = new char[withoutUnicodePtr],
203         0,
204         withoutUnicodePtr);
205     else {
206       int length;
207       System.arraycopy(
208         source,
209         startPosition,
210         result = new char[length = currentPosition - startPosition],
211         0,
212         length);
213     }
214     return result;
215   }
216   public final char[] getCurrentTokenSourceString() {
217     //return the token REAL source (aka unicodes are precomputed).
218     //REMOVE the two " that are at the beginning and the end.
219 
220     char[] result;
221     if (withoutUnicodePtr != 0)
222       //0 is used as a fast test flag so the real first char is in position 1
223       System.arraycopy(withoutUnicodeBuffer, 2,
224       //2 is 1 (real start) + 1 (to jump over the ")
225       result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
226     else {
227       int length;
228       System.arraycopy(
229         source,
230         startPosition + 1,
231         result = new char[length = currentPosition - startPosition - 2],
232         0,
233         length);
234     }
235     return result;
236   }
237   public int getCurrentTokenStartPosition() {
238     return this.startPosition;
239   }
240   /*
241    * Search the source position corresponding to the end of a given line number
242    *
243    * Line numbers are 1-based, and relative to the scanner initialPosition. 
244    * Character positions are 0-based.
245    *
246    * In case the given line number is inconsistent, answers -1.
247    */
248   public final int getLineEnd(int lineNumber) {
249 
250     if (lineEnds == null)
251       return -1;
252     if (lineNumber >= lineEnds.length)
253       return -1;
254     if (lineNumber <= 0)
255       return -1;
256 
257     if (lineNumber == lineEnds.length - 1)
258       return eofPosition;
259     return lineEnds[lineNumber - 1];
260     // next line start one character behind the lineEnd of the previous line
261   }
262   /**
263    * Search the source position corresponding to the beginning of a given line number
264    *
265    * Line numbers are 1-based, and relative to the scanner initialPosition. 
266    * Character positions are 0-based.
267    *
268    * e.g.  getLineStart(1) --> 0  i.e. first line starts at character 0.
269    *
270    * In case the given line number is inconsistent, answers -1.
271    */
272   public final int getLineStart(int lineNumber) {
273 
274     if (lineEnds == null)
275       return -1;
276     if (lineNumber >= lineEnds.length)
277       return -1;
278     if (lineNumber <= 0)
279       return -1;
280 
281     if (lineNumber == 1)
282       return initialPosition;
283     return lineEnds[lineNumber - 2] + 1;
284     // next line start one character behind the lineEnd of the previous line
285   }
286   public final boolean getNextChar(char testedChar) {
287     //BOOLEAN
288     //handle the case of unicode.
289     //when a unicode appears then we must use a buffer that holds char internal values
290     //At the end of this method currentCharacter holds the new visited char
291     //and currentPosition points right next after it
292     //Both previous lines are true if the currentCharacter is == to the testedChar
293     //On false, no side effect has occured.
294 
295     //ALL getNextChar.... ARE OPTIMIZED COPIES 
296 
297     int temp = currentPosition;
298     try {
299       if (((currentCharacter = source[currentPosition++]) == '\\')
300         && (source[currentPosition] == 'u')) {
301         //-------------unicode traitement ------------
302         int c1, c2, c3, c4;
303         int unicodeSize = 6;
304         currentPosition++;
305         while (source[currentPosition] == 'u') {
306           currentPosition++;
307           unicodeSize++;
308         }
309 
310         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0)
311           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0)
312           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0)
313           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) {
314           currentPosition = temp;
315           return false;
316         }
317 
318         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
319         if (currentCharacter != testedChar) {
320           currentPosition = temp;
321           return false;
322         }
323         unicodeAsBackSlash = currentCharacter == '\\';
324 
325         //need the unicode buffer
326         if (withoutUnicodePtr == 0) {
327           //buffer all the entries that have been left aside....
328           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
329           System.arraycopy(
330             source,
331             startPosition,
332             withoutUnicodeBuffer,
333             1,
334             withoutUnicodePtr);
335         }
336         //fill the buffer with the char
337         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
338         return true;
339 
340       } //-------------end unicode traitement--------------
341       else {
342         if (currentCharacter != testedChar) {
343           currentPosition = temp;
344           return false;
345         }
346         unicodeAsBackSlash = false;
347         if (withoutUnicodePtr != 0)
348           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
349         return true;
350       }
351     } catch (IndexOutOfBoundsException e) {
352       unicodeAsBackSlash = false;
353       currentPosition = temp;
354       return false;
355     }
356   }
357   public final int getNextChar(char testedChar1, char testedChar2) {
358     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
359     //test can be done with (x==0) for the first and (x>0) for the second
360     //handle the case of unicode.
361     //when a unicode appears then we must use a buffer that holds char internal values
362     //At the end of this method currentCharacter holds the new visited char
363     //and currentPosition points right next after it
364     //Both previous lines are true if the currentCharacter is == to the testedChar1/2
365     //On false, no side effect has occured.
366 
367     //ALL getNextChar.... ARE OPTIMIZED COPIES 
368 
369     int temp = currentPosition;
370     try {
371       int result;
372       if (((currentCharacter = source[currentPosition++]) == '\\')
373         && (source[currentPosition] == 'u')) {
374         //-------------unicode traitement ------------
375         int c1, c2, c3, c4;
376         int unicodeSize = 6;
377         currentPosition++;
378         while (source[currentPosition] == 'u') {
379           currentPosition++;
380           unicodeSize++;
381         }
382 
383         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0)
384           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0)
385           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0)
386           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) {
387           currentPosition = temp;
388           return 2;
389         }
390 
391         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
392         if (currentCharacter == testedChar1)
393           result = 0;
394         else if (currentCharacter == testedChar2)
395           result = 1;
396         else {
397           currentPosition = temp;
398           return -1;
399         }
400 
401         //need the unicode buffer
402         if (withoutUnicodePtr == 0) {
403           //buffer all the entries that have been left aside....
404           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
405           System.arraycopy(
406             source,
407             startPosition,
408             withoutUnicodeBuffer,
409             1,
410             withoutUnicodePtr);
411         }
412         //fill the buffer with the char
413         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
414         return result;
415       } //-------------end unicode traitement--------------
416       else {
417         if (currentCharacter == testedChar1)
418           result = 0;
419         else if (currentCharacter == testedChar2)
420           result = 1;
421         else {
422           currentPosition = temp;
423           return -1;
424         }
425 
426         if (withoutUnicodePtr != 0)
427           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
428         return result;
429       }
430     } catch (IndexOutOfBoundsException e) {
431       currentPosition = temp;
432       return -1;
433     }
434   }
435   public final boolean getNextCharAsDigit() {
436     //BOOLEAN
437     //handle the case of unicode.
438     //when a unicode appears then we must use a buffer that holds char internal values
439     //At the end of this method currentCharacter holds the new visited char
440     //and currentPosition points right next after it
441     //Both previous lines are true if the currentCharacter is a digit
442     //On false, no side effect has occured.
443 
444     //ALL getNextChar.... ARE OPTIMIZED COPIES 
445 
446     int temp = currentPosition;
447     try {
448       if (((currentCharacter = source[currentPosition++]) == '\\')
449         && (source[currentPosition] == 'u')) {
450         //-------------unicode traitement ------------
451         int c1, c2, c3, c4;
452         int unicodeSize = 6;
453         currentPosition++;
454         while (source[currentPosition] == 'u') {
455           currentPosition++;
456           unicodeSize++;
457         }
458 
459         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0)
460           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0)
461           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0)
462           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) {
463           currentPosition = temp;
464           return false;
465         }
466 
467         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
468         if (!Character.isDigit(currentCharacter)) {
469           currentPosition = temp;
470           return false;
471         }
472 
473         //need the unicode buffer
474         if (withoutUnicodePtr == 0) {
475           //buffer all the entries that have been left aside....
476           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
477           System.arraycopy(
478             source,
479             startPosition,
480             withoutUnicodeBuffer,
481             1,
482             withoutUnicodePtr);
483         }
484         //fill the buffer with the char
485         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
486         return true;
487       } //-------------end unicode traitement--------------
488       else {
489         if (!Character.isDigit(currentCharacter)) {
490           currentPosition = temp;
491           return false;
492         }
493         if (withoutUnicodePtr != 0)
494           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
495         return true;
496       }
497     } catch (IndexOutOfBoundsException e) {
498       currentPosition = temp;
499       return false;
500     }
501   }
502   public final boolean getNextCharAsDigit(int radix) {
503     //BOOLEAN
504     //handle the case of unicode.
505     //when a unicode appears then we must use a buffer that holds char internal values
506     //At the end of this method currentCharacter holds the new visited char
507     //and currentPosition points right next after it
508     //Both previous lines are true if the currentCharacter is a digit base on radix
509     //On false, no side effect has occured.
510 
511     //ALL getNextChar.... ARE OPTIMIZED COPIES 
512 
513     int temp = currentPosition;
514     try {
515       if (((currentCharacter = source[currentPosition++]) == '\\')
516         && (source[currentPosition] == 'u')) {
517         //-------------unicode traitement ------------
518         int c1, c2, c3, c4;
519         int unicodeSize = 6;
520         currentPosition++;
521         while (source[currentPosition] == 'u') {
522           currentPosition++;
523           unicodeSize++;
524         }
525 
526         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0)
527           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0)
528           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0)
529           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) {
530           currentPosition = temp;
531           return false;
532         }
533 
534         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
535         if (Character.digit(currentCharacter, radix) == -1) {
536           currentPosition = temp;
537           return false;
538         }
539 
540         //need the unicode buffer
541         if (withoutUnicodePtr == 0) {
542           //buffer all the entries that have been left aside....
543           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
544           System.arraycopy(
545             source,
546             startPosition,
547             withoutUnicodeBuffer,
548             1,
549             withoutUnicodePtr);
550         }
551         //fill the buffer with the char
552         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
553         return true;
554       } //-------------end unicode traitement--------------
555       else {
556         if (Character.digit(currentCharacter, radix) == -1) {
557           currentPosition = temp;
558           return false;
559         }
560         if (withoutUnicodePtr != 0)
561           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
562         return true;
563       }
564     } catch (IndexOutOfBoundsException e) {
565       currentPosition = temp;
566       return false;
567     }
568   }
569   public boolean getNextCharAsJavaIdentifierPart() {
570     //BOOLEAN
571     //handle the case of unicode.
572     //when a unicode appears then we must use a buffer that holds char internal values
573     //At the end of this method currentCharacter holds the new visited char
574     //and currentPosition points right next after it
575     //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
576     //On false, no side effect has occured.
577 
578     //ALL getNextChar.... ARE OPTIMIZED COPIES 
579 
580     int temp = currentPosition;
581     try {
582       if (((currentCharacter = source[currentPosition++]) == '\\')
583         && (source[currentPosition] == 'u')) {
584         //-------------unicode traitement ------------
585         int c1, c2, c3, c4;
586         int unicodeSize = 6;
587         currentPosition++;
588         while (source[currentPosition] == 'u') {
589           currentPosition++;
590           unicodeSize++;
591         }
592 
593         if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0)
594           || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0)
595           || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0)
596           || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) {
597           currentPosition = temp;
598           return false;
599         }
600 
601         currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
602         if (!Character.isJavaIdentifierPart(currentCharacter)) {
603           currentPosition = temp;
604           return false;
605         }
606 
607         //need the unicode buffer
608         if (withoutUnicodePtr == 0) {
609           //buffer all the entries that have been left aside....
610           withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
611           System.arraycopy(
612             source,
613             startPosition,
614             withoutUnicodeBuffer,
615             1,
616             withoutUnicodePtr);
617         }
618         //fill the buffer with the char
619         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
620         return true;
621       } //-------------end unicode traitement--------------
622       else {
623         if (!Character.isJavaIdentifierPart(currentCharacter)) {
624           currentPosition = temp;
625           return false;
626         }
627 
628         if (withoutUnicodePtr != 0)
629           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
630         return true;
631       }
632     } catch (IndexOutOfBoundsException e) {
633       currentPosition = temp;
634       return false;
635     }
636   }
637   public int getNextToken() throws InvalidInputException {
638 
639     this.wasAcr = false;
640     if (diet) {
641       jumpOverMethodBody();
642       diet = false;
643       return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
644     }
645     try {
646       while (true) { //loop for jumping over comments
647         withoutUnicodePtr = 0;
648         //start with a new token (even comment written with unicode )
649 
650         // ---------Consume white space and handles startPosition---------
651         int whiteStart = currentPosition;
652         boolean isWhiteSpace;
653         do {
654           startPosition = currentPosition;
655           if (((currentCharacter = source[currentPosition++]) == '\\')
656             && (source[currentPosition] == 'u')) {
657             isWhiteSpace = jumpOverUnicodeWhiteSpace();
658           } else {
659             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
660               checkNonExternalizeString();
661               if (recordLineSeparator) {
662                 pushLineSeparator();
663               } else {
664                 currentLine = null;
665               }
666             }
667             isWhiteSpace =
668               (currentCharacter == ' ')
669                 || Character.isWhitespace(currentCharacter);
670           }
671         } while (isWhiteSpace);
672         if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
673           // reposition scanner in case we are interested by spaces as tokens
674           currentPosition--;
675           startPosition = whiteStart;
676           return TokenNameWHITESPACE;
677         }
678         //little trick to get out in the middle of a source compuation
679         if (currentPosition > eofPosition)
680           return TokenNameEOF;
681 
682         // ---------Identify the next token-------------
683 
684         switch (currentCharacter) {
685           case '(' :
686             return TokenNameLPAREN;
687           case ')' :
688             return TokenNameRPAREN;
689           case '{' :
690             return TokenNameLBRACE;
691           case '}' :
692             return TokenNameRBRACE;
693           case '[' :
694             return TokenNameLBRACKET;
695           case ']' :
696             return TokenNameRBRACKET;
697           case ';' :
698             return TokenNameSEMICOLON;
699           case ',' :
700             return TokenNameCOMMA;
701           case '.' :
702             if (getNextCharAsDigit())
703               return scanNumber(true);
704             return TokenNameDOT;
705           case '+' :
706             {
707               int test;
708               if ((test = getNextChar('+', '=')) == 0)
709                 return TokenNamePLUS_PLUS;
710               if (test > 0)
711                 return TokenNamePLUS_EQUAL;
712               return TokenNamePLUS;
713             }
714           case '-' :
715             {
716               int test;
717               if ((test = getNextChar('-', '=')) == 0)
718                 return TokenNameMINUS_MINUS;
719               if (test > 0)
720                 return TokenNameMINUS_EQUAL;
721               return TokenNameMINUS;
722             }
723           case '~' :
724             return TokenNameTWIDDLE;
725           case '!' :
726             if (getNextChar('='))
727               return TokenNameNOT_EQUAL;
728             return TokenNameNOT;
729           case '*' :
730             if (getNextChar('='))
731               return TokenNameMULTIPLY_EQUAL;
732             return TokenNameMULTIPLY;
733           case '%' :
734             if (getNextChar('='))
735               return TokenNameREMAINDER_EQUAL;
736             return TokenNameREMAINDER;
737           case '<' :
738             {
739               int test;
740               if ((test = getNextChar('=', '<')) == 0)
741                 return TokenNameLESS_EQUAL;
742               if (test > 0) {
743                 if (getNextChar('='))
744                   return TokenNameLEFT_SHIFT_EQUAL;
745                 return TokenNameLEFT_SHIFT;
746               }
747               return TokenNameLESS;
748             }
749           case '>' :
750             {
751               int test;
752               if ((test = getNextChar('=', '>')) == 0)
753                 return TokenNameGREATER_EQUAL;
754               if (test > 0) {
755                 if ((test = getNextChar('=', '>')) == 0)
756                   return TokenNameRIGHT_SHIFT_EQUAL;
757                 if (test > 0) {
758                   if (getNextChar('='))
759                     return TokenNameUNSIGNED_RIGHT_SHIFT_EQUAL;
760                   return TokenNameUNSIGNED_RIGHT_SHIFT;
761                 }
762                 return TokenNameRIGHT_SHIFT;
763               }
764               return TokenNameGREATER;
765             }
766           case '=' :
767             if (getNextChar('='))
768               return TokenNameEQUAL_EQUAL;
769             return TokenNameEQUAL;
770           case '&' :
771             {
772               int test;
773               if ((test = getNextChar('&', '=')) == 0)
774                 return TokenNameAND_AND;
775               if (test > 0)
776                 return TokenNameAND_EQUAL;
777               return TokenNameAND;
778             }
779           case '|' :
780             {
781               int test;
782               if ((test = getNextChar('|', '=')) == 0)
783                 return TokenNameOR_OR;
784               if (test > 0)
785                 return TokenNameOR_EQUAL;
786               return TokenNameOR;
787             }
788           case '^' :
789             if (getNextChar('='))
790               return TokenNameXOR_EQUAL;
791             return TokenNameXOR;
792           case '?' :
793             return TokenNameQUESTION;
794           case ':' :
795             return TokenNameCOLON;
796           case '\'' :
797             {
798               int test;
799               if ((test = getNextChar('\n', '\r')) == 0) {
800                 throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
801               }
802               if (test > 0) {
803                 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
804                 for (int lookAhead = 0; lookAhead < 3; lookAhead++) {
805                   if (currentPosition + lookAhead
806                     == source.length)
807                     break;
808                   if (source[currentPosition + lookAhead]
809                     == '\n')
810                     break;
811                   if (source[currentPosition + lookAhead]
812                     == '\'') {
813                     currentPosition += lookAhead + 1;
814                     break;
815                   }
816                 }
817                 throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
818               }
819             }
820             if (getNextChar('\'')) {
821               // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
822               for (int lookAhead = 0; lookAhead < 3; lookAhead++) {
823                 if (currentPosition + lookAhead == source.length)
824                   break;
825                 if (source[currentPosition + lookAhead] == '\n')
826                   break;
827                 if (source[currentPosition + lookAhead] == '\'') {
828                   currentPosition += lookAhead + 1;
829                   break;
830                 }
831               }
832               throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
833             }
834             if (getNextChar('\\'))
835               scanEscapeCharacter();
836             else { // consume next character
837               unicodeAsBackSlash = false;
838               if (((currentCharacter = source[currentPosition++]) == '\\')
839                 && (source[currentPosition] == 'u')) {
840                 getNextUnicodeChar();
841               } else {
842                 if (withoutUnicodePtr != 0) {
843                   withoutUnicodeBuffer[++withoutUnicodePtr] =
844                     currentCharacter;
845                 }
846               }
847             }
848             if (getNextChar('\''))
849               return TokenNameCharacterLiteral;
850             // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
851             for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
852               if (currentPosition + lookAhead == source.length)
853                 break;
854               if (source[currentPosition + lookAhead] == '\n')
855                 break;
856               if (source[currentPosition + lookAhead] == '\'') {
857                 currentPosition += lookAhead + 1;
858                 break;
859               }
860             }
861             throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
862           case '"' :
863             try {
864               // consume next character
865               unicodeAsBackSlash = false;
866               if (((currentCharacter = source[currentPosition++]) == '\\')
867                 && (source[currentPosition] == 'u')) {
868                 getNextUnicodeChar();
869               } else {
870                 if (withoutUnicodePtr != 0) {
871                   withoutUnicodeBuffer[++withoutUnicodePtr] =
872                     currentCharacter;
873                 }
874               }
875 
876               while (currentCharacter != '"') {
877                 /**** \r and \n are not valid in string literals ****/
878                 if ((currentCharacter == '\n')
879                   || (currentCharacter == '\r')) {
880                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
881                   for (int lookAhead = 0;
882                     lookAhead < 50;
883                     lookAhead++) {
884                     if (currentPosition + lookAhead
885                       == source.length)
886                       break;
887                     if (source[currentPosition + lookAhead]
888                       == '\n')
889                       break;
890                     if (source[currentPosition + lookAhead]
891                       == '\"') {
892                       currentPosition += lookAhead
893                         + 1;
894                       break;
895                     }
896                   }
897                   throw new InvalidInputException(INVALID_CHAR_IN_STRING);
898                 }
899                 if (currentCharacter == '\\') {
900                   int escapeSize = currentPosition;
901                   boolean backSlashAsUnicodeInString =
902                     unicodeAsBackSlash;
903                   //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
904                   scanEscapeCharacter();
905                   escapeSize = currentPosition - escapeSize;
906                   if (withoutUnicodePtr == 0) {
907                     //buffer all the entries that have been left aside....
908                     withoutUnicodePtr =
909                       currentPosition
910                         - escapeSize
911                         - 1
912                         - startPosition;
913                     System.arraycopy(
914                       source,
915                       startPosition,
916                       withoutUnicodeBuffer,
917                       1,
918                       withoutUnicodePtr);
919                     withoutUnicodeBuffer[++withoutUnicodePtr] =
920                       currentCharacter;
921                   } else { //overwrite the / in the buffer
922                     withoutUnicodeBuffer[withoutUnicodePtr] =
923                       currentCharacter;
924                     if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
925                       withoutUnicodePtr--;
926                     }
927                   }
928                 }
929                 // consume next character
930                 unicodeAsBackSlash = false;
931                 if (((currentCharacter = source[currentPosition++])
932                   == '\\')
933                   && (source[currentPosition] == 'u')) {
934                   getNextUnicodeChar();
935                 } else {
936                   if (withoutUnicodePtr != 0) {
937                     withoutUnicodeBuffer[++withoutUnicodePtr] =
938                       currentCharacter;
939                   }
940                 }
941 
942               }
943             } catch (IndexOutOfBoundsException e) {
944               throw new InvalidInputException(UNTERMINATED_STRING);
945             } catch (InvalidInputException e) {
946               if (e.getMessage().equals(INVALID_ESCAPE)) {
947                 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
948                 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
949                   if (currentPosition + lookAhead
950                     == source.length)
951                     break;
952                   if (source[currentPosition + lookAhead]
953                     == '\n')
954                     break;
955                   if (source[currentPosition + lookAhead]
956                     == '\"') {
957                     currentPosition += lookAhead + 1;
958                     break;
959                   }
960                 }
961 
962               }
963               throw e; // rethrow
964             }
965             if (checkNonExternalizedStringLiterals) { // check for presence of  NLS tags //$NON-NLS-?$ where ? is an int.
966               if (currentLine == null) {
967                 currentLine = new NLSLine();
968                 lines.add(currentLine);
969               }
970               currentLine.add(
971                 new StringLiteral(
972                   getCurrentTokenSourceString(),
973                   startPosition,
974                   currentPosition - 1));
975             }
976             return TokenNameStringLiteral;
977           case '/' :
978             {
979               int test;
980               if ((test = getNextChar('/', '*')) == 0) { //line comment 
981                 int endPositionForLineComment = 0;
982                 try { //get the next char 
983                   if (((currentCharacter =
984                     source[currentPosition++])
985                     == '\\')
986                     && (source[currentPosition] == 'u')) {
987                     //-------------unicode traitement ------------
988                     int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
989                     currentPosition++;
990                     while (source[currentPosition]
991                       == 'u') {
992                       currentPosition++;
993                     }
994                     if ((c1 =
995                       Character.getNumericValue(
996                         source[currentPosition++]))
997                       > 15
998                       || c1 < 0
999                       || (c2 =
1000                        Character
1001                          .getNumericValue(
1002                          source[currentPosition++]))
1003                        > 15
1004                      || c2 < 0
1005                      || (c3 =
1006                        Character
1007                          .getNumericValue(
1008                          source[currentPosition++]))
1009                        > 15
1010                      || c3 < 0
1011                      || (c4 =
1012                        Character
1013                          .getNumericValue(
1014                          source[currentPosition++]))
1015                        > 15
1016                      || c4 < 0) {
1017                      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1018                    } else {
1019                      currentCharacter =
1020                        (char) (((c1 * 16 + c2)
1021                          * 16
1022                          + c3)
1023                          * 16
1024                          + c4);
1025                    }
1026                  }
1027
1028                  //handle the \\u case manually into comment
1029                  if (currentCharacter == '\\') {
1030                    if (source[currentPosition] == '\\')
1031                      currentPosition++;
1032                  } //jump over the \\
1033                  boolean isUnicode = false;
1034                  while (currentCharacter != '\r'
1035                    && currentCharacter != '\n') {
1036                    //get the next char
1037                    isUnicode = false;
1038                    if (((currentCharacter =
1039                      source[currentPosition++])
1040                      == '\\')
1041                      && (source[currentPosition]
1042                        == 'u')) {
1043                      isUnicode = true;
1044                      //-------------unicode traitement ------------
1045                      int c1 = 0,
1046                        c2 = 0,
1047                        c3 = 0,
1048                        c4 = 0;
1049                      currentPosition++;
1050                      while (source[currentPosition]
1051                        == 'u') {
1052                        currentPosition++;
1053                      }
1054                      if ((c1 =
1055                        Character
1056                          .getNumericValue(
1057                          source[currentPosition++]))
1058                        > 15
1059                        || c1 < 0
1060                        || (c2 =
1061                          Character
1062                            .getNumericValue(
1063                            source[currentPosition++]))
1064                          > 15
1065                        || c2 < 0
1066                        || (c3 =
1067                          Character
1068                            .getNumericValue(
1069                            source[currentPosition++]))
1070                          > 15
1071                        || c3 < 0
1072                        || (c4 =
1073                          Character
1074                            .getNumericValue(
1075                            source[currentPosition++]))
1076                          > 15
1077                        || c4 < 0) {
1078                        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1079                      } else {
1080                        currentCharacter =
1081                          (char) (((c1
1082                            * 16
1083                            + c2)
1084                            * 16
1085                            + c3)
1086                            * 16
1087                            + c4);
1088                      }
1089                    }
1090                    //handle the \\u case manually into comment
1091                    if (currentCharacter == '\\') {
1092                      if (source[currentPosition]
1093                        == '\\')
1094                        currentPosition++;
1095                    } //jump over the \\
1096                  }
1097                  if (isUnicode) {
1098                    endPositionForLineComment =
1099                      currentPosition - 6;
1100                  } else {
1101                    endPositionForLineComment =
1102                      currentPosition - 1;
1103                  }
1104                  recordComment(false);
1105                  if ((currentCharacter == '\r')
1106                    || (currentCharacter == '\n')) {
1107                    checkNonExternalizeString();
1108                    if (recordLineSeparator) {
1109                      if (isUnicode) {
1110                        pushUnicodeLineSeparator();
1111                      } else {
1112                        pushLineSeparator();
1113                      }
1114                    } else {
1115                      currentLine = null;
1116                    }
1117                  }
1118                  if (tokenizeComments) {
1119                    if (!isUnicode) {
1120                      currentPosition =
1121                        endPositionForLineComment;
1122                      // reset one character behind
1123                    }
1124                    return TokenNameCOMMENT_LINE;
1125                  }
1126                } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1127                  if (tokenizeComments) {
1128                    currentPosition--;
1129                    // reset one character behind
1130                    return TokenNameCOMMENT_LINE;
1131                  }
1132                }
1133                break;
1134              }
1135              if (test > 0) { //traditional and annotation comment
1136                boolean isJavadoc = false, star = false;
1137                // consume next character
1138                unicodeAsBackSlash = false;
1139                if (((currentCharacter = source[currentPosition++])
1140                  == '\\')
1141                  && (source[currentPosition] == 'u')) {
1142                  getNextUnicodeChar();
1143                } else {
1144                  if (withoutUnicodePtr != 0) {
1145                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1146                      currentCharacter;
1147                  }
1148                }
1149
1150                if (currentCharacter == '*') {
1151                  isJavadoc = true;
1152                  star = true;
1153                }
1154                if ((currentCharacter == '\r')
1155                  || (currentCharacter == '\n')) {
1156                  checkNonExternalizeString();
1157                  if (recordLineSeparator) {
1158                    pushLineSeparator();
1159                  } else {
1160                    currentLine = null;
1161                  }
1162                }
1163                try { //get the next char 
1164                  if (((currentCharacter =
1165                    source[currentPosition++])
1166                    == '\\')
1167                    && (source[currentPosition] == 'u')) {
1168                    //-------------unicode traitement ------------
1169                    getNextUnicodeChar();
1170                  }
1171                  //handle the \\u case manually into comment
1172                  if (currentCharacter == '\\') {
1173                    if (source[currentPosition] == '\\')
1174                      currentPosition++;
1175                    //jump over the \\
1176                  }
1177                  // empty comment is not a javadoc /**/
1178                  if (currentCharacter == '/') {
1179                    isJavadoc = false;
1180                  }
1181                  //loop until end of comment */
1182                  while ((currentCharacter != '/') || (!star)) {
1183                    if ((currentCharacter == '\r')
1184                      || (currentCharacter == '\n')) {
1185                      checkNonExternalizeString();
1186                      if (recordLineSeparator) {
1187                        pushLineSeparator();
1188                      } else {
1189                        currentLine = null;
1190                      }
1191                    }
1192                    star = currentCharacter == '*';
1193                    //get next char
1194                    if (((currentCharacter =
1195                      source[currentPosition++])
1196                      == '\\')
1197                      && (source[currentPosition]
1198                        == 'u')) {
1199                      //-------------unicode traitement ------------
1200                      getNextUnicodeChar();
1201                    }
1202                    //handle the \\u case manually into comment
1203                    if (currentCharacter == '\\') {
1204                      if (source[currentPosition]
1205                        == '\\')
1206                        currentPosition++;
1207                    } //jump over the \\
1208                  }
1209                  recordComment(isJavadoc);
1210                  if (tokenizeComments) {
1211                    if (isJavadoc)
1212                      return TokenNameCOMMENT_JAVADOC;
1213                    return TokenNameCOMMENT_BLOCK;
1214                  }
1215                } catch (IndexOutOfBoundsException e) {
1216                  throw new InvalidInputException(UNTERMINATED_COMMENT);
1217                }
1218                break;
1219              }
1220              if (getNextChar('='))
1221                return TokenNameDIVIDE_EQUAL;
1222              return TokenNameDIVIDE;
1223            }
1224          case '\u001a' :
1225            if (atEnd())
1226              return TokenNameEOF;
1227            //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1228            throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1229
1230          default :
1231            if (Character.isJavaIdentifierStart(currentCharacter))
1232              return scanIdentifierOrKeyword();
1233          if (currentCharacter == '#')
1234            return TokenNameHASH;
1235          if (currentCharacter == '@')
1236            return TokenNameEACH;
1237            if (Character.isDigit(currentCharacter))
1238              return scanNumber(false);
1239            return TokenNameERROR;
1240        }
1241      }
1242    } //-----------------end switch while try--------------------
1243    catch (IndexOutOfBoundsException e) {
1244    }
1245    return TokenNameEOF;
1246  }
1247  public final void getNextUnicodeChar() throws IndexOutOfBoundsException, InvalidInputException {
1248    //VOID
1249    //handle the case of unicode.
1250    //when a unicode appears then we must use a buffer that holds char internal values
1251    //At the end of this method currentCharacter holds the new visited char
1252    //and currentPosition points right next after it
1253
1254    //ALL getNextChar.... ARE OPTIMIZED COPIES 
1255
1256    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1257    currentPosition++;
1258    while (source[currentPosition] == 'u') {
1259      currentPosition++;
1260      unicodeSize++;
1261    }
1262
1263    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1264      || c1 < 0
1265      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1266      || c2 < 0
1267      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1268      || c3 < 0
1269      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1270      || c4 < 0) {
1271      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1272    } else {
1273      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1274      //need the unicode buffer
1275      if (withoutUnicodePtr == 0) {
1276        //buffer all the entries that have been left aside....
1277        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1278        System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1279      }
1280      //fill the buffer with the char
1281      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1282    }
1283    unicodeAsBackSlash = currentCharacter == '\\';
1284  }
1285  /* Tokenize a method body, assuming that curly brackets are properly balanced.
1286   */
1287  public final void jumpOverMethodBody() {
1288
1289    this.wasAcr = false;
1290    int found = 1;
1291    try {
1292      while (true) { //loop for jumping over comments
1293        // ---------Consume white space and handles startPosition---------
1294        boolean isWhiteSpace;
1295        do {
1296          startPosition = currentPosition;
1297          if (((currentCharacter = source[currentPosition++]) == '\\')
1298            && (source[currentPosition] == 'u')) {
1299            isWhiteSpace = jumpOverUnicodeWhiteSpace();
1300          } else {
1301            if (recordLineSeparator
1302              && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1303              pushLineSeparator();
1304            isWhiteSpace = Character.isWhitespace(currentCharacter);
1305          }
1306        } while (isWhiteSpace);
1307
1308        // -------consume token until } is found---------
1309        switch (currentCharacter) {
1310          case '{' :
1311            found++;
1312            break;
1313          case '}' :
1314            found--;
1315            if (found == 0)
1316              return;
1317            break;
1318          case '\'' :
1319            {
1320              boolean test;
1321              test = getNextChar('\\');
1322              if (test) {
1323                try {
1324                  scanEscapeCharacter();
1325                } catch (InvalidInputException ex) {
1326                };
1327              } else {
1328                try { // consume next character
1329                  unicodeAsBackSlash = false;
1330                  if (((currentCharacter =
1331                    source[currentPosition++])
1332                    == '\\')
1333                    && (source[currentPosition] == 'u')) {
1334                    getNextUnicodeChar();
1335                  } else {
1336                    if (withoutUnicodePtr != 0) {
1337                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1338                        currentCharacter;
1339                    }
1340                  }
1341                } catch (InvalidInputException ex) {
1342                };
1343              }
1344              getNextChar('\'');
1345              break;
1346            }
1347          case '"' :
1348            try {
1349              try { // consume next character
1350                unicodeAsBackSlash = false;
1351                if (((currentCharacter = source[currentPosition++])
1352                  == '\\')
1353                  && (source[currentPosition] == 'u')) {
1354                  getNextUnicodeChar();
1355                } else {
1356                  if (withoutUnicodePtr != 0) {
1357                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1358                      currentCharacter;
1359                  }
1360                }
1361              } catch (InvalidInputException ex) {
1362              };
1363              while (currentCharacter != '"') {
1364                if (currentCharacter == '\r') {
1365                  if (source[currentPosition] == '\n')
1366                    currentPosition++;
1367                  break;
1368                  // the string cannot go further that the line
1369                }
1370                if (currentCharacter == '\n') {
1371                  break;
1372                  // the string cannot go further that the line
1373                }
1374                if (currentCharacter == '\\') {
1375                  try {
1376                    scanEscapeCharacter();
1377                  } catch (InvalidInputException ex) {
1378                  };
1379                }
1380                try { // consume next character
1381                  unicodeAsBackSlash = false;
1382                  if (((currentCharacter =
1383                    source[currentPosition++]