Source code: com/port80/eclipse/util/JavaCodeScanner.java
1 /**
2 * Modified from org.eclipse.jdt.internal.compiler.parser.Scanner.java
3 * . Added token for '#'.
4 */
5 package com.port80.eclipse.util;
6
7 import java.util.ArrayList;
8 import java.util.Iterator;
9 import java.util.List;
10
11 import org.eclipse.jdt.core.compiler.IScanner;
12 import org.eclipse.jdt.core.compiler.InvalidInputException;
13 import org.eclipse.jdt.internal.compiler.ast.StringLiteral;
14 import org.eclipse.jdt.internal.compiler.parser.NLSLine;
15
16 public class JavaCodeScanner implements IScanner, IJavaCodeSymbol {
17
18 /* APIs ares
19 - getNextToken() which return the current type of the token
20 (this value is not memorized by the scanner)
21 - getCurrentTokenSource() which provides with the token "REAL" source
22 (aka all unicode have been transformed into a correct char)
23 - sourceStart gives the position into the stream
24 - currentPosition-1 gives the sourceEnd position into the stream
25 */
26
27 // 1.4 feature
28 private boolean assertMode;
29 public boolean useAssertAsAnIndentifier = false;
30 //flag indicating if processed source contains occurrences of keyword assert
31 public boolean containsAssertKeyword = false;
32
33 public boolean recordLineSeparator;
34 public char currentCharacter;
35 public int startPosition;
36 public int currentPosition;
37 public int initialPosition, eofPosition;
38 // after this position eof are generated instead of real token from the source
39
40 public boolean tokenizeComments;
41 public boolean tokenizeWhiteSpace;
42
43 //source should be viewed as a window (aka a part)
44 //of a entire very large stream
45 public char source[];
46
47 //unicode support
48 public char[] withoutUnicodeBuffer;
49 public int withoutUnicodePtr; //when == 0 ==> no unicode in the current token
50 public boolean unicodeAsBackSlash = false;
51
52 public boolean scanningFloatLiteral = false;
53
54 //support for /** comments
55 //public char[][] comments = new char[10][];
56 public int[] commentStops = new int[10];
57 public int[] commentStarts = new int[10];
58 public int commentPtr = -1; // no comment test with commentPtr value -1
59
60 //diet parsing support - jump over some method body when requested
61 public boolean diet = false;
62
63 //support for the poor-line-debuggers ....
64 //remember the position of the cr/lf
65 public int[] lineEnds = new int[250];
66 public int linePtr = -1;
67 public boolean wasAcr = false;
68
69 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
70
71 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
72 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
73 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
74 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
75 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
76 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
77 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
78
79 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
80 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
81 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
82 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
83
84 //----------------optimized identifier managment------------------
85 static final char[] charArray_a = new char[] { 'a' },
86 charArray_b = new char[] { 'b' },
87 charArray_c = new char[] { 'c' },
88 charArray_d = new char[] { 'd' },
89 charArray_e = new char[] { 'e' },
90 charArray_f = new char[] { 'f' },
91 charArray_g = new char[] { 'g' },
92 charArray_h = new char[] { 'h' },
93 charArray_i = new char[] { 'i' },
94 charArray_j = new char[] { 'j' },
95 charArray_k = new char[] { 'k' },
96 charArray_l = new char[] { 'l' },
97 charArray_m = new char[] { 'm' },
98 charArray_n = new char[] { 'n' },
99 charArray_o = new char[] { 'o' },
100 charArray_p = new char[] { 'p' },
101 charArray_q = new char[] { 'q' },
102 charArray_r = new char[] { 'r' },
103 charArray_s = new char[] { 's' },
104 charArray_t = new char[] { 't' },
105 charArray_u = new char[] { 'u' },
106 charArray_v = new char[] { 'v' },
107 charArray_w = new char[] { 'w' },
108 charArray_x = new char[] { 'x' },
109 charArray_y = new char[] { 'y' },
110 charArray_z = new char[] { 'z' };
111
112 static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
113 static final int TableSize = 30, InternalTableSize = 6; //30*6 = 180 entries
114 public static final int OptimizedLength = 6;
115 public /*static*/
116 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
117 // support for detecting non-externalized string literals
118 int currentLineNr = -1;
119 int previousLineNr = -1;
120 NLSLine currentLine = null;
121 List lines = new ArrayList();
122 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
123 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
124 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
125 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
126 public StringLiteral[] nonNLSStrings = null;
127 public boolean checkNonExternalizedStringLiterals = true;
128 public boolean wasNonExternalizedStringLiteral = false;
129
130 /*static*/ {
131 for (int i = 0; i < 6; i++) {
132 for (int j = 0; j < TableSize; j++) {
133 for (int k = 0; k < InternalTableSize; k++) {
134 charArray_length[i][j][k] = initCharArray;
135 }
136 }
137 }
138 }
139 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
140
141 public static final int RoundBracket = 0;
142 public static final int SquareBracket = 1;
143 public static final int CurlyBracket = 2;
144 public static final int BracketKinds = 3;
145 public JavaCodeScanner() {
146 this(false, false);
147 }
148 public JavaCodeScanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
149 this(tokenizeComments, tokenizeWhiteSpace, false);
150 }
151 public final boolean atEnd() {
152 // This code is not relevant if source is
153 // Only a part of the real stream input
154
155 return source.length == currentPosition;
156 }
157 public char[] getCurrentIdentifierSource() {
158 //return the token REAL source (aka unicodes are precomputed)
159
160 char[] result;
161 if (withoutUnicodePtr != 0)
162 //0 is used as a fast test flag so the real first char is in position 1
163 System.arraycopy(
164 withoutUnicodeBuffer,
165 1,
166 result = new char[withoutUnicodePtr],
167 0,
168 withoutUnicodePtr);
169 else {
170 int length = currentPosition - startPosition;
171 switch (length) { // see OptimizedLength
172 case 1 :
173 return optimizedCurrentTokenSource1();
174 case 2 :
175 return optimizedCurrentTokenSource2();
176 case 3 :
177 return optimizedCurrentTokenSource3();
178 case 4 :
179 return optimizedCurrentTokenSource4();
180 case 5 :
181 return optimizedCurrentTokenSource5();
182 case 6 :
183 return optimizedCurrentTokenSource6();
184 }
185 //no optimization
186 System.arraycopy(source, startPosition, result = new char[length], 0, length);
187 }
188 return result;
189 }
190 public int getCurrentTokenEndPosition() {
191 return this.currentPosition - 1;
192 }
193 public final char[] getCurrentTokenSource() {
194 // Return the token REAL source (aka unicodes are precomputed)
195
196 char[] result;
197 if (withoutUnicodePtr != 0)
198 // 0 is used as a fast test flag so the real first char is in position 1
199 System.arraycopy(
200 withoutUnicodeBuffer,
201 1,
202 result = new char[withoutUnicodePtr],
203 0,
204 withoutUnicodePtr);
205 else {
206 int length;
207 System.arraycopy(
208 source,
209 startPosition,
210 result = new char[length = currentPosition - startPosition],
211 0,
212 length);
213 }
214 return result;
215 }
216 public final char[] getCurrentTokenSourceString() {
217 //return the token REAL source (aka unicodes are precomputed).
218 //REMOVE the two " that are at the beginning and the end.
219
220 char[] result;
221 if (withoutUnicodePtr != 0)
222 //0 is used as a fast test flag so the real first char is in position 1
223 System.arraycopy(withoutUnicodeBuffer, 2,
224 //2 is 1 (real start) + 1 (to jump over the ")
225 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
226 else {
227 int length;
228 System.arraycopy(
229 source,
230 startPosition + 1,
231 result = new char[length = currentPosition - startPosition - 2],
232 0,
233 length);
234 }
235 return result;
236 }
237 public int getCurrentTokenStartPosition() {
238 return this.startPosition;
239 }
240 /*
241 * Search the source position corresponding to the end of a given line number
242 *
243 * Line numbers are 1-based, and relative to the scanner initialPosition.
244 * Character positions are 0-based.
245 *
246 * In case the given line number is inconsistent, answers -1.
247 */
248 public final int getLineEnd(int lineNumber) {
249
250 if (lineEnds == null)
251 return -1;
252 if (lineNumber >= lineEnds.length)
253 return -1;
254 if (lineNumber <= 0)
255 return -1;
256
257 if (lineNumber == lineEnds.length - 1)
258 return eofPosition;
259 return lineEnds[lineNumber - 1];
260 // next line start one character behind the lineEnd of the previous line
261 }
262 /**
263 * Search the source position corresponding to the beginning of a given line number
264 *
265 * Line numbers are 1-based, and relative to the scanner initialPosition.
266 * Character positions are 0-based.
267 *
268 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
269 *
270 * In case the given line number is inconsistent, answers -1.
271 */
272 public final int getLineStart(int lineNumber) {
273
274 if (lineEnds == null)
275 return -1;
276 if (lineNumber >= lineEnds.length)
277 return -1;
278 if (lineNumber <= 0)
279 return -1;
280
281 if (lineNumber == 1)
282 return initialPosition;
283 return lineEnds[lineNumber - 2] + 1;
284 // next line start one character behind the lineEnd of the previous line
285 }
286 public final boolean getNextChar(char testedChar) {
287 //BOOLEAN
288 //handle the case of unicode.
289 //when a unicode appears then we must use a buffer that holds char internal values
290 //At the end of this method currentCharacter holds the new visited char
291 //and currentPosition points right next after it
292 //Both previous lines are true if the currentCharacter is == to the testedChar
293 //On false, no side effect has occured.
294
295 //ALL getNextChar.... ARE OPTIMIZED COPIES
296
297 int temp = currentPosition;
298 try {
299 if (((currentCharacter = source[currentPosition++]) == '\\')
300 && (source[currentPosition] == 'u')) {
301 //-------------unicode traitement ------------
302 int c1, c2, c3, c4;
303 int unicodeSize = 6;
304 currentPosition++;
305 while (source[currentPosition] == 'u') {
306 currentPosition++;
307 unicodeSize++;
308 }
309
310 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0)
311 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0)
312 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0)
313 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) {
314 currentPosition = temp;
315 return false;
316 }
317
318 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
319 if (currentCharacter != testedChar) {
320 currentPosition = temp;
321 return false;
322 }
323 unicodeAsBackSlash = currentCharacter == '\\';
324
325 //need the unicode buffer
326 if (withoutUnicodePtr == 0) {
327 //buffer all the entries that have been left aside....
328 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
329 System.arraycopy(
330 source,
331 startPosition,
332 withoutUnicodeBuffer,
333 1,
334 withoutUnicodePtr);
335 }
336 //fill the buffer with the char
337 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
338 return true;
339
340 } //-------------end unicode traitement--------------
341 else {
342 if (currentCharacter != testedChar) {
343 currentPosition = temp;
344 return false;
345 }
346 unicodeAsBackSlash = false;
347 if (withoutUnicodePtr != 0)
348 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
349 return true;
350 }
351 } catch (IndexOutOfBoundsException e) {
352 unicodeAsBackSlash = false;
353 currentPosition = temp;
354 return false;
355 }
356 }
357 public final int getNextChar(char testedChar1, char testedChar2) {
358 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
359 //test can be done with (x==0) for the first and (x>0) for the second
360 //handle the case of unicode.
361 //when a unicode appears then we must use a buffer that holds char internal values
362 //At the end of this method currentCharacter holds the new visited char
363 //and currentPosition points right next after it
364 //Both previous lines are true if the currentCharacter is == to the testedChar1/2
365 //On false, no side effect has occured.
366
367 //ALL getNextChar.... ARE OPTIMIZED COPIES
368
369 int temp = currentPosition;
370 try {
371 int result;
372 if (((currentCharacter = source[currentPosition++]) == '\\')
373 && (source[currentPosition] == 'u')) {
374 //-------------unicode traitement ------------
375 int c1, c2, c3, c4;
376 int unicodeSize = 6;
377 currentPosition++;
378 while (source[currentPosition] == 'u') {
379 currentPosition++;
380 unicodeSize++;
381 }
382
383 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0)
384 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0)
385 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0)
386 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) {
387 currentPosition = temp;
388 return 2;
389 }
390
391 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
392 if (currentCharacter == testedChar1)
393 result = 0;
394 else if (currentCharacter == testedChar2)
395 result = 1;
396 else {
397 currentPosition = temp;
398 return -1;
399 }
400
401 //need the unicode buffer
402 if (withoutUnicodePtr == 0) {
403 //buffer all the entries that have been left aside....
404 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
405 System.arraycopy(
406 source,
407 startPosition,
408 withoutUnicodeBuffer,
409 1,
410 withoutUnicodePtr);
411 }
412 //fill the buffer with the char
413 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
414 return result;
415 } //-------------end unicode traitement--------------
416 else {
417 if (currentCharacter == testedChar1)
418 result = 0;
419 else if (currentCharacter == testedChar2)
420 result = 1;
421 else {
422 currentPosition = temp;
423 return -1;
424 }
425
426 if (withoutUnicodePtr != 0)
427 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
428 return result;
429 }
430 } catch (IndexOutOfBoundsException e) {
431 currentPosition = temp;
432 return -1;
433 }
434 }
435 public final boolean getNextCharAsDigit() {
436 //BOOLEAN
437 //handle the case of unicode.
438 //when a unicode appears then we must use a buffer that holds char internal values
439 //At the end of this method currentCharacter holds the new visited char
440 //and currentPosition points right next after it
441 //Both previous lines are true if the currentCharacter is a digit
442 //On false, no side effect has occured.
443
444 //ALL getNextChar.... ARE OPTIMIZED COPIES
445
446 int temp = currentPosition;
447 try {
448 if (((currentCharacter = source[currentPosition++]) == '\\')
449 && (source[currentPosition] == 'u')) {
450 //-------------unicode traitement ------------
451 int c1, c2, c3, c4;
452 int unicodeSize = 6;
453 currentPosition++;
454 while (source[currentPosition] == 'u') {
455 currentPosition++;
456 unicodeSize++;
457 }
458
459 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0)
460 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0)
461 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0)
462 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) {
463 currentPosition = temp;
464 return false;
465 }
466
467 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
468 if (!Character.isDigit(currentCharacter)) {
469 currentPosition = temp;
470 return false;
471 }
472
473 //need the unicode buffer
474 if (withoutUnicodePtr == 0) {
475 //buffer all the entries that have been left aside....
476 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
477 System.arraycopy(
478 source,
479 startPosition,
480 withoutUnicodeBuffer,
481 1,
482 withoutUnicodePtr);
483 }
484 //fill the buffer with the char
485 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
486 return true;
487 } //-------------end unicode traitement--------------
488 else {
489 if (!Character.isDigit(currentCharacter)) {
490 currentPosition = temp;
491 return false;
492 }
493 if (withoutUnicodePtr != 0)
494 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
495 return true;
496 }
497 } catch (IndexOutOfBoundsException e) {
498 currentPosition = temp;
499 return false;
500 }
501 }
502 public final boolean getNextCharAsDigit(int radix) {
503 //BOOLEAN
504 //handle the case of unicode.
505 //when a unicode appears then we must use a buffer that holds char internal values
506 //At the end of this method currentCharacter holds the new visited char
507 //and currentPosition points right next after it
508 //Both previous lines are true if the currentCharacter is a digit base on radix
509 //On false, no side effect has occured.
510
511 //ALL getNextChar.... ARE OPTIMIZED COPIES
512
513 int temp = currentPosition;
514 try {
515 if (((currentCharacter = source[currentPosition++]) == '\\')
516 && (source[currentPosition] == 'u')) {
517 //-------------unicode traitement ------------
518 int c1, c2, c3, c4;
519 int unicodeSize = 6;
520 currentPosition++;
521 while (source[currentPosition] == 'u') {
522 currentPosition++;
523 unicodeSize++;
524 }
525
526 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0)
527 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0)
528 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0)
529 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) {
530 currentPosition = temp;
531 return false;
532 }
533
534 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
535 if (Character.digit(currentCharacter, radix) == -1) {
536 currentPosition = temp;
537 return false;
538 }
539
540 //need the unicode buffer
541 if (withoutUnicodePtr == 0) {
542 //buffer all the entries that have been left aside....
543 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
544 System.arraycopy(
545 source,
546 startPosition,
547 withoutUnicodeBuffer,
548 1,
549 withoutUnicodePtr);
550 }
551 //fill the buffer with the char
552 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
553 return true;
554 } //-------------end unicode traitement--------------
555 else {
556 if (Character.digit(currentCharacter, radix) == -1) {
557 currentPosition = temp;
558 return false;
559 }
560 if (withoutUnicodePtr != 0)
561 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
562 return true;
563 }
564 } catch (IndexOutOfBoundsException e) {
565 currentPosition = temp;
566 return false;
567 }
568 }
569 public boolean getNextCharAsJavaIdentifierPart() {
570 //BOOLEAN
571 //handle the case of unicode.
572 //when a unicode appears then we must use a buffer that holds char internal values
573 //At the end of this method currentCharacter holds the new visited char
574 //and currentPosition points right next after it
575 //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
576 //On false, no side effect has occured.
577
578 //ALL getNextChar.... ARE OPTIMIZED COPIES
579
580 int temp = currentPosition;
581 try {
582 if (((currentCharacter = source[currentPosition++]) == '\\')
583 && (source[currentPosition] == 'u')) {
584 //-------------unicode traitement ------------
585 int c1, c2, c3, c4;
586 int unicodeSize = 6;
587 currentPosition++;
588 while (source[currentPosition] == 'u') {
589 currentPosition++;
590 unicodeSize++;
591 }
592
593 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0)
594 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0)
595 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0)
596 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) {
597 currentPosition = temp;
598 return false;
599 }
600
601 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
602 if (!Character.isJavaIdentifierPart(currentCharacter)) {
603 currentPosition = temp;
604 return false;
605 }
606
607 //need the unicode buffer
608 if (withoutUnicodePtr == 0) {
609 //buffer all the entries that have been left aside....
610 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
611 System.arraycopy(
612 source,
613 startPosition,
614 withoutUnicodeBuffer,
615 1,
616 withoutUnicodePtr);
617 }
618 //fill the buffer with the char
619 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
620 return true;
621 } //-------------end unicode traitement--------------
622 else {
623 if (!Character.isJavaIdentifierPart(currentCharacter)) {
624 currentPosition = temp;
625 return false;
626 }
627
628 if (withoutUnicodePtr != 0)
629 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
630 return true;
631 }
632 } catch (IndexOutOfBoundsException e) {
633 currentPosition = temp;
634 return false;
635 }
636 }
637 public int getNextToken() throws InvalidInputException {
638
639 this.wasAcr = false;
640 if (diet) {
641 jumpOverMethodBody();
642 diet = false;
643 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
644 }
645 try {
646 while (true) { //loop for jumping over comments
647 withoutUnicodePtr = 0;
648 //start with a new token (even comment written with unicode )
649
650 // ---------Consume white space and handles startPosition---------
651 int whiteStart = currentPosition;
652 boolean isWhiteSpace;
653 do {
654 startPosition = currentPosition;
655 if (((currentCharacter = source[currentPosition++]) == '\\')
656 && (source[currentPosition] == 'u')) {
657 isWhiteSpace = jumpOverUnicodeWhiteSpace();
658 } else {
659 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
660 checkNonExternalizeString();
661 if (recordLineSeparator) {
662 pushLineSeparator();
663 } else {
664 currentLine = null;
665 }
666 }
667 isWhiteSpace =
668 (currentCharacter == ' ')
669 || Character.isWhitespace(currentCharacter);
670 }
671 } while (isWhiteSpace);
672 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
673 // reposition scanner in case we are interested by spaces as tokens
674 currentPosition--;
675 startPosition = whiteStart;
676 return TokenNameWHITESPACE;
677 }
678 //little trick to get out in the middle of a source compuation
679 if (currentPosition > eofPosition)
680 return TokenNameEOF;
681
682 // ---------Identify the next token-------------
683
684 switch (currentCharacter) {
685 case '(' :
686 return TokenNameLPAREN;
687 case ')' :
688 return TokenNameRPAREN;
689 case '{' :
690 return TokenNameLBRACE;
691 case '}' :
692 return TokenNameRBRACE;
693 case '[' :
694 return TokenNameLBRACKET;
695 case ']' :
696 return TokenNameRBRACKET;
697 case ';' :
698 return TokenNameSEMICOLON;
699 case ',' :
700 return TokenNameCOMMA;
701 case '.' :
702 if (getNextCharAsDigit())
703 return scanNumber(true);
704 return TokenNameDOT;
705 case '+' :
706 {
707 int test;
708 if ((test = getNextChar('+', '=')) == 0)
709 return TokenNamePLUS_PLUS;
710 if (test > 0)
711 return TokenNamePLUS_EQUAL;
712 return TokenNamePLUS;
713 }
714 case '-' :
715 {
716 int test;
717 if ((test = getNextChar('-', '=')) == 0)
718 return TokenNameMINUS_MINUS;
719 if (test > 0)
720 return TokenNameMINUS_EQUAL;
721 return TokenNameMINUS;
722 }
723 case '~' :
724 return TokenNameTWIDDLE;
725 case '!' :
726 if (getNextChar('='))
727 return TokenNameNOT_EQUAL;
728 return TokenNameNOT;
729 case '*' :
730 if (getNextChar('='))
731 return TokenNameMULTIPLY_EQUAL;
732 return TokenNameMULTIPLY;
733 case '%' :
734 if (getNextChar('='))
735 return TokenNameREMAINDER_EQUAL;
736 return TokenNameREMAINDER;
737 case '<' :
738 {
739 int test;
740 if ((test = getNextChar('=', '<')) == 0)
741 return TokenNameLESS_EQUAL;
742 if (test > 0) {
743 if (getNextChar('='))
744 return TokenNameLEFT_SHIFT_EQUAL;
745 return TokenNameLEFT_SHIFT;
746 }
747 return TokenNameLESS;
748 }
749 case '>' :
750 {
751 int test;
752 if ((test = getNextChar('=', '>')) == 0)
753 return TokenNameGREATER_EQUAL;
754 if (test > 0) {
755 if ((test = getNextChar('=', '>')) == 0)
756 return TokenNameRIGHT_SHIFT_EQUAL;
757 if (test > 0) {
758 if (getNextChar('='))
759 return TokenNameUNSIGNED_RIGHT_SHIFT_EQUAL;
760 return TokenNameUNSIGNED_RIGHT_SHIFT;
761 }
762 return TokenNameRIGHT_SHIFT;
763 }
764 return TokenNameGREATER;
765 }
766 case '=' :
767 if (getNextChar('='))
768 return TokenNameEQUAL_EQUAL;
769 return TokenNameEQUAL;
770 case '&' :
771 {
772 int test;
773 if ((test = getNextChar('&', '=')) == 0)
774 return TokenNameAND_AND;
775 if (test > 0)
776 return TokenNameAND_EQUAL;
777 return TokenNameAND;
778 }
779 case '|' :
780 {
781 int test;
782 if ((test = getNextChar('|', '=')) == 0)
783 return TokenNameOR_OR;
784 if (test > 0)
785 return TokenNameOR_EQUAL;
786 return TokenNameOR;
787 }
788 case '^' :
789 if (getNextChar('='))
790 return TokenNameXOR_EQUAL;
791 return TokenNameXOR;
792 case '?' :
793 return TokenNameQUESTION;
794 case ':' :
795 return TokenNameCOLON;
796 case '\'' :
797 {
798 int test;
799 if ((test = getNextChar('\n', '\r')) == 0) {
800 throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
801 }
802 if (test > 0) {
803 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
804 for (int lookAhead = 0; lookAhead < 3; lookAhead++) {
805 if (currentPosition + lookAhead
806 == source.length)
807 break;
808 if (source[currentPosition + lookAhead]
809 == '\n')
810 break;
811 if (source[currentPosition + lookAhead]
812 == '\'') {
813 currentPosition += lookAhead + 1;
814 break;
815 }
816 }
817 throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
818 }
819 }
820 if (getNextChar('\'')) {
821 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
822 for (int lookAhead = 0; lookAhead < 3; lookAhead++) {
823 if (currentPosition + lookAhead == source.length)
824 break;
825 if (source[currentPosition + lookAhead] == '\n')
826 break;
827 if (source[currentPosition + lookAhead] == '\'') {
828 currentPosition += lookAhead + 1;
829 break;
830 }
831 }
832 throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
833 }
834 if (getNextChar('\\'))
835 scanEscapeCharacter();
836 else { // consume next character
837 unicodeAsBackSlash = false;
838 if (((currentCharacter = source[currentPosition++]) == '\\')
839 && (source[currentPosition] == 'u')) {
840 getNextUnicodeChar();
841 } else {
842 if (withoutUnicodePtr != 0) {
843 withoutUnicodeBuffer[++withoutUnicodePtr] =
844 currentCharacter;
845 }
846 }
847 }
848 if (getNextChar('\''))
849 return TokenNameCharacterLiteral;
850 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
851 for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
852 if (currentPosition + lookAhead == source.length)
853 break;
854 if (source[currentPosition + lookAhead] == '\n')
855 break;
856 if (source[currentPosition + lookAhead] == '\'') {
857 currentPosition += lookAhead + 1;
858 break;
859 }
860 }
861 throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
862 case '"' :
863 try {
864 // consume next character
865 unicodeAsBackSlash = false;
866 if (((currentCharacter = source[currentPosition++]) == '\\')
867 && (source[currentPosition] == 'u')) {
868 getNextUnicodeChar();
869 } else {
870 if (withoutUnicodePtr != 0) {
871 withoutUnicodeBuffer[++withoutUnicodePtr] =
872 currentCharacter;
873 }
874 }
875
876 while (currentCharacter != '"') {
877 /**** \r and \n are not valid in string literals ****/
878 if ((currentCharacter == '\n')
879 || (currentCharacter == '\r')) {
880 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
881 for (int lookAhead = 0;
882 lookAhead < 50;
883 lookAhead++) {
884 if (currentPosition + lookAhead
885 == source.length)
886 break;
887 if (source[currentPosition + lookAhead]
888 == '\n')
889 break;
890 if (source[currentPosition + lookAhead]
891 == '\"') {
892 currentPosition += lookAhead
893 + 1;
894 break;
895 }
896 }
897 throw new InvalidInputException(INVALID_CHAR_IN_STRING);
898 }
899 if (currentCharacter == '\\') {
900 int escapeSize = currentPosition;
901 boolean backSlashAsUnicodeInString =
902 unicodeAsBackSlash;
903 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
904 scanEscapeCharacter();
905 escapeSize = currentPosition - escapeSize;
906 if (withoutUnicodePtr == 0) {
907 //buffer all the entries that have been left aside....
908 withoutUnicodePtr =
909 currentPosition
910 - escapeSize
911 - 1
912 - startPosition;
913 System.arraycopy(
914 source,
915 startPosition,
916 withoutUnicodeBuffer,
917 1,
918 withoutUnicodePtr);
919 withoutUnicodeBuffer[++withoutUnicodePtr] =
920 currentCharacter;
921 } else { //overwrite the / in the buffer
922 withoutUnicodeBuffer[withoutUnicodePtr] =
923 currentCharacter;
924 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
925 withoutUnicodePtr--;
926 }
927 }
928 }
929 // consume next character
930 unicodeAsBackSlash = false;
931 if (((currentCharacter = source[currentPosition++])
932 == '\\')
933 && (source[currentPosition] == 'u')) {
934 getNextUnicodeChar();
935 } else {
936 if (withoutUnicodePtr != 0) {
937 withoutUnicodeBuffer[++withoutUnicodePtr] =
938 currentCharacter;
939 }
940 }
941
942 }
943 } catch (IndexOutOfBoundsException e) {
944 throw new InvalidInputException(UNTERMINATED_STRING);
945 } catch (InvalidInputException e) {
946 if (e.getMessage().equals(INVALID_ESCAPE)) {
947 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
948 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
949 if (currentPosition + lookAhead
950 == source.length)
951 break;
952 if (source[currentPosition + lookAhead]
953 == '\n')
954 break;
955 if (source[currentPosition + lookAhead]
956 == '\"') {
957 currentPosition += lookAhead + 1;
958 break;
959 }
960 }
961
962 }
963 throw e; // rethrow
964 }
965 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
966 if (currentLine == null) {
967 currentLine = new NLSLine();
968 lines.add(currentLine);
969 }
970 currentLine.add(
971 new StringLiteral(
972 getCurrentTokenSourceString(),
973 startPosition,
974 currentPosition - 1));
975 }
976 return TokenNameStringLiteral;
977 case '/' :
978 {
979 int test;
980 if ((test = getNextChar('/', '*')) == 0) { //line comment
981 int endPositionForLineComment = 0;
982 try { //get the next char
983 if (((currentCharacter =
984 source[currentPosition++])
985 == '\\')
986 && (source[currentPosition] == 'u')) {
987 //-------------unicode traitement ------------
988 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
989 currentPosition++;
990 while (source[currentPosition]
991 == 'u') {
992 currentPosition++;
993 }
994 if ((c1 =
995 Character.getNumericValue(
996 source[currentPosition++]))
997 > 15
998 || c1 < 0
999 || (c2 =
1000 Character
1001 .getNumericValue(
1002 source[currentPosition++]))
1003 > 15
1004 || c2 < 0
1005 || (c3 =
1006 Character
1007 .getNumericValue(
1008 source[currentPosition++]))
1009 > 15
1010 || c3 < 0
1011 || (c4 =
1012 Character
1013 .getNumericValue(
1014 source[currentPosition++]))
1015 > 15
1016 || c4 < 0) {
1017 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1018 } else {
1019 currentCharacter =
1020 (char) (((c1 * 16 + c2)
1021 * 16
1022 + c3)
1023 * 16
1024 + c4);
1025 }
1026 }
1027
1028 //handle the \\u case manually into comment
1029 if (currentCharacter == '\\') {
1030 if (source[currentPosition] == '\\')
1031 currentPosition++;
1032 } //jump over the \\
1033 boolean isUnicode = false;
1034 while (currentCharacter != '\r'
1035 && currentCharacter != '\n') {
1036 //get the next char
1037 isUnicode = false;
1038 if (((currentCharacter =
1039 source[currentPosition++])
1040 == '\\')
1041 && (source[currentPosition]
1042 == 'u')) {
1043 isUnicode = true;
1044 //-------------unicode traitement ------------
1045 int c1 = 0,
1046 c2 = 0,
1047 c3 = 0,
1048 c4 = 0;
1049 currentPosition++;
1050 while (source[currentPosition]
1051 == 'u') {
1052 currentPosition++;
1053 }
1054 if ((c1 =
1055 Character
1056 .getNumericValue(
1057 source[currentPosition++]))
1058 > 15
1059 || c1 < 0
1060 || (c2 =
1061 Character
1062 .getNumericValue(
1063 source[currentPosition++]))
1064 > 15
1065 || c2 < 0
1066 || (c3 =
1067 Character
1068 .getNumericValue(
1069 source[currentPosition++]))
1070 > 15
1071 || c3 < 0
1072 || (c4 =
1073 Character
1074 .getNumericValue(
1075 source[currentPosition++]))
1076 > 15
1077 || c4 < 0) {
1078 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1079 } else {
1080 currentCharacter =
1081 (char) (((c1
1082 * 16
1083 + c2)
1084 * 16
1085 + c3)
1086 * 16
1087 + c4);
1088 }
1089 }
1090 //handle the \\u case manually into comment
1091 if (currentCharacter == '\\') {
1092 if (source[currentPosition]
1093 == '\\')
1094 currentPosition++;
1095 } //jump over the \\
1096 }
1097 if (isUnicode) {
1098 endPositionForLineComment =
1099 currentPosition - 6;
1100 } else {
1101 endPositionForLineComment =
1102 currentPosition - 1;
1103 }
1104 recordComment(false);
1105 if ((currentCharacter == '\r')
1106 || (currentCharacter == '\n')) {
1107 checkNonExternalizeString();
1108 if (recordLineSeparator) {
1109 if (isUnicode) {
1110 pushUnicodeLineSeparator();
1111 } else {
1112 pushLineSeparator();
1113 }
1114 } else {
1115 currentLine = null;
1116 }
1117 }
1118 if (tokenizeComments) {
1119 if (!isUnicode) {
1120 currentPosition =
1121 endPositionForLineComment;
1122 // reset one character behind
1123 }
1124 return TokenNameCOMMENT_LINE;
1125 }
1126 } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1127 if (tokenizeComments) {
1128 currentPosition--;
1129 // reset one character behind
1130 return TokenNameCOMMENT_LINE;
1131 }
1132 }
1133 break;
1134 }
1135 if (test > 0) { //traditional and annotation comment
1136 boolean isJavadoc = false, star = false;
1137 // consume next character
1138 unicodeAsBackSlash = false;
1139 if (((currentCharacter = source[currentPosition++])
1140 == '\\')
1141 && (source[currentPosition] == 'u')) {
1142 getNextUnicodeChar();
1143 } else {
1144 if (withoutUnicodePtr != 0) {
1145 withoutUnicodeBuffer[++withoutUnicodePtr] =
1146 currentCharacter;
1147 }
1148 }
1149
1150 if (currentCharacter == '*') {
1151 isJavadoc = true;
1152 star = true;
1153 }
1154 if ((currentCharacter == '\r')
1155 || (currentCharacter == '\n')) {
1156 checkNonExternalizeString();
1157 if (recordLineSeparator) {
1158 pushLineSeparator();
1159 } else {
1160 currentLine = null;
1161 }
1162 }
1163 try { //get the next char
1164 if (((currentCharacter =
1165 source[currentPosition++])
1166 == '\\')
1167 && (source[currentPosition] == 'u')) {
1168 //-------------unicode traitement ------------
1169 getNextUnicodeChar();
1170 }
1171 //handle the \\u case manually into comment
1172 if (currentCharacter == '\\') {
1173 if (source[currentPosition] == '\\')
1174 currentPosition++;
1175 //jump over the \\
1176 }
1177 // empty comment is not a javadoc /**/
1178 if (currentCharacter == '/') {
1179 isJavadoc = false;
1180 }
1181 //loop until end of comment */
1182 while ((currentCharacter != '/') || (!star)) {
1183 if ((currentCharacter == '\r')
1184 || (currentCharacter == '\n')) {
1185 checkNonExternalizeString();
1186 if (recordLineSeparator) {
1187 pushLineSeparator();
1188 } else {
1189 currentLine = null;
1190 }
1191 }
1192 star = currentCharacter == '*';
1193 //get next char
1194 if (((currentCharacter =
1195 source[currentPosition++])
1196 == '\\')
1197 && (source[currentPosition]
1198 == 'u')) {
1199 //-------------unicode traitement ------------
1200 getNextUnicodeChar();
1201 }
1202 //handle the \\u case manually into comment
1203 if (currentCharacter == '\\') {
1204 if (source[currentPosition]
1205 == '\\')
1206 currentPosition++;
1207 } //jump over the \\
1208 }
1209 recordComment(isJavadoc);
1210 if (tokenizeComments) {
1211 if (isJavadoc)
1212 return TokenNameCOMMENT_JAVADOC;
1213 return TokenNameCOMMENT_BLOCK;
1214 }
1215 } catch (IndexOutOfBoundsException e) {
1216 throw new InvalidInputException(UNTERMINATED_COMMENT);
1217 }
1218 break;
1219 }
1220 if (getNextChar('='))
1221 return TokenNameDIVIDE_EQUAL;
1222 return TokenNameDIVIDE;
1223 }
1224 case '\u001a' :
1225 if (atEnd())
1226 return TokenNameEOF;
1227 //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1228 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1229
1230 default :
1231 if (Character.isJavaIdentifierStart(currentCharacter))
1232 return scanIdentifierOrKeyword();
1233 if (currentCharacter == '#')
1234 return TokenNameHASH;
1235 if (currentCharacter == '@')
1236 return TokenNameEACH;
1237 if (Character.isDigit(currentCharacter))
1238 return scanNumber(false);
1239 return TokenNameERROR;
1240 }
1241 }
1242 } //-----------------end switch while try--------------------
1243 catch (IndexOutOfBoundsException e) {
1244 }
1245 return TokenNameEOF;
1246 }
1247 public final void getNextUnicodeChar() throws IndexOutOfBoundsException, InvalidInputException {
1248 //VOID
1249 //handle the case of unicode.
1250 //when a unicode appears then we must use a buffer that holds char internal values
1251 //At the end of this method currentCharacter holds the new visited char
1252 //and currentPosition points right next after it
1253
1254 //ALL getNextChar.... ARE OPTIMIZED COPIES
1255
1256 int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1257 currentPosition++;
1258 while (source[currentPosition] == 'u') {
1259 currentPosition++;
1260 unicodeSize++;
1261 }
1262
1263 if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1264 || c1 < 0
1265 || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1266 || c2 < 0
1267 || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1268 || c3 < 0
1269 || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1270 || c4 < 0) {
1271 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1272 } else {
1273 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1274 //need the unicode buffer
1275 if (withoutUnicodePtr == 0) {
1276 //buffer all the entries that have been left aside....
1277 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1278 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1279 }
1280 //fill the buffer with the char
1281 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1282 }
1283 unicodeAsBackSlash = currentCharacter == '\\';
1284 }
1285 /* Tokenize a method body, assuming that curly brackets are properly balanced.
1286 */
1287 public final void jumpOverMethodBody() {
1288
1289 this.wasAcr = false;
1290 int found = 1;
1291 try {
1292 while (true) { //loop for jumping over comments
1293 // ---------Consume white space and handles startPosition---------
1294 boolean isWhiteSpace;
1295 do {
1296 startPosition = currentPosition;
1297 if (((currentCharacter = source[currentPosition++]) == '\\')
1298 && (source[currentPosition] == 'u')) {
1299 isWhiteSpace = jumpOverUnicodeWhiteSpace();
1300 } else {
1301 if (recordLineSeparator
1302 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1303 pushLineSeparator();
1304 isWhiteSpace = Character.isWhitespace(currentCharacter);
1305 }
1306 } while (isWhiteSpace);
1307
1308 // -------consume token until } is found---------
1309 switch (currentCharacter) {
1310 case '{' :
1311 found++;
1312 break;
1313 case '}' :
1314 found--;
1315 if (found == 0)
1316 return;
1317 break;
1318 case '\'' :
1319 {
1320 boolean test;
1321 test = getNextChar('\\');
1322 if (test) {
1323 try {
1324 scanEscapeCharacter();
1325 } catch (InvalidInputException ex) {
1326 };
1327 } else {
1328 try { // consume next character
1329 unicodeAsBackSlash = false;
1330 if (((currentCharacter =
1331 source[currentPosition++])
1332 == '\\')
1333 && (source[currentPosition] == 'u')) {
1334 getNextUnicodeChar();
1335 } else {
1336 if (withoutUnicodePtr != 0) {
1337 withoutUnicodeBuffer[++withoutUnicodePtr] =
1338 currentCharacter;
1339 }
1340 }
1341 } catch (InvalidInputException ex) {
1342 };
1343 }
1344 getNextChar('\'');
1345 break;
1346 }
1347 case '"' :
1348 try {
1349 try { // consume next character
1350 unicodeAsBackSlash = false;
1351 if (((currentCharacter = source[currentPosition++])
1352 == '\\')
1353 && (source[currentPosition] == 'u')) {
1354 getNextUnicodeChar();
1355 } else {
1356 if (withoutUnicodePtr != 0) {
1357 withoutUnicodeBuffer[++withoutUnicodePtr] =
1358 currentCharacter;
1359 }
1360 }
1361 } catch (InvalidInputException ex) {
1362 };
1363 while (currentCharacter != '"') {
1364 if (currentCharacter == '\r') {
1365 if (source[currentPosition] == '\n')
1366 currentPosition++;
1367 break;
1368 // the string cannot go further that the line
1369 }
1370 if (currentCharacter == '\n') {
1371 break;
1372 // the string cannot go further that the line
1373 }
1374 if (currentCharacter == '\\') {
1375 try {
1376 scanEscapeCharacter();
1377 } catch (InvalidInputException ex) {
1378 };
1379 }
1380 try { // consume next character
1381 unicodeAsBackSlash = false;
1382 if (((currentCharacter =
1383 source[currentPosition++]