Source code: novaworx/syntax/TokenMarker.java
1 /*
2 Novaworx Development Environment
3 Copyright (C) 2000-2003 Mark Soderquist
4 Portions Copyright (C) 1998-2001 Slava Pestov
5 Portions Copyright (C) 1999-2000 Mike Dillon
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to:
19
20 Free Software Foundation, Inc.
21 59 Temple Place, Suite 330
22 Boston, MA 02111-1307 USA
23 */
24
25 package novaworx.syntax;
26
27 import java.util.*;
28 import javax.swing.text.*;
29 import cosmoworx.log.*;
30
31 /**
32 A token marker splits lines of text into tokens. Each token carries
33 a length field and an identification tag that can be mapped to a color
34 or font style for painting that token.
35
36 @author Mike Dillon
37 @author Slava Pestov
38 @author Mark Soderquist
39 @see Token
40 */
41 public class TokenMarker {
42
43 public static final int MAJOR_ACTIONS = 0x000000FF;
44 public static final int WHITESPACE = 1 << 0;
45 public static final int SPAN = 1 << 1;
46 public static final int MARK_PREVIOUS = 1 << 2;
47 public static final int MARK_FOLLOWING = 1 << 3;
48 public static final int EOL_SPAN = 1 << 4;
49 // public static final int MAJOR_ACTION_5 = 1 << 5;
50 // public static final int MAJOR_ACTION_6 = 1 << 6;
51 // public static final int MAJOR_ACTION_7 = 1 << 7;
52
53 public static final int ACTION_HINTS = 0x0000FF00;
54 public static final int EXCLUDE_MATCH = 1 << 8;
55 public static final int AT_LINE_START = 1 << 9;
56 public static final int NO_LINE_BREAK = 1 << 10;
57 public static final int NO_WORD_BREAK = 1 << 11;
58 public static final int IS_ESCAPE = 1 << 12;
59 public static final int DELEGATE = 1 << 13;
60 // public static final int ACTION_HINT_14 = 1 << 14;
61 // public static final int ACTION_HINT_15 = 1 << 15;
62
63 private static final int SOFT_SPAN = MARK_FOLLOWING | NO_WORD_BREAK;
64
65 private String msName;
66 private String msRulePrefix;
67 private Hashtable mhRuleSets;
68 private ParserRuleSet moMainRuleSet;
69
70 private SyntaxLineContext moOriginalContext;
71 private SyntaxLineContext moContext;
72 private Segment moPattern;
73 private int miLastOffset;
74 private int miLastKeyword;
75 private int miLineLength;
76 private int miPosition;
77 private boolean mbEscaped;
78
79 public TokenMarker() {
80 mhRuleSets = new Hashtable(64);
81 moPattern = new Segment();
82 }
83
84 public void addRuleSet(String asName, ParserRuleSet aoRules ) {
85 if( aoRules == null) return;
86
87 if( asName == null ) asName = "MAIN";
88
89 mhRuleSets.put( msRulePrefix.concat( asName ), aoRules );
90
91 if( "MAIN".equals( asName ) ) moMainRuleSet = aoRules;
92 }
93
94 public ParserRuleSet getMainRuleSet() {
95 return moMainRuleSet;
96 }
97
98 public ParserRuleSet getRuleSet( String asName ) {
99 ParserRuleSet oRules;
100
101 oRules = (ParserRuleSet)mhRuleSets.get( asName );
102
103 if( oRules == null && !asName.startsWith( msRulePrefix ) ) {
104 int iDelimiter = asName.indexOf( "::" );
105
106 String sSyntaxName = asName.substring( 0, iDelimiter );
107
108 Syntax oSyntax = SyntaxFactory.getSyntax( sSyntaxName );
109 if( oSyntax == null) {
110 Log.write( Log.ERROR, "Unknown syntax: " + sSyntaxName );
111 oRules = null;
112 } else {
113 TokenMarker oMarker = oSyntax.getTokenMarker();
114 oRules = oMarker.getRuleSet( asName );
115 mhRuleSets.put( asName, oRules );
116 }
117
118 }
119
120 if( oRules == null ) Log.write( Log.ERROR, "Unresolved delegate target: " + asName );
121
122 return oRules;
123 }
124
125 public String getName() {
126 return msName;
127 }
128
129 public void setName( String asName ) {
130 if( asName == null) throw new NullPointerException();
131
132 msName = asName;
133 msRulePrefix = asName.concat( "::" );
134 }
135
136 /**
137 Do not call this method directly. This class is not thread safe.
138 Call SyntaxDocument.markTokens() instead.
139 @param aoContext The new <code>SyntaxLineContext</code> of the line to mark.
140 */
141 public SyntaxLineContext markTokens( Segment aoSegment, SyntaxLineContext aoContext ) {
142 moContext = aoContext;
143 SyntaxTokenList aoTokenList = new SyntaxTokenList();
144
145 miLastOffset = miLastKeyword = aoSegment.offset;
146 miLineLength = aoSegment.count + aoSegment.offset;
147
148 int iTerminateChar = moContext.moRules.getTerminateChar();
149 int iSearchLimit = ( iTerminateChar >= 0 && iTerminateChar < aoSegment.count)
150 ? aoSegment.offset + iTerminateChar : miLineLength;
151
152 mbEscaped = false;
153
154 boolean bKeepGoing;
155 boolean bTempEscaped;
156 Segment oTempPattern;
157 ParserRule oRule;
158 SyntaxLineContext oTempContext;
159
160 for( miPosition = aoSegment.offset; miPosition < iSearchLimit; miPosition++ ) {
161 // If we are not in the top level context, we are delegated
162 if( moContext.moParent != null ) {
163 oTempContext = moContext;
164
165 moContext = moContext.moParent;
166
167 moPattern.array = moContext.moInRule.maSearchChars;
168 moPattern.count = moContext.moInRule.maSequenceLengths[1];
169 moPattern.offset = moContext.moInRule.maSequenceLengths[0];
170
171 bKeepGoing = handleRule( aoTokenList, aoSegment, moContext.moInRule );
172
173 moContext = oTempContext;
174
175 if( !bKeepGoing ) {
176 if( mbEscaped ) {
177 mbEscaped = false;
178 } else {
179 if( miPosition != miLastOffset ) {
180 if( moContext.moInRule == null ) {
181 markKeyword( aoTokenList, aoSegment, miLastKeyword, miPosition );
182 aoTokenList.addToken(
183 miPosition - miLastOffset,
184 moContext.moRules.getDefault(),
185 moContext.moRules
186 );
187 } else if( ( moContext.moInRule.miAction & ( NO_LINE_BREAK | NO_WORD_BREAK ) ) == 0 ) {
188 aoTokenList.addToken(
189 miPosition - miLastOffset,
190 moContext.moInRule.myToken,
191 moContext.moRules
192 );
193 } else {
194 aoTokenList.addToken(
195 miPosition - miLastOffset,
196 Token.INVALID,
197 moContext.moRules
198 );
199 }
200 }
201
202 moContext = (SyntaxLineContext)moContext.moParent.clone();
203
204 if( ( moContext.moInRule.miAction & EXCLUDE_MATCH ) == EXCLUDE_MATCH ) {
205 aoTokenList.addToken(
206 moPattern.count,
207 moContext.moRules.getDefault(),
208 moContext.moRules
209 );
210 } else {
211 aoTokenList.addToken(
212 moPattern.count,
213 moContext.moInRule.myToken,
214 moContext.moRules
215 );
216 }
217
218 moContext.moInRule = null;
219
220 miLastKeyword = miLastOffset = miPosition + moPattern.count;
221 }
222
223 // Move postion to last character of match sequence.
224 miPosition += ( moPattern.count - 1);
225
226 continue;
227 }
228 }
229
230 // Check the escape rule for the current context.
231 if( ( oRule = moContext.moRules.getEscapeRule() ) != null) {
232 // Assign tempPattern to mutable "buffer" moPattern.
233 oTempPattern = moPattern;
234
235 // Swap in the escape moPattern.
236 moPattern = moContext.moRules.getEscapePattern();
237
238 bTempEscaped = mbEscaped;
239
240 bKeepGoing = handleRule( aoTokenList, aoSegment, oRule);
241
242 // Swap back the buffer moPattern.
243 moPattern = oTempPattern;
244
245 if( !bKeepGoing ) {
246 if( bTempEscaped ) mbEscaped = false;
247 continue;
248 }
249 }
250
251 // If we are inside a span, check for its end sequence.
252 oRule = moContext.moInRule;
253 if( oRule != null && ( oRule.miAction & SPAN ) == SPAN ) {
254 moPattern.array = oRule.maSearchChars;
255 moPattern.count = oRule.maSequenceLengths[1];
256 moPattern.offset = oRule.maSequenceLengths[0];
257
258 // If we match the end of the span, or if this is a "hard" span,
259 // continue to the next character; otherwise, check all
260 // applicable rules below.
261 if( !handleRule( aoTokenList, aoSegment, oRule ) || ( oRule.miAction & SOFT_SPAN ) == 0 ) {
262 mbEscaped = false;
263 continue;
264 }
265 }
266
267 // Now check every rule.
268 oRule = moContext.moRules.getRules( aoSegment.array[ miPosition ] );
269 while( oRule != null) {
270 moPattern.array = oRule.maSearchChars;
271
272 if( moContext.moInRule == oRule && ( oRule.miAction & SPAN ) == SPAN ) {
273 moPattern.count = oRule.maSequenceLengths[1];
274 moPattern.offset = oRule.maSequenceLengths[0];
275 } else {
276 moPattern.count = oRule.maSequenceLengths[0];
277 moPattern.offset = 0;
278 }
279
280 // Stop checking rules if there was a match and go to next position.
281 if( !handleRule( aoTokenList, aoSegment, oRule ) ) break;
282
283 oRule = oRule.moNext;
284 }
285
286 mbEscaped = false;
287 }
288 // Done scanning line for tokens.
289
290 // Check for keywords at the end of the line.
291 if( moContext.moInRule == null ) markKeyword( aoTokenList, aoSegment, miLastKeyword, miLineLength);
292
293 // Mark all remaining characters.
294 if( miLastOffset != miLineLength) {
295 if( moContext.moInRule == null ) {
296 aoTokenList.addToken(
297 miLineLength - miLastOffset,
298 moContext.moRules.getDefault(),
299 moContext.moRules
300 );
301 } else if(
302 ( moContext.moInRule.miAction & SPAN ) == SPAN &&
303 ( moContext.moInRule.miAction & ( NO_LINE_BREAK | NO_WORD_BREAK ) ) != 0
304 ) {
305 aoTokenList.addToken(
306 miLineLength - miLastOffset,
307 Token.INVALID,
308 moContext.moRules
309 );
310 moContext.moInRule = null;
311 } else {
312 aoTokenList.addToken(
313 miLineLength - miLastOffset,
314 moContext.moInRule.myToken,
315 moContext.moRules
316 );
317
318 if( ( moContext.moInRule.miAction & MARK_FOLLOWING ) == MARK_FOLLOWING ) {
319 moContext.moInRule = null;
320 }
321 }
322 }
323
324 aoTokenList.addToken( 0, Token.END, moContext.moRules );
325
326 moContext.moTokenList = aoTokenList;
327
328 return moContext;
329 }
330
331 /**
332 Checks if the rule matches the line at the current position
333 and handles the rule if so.
334 @param aoTokenList List of tokens in line.
335 @param aoSegment Segment to check rule against
336 @param aoCheckRule ParserRule to check against line
337 @return true: Keep checking other rules. <br>false: Stop checking other rules.
338 */
339 private boolean handleRule(
340 SyntaxTokenList aoTokenList,
341 Segment aoSegment,
342 ParserRule aoCheckRule
343 ) {
344 if( moPattern.count == 0 ) return true;
345
346 if( miLineLength - miPosition < moPattern.count ) return true;
347
348 char cA, cB;
349 for( int iIndex = 0; iIndex < moPattern.count; iIndex++) {
350 cA = moPattern.array[ moPattern.offset + iIndex ];
351 cB = aoSegment.array[ miPosition + iIndex ];
352
353 // Break out and check the next rule if there is a mismatch.
354 if(
355 !(
356 cA == cB || moContext.moRules.getIgnoreCase() && (
357 Character.toLowerCase( cA ) == cB || cA == Character.toLowerCase( cB )
358 )
359 )
360 ) return true;
361 }
362
363 if( mbEscaped ) {
364 miPosition += moPattern.count - 1;
365 return false;
366 } else if( ( aoCheckRule.miAction & IS_ESCAPE ) == IS_ESCAPE ) {
367 mbEscaped = true;
368 miPosition += moPattern.count - 1;
369 return false;
370 }
371
372 //{{{ handle soft spans
373 if(
374 moContext.moInRule != null &&
375 moContext.moInRule != aoCheckRule &&
376 ( moContext.moInRule.miAction & SOFT_SPAN ) != 0
377 ) {
378 if( ( moContext.moInRule.miAction & NO_WORD_BREAK ) == NO_WORD_BREAK ) {
379 aoTokenList.addToken(
380 miPosition - miLastOffset,
381 Token.INVALID,
382 moContext.moRules
383 );
384 } else {
385 aoTokenList.addToken(
386 miPosition - miLastOffset,
387 moContext.moInRule.myToken,
388 moContext.moRules
389 );
390 }
391 miLastOffset = miLastKeyword = miPosition;
392 moContext.moInRule = null;
393 }
394
395 // Not inside a rule.
396 if( moContext.moInRule == null ) {
397 //Log.write( Log.CONFIG, "Not inside a rule..." );
398 if( ( aoCheckRule.miAction & AT_LINE_START ) == AT_LINE_START ) {
399 if( ( ( ( aoCheckRule.miAction & MARK_PREVIOUS) != 0) ? miLastKeyword : miPosition ) != aoSegment.offset ) {
400 return true;
401 }
402 }
403
404 markKeyword( aoTokenList, aoSegment, miLastKeyword, miPosition);
405
406 if( ( aoCheckRule.miAction & MARK_PREVIOUS ) != MARK_PREVIOUS ) {
407 miLastKeyword = miPosition + moPattern.count;
408
409 if( ( aoCheckRule.miAction & WHITESPACE ) == WHITESPACE ) {
410 return false;
411 }
412
413 // Mark previous sequence as NULL (plain text).
414 if( miLastOffset < miPosition ) {
415 aoTokenList.addToken(
416 miPosition - miLastOffset,
417 moContext.moRules.getDefault(),
418 moContext.moRules
419 );
420 }
421 }
422
423 switch( aoCheckRule.miAction & MAJOR_ACTIONS ) {
424 case 0: {
425 // This is a plain sequence rule.
426 aoTokenList.addToken(
427 moPattern.count,
428 aoCheckRule.myToken,
429 moContext.moRules
430 );
431
432 miLastOffset = miPosition + moPattern.count;
433
434 break;
435 }
436 case SPAN: {
437 moContext.moInRule = aoCheckRule;
438
439 // Non-delegated.
440 if( ( aoCheckRule.miAction & DELEGATE ) != DELEGATE ) {
441 if( ( aoCheckRule.miAction & EXCLUDE_MATCH ) == EXCLUDE_MATCH ) {
442 aoTokenList.addToken(
443 moPattern.count,
444 moContext.moRules.getDefault(),
445 moContext.moRules
446 );
447 miLastOffset = miPosition + moPattern.count;
448 } else {
449 miLastOffset = miPosition;
450 }
451 } else {
452 String sName = new String(
453 aoCheckRule.maSearchChars,
454 aoCheckRule.maSequenceLengths[0] + aoCheckRule.maSequenceLengths[1],
455 aoCheckRule.maSequenceLengths[2]
456 );
457
458 ParserRuleSet oDelegateSet = getRuleSet( sName );
459
460 if( oDelegateSet != null ) {
461 if( ( aoCheckRule.miAction & EXCLUDE_MATCH) == EXCLUDE_MATCH ) {
462 aoTokenList.addToken(
463 moPattern.count,
464 moContext.moRules.getDefault(),
465 moContext.moRules
466 );
467 } else {
468 aoTokenList.addToken(
469 moPattern.count,
470 aoCheckRule.myToken,
471 moContext.moRules
472 );
473 }
474 miLastOffset = miPosition + moPattern.count;
475
476 moContext = new SyntaxLineContext( oDelegateSet, moContext );
477 }
478 }
479
480 break;
481 }
482 case EOL_SPAN: {
483 if( ( aoCheckRule.miAction & EXCLUDE_MATCH ) == EXCLUDE_MATCH ) {
484 aoTokenList.addToken(
485 moPattern.count,
486 moContext.moRules.getDefault(),
487 moContext.moRules
488 );
489 aoTokenList.addToken(
490 miLineLength - (miPosition + moPattern.count),
491 aoCheckRule.myToken,
492 moContext.moRules
493 );
494 } else {
495 aoTokenList.addToken(
496 miLineLength - miPosition,
497 aoCheckRule.myToken,
498 moContext.moRules
499 );
500 }
501 miLastOffset = miLineLength;
502 miLastKeyword = miLineLength;
503 miPosition = miLineLength;
504
505 return false;
506 }
507 case MARK_PREVIOUS: {
508 if( miLastKeyword > miLastOffset ) {
509 aoTokenList.addToken(
510 miLastKeyword - miLastOffset,
511 moContext.moRules.getDefault(),
512 moContext.moRules
513 );
514 miLastOffset = miLastKeyword;
515 }
516
517 if( ( aoCheckRule.miAction & EXCLUDE_MATCH ) == EXCLUDE_MATCH ) {
518 aoTokenList.addToken(
519 miPosition - miLastOffset,
520 aoCheckRule.myToken,
521 moContext.moRules
522 );
523 aoTokenList.addToken(
524 moPattern.count,
525 moContext.moRules.getDefault(),
526 moContext.moRules
527 );
528 } else {
529 aoTokenList.addToken(
530 miPosition - miLastOffset + moPattern.count,
531 aoCheckRule.myToken,
532 moContext.moRules
533 );
534 }
535
536 miLastOffset = miPosition + moPattern.count;
537 break;
538 }
539 case MARK_FOLLOWING: {
540 moContext.moInRule = aoCheckRule;
541 if( ( aoCheckRule.miAction & EXCLUDE_MATCH ) == EXCLUDE_MATCH ) {
542 aoTokenList.addToken(
543 moPattern.count,
544 moContext.moRules.getDefault(),
545 moContext.moRules
546 );
547 miLastOffset = miPosition + moPattern.count;
548 } else {
549 miLastOffset = miPosition;
550 }
551 break;
552 }
553 default: {
554 throw new InternalError( "Unhandled major action" );
555 }
556 }
557
558 miLastKeyword = miLastOffset;
559
560 // Move position to last character of match sequence.
561 miPosition += ( moPattern.count - 1 );
562
563 // Break out of inner for loop to check next char.
564 return false;
565 } else if( ( aoCheckRule.miAction & SPAN ) == SPAN ) {
566 if( ( aoCheckRule.miAction & DELEGATE ) != DELEGATE ) {
567 moContext.moInRule = null;
568 //Log.write( Log.CONFIG, "Using delegate: " + new String( aoCheckRule.maSearchChars ) );
569 if( ( aoCheckRule.miAction & EXCLUDE_MATCH ) == EXCLUDE_MATCH ) {
570 aoTokenList.addToken(
571 miPosition - miLastOffset,
572 aoCheckRule.myToken,
573 moContext.moRules
574 );
575 aoTokenList.addToken(
576 moPattern.count,
577 moContext.moRules.getDefault(),
578 moContext.moRules
579 );
580 } else {
581 //Log.write( Log.CONFIG, "Adding token: " + aoCheckRule.myToken );
582 aoTokenList.addToken(
583 (miPosition + moPattern.count) - miLastOffset,
584 aoCheckRule.myToken,
585 moContext.moRules
586 );
587 }
588
589 miLastKeyword = miLastOffset = miPosition + moPattern.count;
590
591 // Move position to last character of match sequence.
592 miPosition += (moPattern.count - 1);
593 }
594
595 // Break out of inner for loop to check next char.
596 return false;
597 }
598
599 return true;
600 }
601
602 private void markKeyword(
603 SyntaxTokenList aoTokenList,
604 Segment aoSegment,
605 int aiStart,
606 int aiEnd
607 ) {
608 KeywordMap oKeywords = moContext.moRules.getKeywords();
609
610 int iLength = aiEnd - aiStart;
611
612 // Do digits.
613
614 // Right now, this is hardcoded to handle these cases:
615 // 1234
616 // 0x1234abcf
617 // 1234l
618 // 12.34f
619 // 12.34d
620 // In the future, we need some sort of regexp mechanism.
621
622 if( moContext.moRules.getHighlightDigits() ) {
623 char[] aCharacters = aoSegment.array;
624 boolean bDigit = true;
625 boolean bOctal = false;
626 boolean bHex = false;
627 boolean bSeenSomeDigits = false;
628
629 loop:
630 for(int iIndex = 0; iIndex < iLength; iIndex++) {
631 char cChar = aCharacters[ aiStart+ iIndex ];
632 switch( cChar ) {
633 case '0': {
634 if( iIndex == 0) bOctal = true;
635 bSeenSomeDigits = true;
636 continue loop;
637 }
638 case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
639 bSeenSomeDigits = true;
640 continue loop;
641 }
642 case 'x': case 'X': {
643 if( bOctal && iIndex == 1) {
644 bHex = true;
645 continue loop;
646 } else {
647 break;
648 }
649 }
650 case 'd': case 'D': case 'f': case 'F': {
651 if( bHex ) {
652 continue loop;
653 } else if( iIndex == iLength - 1 && bSeenSomeDigits ) {
654 continue loop;
655 } else {
656 break;
657 }
658 }
659 case 'l': case 'L': {
660 if( iIndex == iLength - 1 && bSeenSomeDigits ) {
661 continue loop;
662 } else {
663 break;
664 }
665 }
666 case 'e': case 'E': {
667 if( bSeenSomeDigits ) {
668 continue loop;
669 } else {
670 break;
671 }
672 }
673 case 'a': case 'A': case 'b': case 'B': case 'c': case 'C': {
674 if( bHex ) {
675 continue loop;
676 } else {
677 break;
678 }
679 }
680 case '.': case '-': {
681 continue loop;
682 // Normally, this shouldn't be necessary, because most modes
683 // define '.' and '-' SEQs. However, in props mode, we can't
684 // define such a SEQ because it would break the AT_LINE_START
685 // MARK_PREVIOUS rule.
686 }
687 default: {
688 break;
689 }
690 }
691
692 // if we ended up here, then we have found a
693 // non-bDigit character.
694 bDigit = false;
695 break loop;
696 }
697
698 // If we got this far with bDigit = true, then the keyword
699 // consists of all digits. Add it as such.
700 if( bDigit && bSeenSomeDigits ) {
701 if( aiStart != miLastOffset ) {
702 aoTokenList.addToken(
703 aiStart - miLastOffset,
704 moContext.moRules.getDefault(),
705 moContext.moRules
706 );
707 }
708 aoTokenList.addToken( iLength, Token.DIGIT, moContext.moRules );
709 miLastKeyword = miLastOffset = aiEnd;
710
711 return;
712 }
713 }
714
715 if( oKeywords != null) {
716 byte yID = oKeywords.lookup( aoSegment, aiStart, iLength );
717
718 if( yID != Token.NULL ) {
719 if( aiStart != miLastOffset ) {
720 aoTokenList.addToken(
721 aiStart - miLastOffset,
722 moContext.moRules.getDefault(),
723 moContext.moRules
724 );
725 }
726 aoTokenList.addToken( iLength, yID, moContext.moRules );
727 miLastKeyword = miLastOffset = aiEnd;
728 }
729 }
730 }
731
732 }