Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: novaworx/syntax/TokenMarker.java


1   /*
2   Novaworx Development Environment
3   Copyright (C) 2000-2003 Mark Soderquist
4   Portions Copyright (C) 1998-2001 Slava Pestov
5   Portions Copyright (C) 1999-2000 Mike Dillon
6   
7   This program is free software; you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 2 of the License, or
10  (at your option) any later version.
11  
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  GNU General Public License for more details.
16  
17  You should have received a copy of the GNU General Public License
18  along with this program; if not, write to:
19  
20  Free Software Foundation, Inc.
21  59 Temple Place, Suite 330
22  Boston, MA 02111-1307 USA
23  */
24  
25  package novaworx.syntax;
26  
27  import java.util.*;
28  import javax.swing.text.*;
29  import cosmoworx.log.*;
30  
31  /**
32  A token marker splits lines of text into tokens. Each token carries
33  a length field and an identification tag that can be mapped to a color
34  or font style for painting that token.
35  
36  @author Mike Dillon
37  @author Slava Pestov
38  @author Mark Soderquist
39  @see Token
40  */
41  public class TokenMarker {
42  
43    public static final int MAJOR_ACTIONS = 0x000000FF;
44    public static final int WHITESPACE = 1 << 0;
45    public static final int SPAN = 1 << 1;
46    public static final int MARK_PREVIOUS = 1 << 2;
47    public static final int MARK_FOLLOWING = 1 << 3;
48    public static final int EOL_SPAN = 1 << 4;
49  //  public static final int MAJOR_ACTION_5 = 1 << 5;
50  //  public static final int MAJOR_ACTION_6 = 1 << 6;
51  //  public static final int MAJOR_ACTION_7 = 1 << 7;
52  
53    public static final int ACTION_HINTS = 0x0000FF00;
54    public static final int EXCLUDE_MATCH = 1 << 8;
55    public static final int AT_LINE_START = 1 << 9;
56    public static final int NO_LINE_BREAK = 1 << 10;
57    public static final int NO_WORD_BREAK = 1 << 11;
58    public static final int IS_ESCAPE = 1 << 12;
59    public static final int DELEGATE = 1 << 13;
60  //  public static final int ACTION_HINT_14 = 1 << 14;
61  //  public static final int ACTION_HINT_15 = 1 << 15;
62  
63    private static final int SOFT_SPAN = MARK_FOLLOWING | NO_WORD_BREAK;
64  
65    private String msName;
66    private String msRulePrefix;
67    private Hashtable mhRuleSets;
68    private ParserRuleSet moMainRuleSet;
69  
70    private SyntaxLineContext moOriginalContext;
71    private SyntaxLineContext moContext;
72    private Segment moPattern;
73    private int miLastOffset;
74    private int miLastKeyword;
75    private int miLineLength;
76    private int miPosition;
77    private boolean mbEscaped;
78  
79    public TokenMarker() {
80      mhRuleSets = new Hashtable(64);
81      moPattern = new Segment();
82    }
83  
84    public void addRuleSet(String asName, ParserRuleSet aoRules ) {
85      if( aoRules == null) return;
86  
87      if( asName == null ) asName = "MAIN";
88  
89      mhRuleSets.put( msRulePrefix.concat( asName ), aoRules );
90  
91      if( "MAIN".equals( asName ) ) moMainRuleSet = aoRules;
92    }
93  
94    public ParserRuleSet getMainRuleSet() {
95      return moMainRuleSet;
96    }
97  
98    public ParserRuleSet getRuleSet( String asName ) {
99      ParserRuleSet oRules;
100 
101     oRules = (ParserRuleSet)mhRuleSets.get( asName );
102 
103     if( oRules == null && !asName.startsWith( msRulePrefix ) ) {
104       int iDelimiter = asName.indexOf( "::" );
105 
106       String sSyntaxName = asName.substring( 0, iDelimiter );
107 
108       Syntax oSyntax = SyntaxFactory.getSyntax( sSyntaxName );
109       if( oSyntax == null) {
110         Log.write( Log.ERROR, "Unknown syntax: " + sSyntaxName );
111         oRules = null;
112       } else {
113         TokenMarker oMarker = oSyntax.getTokenMarker();
114         oRules = oMarker.getRuleSet( asName );
115         mhRuleSets.put( asName, oRules );
116       }
117 
118     }
119 
120     if( oRules == null ) Log.write( Log.ERROR, "Unresolved delegate target: " + asName );
121 
122     return oRules;
123   }
124 
125   public String getName() {
126     return msName;
127   }
128 
129   public void setName( String asName ) {
130     if( asName == null) throw new NullPointerException();
131 
132     msName = asName;
133     msRulePrefix = asName.concat( "::" );
134   }
135 
136   /**
137   Do not call this method directly. This class is not thread safe.
138   Call SyntaxDocument.markTokens() instead.
139   @param aoContext The new <code>SyntaxLineContext</code> of the line to mark.
140   */
141   public SyntaxLineContext markTokens( Segment aoSegment, SyntaxLineContext aoContext ) {
142     moContext = aoContext;
143     SyntaxTokenList aoTokenList = new SyntaxTokenList();
144 
145     miLastOffset = miLastKeyword = aoSegment.offset;
146     miLineLength = aoSegment.count + aoSegment.offset;
147 
148     int iTerminateChar = moContext.moRules.getTerminateChar();
149     int iSearchLimit = ( iTerminateChar >= 0 && iTerminateChar < aoSegment.count)
150       ? aoSegment.offset + iTerminateChar : miLineLength;
151 
152     mbEscaped = false;
153 
154     boolean bKeepGoing;
155     boolean bTempEscaped;
156     Segment oTempPattern;
157     ParserRule oRule;
158     SyntaxLineContext oTempContext;
159 
160     for( miPosition = aoSegment.offset; miPosition < iSearchLimit; miPosition++ ) {
161       // If we are not in the top level context, we are delegated
162       if( moContext.moParent != null ) {
163         oTempContext = moContext;
164 
165         moContext = moContext.moParent;
166 
167         moPattern.array = moContext.moInRule.maSearchChars;
168         moPattern.count = moContext.moInRule.maSequenceLengths[1];
169         moPattern.offset = moContext.moInRule.maSequenceLengths[0];
170 
171         bKeepGoing = handleRule( aoTokenList, aoSegment, moContext.moInRule );
172 
173         moContext = oTempContext;
174 
175         if( !bKeepGoing ) {
176           if( mbEscaped ) {
177             mbEscaped = false;
178           } else {
179             if( miPosition != miLastOffset ) {
180               if( moContext.moInRule == null ) {
181                 markKeyword( aoTokenList, aoSegment, miLastKeyword, miPosition );
182                 aoTokenList.addToken(
183                   miPosition - miLastOffset,
184                   moContext.moRules.getDefault(),
185                   moContext.moRules
186                 );
187               } else if( ( moContext.moInRule.miAction & ( NO_LINE_BREAK | NO_WORD_BREAK ) ) == 0 ) {
188                 aoTokenList.addToken(
189                   miPosition - miLastOffset,
190                   moContext.moInRule.myToken,
191                   moContext.moRules
192                 );
193               } else {
194                 aoTokenList.addToken(
195                   miPosition - miLastOffset,
196                   Token.INVALID,
197                   moContext.moRules
198                 );
199               }
200             }
201 
202             moContext = (SyntaxLineContext)moContext.moParent.clone();
203 
204             if( ( moContext.moInRule.miAction & EXCLUDE_MATCH ) == EXCLUDE_MATCH ) {
205               aoTokenList.addToken(
206                 moPattern.count,
207                 moContext.moRules.getDefault(),
208                 moContext.moRules
209               );
210             } else {
211               aoTokenList.addToken(
212                 moPattern.count,
213                 moContext.moInRule.myToken,
214                 moContext.moRules
215               );
216             }
217 
218             moContext.moInRule = null;
219 
220             miLastKeyword = miLastOffset = miPosition + moPattern.count;
221           }
222 
223           // Move postion to last character of match sequence.
224           miPosition += ( moPattern.count - 1);
225 
226           continue;
227         }
228       }
229 
230       // Check the escape rule for the current context.
231       if( ( oRule = moContext.moRules.getEscapeRule() ) != null) {
232         // Assign tempPattern to mutable "buffer" moPattern.
233         oTempPattern = moPattern;
234 
235         // Swap in the escape moPattern.
236         moPattern = moContext.moRules.getEscapePattern();
237 
238         bTempEscaped = mbEscaped;
239 
240         bKeepGoing = handleRule( aoTokenList, aoSegment, oRule);
241 
242         // Swap back the buffer moPattern.
243         moPattern = oTempPattern;
244 
245         if( !bKeepGoing ) {
246           if( bTempEscaped ) mbEscaped = false;
247           continue;
248         }
249       }
250 
251       // If we are inside a span, check for its end sequence.
252       oRule = moContext.moInRule;
253       if( oRule != null && ( oRule.miAction & SPAN ) == SPAN ) {
254         moPattern.array = oRule.maSearchChars;
255         moPattern.count = oRule.maSequenceLengths[1];
256         moPattern.offset = oRule.maSequenceLengths[0];
257 
258         // If we match the end of the span, or if this is a "hard" span,
259         // continue to the next character; otherwise, check all
260         // applicable rules below.
261         if( !handleRule( aoTokenList, aoSegment, oRule ) || ( oRule.miAction & SOFT_SPAN ) == 0 ) {
262           mbEscaped = false;
263           continue;
264         }
265       }
266 
267       // Now check every rule.
268       oRule = moContext.moRules.getRules( aoSegment.array[ miPosition ] );
269       while( oRule != null) {
270         moPattern.array = oRule.maSearchChars;
271 
272         if( moContext.moInRule == oRule && ( oRule.miAction & SPAN ) == SPAN ) {
273           moPattern.count = oRule.maSequenceLengths[1];
274           moPattern.offset = oRule.maSequenceLengths[0];
275         } else {
276           moPattern.count = oRule.maSequenceLengths[0];
277           moPattern.offset = 0;
278         }
279 
280         // Stop checking rules if there was a match and go to next position.
281         if( !handleRule( aoTokenList, aoSegment, oRule ) ) break;
282 
283         oRule = oRule.moNext;
284       }
285 
286       mbEscaped = false;
287     }
288     // Done scanning line for tokens.
289 
290     // Check for keywords at the end of the line.
291     if( moContext.moInRule == null ) markKeyword( aoTokenList, aoSegment, miLastKeyword, miLineLength);
292 
293     // Mark all remaining characters.
294     if( miLastOffset != miLineLength) {
295       if( moContext.moInRule == null ) {
296         aoTokenList.addToken(
297           miLineLength - miLastOffset,
298           moContext.moRules.getDefault(),
299           moContext.moRules
300         );
301       } else if(
302         ( moContext.moInRule.miAction & SPAN ) == SPAN &&
303         ( moContext.moInRule.miAction & ( NO_LINE_BREAK | NO_WORD_BREAK ) ) != 0
304       ) {
305         aoTokenList.addToken(
306           miLineLength - miLastOffset,
307           Token.INVALID,
308           moContext.moRules
309         );
310         moContext.moInRule = null;
311       } else {
312         aoTokenList.addToken(
313           miLineLength - miLastOffset,
314           moContext.moInRule.myToken,
315           moContext.moRules
316         );
317 
318         if( ( moContext.moInRule.miAction & MARK_FOLLOWING ) == MARK_FOLLOWING ) {
319           moContext.moInRule = null;
320         }
321       }
322     }
323 
324     aoTokenList.addToken( 0, Token.END, moContext.moRules );
325 
326     moContext.moTokenList = aoTokenList;
327 
328     return moContext;
329   }
330 
331   /**
332   Checks if the rule matches the line at the current position
333   and handles the rule if so.
334   @param aoTokenList List of tokens in line.
335   @param aoSegment Segment to check rule against
336   @param aoCheckRule ParserRule to check against line
337   @return true: Keep checking other rules. <br>false: Stop checking other rules.
338   */
339   private boolean handleRule(
340     SyntaxTokenList aoTokenList,
341     Segment aoSegment,
342     ParserRule aoCheckRule
343   ) {
344     if( moPattern.count == 0 ) return true;
345 
346     if( miLineLength - miPosition < moPattern.count ) return true;
347 
348     char cA, cB;
349     for( int iIndex = 0; iIndex < moPattern.count; iIndex++) {
350       cA = moPattern.array[ moPattern.offset + iIndex ];
351       cB = aoSegment.array[ miPosition + iIndex ];
352 
353       // Break out and check the next rule if there is a mismatch.
354       if(
355         !(
356           cA == cB || moContext.moRules.getIgnoreCase() && (
357             Character.toLowerCase( cA ) == cB || cA == Character.toLowerCase( cB )
358           )
359         )
360       ) return true;
361     }
362 
363     if( mbEscaped ) {
364       miPosition += moPattern.count - 1;
365       return false;
366     } else if( ( aoCheckRule.miAction & IS_ESCAPE ) == IS_ESCAPE ) {
367       mbEscaped = true;
368       miPosition += moPattern.count - 1;
369       return false;
370     }
371 
372     //{{{ handle soft spans
373     if(
374       moContext.moInRule != null &&
375       moContext.moInRule != aoCheckRule &&
376       ( moContext.moInRule.miAction & SOFT_SPAN ) != 0
377     ) {
378       if( ( moContext.moInRule.miAction & NO_WORD_BREAK ) == NO_WORD_BREAK ) {
379         aoTokenList.addToken(
380           miPosition - miLastOffset,
381           Token.INVALID,
382           moContext.moRules
383         );
384       } else {
385         aoTokenList.addToken(
386           miPosition - miLastOffset,
387           moContext.moInRule.myToken,
388           moContext.moRules
389         );
390       }
391       miLastOffset = miLastKeyword = miPosition;
392       moContext.moInRule = null;
393     }
394 
395     // Not inside a rule.
396     if( moContext.moInRule == null ) {
397       //Log.write( Log.CONFIG, "Not inside a rule..." );
398       if( ( aoCheckRule.miAction & AT_LINE_START ) == AT_LINE_START ) {
399         if( ( ( ( aoCheckRule.miAction & MARK_PREVIOUS) != 0) ? miLastKeyword : miPosition ) != aoSegment.offset ) {
400           return true;
401         }
402       }
403 
404       markKeyword( aoTokenList, aoSegment, miLastKeyword, miPosition);
405 
406       if( ( aoCheckRule.miAction & MARK_PREVIOUS ) != MARK_PREVIOUS ) {
407         miLastKeyword = miPosition + moPattern.count;
408 
409         if( ( aoCheckRule.miAction & WHITESPACE ) == WHITESPACE ) {
410           return false;
411         }
412 
413         // Mark previous sequence as NULL (plain text).
414         if( miLastOffset < miPosition ) {
415           aoTokenList.addToken(
416             miPosition - miLastOffset,
417             moContext.moRules.getDefault(),
418             moContext.moRules
419           );
420         }
421       }
422 
423       switch( aoCheckRule.miAction & MAJOR_ACTIONS ) {
424         case 0: {
425           // This is a plain sequence rule.
426           aoTokenList.addToken(
427             moPattern.count,
428             aoCheckRule.myToken,
429             moContext.moRules
430           );
431 
432           miLastOffset = miPosition + moPattern.count;
433 
434           break;
435         }
436         case SPAN: {
437           moContext.moInRule = aoCheckRule;
438 
439           // Non-delegated.
440           if( ( aoCheckRule.miAction & DELEGATE ) != DELEGATE ) {
441             if( ( aoCheckRule.miAction & EXCLUDE_MATCH ) == EXCLUDE_MATCH ) {
442               aoTokenList.addToken(
443                 moPattern.count,
444                 moContext.moRules.getDefault(),
445                 moContext.moRules
446               );
447               miLastOffset = miPosition + moPattern.count;
448             } else {
449               miLastOffset = miPosition;
450             }
451           } else {
452             String sName = new String(
453               aoCheckRule.maSearchChars,
454               aoCheckRule.maSequenceLengths[0] + aoCheckRule.maSequenceLengths[1],
455               aoCheckRule.maSequenceLengths[2]
456             );
457 
458             ParserRuleSet oDelegateSet = getRuleSet( sName );
459 
460             if( oDelegateSet != null ) {
461               if( ( aoCheckRule.miAction & EXCLUDE_MATCH) == EXCLUDE_MATCH ) {
462                 aoTokenList.addToken(
463                   moPattern.count,
464                   moContext.moRules.getDefault(),
465                   moContext.moRules
466                 );
467               } else {
468                 aoTokenList.addToken(
469                   moPattern.count,
470                   aoCheckRule.myToken,
471                   moContext.moRules
472                 );
473               }
474               miLastOffset = miPosition + moPattern.count;
475 
476               moContext = new SyntaxLineContext( oDelegateSet, moContext );
477             }
478           }
479 
480           break;
481         }
482         case EOL_SPAN: {
483           if( ( aoCheckRule.miAction & EXCLUDE_MATCH ) == EXCLUDE_MATCH ) {
484             aoTokenList.addToken(
485               moPattern.count,
486               moContext.moRules.getDefault(),
487               moContext.moRules
488             );
489             aoTokenList.addToken(
490               miLineLength - (miPosition + moPattern.count),
491               aoCheckRule.myToken,
492               moContext.moRules
493             );
494           } else {
495             aoTokenList.addToken(
496               miLineLength - miPosition,
497               aoCheckRule.myToken,
498               moContext.moRules
499             );
500           }
501           miLastOffset = miLineLength;
502           miLastKeyword = miLineLength;
503           miPosition = miLineLength;
504 
505           return false;
506         }
507         case MARK_PREVIOUS: {
508           if( miLastKeyword > miLastOffset ) {
509             aoTokenList.addToken(
510               miLastKeyword - miLastOffset,
511               moContext.moRules.getDefault(),
512               moContext.moRules
513             );
514             miLastOffset = miLastKeyword;
515           }
516 
517           if( ( aoCheckRule.miAction & EXCLUDE_MATCH ) == EXCLUDE_MATCH ) {
518             aoTokenList.addToken(
519               miPosition - miLastOffset,
520               aoCheckRule.myToken,
521               moContext.moRules
522             );
523             aoTokenList.addToken(
524               moPattern.count,
525               moContext.moRules.getDefault(),
526               moContext.moRules
527             );
528           } else {
529             aoTokenList.addToken(
530               miPosition - miLastOffset + moPattern.count,
531               aoCheckRule.myToken,
532               moContext.moRules
533             );
534           }
535 
536           miLastOffset = miPosition + moPattern.count;
537           break;
538         }
539         case MARK_FOLLOWING: {
540           moContext.moInRule = aoCheckRule;
541           if( ( aoCheckRule.miAction & EXCLUDE_MATCH ) == EXCLUDE_MATCH ) {
542             aoTokenList.addToken(
543               moPattern.count,
544               moContext.moRules.getDefault(),
545               moContext.moRules
546             );
547             miLastOffset = miPosition + moPattern.count;
548           } else {
549             miLastOffset = miPosition;
550           }
551           break;
552         }
553         default: {
554           throw new InternalError( "Unhandled major action" );
555         }
556       }
557 
558       miLastKeyword = miLastOffset;
559 
560       // Move position to last character of match sequence.
561       miPosition += ( moPattern.count - 1 );
562 
563       // Break out of inner for loop to check next char.
564       return false;
565     } else if( ( aoCheckRule.miAction & SPAN ) == SPAN ) {
566       if( ( aoCheckRule.miAction & DELEGATE ) != DELEGATE ) {
567         moContext.moInRule = null;
568         //Log.write( Log.CONFIG, "Using delegate: " + new String( aoCheckRule.maSearchChars ) );
569         if( ( aoCheckRule.miAction & EXCLUDE_MATCH ) == EXCLUDE_MATCH ) {
570           aoTokenList.addToken(
571             miPosition - miLastOffset,
572             aoCheckRule.myToken,
573             moContext.moRules
574           );
575           aoTokenList.addToken(
576             moPattern.count,
577             moContext.moRules.getDefault(),
578             moContext.moRules
579           );
580         } else {
581           //Log.write( Log.CONFIG, "Adding token: " + aoCheckRule.myToken );
582           aoTokenList.addToken(
583             (miPosition + moPattern.count) - miLastOffset,
584             aoCheckRule.myToken,
585             moContext.moRules
586           );
587         }
588 
589         miLastKeyword = miLastOffset = miPosition + moPattern.count;
590 
591         // Move position to last character of match sequence.
592         miPosition += (moPattern.count - 1);
593       }
594 
595       // Break out of inner for loop to check next char.
596       return false;
597     }
598 
599     return true;
600   }
601 
602   private void markKeyword(
603     SyntaxTokenList aoTokenList,
604     Segment aoSegment,
605     int aiStart,
606     int aiEnd
607   ) {
608     KeywordMap oKeywords = moContext.moRules.getKeywords();
609 
610     int iLength = aiEnd - aiStart;
611 
612     // Do digits.
613 
614     // Right now, this is hardcoded to handle these cases:
615     //   1234
616     //   0x1234abcf
617     //   1234l
618     //   12.34f
619     //   12.34d
620     // In the future, we need some sort of regexp mechanism.
621 
622     if( moContext.moRules.getHighlightDigits() ) {
623       char[] aCharacters = aoSegment.array;
624       boolean bDigit = true;
625       boolean bOctal = false;
626       boolean bHex = false;
627       boolean bSeenSomeDigits = false;
628 
629 loop:
630       for(int iIndex = 0; iIndex < iLength; iIndex++) {
631         char cChar = aCharacters[ aiStart+ iIndex ];
632         switch( cChar ) {
633           case '0': {
634             if( iIndex == 0) bOctal = true;
635             bSeenSomeDigits = true;
636             continue loop;
637           }
638           case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
639             bSeenSomeDigits = true;
640             continue loop;
641           }
642           case 'x': case 'X': {
643             if( bOctal && iIndex == 1) {
644               bHex = true;
645               continue loop;
646             } else {
647               break;
648             }
649           }
650           case 'd': case 'D': case 'f': case 'F': {
651             if( bHex ) {
652               continue loop;
653             } else if( iIndex == iLength - 1 && bSeenSomeDigits ) {
654               continue loop;
655             } else {
656               break;
657             }
658           }
659           case 'l': case 'L': {
660             if( iIndex == iLength - 1 && bSeenSomeDigits ) {
661               continue loop;
662             } else {
663               break;
664             }
665           }
666           case 'e': case 'E': {
667             if( bSeenSomeDigits ) {
668               continue loop;
669             } else {
670               break;
671             }
672           }
673           case 'a': case 'A': case 'b': case 'B': case 'c': case 'C': {
674             if( bHex ) {
675               continue loop;
676             } else {
677               break;
678             }
679           }
680           case '.': case '-': {
681             continue loop;
682             // Normally, this shouldn't be necessary, because most modes
683             // define '.' and '-' SEQs. However, in props mode, we can't
684             // define such a SEQ because it would break the AT_LINE_START
685             // MARK_PREVIOUS rule.
686           }
687           default: {
688             break;
689           }
690         }
691 
692         // if we ended up here, then we have found a
693         // non-bDigit character.
694         bDigit = false;
695         break loop;
696       }
697 
698       // If we got this far with bDigit = true, then the keyword
699       // consists of all digits. Add it as such.
700       if( bDigit && bSeenSomeDigits ) {
701         if( aiStart != miLastOffset ) {
702           aoTokenList.addToken(
703             aiStart - miLastOffset,
704             moContext.moRules.getDefault(),
705             moContext.moRules
706           );
707         }
708         aoTokenList.addToken( iLength, Token.DIGIT, moContext.moRules );
709         miLastKeyword = miLastOffset = aiEnd;
710 
711         return;
712       }
713     }
714 
715     if( oKeywords != null) {
716       byte yID = oKeywords.lookup( aoSegment, aiStart, iLength );
717 
718       if( yID != Token.NULL ) {
719         if( aiStart != miLastOffset ) {
720           aoTokenList.addToken(
721             aiStart - miLastOffset,
722             moContext.moRules.getDefault(),
723             moContext.moRules
724           );
725         }
726         aoTokenList.addToken( iLength, yID, moContext.moRules );
727         miLastKeyword = miLastOffset = aiEnd;
728       }
729     }
730   }
731 
732 }