Save This Page
Home » sitemesh-2.3 » com.opensymphony.module » sitemesh » html » tokenizer » [javadoc | source]
    1   /*
    2    * IF YOU ARE HAVING TROUBLE COMPILING THIS CLASS, IT IS PROBABLY BECAUSE Lexer.java IS MISSING.
    3    *
    4    * Use 'ant jflex' to generate the file, which will reside in build/java
    5    */
    6   
    7   package com.opensymphony.module.sitemesh.html.tokenizer;
    8   
    9   import com.opensymphony.module.sitemesh.html.Tag;
   10   import com.opensymphony.module.sitemesh.html.Text;
   11   import com.opensymphony.module.sitemesh.html.util.CharArray;
   12   import com.opensymphony.module.sitemesh.util.CharArrayReader;
   13   
   14   import java.io.IOException;
   15   import java.util.ArrayList;
   16   import java.util.List;
   17   
   18   /**
   19    * Looks for patterns of tokens in the Lexer and translates these to calls to pass to the TokenHandler.
   20    *
   21    * @author Joe Walnes
   22    * @see TagTokenizer
   23    */
   24   class Parser extends Lexer implements Text, Tag {
   25   
   26       private final CharArray attributeBuffer = new CharArray(64);
   27       private int pushbackToken = -1;
   28       private String pushbackText;
   29   
   30       public final static short SLASH=257;
   31       public final static short WHITESPACE=258;
   32       public final static short EQUALS=259;
   33       public final static short QUOTE=260;
   34       public final static short WORD=261;
   35       public final static short TEXT=262;
   36       public final static short QUOTED=263;
   37       public final static short LT=264;
   38       public final static short GT=265;
   39       public final static short LT_OPEN_MAGIC_COMMENT=266;
   40       public final static short LT_CLOSE_MAGIC_COMMENT=267;
   41   
   42       private final char[] input;
   43   
   44       private TokenHandler handler;
   45   
   46       private int position;
   47       private int length;
   48   
   49       private String name;
   50       private int type;
   51       private final List attributes = new ArrayList();
   52   
   53       public Parser(char[] input, TokenHandler handler) {
   54           super(new CharArrayReader(input));
   55           this.input = input;
   56           this.handler = handler;
   57       }
   58   
   59       private String text() {
   60           if (pushbackToken == -1) {
   61               return yytext();
   62           } else {
   63               return pushbackText;
   64           }
   65       }
   66   
   67       private void skipWhiteSpace() throws IOException {
   68           while (true) {
   69               int next;
   70               if (pushbackToken == -1) {
   71                   next = yylex();
   72               } else {
   73                   next = pushbackToken;
   74                   pushbackToken = -1;
   75               }
   76               if (next != Parser.WHITESPACE) {
   77                   pushBack(next);
   78                   break;
   79               }
   80           }
   81       }
   82   
   83       private void pushBack(int next) {
   84           if (pushbackToken != -1) {
   85               reportError("Cannot pushback more than once", line(), column());
   86           }
   87           pushbackToken = next;
   88           if (next == Parser.WORD || next == Parser.QUOTED || next == Parser.SLASH || next == Parser.EQUALS) {
   89               pushbackText = yytext();
   90           } else {
   91               pushbackText = null;
   92           }
   93       }
   94   
   95       public void start() {
   96           try {
   97               while (true) {
   98                   int token;
   99                   if (pushbackToken == -1) {
  100                       token = yylex();
  101                   } else {
  102                       token = pushbackToken;
  103                       pushbackToken = -1;
  104                   }
  105                   if (token == 0) {
  106                       // EOF
  107                       return;
  108                   } else if (token == Parser.TEXT) {
  109                       // Got some text
  110                       parsedText(position(), length());
  111                   } else if (token == Parser.LT) {
  112                       // Token "<" - start of tag
  113                       parseTag(Tag.OPEN);
  114                   } else if (token == Parser.LT_OPEN_MAGIC_COMMENT) {
  115                       // Token "<!--[" - start of open magic comment
  116                       parseTag(Tag.OPEN_MAGIC_COMMENT);
  117                   } else if (token == Parser.LT_CLOSE_MAGIC_COMMENT) {
  118                       // Token "<![" - start of close magic comment
  119                       parseTag(Tag.CLOSE_MAGIC_COMMENT);
  120                   } else {
  121                       reportError("Unexpected token from lexer, was expecting TEXT or LT", line(), column());
  122                   }
  123               }
  124           } catch (IOException e) {
  125               throw new RuntimeException(e);
  126           }
  127       }
  128   
  129       private void parseTag(int type) throws IOException {
  130           // Start parsing a TAG
  131   
  132           int start = position();
  133           skipWhiteSpace();
  134           int token;
  135           if (pushbackToken == -1) {
  136               token = yylex();
  137           } else {
  138               token = pushbackToken;
  139               pushbackToken = -1;
  140           }
  141           String name;
  142   
  143           if (token == Parser.SLASH) {
  144               // Token "/" - it's a closing tag
  145               type = Tag.CLOSE;
  146               if (pushbackToken == -1) {
  147                   token = yylex();
  148               } else {
  149                   token = pushbackToken;
  150                   pushbackToken = -1;
  151               }
  152           }
  153   
  154           if (token == Parser.WORD) {
  155               // Token WORD - name of tag
  156               name = text();
  157   
  158               if (handler.shouldProcessTag(name)) {
  159                   parseFullTag(type, name, start);
  160               } else {
  161   
  162                   // don't care about this tag... scan to the end and treat it as text
  163                   while(true)  {
  164                       if (pushbackToken == -1) {
  165                           token = yylex();
  166                       } else {
  167                           token = pushbackToken;
  168                           pushbackToken = -1;
  169                       }
  170                       if (token == Parser.GT) {
  171                           pushBack(yylex()); // take and replace the next token, so the position is correct  
  172                           parsedText(start, position() - start);
  173                           return;
  174                       }
  175                   }
  176               }
  177   
  178           } else if (token == Parser.GT) {
  179               // Token ">" - an illegal <> or <  > tag. Ignore
  180           } else {
  181               reportError("Could not recognise tag", line(), column());
  182           }
  183       }
  184   
  185       private void parseFullTag(int type, String name, int start) throws IOException {
  186           int token;
  187           while (true) {
  188               skipWhiteSpace();
  189               if (pushbackToken == -1) {
  190                   token = yylex();
  191               } else {
  192                   token = pushbackToken;
  193                   pushbackToken = -1;
  194               }
  195               pushBack(token);
  196   
  197               if (token == Parser.SLASH || token == Parser.GT) {
  198                   break; // no more attributes here
  199               } else if (token == Parser.WORD) {
  200                   parseAttribute(); // start of an attribute
  201               } else {
  202                   reportError("XXY", line(), column());
  203               }
  204           }
  205   
  206           if (pushbackToken == -1) {
  207               token = yylex();
  208           } else {
  209               token = pushbackToken;
  210               pushbackToken = -1;
  211           }
  212           if (token == Parser.SLASH) {
  213               // Token "/" - it's an empty tag
  214               type = Tag.EMPTY;
  215               if (pushbackToken == -1) {
  216                   token = yylex();
  217               } else {
  218                   token = pushbackToken;
  219                   pushbackToken = -1;
  220               }
  221           }
  222   
  223           if (token == Parser.GT) {
  224               // Token ">" - YAY! end of tag.. process it!
  225               parsedTag(type, name, start, position() - start + 1);
  226           } else {
  227               reportError("Expected end of tag", line(), column());
  228           }
  229       }
  230   
  231       private void parseAttribute() throws IOException {
  232           int token;
  233           if (pushbackToken == -1) {
  234               token = yylex();
  235           } else {
  236               token = pushbackToken;
  237               pushbackToken = -1;
  238           }
  239           // Token WORD - start of an attribute
  240           String attributeName = text();
  241           skipWhiteSpace();
  242           if (pushbackToken == -1) {
  243               token = yylex();
  244           } else {
  245               token = pushbackToken;
  246               pushbackToken = -1;
  247           }
  248           if (token == Parser.EQUALS) {
  249               // Token "=" - the attribute has a value
  250               skipWhiteSpace();
  251               if (pushbackToken == -1) {
  252                   token = yylex();
  253               } else {
  254                   token = pushbackToken;
  255                   pushbackToken = -1;
  256               }
  257               if (token == Parser.QUOTED) {
  258                   // token QUOTED - a quoted literal as the attribute value
  259                   parsedAttribute(attributeName, text(), true);
  260               } else if (token == Parser.WORD || token == Parser.SLASH) {
  261                   // unquoted word
  262                   attributeBuffer.clear();
  263                   attributeBuffer.append(text());
  264                   while (true) {
  265                       int next;
  266                       if (pushbackToken == -1) {
  267                           next = yylex();
  268                       } else {
  269                           next = pushbackToken;
  270                           pushbackToken = -1;
  271                       }
  272                       if (next == Parser.WORD || next == Parser.EQUALS || next == Parser.SLASH) {
  273                           attributeBuffer.append(text());
  274                           // TODO: how to handle <a x=c/> ?
  275                       } else {
  276                           pushBack(next);
  277                           break;
  278                       }
  279                   }
  280                   parsedAttribute(attributeName, attributeBuffer.toString(), false);
  281               } else if (token == Parser.SLASH || token == Parser.GT) {
  282                   // no more attributes
  283                   pushBack(token);
  284               } else {
  285                   reportError("Illegal attribute value", line(), column());
  286               }
  287           } else if (token == Parser.SLASH || token == Parser.GT || token == Parser.WORD) {
  288               // it was a value-less HTML style attribute
  289               parsedAttribute(attributeName, null, false);
  290               pushBack(token);
  291           } else {
  292               reportError("Illegal attribute name", line(), column());
  293           }
  294       }
  295   
  296       public void parsedText(int position, int length) {
  297           this.position = position;
  298           this.length = length;
  299           handler.text((Text) this);
  300       }
  301   
  302       public void parsedTag(int type, String name, int start, int length) {
  303           this.type = type;
  304           this.name = name;
  305           this.position = start;
  306           this.length = length;
  307           handler.tag((Tag) this);
  308           attributes.clear();
  309       }
  310   
  311       public void parsedAttribute(String name, String value, boolean quoted) {
  312           attributes.add(name);
  313           if (quoted) {
  314               attributes.add(value.substring(1, value.length() - 1));
  315           } else {
  316               attributes.add(value);
  317           }
  318       }
  319   
  320       protected void reportError(String message, int line, int column) {
  321   //        System.out.println(message);
  322           handler.warning(message, line, column);
  323       }
  324   
  325       public String getName() {
  326           return name;
  327       }
  328   
  329       public int getType() {
  330           return type;
  331       }
  332   
  333       public String getContents() {
  334           return new String(input, position, length);
  335       }
  336   
  337       public void writeTo(CharArray out) {
  338           out.append(input, position, length);
  339       }
  340   
  341       public int getAttributeCount() {
  342           return attributes == null ? 0 : attributes.size() / 2;
  343       }
  344   
  345       public String getAttributeName(int index) {
  346           return (String) attributes.get(index * 2);
  347       }
  348   
  349       public String getAttributeValue(int index) {
  350           return (String) attributes.get(index * 2 + 1);
  351       }
  352   
  353       public String getAttributeValue(String name) {
  354           // todo: optimize
  355           if (attributes == null) {
  356               return null;
  357           }
  358           final int len = attributes.size();
  359           for (int i = 0; i < len; i+=2) {
  360               if (name.equalsIgnoreCase((String) attributes.get(i))) {
  361                   return (String) attributes.get(i + 1);
  362               }
  363           }
  364           return null;
  365       }
  366   
  367       public boolean hasAttribute(String name) {
  368           return getAttributeValue(name) != null;
  369       }
  370   
  371   }

Save This Page
Home » sitemesh-2.3 » com.opensymphony.module » sitemesh » html » tokenizer » [javadoc | source]