Save This Page
Home » apache-tomcat-6.0.16-src » org.apache » jasper » compiler » [javadoc | source]
    1   /*
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    * 
    9    *      http://www.apache.org/licenses/LICENSE-2.0
   10    * 
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   
   18   package org.apache.jasper.compiler;
   19   
   20   import java.io.FileNotFoundException;
   21   import java.io.IOException;
   22   import java.io.InputStreamReader;
   23   import java.net.JarURLConnection;
   24   import java.net.URL;
   25   import java.util.Stack;
   26   import java.util.jar.JarFile;
   27   
   28   import org.apache.jasper.JasperException;
   29   import org.apache.jasper.JspCompilationContext;
   30   import org.apache.jasper.xmlparser.XMLEncodingDetector;
   31   import org.xml.sax.Attributes;
   32   
   33   /**
   34    * Controller for the parsing of a JSP page.
   35    * <p>
   36    * The same ParserController instance is used for a JSP page and any JSP
   37    * segments included by it (via an include directive), where each segment may
   38    * be provided in standard or XML syntax. This class selects and invokes the
   39    * appropriate parser for the JSP page and its included segments.
   40    *
   41    * @author Pierre Delisle
   42    * @author Jan Luehe
   43    */
   44   class ParserController implements TagConstants {
   45   
   46       private static final String CHARSET = "charset=";
   47   
   48       private JspCompilationContext ctxt;
   49       private Compiler compiler;
   50       private ErrorDispatcher err;
   51   
   52       /*
   53        * Indicates the syntax (XML or standard) of the file being processed
   54        */
   55       private boolean isXml;
   56   
   57       /*
   58        * A stack to keep track of the 'current base directory'
   59        * for include directives that refer to relative paths.
   60        */
   61       private Stack baseDirStack = new Stack();
   62   
   63       private boolean isEncodingSpecifiedInProlog;
   64       private boolean isBomPresent;
   65       private int skip;
   66   
   67       private String sourceEnc;
   68   
   69       private boolean isDefaultPageEncoding;
   70       private boolean isTagFile;
   71       private boolean directiveOnly;
   72   
   73       /*
   74        * Constructor
   75        */
   76       public ParserController(JspCompilationContext ctxt, Compiler compiler) {
   77           this.ctxt = ctxt; 
   78           this.compiler = compiler;
   79           this.err = compiler.getErrorDispatcher();
   80       }
   81   
   82       public JspCompilationContext getJspCompilationContext () {
   83           return ctxt;
   84       }
   85   
   86       public Compiler getCompiler () {
   87           return compiler;
   88       }
   89   
   90       /**
   91        * Parses a JSP page or tag file. This is invoked by the compiler.
   92        *
   93        * @param inFileName The path to the JSP page or tag file to be parsed.
   94        */
   95       public Node.Nodes parse(String inFileName)
   96       throws FileNotFoundException, JasperException, IOException {
   97           // If we're parsing a packaged tag file or a resource included by it
   98           // (using an include directive), ctxt.getTagFileJar() returns the 
   99           // JAR file from which to read the tag file or included resource,
  100           // respectively.
  101           isTagFile = ctxt.isTagFile();
  102           directiveOnly = false;
  103           return doParse(inFileName, null, ctxt.getTagFileJarUrl());
  104       }
  105   
  106       /**
  107        * Processes an include directive with the given path.
  108        *
  109        * @param inFileName The path to the resource to be included.
  110        * @param parent The parent node of the include directive.
  111        * @param jarFile The JAR file from which to read the included resource,
  112        * or null of the included resource is to be read from the filesystem
  113        */
  114       public Node.Nodes parse(String inFileName, Node parent,
  115               URL jarFileUrl)
  116       throws FileNotFoundException, JasperException, IOException {
  117           // For files that are statically included, isTagfile and directiveOnly
  118           // remain unchanged.
  119           return doParse(inFileName, parent, jarFileUrl);
  120       }
  121   
  122       /**
  123        * Extracts tag file directive information from the tag file with the
  124        * given name.
  125        *
  126        * This is invoked by the compiler 
  127        *
  128        * @param inFileName The name of the tag file to be parsed.
  129        */
  130       public Node.Nodes parseTagFileDirectives(String inFileName)
  131       throws FileNotFoundException, JasperException, IOException {
  132           boolean isTagFileSave = isTagFile;
  133           boolean directiveOnlySave = directiveOnly;
  134           isTagFile = true;
  135           directiveOnly = true;
  136           Node.Nodes page = doParse(inFileName, null,
  137                   ctxt.getTagFileJarUrl(inFileName));
  138           directiveOnly = directiveOnlySave;
  139           isTagFile = isTagFileSave;
  140           return page;
  141       }
  142   
  143       /**
  144        * Parses the JSP page or tag file with the given path name.
  145        *
  146        * @param inFileName The name of the JSP page or tag file to be parsed.
  147        * @param parent The parent node (non-null when processing an include
  148        * directive)
  149        * @param isTagFile true if file to be parsed is tag file, and false if it
  150        * is a regular JSP page
  151        * @param directivesOnly true if the file to be parsed is a tag file and
  152        * we are only interested in the directives needed for constructing a
  153        * TagFileInfo.
  154        * @param jarFile The JAR file from which to read the JSP page or tag file,
  155        * or null if the JSP page or tag file is to be read from the filesystem
  156        */
  157       private Node.Nodes doParse(String inFileName,
  158               Node parent,
  159               URL jarFileUrl)
  160       throws FileNotFoundException, JasperException, IOException {
  161   
  162           Node.Nodes parsedPage = null;
  163           isEncodingSpecifiedInProlog = false;
  164           isBomPresent = false;
  165           isDefaultPageEncoding = false;
  166   
  167           JarFile jarFile = getJarFile(jarFileUrl);
  168           String absFileName = resolveFileName(inFileName);
  169           String jspConfigPageEnc = getJspConfigPageEncoding(absFileName);
  170   
  171           // Figure out what type of JSP document and encoding type we are
  172           // dealing with
  173           determineSyntaxAndEncoding(absFileName, jarFile, jspConfigPageEnc);
  174   
  175           if (parent != null) {
  176               // Included resource, add to dependent list
  177               compiler.getPageInfo().addDependant(absFileName);
  178           }
  179   
  180           if ((isXml && isEncodingSpecifiedInProlog) || isBomPresent) {
  181               /*
  182                * Make sure the encoding explicitly specified in the XML
  183                * prolog (if any) matches that in the JSP config element
  184                * (if any), treating "UTF-16", "UTF-16BE", and "UTF-16LE" as
  185                * identical.
  186                */
  187               if (jspConfigPageEnc != null && !jspConfigPageEnc.equals(sourceEnc)
  188                       && (!jspConfigPageEnc.startsWith("UTF-16")
  189                               || !sourceEnc.startsWith("UTF-16"))) {
  190                   err.jspError("jsp.error.prolog_config_encoding_mismatch",
  191                           sourceEnc, jspConfigPageEnc);
  192               }
  193           }
  194   
  195           // Dispatch to the appropriate parser
  196           if (isXml) {
  197               // JSP document (XML syntax)
  198               // InputStream for jspx page is created and properly closed in
  199               // JspDocumentParser.
  200               parsedPage = JspDocumentParser.parse(this, absFileName,
  201                       jarFile, parent,
  202                       isTagFile, directiveOnly,
  203                       sourceEnc,
  204                       jspConfigPageEnc,
  205                       isEncodingSpecifiedInProlog,
  206                       isBomPresent);
  207           } else {
  208               // Standard syntax
  209               InputStreamReader inStreamReader = null;
  210               try {
  211                   inStreamReader = JspUtil.getReader(absFileName, sourceEnc,
  212                           jarFile, ctxt, err, skip);
  213                   JspReader jspReader = new JspReader(ctxt, absFileName,
  214                           sourceEnc, inStreamReader,
  215                           err);
  216                   parsedPage = Parser.parse(this, jspReader, parent, isTagFile,
  217                           directiveOnly, jarFileUrl,
  218                           sourceEnc, jspConfigPageEnc,
  219                           isDefaultPageEncoding, isBomPresent);
  220               } finally {
  221                   if (inStreamReader != null) {
  222                       try {
  223                           inStreamReader.close();
  224                       } catch (Exception any) {
  225                       }
  226                   }
  227               }
  228           }
  229   
  230           if (jarFile != null) {
  231               try {
  232                   jarFile.close();
  233               } catch (Throwable t) {}
  234           }
  235   
  236           baseDirStack.pop();
  237   
  238           return parsedPage;
  239       }
  240   
  241       /*
  242        * Checks to see if the given URI is matched by a URL pattern specified in
  243        * a jsp-property-group in web.xml, and if so, returns the value of the
  244        * <page-encoding> element.
  245        *
  246        * @param absFileName The URI to match
  247        *
  248        * @return The value of the <page-encoding> attribute of the 
  249        * jsp-property-group with matching URL pattern
  250        */
  251       private String getJspConfigPageEncoding(String absFileName)
  252       throws JasperException {
  253   
  254           JspConfig jspConfig = ctxt.getOptions().getJspConfig();
  255           JspConfig.JspProperty jspProperty
  256               = jspConfig.findJspProperty(absFileName);
  257           return jspProperty.getPageEncoding();
  258       }
  259   
  260       /**
  261        * Determines the syntax (standard or XML) and page encoding properties
  262        * for the given file, and stores them in the 'isXml' and 'sourceEnc'
  263        * instance variables, respectively.
  264        */
  265       private void determineSyntaxAndEncoding(String absFileName,
  266               JarFile jarFile,
  267               String jspConfigPageEnc)
  268       throws JasperException, IOException {
  269   
  270           isXml = false;
  271   
  272           /*
  273            * 'true' if the syntax (XML or standard) of the file is given
  274            * from external information: either via a JSP configuration element,
  275            * the ".jspx" suffix, or the enclosing file (for included resources)
  276            */
  277           boolean isExternal = false;
  278   
  279           /*
  280            * Indicates whether we need to revert from temporary usage of
  281            * "ISO-8859-1" back to "UTF-8"
  282            */
  283           boolean revert = false;
  284   
  285           JspConfig jspConfig = ctxt.getOptions().getJspConfig();
  286           JspConfig.JspProperty jspProperty = jspConfig.findJspProperty(
  287                   absFileName);
  288           if (jspProperty.isXml() != null) {
  289               // If <is-xml> is specified in a <jsp-property-group>, it is used.
  290               isXml = JspUtil.booleanValue(jspProperty.isXml());
  291               isExternal = true;
  292           } else if (absFileName.endsWith(".jspx")
  293                   || absFileName.endsWith(".tagx")) {
  294               isXml = true;
  295               isExternal = true;
  296           }
  297   
  298           if (isExternal && !isXml) {
  299               // JSP (standard) syntax. Use encoding specified in jsp-config
  300               // if provided.
  301               sourceEnc = jspConfigPageEnc;
  302               if (sourceEnc != null) {
  303                   return;
  304               }
  305               // We don't know the encoding, so use BOM to determine it
  306               sourceEnc = "ISO-8859-1";
  307           } else {
  308               // XML syntax or unknown, (auto)detect encoding ...
  309               Object[] ret = XMLEncodingDetector.getEncoding(absFileName,
  310                       jarFile, ctxt, err);
  311               sourceEnc = (String) ret[0];
  312               if (((Boolean) ret[1]).booleanValue()) {
  313                   isEncodingSpecifiedInProlog = true;
  314               }
  315               if (((Boolean) ret[2]).booleanValue()) {
  316                   isBomPresent = true;
  317               }
  318               skip = ((Integer) ret[3]).intValue();
  319   
  320               if (!isXml && sourceEnc.equals("UTF-8")) {
  321                   /*
  322                    * We don't know if we're dealing with XML or standard syntax.
  323                    * Therefore, we need to check to see if the page contains
  324                    * a <jsp:root> element.
  325                    *
  326                    * We need to be careful, because the page may be encoded in
  327                    * ISO-8859-1 (or something entirely different), and may
  328                    * contain byte sequences that will cause a UTF-8 converter to
  329                    * throw exceptions. 
  330                    *
  331                    * It is safe to use a source encoding of ISO-8859-1 in this
  332                    * case, as there are no invalid byte sequences in ISO-8859-1,
  333                    * and the byte/character sequences we're looking for (i.e.,
  334                    * <jsp:root>) are identical in either encoding (both UTF-8
  335                    * and ISO-8859-1 are extensions of ASCII).
  336                    */
  337                   sourceEnc = "ISO-8859-1";
  338                   revert = true;
  339               }
  340           }
  341   
  342           if (isXml) {
  343               // (This implies 'isExternal' is TRUE.)
  344               // We know we're dealing with a JSP document (via JSP config or
  345               // ".jspx" suffix), so we're done.
  346               return;
  347           }
  348   
  349           /*
  350            * At this point, 'isExternal' or 'isXml' is FALSE.
  351            * Search for jsp:root action, in order to determine if we're dealing 
  352            * with XML or standard syntax (unless we already know what we're 
  353            * dealing with, i.e., when 'isExternal' is TRUE and 'isXml' is FALSE).
  354            * No check for XML prolog, since nothing prevents a page from
  355            * outputting XML and still using JSP syntax (in this case, the 
  356            * XML prolog is treated as template text).
  357            */
  358           JspReader jspReader = null;
  359           try {
  360               jspReader = new JspReader(ctxt, absFileName, sourceEnc, jarFile,
  361                       err);
  362           } catch (FileNotFoundException ex) {
  363               throw new JasperException(ex);
  364           }
  365           jspReader.setSingleFile(true);
  366           Mark startMark = jspReader.mark();
  367           if (!isExternal) {
  368               jspReader.reset(startMark);
  369               if (hasJspRoot(jspReader)) {
  370                   if (revert) {
  371                       sourceEnc = "UTF-8";
  372                   }
  373                   isXml = true;
  374                   return;
  375               } else {
  376                   if (revert && isBomPresent) {
  377                       sourceEnc = "UTF-8";
  378                   }
  379                   isXml = false;
  380               }
  381           }
  382   
  383           /*
  384            * At this point, we know we're dealing with JSP syntax.
  385            * If an XML prolog is provided, it's treated as template text.
  386            * Determine the page encoding from the page directive, unless it's
  387            * specified via JSP config.
  388            */
  389           if (!isBomPresent) {
  390               sourceEnc = jspConfigPageEnc;
  391               if (sourceEnc == null) {
  392                   sourceEnc = getPageEncodingForJspSyntax(jspReader, startMark);
  393                   if (sourceEnc == null) {
  394                       // Default to "ISO-8859-1" per JSP spec
  395                       sourceEnc = "ISO-8859-1";
  396                       isDefaultPageEncoding = true;
  397                   }
  398               }
  399           }
  400           
  401       }
  402   
  403       /*
  404        * Determines page source encoding for page or tag file in JSP syntax,
  405        * by reading (in this order) the value of the 'pageEncoding' page
  406        * directive attribute, or the charset value of the 'contentType' page
  407        * directive attribute.
  408        *
  409        * @return The page encoding, or null if not found
  410        */
  411       private String getPageEncodingForJspSyntax(JspReader jspReader,
  412               Mark startMark)
  413       throws JasperException {
  414   
  415           String encoding = null;
  416           String saveEncoding = null;
  417   
  418           jspReader.reset(startMark);
  419   
  420           /*
  421            * Determine page encoding from directive of the form <%@ page %>,
  422            * <%@ tag %>, <jsp:directive.page > or <jsp:directive.tag >.
  423            */
  424           while (true) {
  425               if (jspReader.skipUntil("<") == null) {
  426                   break;
  427               }
  428               // If this is a comment, skip until its end
  429               if (jspReader.matches("%--")) {
  430                   if (jspReader.skipUntil("--%>") == null) {
  431                       // error will be caught in Parser
  432                       break;
  433                   }
  434                   continue;
  435               }
  436               boolean isDirective = jspReader.matches("%@");
  437               if (isDirective) {
  438                   jspReader.skipSpaces();
  439               }
  440               else {
  441                   isDirective = jspReader.matches("jsp:directive.");
  442               }
  443               if (!isDirective) {
  444                   continue;
  445               }
  446   
  447               // compare for "tag ", so we don't match "taglib"
  448               if (jspReader.matches("tag ") || jspReader.matches("page")) {
  449   
  450                   jspReader.skipSpaces();
  451                   Attributes attrs = Parser.parseAttributes(this, jspReader);
  452                   encoding = getPageEncodingFromDirective(attrs, "pageEncoding");
  453                   if (encoding != null) {
  454                       break;
  455                   }
  456                   encoding = getPageEncodingFromDirective(attrs, "contentType");
  457                   if (encoding != null) {
  458                       saveEncoding = encoding;
  459                   }
  460               }
  461           }
  462   
  463           if (encoding == null) {
  464               encoding = saveEncoding;
  465           }
  466   
  467           return encoding;
  468       }
  469   
  470       /*
  471        * Scans the given attributes for the attribute with the given name,
  472        * which is either 'pageEncoding' or 'contentType', and returns the
  473        * specified page encoding.
  474        *
  475        * In the case of 'contentType', the page encoding is taken from the
  476        * content type's 'charset' component.
  477        *
  478        * @param attrs The page directive attributes
  479        * @param attrName The name of the attribute to search for (either
  480        * 'pageEncoding' or 'contentType')
  481        *
  482        * @return The page encoding, or null
  483        */
  484       private String getPageEncodingFromDirective(Attributes attrs,
  485               String attrName) {
  486           String value = attrs.getValue(attrName);
  487           if (attrName.equals("pageEncoding")) {
  488               return value;
  489           }
  490   
  491           // attrName = contentType
  492           String contentType = value;
  493           String encoding = null;
  494           if (contentType != null) {
  495               int loc = contentType.indexOf(CHARSET);
  496               if (loc != -1) {
  497                   encoding = contentType.substring(loc + CHARSET.length());
  498               }
  499           }
  500   
  501           return encoding;
  502       }
  503   
  504       /*
  505        * Resolve the name of the file and update baseDirStack() to keep track of
  506        * the current base directory for each included file.
  507        * The 'root' file is always an 'absolute' path, so no need to put an
  508        * initial value in the baseDirStack.
  509        */
  510       private String resolveFileName(String inFileName) {
  511           String fileName = inFileName.replace('\\', '/');
  512           boolean isAbsolute = fileName.startsWith("/");
  513           fileName = isAbsolute ? fileName 
  514                   : (String) baseDirStack.peek() + fileName;
  515           String baseDir = 
  516               fileName.substring(0, fileName.lastIndexOf("/") + 1);
  517           baseDirStack.push(baseDir);
  518           return fileName;
  519       }
  520   
  521       /*
  522        * Checks to see if the given page contains, as its first element, a <root>
  523        * element whose prefix is bound to the JSP namespace, as in:
  524        *
  525        * <wombat:root xmlns:wombat="http://java.sun.com/JSP/Page" version="1.2">
  526        *   ...
  527        * </wombat:root>
  528        *
  529        * @param reader The reader for this page
  530        *
  531        * @return true if this page contains a root element whose prefix is bound
  532        * to the JSP namespace, and false otherwise
  533        */
  534       private boolean hasJspRoot(JspReader reader) throws JasperException {
  535   
  536           // <prefix>:root must be the first element
  537           Mark start = null;
  538           while ((start = reader.skipUntil("<")) != null) {
  539               int c = reader.nextChar();
  540               if (c != '!' && c != '?') break;
  541           }
  542           if (start == null) {
  543               return false;
  544           }
  545           Mark stop = reader.skipUntil(":root");
  546           if (stop == null) {
  547               return false;
  548           }
  549           // call substring to get rid of leading '<'
  550           String prefix = reader.getText(start, stop).substring(1);
  551   
  552           start = stop;
  553           stop = reader.skipUntil(">");
  554           if (stop == null) {
  555               return false;
  556           }
  557   
  558           // Determine namespace associated with <root> element's prefix
  559           String root = reader.getText(start, stop);
  560           String xmlnsDecl = "xmlns:" + prefix;
  561           int index = root.indexOf(xmlnsDecl);
  562           if (index == -1) {
  563               return false;
  564           }
  565           index += xmlnsDecl.length();
  566           while (index < root.length()
  567                   && Character.isWhitespace(root.charAt(index))) {
  568               index++;
  569           }
  570           if (index < root.length() && root.charAt(index) == '=') {
  571               index++;
  572               while (index < root.length()
  573                       && Character.isWhitespace(root.charAt(index))) {
  574                   index++;
  575               }
  576               if (index < root.length() && root.charAt(index++) == '"'
  577                   && root.regionMatches(index, JSP_URI, 0,
  578                           JSP_URI.length())) {
  579                   return true;
  580               }
  581           }
  582   
  583           return false;
  584       }
  585   
  586       private JarFile getJarFile(URL jarFileUrl) throws IOException {
  587           JarFile jarFile = null;
  588   
  589           if (jarFileUrl != null) {
  590               JarURLConnection conn = (JarURLConnection) jarFileUrl.openConnection();
  591               conn.setUseCaches(false);
  592               conn.connect();
  593               jarFile = conn.getJarFile();
  594           }
  595   
  596           return jarFile;
  597       }
  598   
  599   }

Save This Page
Home » apache-tomcat-6.0.16-src » org.apache » jasper » compiler » [javadoc | source]