Save This Page
Home » apache-tomcat-6.0.26-src » org.apache » jasper » compiler » [javadoc | source]
    1   /*
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    * 
    9    *      http://www.apache.org/licenses/LICENSE-2.0
   10    * 
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   
   18   package org.apache.jasper.compiler;
   19   
   20   import java.io.FileNotFoundException;
   21   import java.io.IOException;
   22   import java.io.InputStreamReader;
   23   import java.net.JarURLConnection;
   24   import java.net.URL;
   25   import java.util.Stack;
   26   import java.util.jar.JarFile;
   27   
   28   import org.apache.jasper.JasperException;
   29   import org.apache.jasper.JspCompilationContext;
   30   import org.apache.jasper.xmlparser.XMLEncodingDetector;
   31   import org.xml.sax.Attributes;
   32   
   33   /**
   34    * Controller for the parsing of a JSP page.
   35    * <p>
   36    * The same ParserController instance is used for a JSP page and any JSP
   37    * segments included by it (via an include directive), where each segment may
   38    * be provided in standard or XML syntax. This class selects and invokes the
   39    * appropriate parser for the JSP page and its included segments.
   40    *
   41    * @author Pierre Delisle
   42    * @author Jan Luehe
   43    */
   44   class ParserController implements TagConstants {
   45   
   46       private static final String CHARSET = "charset=";
   47   
   48       private JspCompilationContext ctxt;
   49       private Compiler compiler;
   50       private ErrorDispatcher err;
   51   
   52       /*
   53        * Indicates the syntax (XML or standard) of the file being processed
   54        */
   55       private boolean isXml;
   56   
   57       /*
   58        * A stack to keep track of the 'current base directory'
   59        * for include directives that refer to relative paths.
   60        */
   61       private Stack baseDirStack = new Stack();
   62   
   63       private boolean isEncodingSpecifiedInProlog;
   64       private boolean isBomPresent;
   65       private int skip;
   66   
   67       private String sourceEnc;
   68   
   69       private boolean isDefaultPageEncoding;
   70       private boolean isTagFile;
   71       private boolean directiveOnly;
   72   
   73       /*
   74        * Constructor
   75        */
   76       public ParserController(JspCompilationContext ctxt, Compiler compiler) {
   77           this.ctxt = ctxt; 
   78           this.compiler = compiler;
   79           this.err = compiler.getErrorDispatcher();
   80       }
   81   
   82       public JspCompilationContext getJspCompilationContext () {
   83           return ctxt;
   84       }
   85   
   86       public Compiler getCompiler () {
   87           return compiler;
   88       }
   89   
   90       /**
   91        * Parses a JSP page or tag file. This is invoked by the compiler.
   92        *
   93        * @param inFileName The path to the JSP page or tag file to be parsed.
   94        */
   95       public Node.Nodes parse(String inFileName)
   96       throws FileNotFoundException, JasperException, IOException {
   97           // If we're parsing a packaged tag file or a resource included by it
   98           // (using an include directive), ctxt.getTagFileJar() returns the 
   99           // JAR file from which to read the tag file or included resource,
  100           // respectively.
  101           isTagFile = ctxt.isTagFile();
  102           directiveOnly = false;
  103           return doParse(inFileName, null, ctxt.getTagFileJarUrl());
  104       }
  105   
  106       /**
  107        * Parses the directives of a JSP page or tag file. This is invoked by the
  108        * compiler.
  109        *
  110        * @param inFileName The path to the JSP page or tag file to be parsed.
  111        */
  112       public Node.Nodes parseDirectives(String inFileName)
  113       throws FileNotFoundException, JasperException, IOException {
  114           // If we're parsing a packaged tag file or a resource included by it
  115           // (using an include directive), ctxt.getTagFileJar() returns the 
  116           // JAR file from which to read the tag file or included resource,
  117           // respectively.
  118           isTagFile = ctxt.isTagFile();
  119           directiveOnly = true;
  120           return doParse(inFileName, null, ctxt.getTagFileJarUrl());
  121       }
  122   
  123   
  124       /**
  125        * Processes an include directive with the given path.
  126        *
  127        * @param inFileName The path to the resource to be included.
  128        * @param parent The parent node of the include directive.
  129        * @param jarFile The JAR file from which to read the included resource,
  130        * or null of the included resource is to be read from the filesystem
  131        */
  132       public Node.Nodes parse(String inFileName, Node parent,
  133               URL jarFileUrl)
  134       throws FileNotFoundException, JasperException, IOException {
  135           // For files that are statically included, isTagfile and directiveOnly
  136           // remain unchanged.
  137           return doParse(inFileName, parent, jarFileUrl);
  138       }
  139   
  140       /**
  141        * Extracts tag file directive information from the tag file with the
  142        * given name.
  143        *
  144        * This is invoked by the compiler 
  145        *
  146        * @param inFileName The name of the tag file to be parsed.
  147        * @deprecated Use {@link #parseTagFileDirectives(String, URL)}
  148        *             See https://issues.apache.org/bugzilla/show_bug.cgi?id=46471
  149        */
  150       public Node.Nodes parseTagFileDirectives(String inFileName)
  151       throws FileNotFoundException, JasperException, IOException {
  152           return parseTagFileDirectives(
  153                   inFileName, ctxt.getTagFileJarUrl(inFileName));
  154       }
  155   
  156       /**
  157        * Extracts tag file directive information from the given tag file.
  158        *
  159        * This is invoked by the compiler 
  160        *
  161        * @param inFileName    The name of the tag file to be parsed.
  162        * @param tagFileJarUrl The location of the tag file.
  163        */
  164       public Node.Nodes parseTagFileDirectives(String inFileName,
  165               URL tagFileJarUrl)
  166               throws FileNotFoundException, JasperException, IOException {
  167           boolean isTagFileSave = isTagFile;
  168           boolean directiveOnlySave = directiveOnly;
  169           isTagFile = true;
  170           directiveOnly = true;
  171           Node.Nodes page = doParse(inFileName, null, tagFileJarUrl);
  172           directiveOnly = directiveOnlySave;
  173           isTagFile = isTagFileSave;
  174           return page;
  175       }
  176   
  177       /**
  178        * Parses the JSP page or tag file with the given path name.
  179        *
  180        * @param inFileName The name of the JSP page or tag file to be parsed.
  181        * @param parent The parent node (non-null when processing an include
  182        * directive)
  183        * @param isTagFile true if file to be parsed is tag file, and false if it
  184        * is a regular JSP page
  185        * @param directivesOnly true if the file to be parsed is a tag file and
  186        * we are only interested in the directives needed for constructing a
  187        * TagFileInfo.
  188        * @param jarFile The JAR file from which to read the JSP page or tag file,
  189        * or null if the JSP page or tag file is to be read from the filesystem
  190        */
  191       private Node.Nodes doParse(String inFileName,
  192               Node parent,
  193               URL jarFileUrl)
  194       throws FileNotFoundException, JasperException, IOException {
  195   
  196           Node.Nodes parsedPage = null;
  197           isEncodingSpecifiedInProlog = false;
  198           isBomPresent = false;
  199           isDefaultPageEncoding = false;
  200   
  201           JarFile jarFile = getJarFile(jarFileUrl);
  202           String absFileName = resolveFileName(inFileName);
  203           String jspConfigPageEnc = getJspConfigPageEncoding(absFileName);
  204   
  205           // Figure out what type of JSP document and encoding type we are
  206           // dealing with
  207           determineSyntaxAndEncoding(absFileName, jarFile, jspConfigPageEnc);
  208   
  209           if (parent != null) {
  210               // Included resource, add to dependent list
  211               if (jarFile == null) {
  212                   compiler.getPageInfo().addDependant(absFileName);
  213               } else {
  214                   compiler.getPageInfo().addDependant(
  215                           jarFileUrl.toExternalForm() + absFileName.substring(1));
  216               }
  217           }
  218   
  219           if ((isXml && isEncodingSpecifiedInProlog) || isBomPresent) {
  220               /*
  221                * Make sure the encoding explicitly specified in the XML
  222                * prolog (if any) matches that in the JSP config element
  223                * (if any), treating "UTF-16", "UTF-16BE", and "UTF-16LE" as
  224                * identical.
  225                */
  226               if (jspConfigPageEnc != null && !jspConfigPageEnc.equals(sourceEnc)
  227                       && (!jspConfigPageEnc.startsWith("UTF-16")
  228                               || !sourceEnc.startsWith("UTF-16"))) {
  229                   err.jspError("jsp.error.prolog_config_encoding_mismatch",
  230                           sourceEnc, jspConfigPageEnc);
  231               }
  232           }
  233   
  234           // Dispatch to the appropriate parser
  235           if (isXml) {
  236               // JSP document (XML syntax)
  237               // InputStream for jspx page is created and properly closed in
  238               // JspDocumentParser.
  239               parsedPage = JspDocumentParser.parse(this, absFileName,
  240                       jarFile, parent,
  241                       isTagFile, directiveOnly,
  242                       sourceEnc,
  243                       jspConfigPageEnc,
  244                       isEncodingSpecifiedInProlog,
  245                       isBomPresent);
  246           } else {
  247               // Standard syntax
  248               InputStreamReader inStreamReader = null;
  249               try {
  250                   inStreamReader = JspUtil.getReader(absFileName, sourceEnc,
  251                           jarFile, ctxt, err, skip);
  252                   JspReader jspReader = new JspReader(ctxt, absFileName,
  253                           sourceEnc, inStreamReader,
  254                           err);
  255                   parsedPage = Parser.parse(this, jspReader, parent, isTagFile,
  256                           directiveOnly, jarFileUrl,
  257                           sourceEnc, jspConfigPageEnc,
  258                           isDefaultPageEncoding, isBomPresent);
  259               } finally {
  260                   if (inStreamReader != null) {
  261                       try {
  262                           inStreamReader.close();
  263                       } catch (Exception any) {
  264                       }
  265                   }
  266               }
  267           }
  268   
  269           if (jarFile != null) {
  270               try {
  271                   jarFile.close();
  272               } catch (Throwable t) {}
  273           }
  274   
  275           baseDirStack.pop();
  276   
  277           return parsedPage;
  278       }
  279   
  280       /*
  281        * Checks to see if the given URI is matched by a URL pattern specified in
  282        * a jsp-property-group in web.xml, and if so, returns the value of the
  283        * <page-encoding> element.
  284        *
  285        * @param absFileName The URI to match
  286        *
  287        * @return The value of the <page-encoding> attribute of the 
  288        * jsp-property-group with matching URL pattern
  289        */
  290       private String getJspConfigPageEncoding(String absFileName)
  291       throws JasperException {
  292   
  293           JspConfig jspConfig = ctxt.getOptions().getJspConfig();
  294           JspConfig.JspProperty jspProperty
  295               = jspConfig.findJspProperty(absFileName);
  296           return jspProperty.getPageEncoding();
  297       }
  298   
  299       /**
  300        * Determines the syntax (standard or XML) and page encoding properties
  301        * for the given file, and stores them in the 'isXml' and 'sourceEnc'
  302        * instance variables, respectively.
  303        */
  304       private void determineSyntaxAndEncoding(String absFileName,
  305               JarFile jarFile,
  306               String jspConfigPageEnc)
  307       throws JasperException, IOException {
  308   
  309           isXml = false;
  310   
  311           /*
  312            * 'true' if the syntax (XML or standard) of the file is given
  313            * from external information: either via a JSP configuration element,
  314            * the ".jspx" suffix, or the enclosing file (for included resources)
  315            */
  316           boolean isExternal = false;
  317   
  318           /*
  319            * Indicates whether we need to revert from temporary usage of
  320            * "ISO-8859-1" back to "UTF-8"
  321            */
  322           boolean revert = false;
  323   
  324           JspConfig jspConfig = ctxt.getOptions().getJspConfig();
  325           JspConfig.JspProperty jspProperty = jspConfig.findJspProperty(
  326                   absFileName);
  327           if (jspProperty.isXml() != null) {
  328               // If <is-xml> is specified in a <jsp-property-group>, it is used.
  329               isXml = JspUtil.booleanValue(jspProperty.isXml());
  330               isExternal = true;
  331           } else if (absFileName.endsWith(".jspx")
  332                   || absFileName.endsWith(".tagx")) {
  333               isXml = true;
  334               isExternal = true;
  335           }
  336   
  337           if (isExternal && !isXml) {
  338               // JSP (standard) syntax. Use encoding specified in jsp-config
  339               // if provided.
  340               sourceEnc = jspConfigPageEnc;
  341               if (sourceEnc != null) {
  342                   return;
  343               }
  344               // We don't know the encoding, so use BOM to determine it
  345               sourceEnc = "ISO-8859-1";
  346           } else {
  347               // XML syntax or unknown, (auto)detect encoding ...
  348               Object[] ret = XMLEncodingDetector.getEncoding(absFileName,
  349                       jarFile, ctxt, err);
  350               sourceEnc = (String) ret[0];
  351               if (((Boolean) ret[1]).booleanValue()) {
  352                   isEncodingSpecifiedInProlog = true;
  353               }
  354               if (((Boolean) ret[2]).booleanValue()) {
  355                   isBomPresent = true;
  356               }
  357               skip = ((Integer) ret[3]).intValue();
  358   
  359               if (!isXml && sourceEnc.equals("UTF-8")) {
  360                   /*
  361                    * We don't know if we're dealing with XML or standard syntax.
  362                    * Therefore, we need to check to see if the page contains
  363                    * a <jsp:root> element.
  364                    *
  365                    * We need to be careful, because the page may be encoded in
  366                    * ISO-8859-1 (or something entirely different), and may
  367                    * contain byte sequences that will cause a UTF-8 converter to
  368                    * throw exceptions. 
  369                    *
  370                    * It is safe to use a source encoding of ISO-8859-1 in this
  371                    * case, as there are no invalid byte sequences in ISO-8859-1,
  372                    * and the byte/character sequences we're looking for (i.e.,
  373                    * <jsp:root>) are identical in either encoding (both UTF-8
  374                    * and ISO-8859-1 are extensions of ASCII).
  375                    */
  376                   sourceEnc = "ISO-8859-1";
  377                   revert = true;
  378               }
  379           }
  380   
  381           if (isXml) {
  382               // (This implies 'isExternal' is TRUE.)
  383               // We know we're dealing with a JSP document (via JSP config or
  384               // ".jspx" suffix), so we're done.
  385               return;
  386           }
  387   
  388           /*
  389            * At this point, 'isExternal' or 'isXml' is FALSE.
  390            * Search for jsp:root action, in order to determine if we're dealing 
  391            * with XML or standard syntax (unless we already know what we're 
  392            * dealing with, i.e., when 'isExternal' is TRUE and 'isXml' is FALSE).
  393            * No check for XML prolog, since nothing prevents a page from
  394            * outputting XML and still using JSP syntax (in this case, the 
  395            * XML prolog is treated as template text).
  396            */
  397           JspReader jspReader = null;
  398           try {
  399               jspReader = new JspReader(ctxt, absFileName, sourceEnc, jarFile,
  400                       err);
  401           } catch (FileNotFoundException ex) {
  402               throw new JasperException(ex);
  403           }
  404           jspReader.setSingleFile(true);
  405           Mark startMark = jspReader.mark();
  406           if (!isExternal) {
  407               jspReader.reset(startMark);
  408               if (hasJspRoot(jspReader)) {
  409                   if (revert) {
  410                       sourceEnc = "UTF-8";
  411                   }
  412                   isXml = true;
  413                   return;
  414               } else {
  415                   if (revert && isBomPresent) {
  416                       sourceEnc = "UTF-8";
  417                   }
  418                   isXml = false;
  419               }
  420           }
  421   
  422           /*
  423            * At this point, we know we're dealing with JSP syntax.
  424            * If an XML prolog is provided, it's treated as template text.
  425            * Determine the page encoding from the page directive, unless it's
  426            * specified via JSP config.
  427            */
  428           if (!isBomPresent) {
  429               sourceEnc = jspConfigPageEnc;
  430               if (sourceEnc == null) {
  431                   sourceEnc = getPageEncodingForJspSyntax(jspReader, startMark);
  432                   if (sourceEnc == null) {
  433                       // Default to "ISO-8859-1" per JSP spec
  434                       sourceEnc = "ISO-8859-1";
  435                       isDefaultPageEncoding = true;
  436                   }
  437               }
  438           }
  439           
  440       }
  441   
  442       /*
  443        * Determines page source encoding for page or tag file in JSP syntax,
  444        * by reading (in this order) the value of the 'pageEncoding' page
  445        * directive attribute, or the charset value of the 'contentType' page
  446        * directive attribute.
  447        *
  448        * @return The page encoding, or null if not found
  449        */
  450       private String getPageEncodingForJspSyntax(JspReader jspReader,
  451               Mark startMark)
  452       throws JasperException {
  453   
  454           String encoding = null;
  455           String saveEncoding = null;
  456   
  457           jspReader.reset(startMark);
  458   
  459           /*
  460            * Determine page encoding from directive of the form <%@ page %>,
  461            * <%@ tag %>, <jsp:directive.page > or <jsp:directive.tag >.
  462            */
  463           while (true) {
  464               if (jspReader.skipUntil("<") == null) {
  465                   break;
  466               }
  467               // If this is a comment, skip until its end
  468               if (jspReader.matches("%--")) {
  469                   if (jspReader.skipUntil("--%>") == null) {
  470                       // error will be caught in Parser
  471                       break;
  472                   }
  473                   continue;
  474               }
  475               boolean isDirective = jspReader.matches("%@");
  476               if (isDirective) {
  477                   jspReader.skipSpaces();
  478               }
  479               else {
  480                   isDirective = jspReader.matches("jsp:directive.");
  481               }
  482               if (!isDirective) {
  483                   continue;
  484               }
  485   
  486               // compare for "tag ", so we don't match "taglib"
  487               if (jspReader.matches("tag ") || jspReader.matches("page")) {
  488   
  489                   jspReader.skipSpaces();
  490                   Attributes attrs = Parser.parseAttributes(this, jspReader);
  491                   encoding = getPageEncodingFromDirective(attrs, "pageEncoding");
  492                   if (encoding != null) {
  493                       break;
  494                   }
  495                   encoding = getPageEncodingFromDirective(attrs, "contentType");
  496                   if (encoding != null) {
  497                       saveEncoding = encoding;
  498                   }
  499               }
  500           }
  501   
  502           if (encoding == null) {
  503               encoding = saveEncoding;
  504           }
  505   
  506           return encoding;
  507       }
  508   
  509       /*
  510        * Scans the given attributes for the attribute with the given name,
  511        * which is either 'pageEncoding' or 'contentType', and returns the
  512        * specified page encoding.
  513        *
  514        * In the case of 'contentType', the page encoding is taken from the
  515        * content type's 'charset' component.
  516        *
  517        * @param attrs The page directive attributes
  518        * @param attrName The name of the attribute to search for (either
  519        * 'pageEncoding' or 'contentType')
  520        *
  521        * @return The page encoding, or null
  522        */
  523       private String getPageEncodingFromDirective(Attributes attrs,
  524               String attrName) {
  525           String value = attrs.getValue(attrName);
  526           if (attrName.equals("pageEncoding")) {
  527               return value;
  528           }
  529   
  530           // attrName = contentType
  531           String contentType = value;
  532           String encoding = null;
  533           if (contentType != null) {
  534               int loc = contentType.indexOf(CHARSET);
  535               if (loc != -1) {
  536                   encoding = contentType.substring(loc + CHARSET.length());
  537               }
  538           }
  539   
  540           return encoding;
  541       }
  542   
  543       /*
  544        * Resolve the name of the file and update baseDirStack() to keep track of
  545        * the current base directory for each included file.
  546        * The 'root' file is always an 'absolute' path, so no need to put an
  547        * initial value in the baseDirStack.
  548        */
  549       private String resolveFileName(String inFileName) {
  550           String fileName = inFileName.replace('\\', '/');
  551           boolean isAbsolute = fileName.startsWith("/");
  552           fileName = isAbsolute ? fileName 
  553                   : (String) baseDirStack.peek() + fileName;
  554           String baseDir = 
  555               fileName.substring(0, fileName.lastIndexOf("/") + 1);
  556           baseDirStack.push(baseDir);
  557           return fileName;
  558       }
  559   
  560       /*
  561        * Checks to see if the given page contains, as its first element, a <root>
  562        * element whose prefix is bound to the JSP namespace, as in:
  563        *
  564        * <wombat:root xmlns:wombat="http://java.sun.com/JSP/Page" version="1.2">
  565        *   ...
  566        * </wombat:root>
  567        *
  568        * @param reader The reader for this page
  569        *
  570        * @return true if this page contains a root element whose prefix is bound
  571        * to the JSP namespace, and false otherwise
  572        */
  573       private boolean hasJspRoot(JspReader reader) throws JasperException {
  574   
  575           // <prefix>:root must be the first element
  576           Mark start = null;
  577           while ((start = reader.skipUntil("<")) != null) {
  578               int c = reader.nextChar();
  579               if (c != '!' && c != '?') break;
  580           }
  581           if (start == null) {
  582               return false;
  583           }
  584           Mark stop = reader.skipUntil(":root");
  585           if (stop == null) {
  586               return false;
  587           }
  588           // call substring to get rid of leading '<'
  589           String prefix = reader.getText(start, stop).substring(1);
  590   
  591           start = stop;
  592           stop = reader.skipUntil(">");
  593           if (stop == null) {
  594               return false;
  595           }
  596   
  597           // Determine namespace associated with <root> element's prefix
  598           String root = reader.getText(start, stop);
  599           String xmlnsDecl = "xmlns:" + prefix;
  600           int index = root.indexOf(xmlnsDecl);
  601           if (index == -1) {
  602               return false;
  603           }
  604           index += xmlnsDecl.length();
  605           while (index < root.length()
  606                   && Character.isWhitespace(root.charAt(index))) {
  607               index++;
  608           }
  609           if (index < root.length() && root.charAt(index) == '=') {
  610               index++;
  611               while (index < root.length()
  612                       && Character.isWhitespace(root.charAt(index))) {
  613                   index++;
  614               }
  615               if (index < root.length() && root.charAt(index++) == '"'
  616                   && root.regionMatches(index, JSP_URI, 0,
  617                           JSP_URI.length())) {
  618                   return true;
  619               }
  620           }
  621   
  622           return false;
  623       }
  624   
  625       private JarFile getJarFile(URL jarFileUrl) throws IOException {
  626           JarFile jarFile = null;
  627   
  628           if (jarFileUrl != null) {
  629               JarURLConnection conn = (JarURLConnection) jarFileUrl.openConnection();
  630               conn.setUseCaches(false);
  631               conn.connect();
  632               jarFile = conn.getJarFile();
  633           }
  634   
  635           return jarFile;
  636       }
  637   
  638   }

Save This Page
Home » apache-tomcat-6.0.26-src » org.apache » jasper » compiler » [javadoc | source]