1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.jasper.compiler;
19
20 import java.io.FileNotFoundException;
21 import java.io.IOException;
22 import java.io.InputStreamReader;
23 import java.net.JarURLConnection;
24 import java.net.URL;
25 import java.util.Stack;
26 import java.util.jar.JarFile;
27
28 import org.apache.jasper.JasperException;
29 import org.apache.jasper.JspCompilationContext;
30 import org.apache.jasper.xmlparser.XMLEncodingDetector;
31 import org.xml.sax.Attributes;
32
33 /**
34 * Controller for the parsing of a JSP page.
35 * <p>
36 * The same ParserController instance is used for a JSP page and any JSP
37 * segments included by it (via an include directive), where each segment may
38 * be provided in standard or XML syntax. This class selects and invokes the
39 * appropriate parser for the JSP page and its included segments.
40 *
41 * @author Pierre Delisle
42 * @author Jan Luehe
43 */
44 class ParserController implements TagConstants {
45
46 private static final String CHARSET = "charset=";
47
48 private JspCompilationContext ctxt;
49 private Compiler compiler;
50 private ErrorDispatcher err;
51
52 /*
53 * Indicates the syntax (XML or standard) of the file being processed
54 */
55 private boolean isXml;
56
57 /*
58 * A stack to keep track of the 'current base directory'
59 * for include directives that refer to relative paths.
60 */
61 private Stack baseDirStack = new Stack();
62
63 private boolean isEncodingSpecifiedInProlog;
64 private boolean isBomPresent;
65 private int skip;
66
67 private String sourceEnc;
68
69 private boolean isDefaultPageEncoding;
70 private boolean isTagFile;
71 private boolean directiveOnly;
72
73 /*
74 * Constructor
75 */
76 public ParserController(JspCompilationContext ctxt, Compiler compiler) {
77 this.ctxt = ctxt;
78 this.compiler = compiler;
79 this.err = compiler.getErrorDispatcher();
80 }
81
82 public JspCompilationContext getJspCompilationContext () {
83 return ctxt;
84 }
85
86 public Compiler getCompiler () {
87 return compiler;
88 }
89
90 /**
91 * Parses a JSP page or tag file. This is invoked by the compiler.
92 *
93 * @param inFileName The path to the JSP page or tag file to be parsed.
94 */
95 public Node.Nodes parse(String inFileName)
96 throws FileNotFoundException, JasperException, IOException {
97 // If we're parsing a packaged tag file or a resource included by it
98 // (using an include directive), ctxt.getTagFileJar() returns the
99 // JAR file from which to read the tag file or included resource,
100 // respectively.
101 isTagFile = ctxt.isTagFile();
102 directiveOnly = false;
103 return doParse(inFileName, null, ctxt.getTagFileJarUrl());
104 }
105
106 /**
107 * Parses the directives of a JSP page or tag file. This is invoked by the
108 * compiler.
109 *
110 * @param inFileName The path to the JSP page or tag file to be parsed.
111 */
112 public Node.Nodes parseDirectives(String inFileName)
113 throws FileNotFoundException, JasperException, IOException {
114 // If we're parsing a packaged tag file or a resource included by it
115 // (using an include directive), ctxt.getTagFileJar() returns the
116 // JAR file from which to read the tag file or included resource,
117 // respectively.
118 isTagFile = ctxt.isTagFile();
119 directiveOnly = true;
120 return doParse(inFileName, null, ctxt.getTagFileJarUrl());
121 }
122
123
124 /**
125 * Processes an include directive with the given path.
126 *
127 * @param inFileName The path to the resource to be included.
128 * @param parent The parent node of the include directive.
129 * @param jarFile The JAR file from which to read the included resource,
130 * or null of the included resource is to be read from the filesystem
131 */
132 public Node.Nodes parse(String inFileName, Node parent,
133 URL jarFileUrl)
134 throws FileNotFoundException, JasperException, IOException {
135 // For files that are statically included, isTagfile and directiveOnly
136 // remain unchanged.
137 return doParse(inFileName, parent, jarFileUrl);
138 }
139
140 /**
141 * Extracts tag file directive information from the tag file with the
142 * given name.
143 *
144 * This is invoked by the compiler
145 *
146 * @param inFileName The name of the tag file to be parsed.
147 * @deprecated Use {@link #parseTagFileDirectives(String, URL)}
148 * See https://issues.apache.org/bugzilla/show_bug.cgi?id=46471
149 */
150 public Node.Nodes parseTagFileDirectives(String inFileName)
151 throws FileNotFoundException, JasperException, IOException {
152 return parseTagFileDirectives(
153 inFileName, ctxt.getTagFileJarUrl(inFileName));
154 }
155
156 /**
157 * Extracts tag file directive information from the given tag file.
158 *
159 * This is invoked by the compiler
160 *
161 * @param inFileName The name of the tag file to be parsed.
162 * @param tagFileJarUrl The location of the tag file.
163 */
164 public Node.Nodes parseTagFileDirectives(String inFileName,
165 URL tagFileJarUrl)
166 throws FileNotFoundException, JasperException, IOException {
167 boolean isTagFileSave = isTagFile;
168 boolean directiveOnlySave = directiveOnly;
169 isTagFile = true;
170 directiveOnly = true;
171 Node.Nodes page = doParse(inFileName, null, tagFileJarUrl);
172 directiveOnly = directiveOnlySave;
173 isTagFile = isTagFileSave;
174 return page;
175 }
176
177 /**
178 * Parses the JSP page or tag file with the given path name.
179 *
180 * @param inFileName The name of the JSP page or tag file to be parsed.
181 * @param parent The parent node (non-null when processing an include
182 * directive)
183 * @param isTagFile true if file to be parsed is tag file, and false if it
184 * is a regular JSP page
185 * @param directivesOnly true if the file to be parsed is a tag file and
186 * we are only interested in the directives needed for constructing a
187 * TagFileInfo.
188 * @param jarFile The JAR file from which to read the JSP page or tag file,
189 * or null if the JSP page or tag file is to be read from the filesystem
190 */
191 private Node.Nodes doParse(String inFileName,
192 Node parent,
193 URL jarFileUrl)
194 throws FileNotFoundException, JasperException, IOException {
195
196 Node.Nodes parsedPage = null;
197 isEncodingSpecifiedInProlog = false;
198 isBomPresent = false;
199 isDefaultPageEncoding = false;
200
201 JarFile jarFile = getJarFile(jarFileUrl);
202 String absFileName = resolveFileName(inFileName);
203 String jspConfigPageEnc = getJspConfigPageEncoding(absFileName);
204
205 // Figure out what type of JSP document and encoding type we are
206 // dealing with
207 determineSyntaxAndEncoding(absFileName, jarFile, jspConfigPageEnc);
208
209 if (parent != null) {
210 // Included resource, add to dependent list
211 if (jarFile == null) {
212 compiler.getPageInfo().addDependant(absFileName);
213 } else {
214 compiler.getPageInfo().addDependant(
215 jarFileUrl.toExternalForm() + absFileName.substring(1));
216 }
217 }
218
219 if ((isXml && isEncodingSpecifiedInProlog) || isBomPresent) {
220 /*
221 * Make sure the encoding explicitly specified in the XML
222 * prolog (if any) matches that in the JSP config element
223 * (if any), treating "UTF-16", "UTF-16BE", and "UTF-16LE" as
224 * identical.
225 */
226 if (jspConfigPageEnc != null && !jspConfigPageEnc.equals(sourceEnc)
227 && (!jspConfigPageEnc.startsWith("UTF-16")
228 || !sourceEnc.startsWith("UTF-16"))) {
229 err.jspError("jsp.error.prolog_config_encoding_mismatch",
230 sourceEnc, jspConfigPageEnc);
231 }
232 }
233
234 // Dispatch to the appropriate parser
235 if (isXml) {
236 // JSP document (XML syntax)
237 // InputStream for jspx page is created and properly closed in
238 // JspDocumentParser.
239 parsedPage = JspDocumentParser.parse(this, absFileName,
240 jarFile, parent,
241 isTagFile, directiveOnly,
242 sourceEnc,
243 jspConfigPageEnc,
244 isEncodingSpecifiedInProlog,
245 isBomPresent);
246 } else {
247 // Standard syntax
248 InputStreamReader inStreamReader = null;
249 try {
250 inStreamReader = JspUtil.getReader(absFileName, sourceEnc,
251 jarFile, ctxt, err, skip);
252 JspReader jspReader = new JspReader(ctxt, absFileName,
253 sourceEnc, inStreamReader,
254 err);
255 parsedPage = Parser.parse(this, jspReader, parent, isTagFile,
256 directiveOnly, jarFileUrl,
257 sourceEnc, jspConfigPageEnc,
258 isDefaultPageEncoding, isBomPresent);
259 } finally {
260 if (inStreamReader != null) {
261 try {
262 inStreamReader.close();
263 } catch (Exception any) {
264 }
265 }
266 }
267 }
268
269 if (jarFile != null) {
270 try {
271 jarFile.close();
272 } catch (Throwable t) {}
273 }
274
275 baseDirStack.pop();
276
277 return parsedPage;
278 }
279
280 /*
281 * Checks to see if the given URI is matched by a URL pattern specified in
282 * a jsp-property-group in web.xml, and if so, returns the value of the
283 * <page-encoding> element.
284 *
285 * @param absFileName The URI to match
286 *
287 * @return The value of the <page-encoding> attribute of the
288 * jsp-property-group with matching URL pattern
289 */
290 private String getJspConfigPageEncoding(String absFileName)
291 throws JasperException {
292
293 JspConfig jspConfig = ctxt.getOptions().getJspConfig();
294 JspConfig.JspProperty jspProperty
295 = jspConfig.findJspProperty(absFileName);
296 return jspProperty.getPageEncoding();
297 }
298
299 /**
300 * Determines the syntax (standard or XML) and page encoding properties
301 * for the given file, and stores them in the 'isXml' and 'sourceEnc'
302 * instance variables, respectively.
303 */
304 private void determineSyntaxAndEncoding(String absFileName,
305 JarFile jarFile,
306 String jspConfigPageEnc)
307 throws JasperException, IOException {
308
309 isXml = false;
310
311 /*
312 * 'true' if the syntax (XML or standard) of the file is given
313 * from external information: either via a JSP configuration element,
314 * the ".jspx" suffix, or the enclosing file (for included resources)
315 */
316 boolean isExternal = false;
317
318 /*
319 * Indicates whether we need to revert from temporary usage of
320 * "ISO-8859-1" back to "UTF-8"
321 */
322 boolean revert = false;
323
324 JspConfig jspConfig = ctxt.getOptions().getJspConfig();
325 JspConfig.JspProperty jspProperty = jspConfig.findJspProperty(
326 absFileName);
327 if (jspProperty.isXml() != null) {
328 // If <is-xml> is specified in a <jsp-property-group>, it is used.
329 isXml = JspUtil.booleanValue(jspProperty.isXml());
330 isExternal = true;
331 } else if (absFileName.endsWith(".jspx")
332 || absFileName.endsWith(".tagx")) {
333 isXml = true;
334 isExternal = true;
335 }
336
337 if (isExternal && !isXml) {
338 // JSP (standard) syntax. Use encoding specified in jsp-config
339 // if provided.
340 sourceEnc = jspConfigPageEnc;
341 if (sourceEnc != null) {
342 return;
343 }
344 // We don't know the encoding, so use BOM to determine it
345 sourceEnc = "ISO-8859-1";
346 } else {
347 // XML syntax or unknown, (auto)detect encoding ...
348 Object[] ret = XMLEncodingDetector.getEncoding(absFileName,
349 jarFile, ctxt, err);
350 sourceEnc = (String) ret[0];
351 if (((Boolean) ret[1]).booleanValue()) {
352 isEncodingSpecifiedInProlog = true;
353 }
354 if (((Boolean) ret[2]).booleanValue()) {
355 isBomPresent = true;
356 }
357 skip = ((Integer) ret[3]).intValue();
358
359 if (!isXml && sourceEnc.equals("UTF-8")) {
360 /*
361 * We don't know if we're dealing with XML or standard syntax.
362 * Therefore, we need to check to see if the page contains
363 * a <jsp:root> element.
364 *
365 * We need to be careful, because the page may be encoded in
366 * ISO-8859-1 (or something entirely different), and may
367 * contain byte sequences that will cause a UTF-8 converter to
368 * throw exceptions.
369 *
370 * It is safe to use a source encoding of ISO-8859-1 in this
371 * case, as there are no invalid byte sequences in ISO-8859-1,
372 * and the byte/character sequences we're looking for (i.e.,
373 * <jsp:root>) are identical in either encoding (both UTF-8
374 * and ISO-8859-1 are extensions of ASCII).
375 */
376 sourceEnc = "ISO-8859-1";
377 revert = true;
378 }
379 }
380
381 if (isXml) {
382 // (This implies 'isExternal' is TRUE.)
383 // We know we're dealing with a JSP document (via JSP config or
384 // ".jspx" suffix), so we're done.
385 return;
386 }
387
388 /*
389 * At this point, 'isExternal' or 'isXml' is FALSE.
390 * Search for jsp:root action, in order to determine if we're dealing
391 * with XML or standard syntax (unless we already know what we're
392 * dealing with, i.e., when 'isExternal' is TRUE and 'isXml' is FALSE).
393 * No check for XML prolog, since nothing prevents a page from
394 * outputting XML and still using JSP syntax (in this case, the
395 * XML prolog is treated as template text).
396 */
397 JspReader jspReader = null;
398 try {
399 jspReader = new JspReader(ctxt, absFileName, sourceEnc, jarFile,
400 err);
401 } catch (FileNotFoundException ex) {
402 throw new JasperException(ex);
403 }
404 jspReader.setSingleFile(true);
405 Mark startMark = jspReader.mark();
406 if (!isExternal) {
407 jspReader.reset(startMark);
408 if (hasJspRoot(jspReader)) {
409 if (revert) {
410 sourceEnc = "UTF-8";
411 }
412 isXml = true;
413 return;
414 } else {
415 if (revert && isBomPresent) {
416 sourceEnc = "UTF-8";
417 }
418 isXml = false;
419 }
420 }
421
422 /*
423 * At this point, we know we're dealing with JSP syntax.
424 * If an XML prolog is provided, it's treated as template text.
425 * Determine the page encoding from the page directive, unless it's
426 * specified via JSP config.
427 */
428 if (!isBomPresent) {
429 sourceEnc = jspConfigPageEnc;
430 if (sourceEnc == null) {
431 sourceEnc = getPageEncodingForJspSyntax(jspReader, startMark);
432 if (sourceEnc == null) {
433 // Default to "ISO-8859-1" per JSP spec
434 sourceEnc = "ISO-8859-1";
435 isDefaultPageEncoding = true;
436 }
437 }
438 }
439
440 }
441
442 /*
443 * Determines page source encoding for page or tag file in JSP syntax,
444 * by reading (in this order) the value of the 'pageEncoding' page
445 * directive attribute, or the charset value of the 'contentType' page
446 * directive attribute.
447 *
448 * @return The page encoding, or null if not found
449 */
450 private String getPageEncodingForJspSyntax(JspReader jspReader,
451 Mark startMark)
452 throws JasperException {
453
454 String encoding = null;
455 String saveEncoding = null;
456
457 jspReader.reset(startMark);
458
459 /*
460 * Determine page encoding from directive of the form <%@ page %>,
461 * <%@ tag %>, <jsp:directive.page > or <jsp:directive.tag >.
462 */
463 while (true) {
464 if (jspReader.skipUntil("<") == null) {
465 break;
466 }
467 // If this is a comment, skip until its end
468 if (jspReader.matches("%--")) {
469 if (jspReader.skipUntil("--%>") == null) {
470 // error will be caught in Parser
471 break;
472 }
473 continue;
474 }
475 boolean isDirective = jspReader.matches("%@");
476 if (isDirective) {
477 jspReader.skipSpaces();
478 }
479 else {
480 isDirective = jspReader.matches("jsp:directive.");
481 }
482 if (!isDirective) {
483 continue;
484 }
485
486 // compare for "tag ", so we don't match "taglib"
487 if (jspReader.matches("tag ") || jspReader.matches("page")) {
488
489 jspReader.skipSpaces();
490 Attributes attrs = Parser.parseAttributes(this, jspReader);
491 encoding = getPageEncodingFromDirective(attrs, "pageEncoding");
492 if (encoding != null) {
493 break;
494 }
495 encoding = getPageEncodingFromDirective(attrs, "contentType");
496 if (encoding != null) {
497 saveEncoding = encoding;
498 }
499 }
500 }
501
502 if (encoding == null) {
503 encoding = saveEncoding;
504 }
505
506 return encoding;
507 }
508
509 /*
510 * Scans the given attributes for the attribute with the given name,
511 * which is either 'pageEncoding' or 'contentType', and returns the
512 * specified page encoding.
513 *
514 * In the case of 'contentType', the page encoding is taken from the
515 * content type's 'charset' component.
516 *
517 * @param attrs The page directive attributes
518 * @param attrName The name of the attribute to search for (either
519 * 'pageEncoding' or 'contentType')
520 *
521 * @return The page encoding, or null
522 */
523 private String getPageEncodingFromDirective(Attributes attrs,
524 String attrName) {
525 String value = attrs.getValue(attrName);
526 if (attrName.equals("pageEncoding")) {
527 return value;
528 }
529
530 // attrName = contentType
531 String contentType = value;
532 String encoding = null;
533 if (contentType != null) {
534 int loc = contentType.indexOf(CHARSET);
535 if (loc != -1) {
536 encoding = contentType.substring(loc + CHARSET.length());
537 }
538 }
539
540 return encoding;
541 }
542
543 /*
544 * Resolve the name of the file and update baseDirStack() to keep track of
545 * the current base directory for each included file.
546 * The 'root' file is always an 'absolute' path, so no need to put an
547 * initial value in the baseDirStack.
548 */
549 private String resolveFileName(String inFileName) {
550 String fileName = inFileName.replace('\\', '/');
551 boolean isAbsolute = fileName.startsWith("/");
552 fileName = isAbsolute ? fileName
553 : (String) baseDirStack.peek() + fileName;
554 String baseDir =
555 fileName.substring(0, fileName.lastIndexOf("/") + 1);
556 baseDirStack.push(baseDir);
557 return fileName;
558 }
559
560 /*
561 * Checks to see if the given page contains, as its first element, a <root>
562 * element whose prefix is bound to the JSP namespace, as in:
563 *
564 * <wombat:root xmlns:wombat="http://java.sun.com/JSP/Page" version="1.2">
565 * ...
566 * </wombat:root>
567 *
568 * @param reader The reader for this page
569 *
570 * @return true if this page contains a root element whose prefix is bound
571 * to the JSP namespace, and false otherwise
572 */
573 private boolean hasJspRoot(JspReader reader) throws JasperException {
574
575 // <prefix>:root must be the first element
576 Mark start = null;
577 while ((start = reader.skipUntil("<")) != null) {
578 int c = reader.nextChar();
579 if (c != '!' && c != '?') break;
580 }
581 if (start == null) {
582 return false;
583 }
584 Mark stop = reader.skipUntil(":root");
585 if (stop == null) {
586 return false;
587 }
588 // call substring to get rid of leading '<'
589 String prefix = reader.getText(start, stop).substring(1);
590
591 start = stop;
592 stop = reader.skipUntil(">");
593 if (stop == null) {
594 return false;
595 }
596
597 // Determine namespace associated with <root> element's prefix
598 String root = reader.getText(start, stop);
599 String xmlnsDecl = "xmlns:" + prefix;
600 int index = root.indexOf(xmlnsDecl);
601 if (index == -1) {
602 return false;
603 }
604 index += xmlnsDecl.length();
605 while (index < root.length()
606 && Character.isWhitespace(root.charAt(index))) {
607 index++;
608 }
609 if (index < root.length() && root.charAt(index) == '=') {
610 index++;
611 while (index < root.length()
612 && Character.isWhitespace(root.charAt(index))) {
613 index++;
614 }
615 if (index < root.length() && root.charAt(index++) == '"'
616 && root.regionMatches(index, JSP_URI, 0,
617 JSP_URI.length())) {
618 return true;
619 }
620 }
621
622 return false;
623 }
624
625 private JarFile getJarFile(URL jarFileUrl) throws IOException {
626 JarFile jarFile = null;
627
628 if (jarFileUrl != null) {
629 JarURLConnection conn = (JarURLConnection) jarFileUrl.openConnection();
630 conn.setUseCaches(false);
631 conn.connect();
632 jarFile = conn.getJarFile();
633 }
634
635 return jarFile;
636 }
637
638 }