1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.jasper.compiler;
19
20 import java.io.FileNotFoundException;
21 import java.io.IOException;
22 import java.io.InputStreamReader;
23 import java.net.JarURLConnection;
24 import java.net.URL;
25 import java.util.Stack;
26 import java.util.jar.JarFile;
27
28 import org.apache.jasper.JasperException;
29 import org.apache.jasper.JspCompilationContext;
30 import org.apache.jasper.xmlparser.XMLEncodingDetector;
31 import org.xml.sax.Attributes;
32
33 /**
34 * Controller for the parsing of a JSP page.
35 * <p>
36 * The same ParserController instance is used for a JSP page and any JSP
37 * segments included by it (via an include directive), where each segment may
38 * be provided in standard or XML syntax. This class selects and invokes the
39 * appropriate parser for the JSP page and its included segments.
40 *
41 * @author Pierre Delisle
42 * @author Jan Luehe
43 */
44 class ParserController implements TagConstants {
45
46 private static final String CHARSET = "charset=";
47
48 private JspCompilationContext ctxt;
49 private Compiler compiler;
50 private ErrorDispatcher err;
51
52 /*
53 * Indicates the syntax (XML or standard) of the file being processed
54 */
55 private boolean isXml;
56
57 /*
58 * A stack to keep track of the 'current base directory'
59 * for include directives that refer to relative paths.
60 */
61 private Stack baseDirStack = new Stack();
62
63 private boolean isEncodingSpecifiedInProlog;
64 private boolean isBomPresent;
65 private int skip;
66
67 private String sourceEnc;
68
69 private boolean isDefaultPageEncoding;
70 private boolean isTagFile;
71 private boolean directiveOnly;
72
73 /*
74 * Constructor
75 */
76 public ParserController(JspCompilationContext ctxt, Compiler compiler) {
77 this.ctxt = ctxt;
78 this.compiler = compiler;
79 this.err = compiler.getErrorDispatcher();
80 }
81
82 public JspCompilationContext getJspCompilationContext () {
83 return ctxt;
84 }
85
86 public Compiler getCompiler () {
87 return compiler;
88 }
89
90 /**
91 * Parses a JSP page or tag file. This is invoked by the compiler.
92 *
93 * @param inFileName The path to the JSP page or tag file to be parsed.
94 */
95 public Node.Nodes parse(String inFileName)
96 throws FileNotFoundException, JasperException, IOException {
97 // If we're parsing a packaged tag file or a resource included by it
98 // (using an include directive), ctxt.getTagFileJar() returns the
99 // JAR file from which to read the tag file or included resource,
100 // respectively.
101 isTagFile = ctxt.isTagFile();
102 directiveOnly = false;
103 return doParse(inFileName, null, ctxt.getTagFileJarUrl());
104 }
105
106 /**
107 * Processes an include directive with the given path.
108 *
109 * @param inFileName The path to the resource to be included.
110 * @param parent The parent node of the include directive.
111 * @param jarFile The JAR file from which to read the included resource,
112 * or null of the included resource is to be read from the filesystem
113 */
114 public Node.Nodes parse(String inFileName, Node parent,
115 URL jarFileUrl)
116 throws FileNotFoundException, JasperException, IOException {
117 // For files that are statically included, isTagfile and directiveOnly
118 // remain unchanged.
119 return doParse(inFileName, parent, jarFileUrl);
120 }
121
122 /**
123 * Extracts tag file directive information from the tag file with the
124 * given name.
125 *
126 * This is invoked by the compiler
127 *
128 * @param inFileName The name of the tag file to be parsed.
129 */
130 public Node.Nodes parseTagFileDirectives(String inFileName)
131 throws FileNotFoundException, JasperException, IOException {
132 boolean isTagFileSave = isTagFile;
133 boolean directiveOnlySave = directiveOnly;
134 isTagFile = true;
135 directiveOnly = true;
136 Node.Nodes page = doParse(inFileName, null,
137 ctxt.getTagFileJarUrl(inFileName));
138 directiveOnly = directiveOnlySave;
139 isTagFile = isTagFileSave;
140 return page;
141 }
142
143 /**
144 * Parses the JSP page or tag file with the given path name.
145 *
146 * @param inFileName The name of the JSP page or tag file to be parsed.
147 * @param parent The parent node (non-null when processing an include
148 * directive)
149 * @param isTagFile true if file to be parsed is tag file, and false if it
150 * is a regular JSP page
151 * @param directivesOnly true if the file to be parsed is a tag file and
152 * we are only interested in the directives needed for constructing a
153 * TagFileInfo.
154 * @param jarFile The JAR file from which to read the JSP page or tag file,
155 * or null if the JSP page or tag file is to be read from the filesystem
156 */
157 private Node.Nodes doParse(String inFileName,
158 Node parent,
159 URL jarFileUrl)
160 throws FileNotFoundException, JasperException, IOException {
161
162 Node.Nodes parsedPage = null;
163 isEncodingSpecifiedInProlog = false;
164 isBomPresent = false;
165 isDefaultPageEncoding = false;
166
167 JarFile jarFile = getJarFile(jarFileUrl);
168 String absFileName = resolveFileName(inFileName);
169 String jspConfigPageEnc = getJspConfigPageEncoding(absFileName);
170
171 // Figure out what type of JSP document and encoding type we are
172 // dealing with
173 determineSyntaxAndEncoding(absFileName, jarFile, jspConfigPageEnc);
174
175 if (parent != null) {
176 // Included resource, add to dependent list
177 compiler.getPageInfo().addDependant(absFileName);
178 }
179
180 if ((isXml && isEncodingSpecifiedInProlog) || isBomPresent) {
181 /*
182 * Make sure the encoding explicitly specified in the XML
183 * prolog (if any) matches that in the JSP config element
184 * (if any), treating "UTF-16", "UTF-16BE", and "UTF-16LE" as
185 * identical.
186 */
187 if (jspConfigPageEnc != null && !jspConfigPageEnc.equals(sourceEnc)
188 && (!jspConfigPageEnc.startsWith("UTF-16")
189 || !sourceEnc.startsWith("UTF-16"))) {
190 err.jspError("jsp.error.prolog_config_encoding_mismatch",
191 sourceEnc, jspConfigPageEnc);
192 }
193 }
194
195 // Dispatch to the appropriate parser
196 if (isXml) {
197 // JSP document (XML syntax)
198 // InputStream for jspx page is created and properly closed in
199 // JspDocumentParser.
200 parsedPage = JspDocumentParser.parse(this, absFileName,
201 jarFile, parent,
202 isTagFile, directiveOnly,
203 sourceEnc,
204 jspConfigPageEnc,
205 isEncodingSpecifiedInProlog,
206 isBomPresent);
207 } else {
208 // Standard syntax
209 InputStreamReader inStreamReader = null;
210 try {
211 inStreamReader = JspUtil.getReader(absFileName, sourceEnc,
212 jarFile, ctxt, err, skip);
213 JspReader jspReader = new JspReader(ctxt, absFileName,
214 sourceEnc, inStreamReader,
215 err);
216 parsedPage = Parser.parse(this, jspReader, parent, isTagFile,
217 directiveOnly, jarFileUrl,
218 sourceEnc, jspConfigPageEnc,
219 isDefaultPageEncoding, isBomPresent);
220 } finally {
221 if (inStreamReader != null) {
222 try {
223 inStreamReader.close();
224 } catch (Exception any) {
225 }
226 }
227 }
228 }
229
230 if (jarFile != null) {
231 try {
232 jarFile.close();
233 } catch (Throwable t) {}
234 }
235
236 baseDirStack.pop();
237
238 return parsedPage;
239 }
240
241 /*
242 * Checks to see if the given URI is matched by a URL pattern specified in
243 * a jsp-property-group in web.xml, and if so, returns the value of the
244 * <page-encoding> element.
245 *
246 * @param absFileName The URI to match
247 *
248 * @return The value of the <page-encoding> attribute of the
249 * jsp-property-group with matching URL pattern
250 */
251 private String getJspConfigPageEncoding(String absFileName)
252 throws JasperException {
253
254 JspConfig jspConfig = ctxt.getOptions().getJspConfig();
255 JspConfig.JspProperty jspProperty
256 = jspConfig.findJspProperty(absFileName);
257 return jspProperty.getPageEncoding();
258 }
259
260 /**
261 * Determines the syntax (standard or XML) and page encoding properties
262 * for the given file, and stores them in the 'isXml' and 'sourceEnc'
263 * instance variables, respectively.
264 */
265 private void determineSyntaxAndEncoding(String absFileName,
266 JarFile jarFile,
267 String jspConfigPageEnc)
268 throws JasperException, IOException {
269
270 isXml = false;
271
272 /*
273 * 'true' if the syntax (XML or standard) of the file is given
274 * from external information: either via a JSP configuration element,
275 * the ".jspx" suffix, or the enclosing file (for included resources)
276 */
277 boolean isExternal = false;
278
279 /*
280 * Indicates whether we need to revert from temporary usage of
281 * "ISO-8859-1" back to "UTF-8"
282 */
283 boolean revert = false;
284
285 JspConfig jspConfig = ctxt.getOptions().getJspConfig();
286 JspConfig.JspProperty jspProperty = jspConfig.findJspProperty(
287 absFileName);
288 if (jspProperty.isXml() != null) {
289 // If <is-xml> is specified in a <jsp-property-group>, it is used.
290 isXml = JspUtil.booleanValue(jspProperty.isXml());
291 isExternal = true;
292 } else if (absFileName.endsWith(".jspx")
293 || absFileName.endsWith(".tagx")) {
294 isXml = true;
295 isExternal = true;
296 }
297
298 if (isExternal && !isXml) {
299 // JSP (standard) syntax. Use encoding specified in jsp-config
300 // if provided.
301 sourceEnc = jspConfigPageEnc;
302 if (sourceEnc != null) {
303 return;
304 }
305 // We don't know the encoding, so use BOM to determine it
306 sourceEnc = "ISO-8859-1";
307 } else {
308 // XML syntax or unknown, (auto)detect encoding ...
309 Object[] ret = XMLEncodingDetector.getEncoding(absFileName,
310 jarFile, ctxt, err);
311 sourceEnc = (String) ret[0];
312 if (((Boolean) ret[1]).booleanValue()) {
313 isEncodingSpecifiedInProlog = true;
314 }
315 if (((Boolean) ret[2]).booleanValue()) {
316 isBomPresent = true;
317 }
318 skip = ((Integer) ret[3]).intValue();
319
320 if (!isXml && sourceEnc.equals("UTF-8")) {
321 /*
322 * We don't know if we're dealing with XML or standard syntax.
323 * Therefore, we need to check to see if the page contains
324 * a <jsp:root> element.
325 *
326 * We need to be careful, because the page may be encoded in
327 * ISO-8859-1 (or something entirely different), and may
328 * contain byte sequences that will cause a UTF-8 converter to
329 * throw exceptions.
330 *
331 * It is safe to use a source encoding of ISO-8859-1 in this
332 * case, as there are no invalid byte sequences in ISO-8859-1,
333 * and the byte/character sequences we're looking for (i.e.,
334 * <jsp:root>) are identical in either encoding (both UTF-8
335 * and ISO-8859-1 are extensions of ASCII).
336 */
337 sourceEnc = "ISO-8859-1";
338 revert = true;
339 }
340 }
341
342 if (isXml) {
343 // (This implies 'isExternal' is TRUE.)
344 // We know we're dealing with a JSP document (via JSP config or
345 // ".jspx" suffix), so we're done.
346 return;
347 }
348
349 /*
350 * At this point, 'isExternal' or 'isXml' is FALSE.
351 * Search for jsp:root action, in order to determine if we're dealing
352 * with XML or standard syntax (unless we already know what we're
353 * dealing with, i.e., when 'isExternal' is TRUE and 'isXml' is FALSE).
354 * No check for XML prolog, since nothing prevents a page from
355 * outputting XML and still using JSP syntax (in this case, the
356 * XML prolog is treated as template text).
357 */
358 JspReader jspReader = null;
359 try {
360 jspReader = new JspReader(ctxt, absFileName, sourceEnc, jarFile,
361 err);
362 } catch (FileNotFoundException ex) {
363 throw new JasperException(ex);
364 }
365 jspReader.setSingleFile(true);
366 Mark startMark = jspReader.mark();
367 if (!isExternal) {
368 jspReader.reset(startMark);
369 if (hasJspRoot(jspReader)) {
370 if (revert) {
371 sourceEnc = "UTF-8";
372 }
373 isXml = true;
374 return;
375 } else {
376 if (revert && isBomPresent) {
377 sourceEnc = "UTF-8";
378 }
379 isXml = false;
380 }
381 }
382
383 /*
384 * At this point, we know we're dealing with JSP syntax.
385 * If an XML prolog is provided, it's treated as template text.
386 * Determine the page encoding from the page directive, unless it's
387 * specified via JSP config.
388 */
389 if (!isBomPresent) {
390 sourceEnc = jspConfigPageEnc;
391 if (sourceEnc == null) {
392 sourceEnc = getPageEncodingForJspSyntax(jspReader, startMark);
393 if (sourceEnc == null) {
394 // Default to "ISO-8859-1" per JSP spec
395 sourceEnc = "ISO-8859-1";
396 isDefaultPageEncoding = true;
397 }
398 }
399 }
400
401 }
402
403 /*
404 * Determines page source encoding for page or tag file in JSP syntax,
405 * by reading (in this order) the value of the 'pageEncoding' page
406 * directive attribute, or the charset value of the 'contentType' page
407 * directive attribute.
408 *
409 * @return The page encoding, or null if not found
410 */
411 private String getPageEncodingForJspSyntax(JspReader jspReader,
412 Mark startMark)
413 throws JasperException {
414
415 String encoding = null;
416 String saveEncoding = null;
417
418 jspReader.reset(startMark);
419
420 /*
421 * Determine page encoding from directive of the form <%@ page %>,
422 * <%@ tag %>, <jsp:directive.page > or <jsp:directive.tag >.
423 */
424 while (true) {
425 if (jspReader.skipUntil("<") == null) {
426 break;
427 }
428 // If this is a comment, skip until its end
429 if (jspReader.matches("%--")) {
430 if (jspReader.skipUntil("--%>") == null) {
431 // error will be caught in Parser
432 break;
433 }
434 continue;
435 }
436 boolean isDirective = jspReader.matches("%@");
437 if (isDirective) {
438 jspReader.skipSpaces();
439 }
440 else {
441 isDirective = jspReader.matches("jsp:directive.");
442 }
443 if (!isDirective) {
444 continue;
445 }
446
447 // compare for "tag ", so we don't match "taglib"
448 if (jspReader.matches("tag ") || jspReader.matches("page")) {
449
450 jspReader.skipSpaces();
451 Attributes attrs = Parser.parseAttributes(this, jspReader);
452 encoding = getPageEncodingFromDirective(attrs, "pageEncoding");
453 if (encoding != null) {
454 break;
455 }
456 encoding = getPageEncodingFromDirective(attrs, "contentType");
457 if (encoding != null) {
458 saveEncoding = encoding;
459 }
460 }
461 }
462
463 if (encoding == null) {
464 encoding = saveEncoding;
465 }
466
467 return encoding;
468 }
469
470 /*
471 * Scans the given attributes for the attribute with the given name,
472 * which is either 'pageEncoding' or 'contentType', and returns the
473 * specified page encoding.
474 *
475 * In the case of 'contentType', the page encoding is taken from the
476 * content type's 'charset' component.
477 *
478 * @param attrs The page directive attributes
479 * @param attrName The name of the attribute to search for (either
480 * 'pageEncoding' or 'contentType')
481 *
482 * @return The page encoding, or null
483 */
484 private String getPageEncodingFromDirective(Attributes attrs,
485 String attrName) {
486 String value = attrs.getValue(attrName);
487 if (attrName.equals("pageEncoding")) {
488 return value;
489 }
490
491 // attrName = contentType
492 String contentType = value;
493 String encoding = null;
494 if (contentType != null) {
495 int loc = contentType.indexOf(CHARSET);
496 if (loc != -1) {
497 encoding = contentType.substring(loc + CHARSET.length());
498 }
499 }
500
501 return encoding;
502 }
503
504 /*
505 * Resolve the name of the file and update baseDirStack() to keep track of
506 * the current base directory for each included file.
507 * The 'root' file is always an 'absolute' path, so no need to put an
508 * initial value in the baseDirStack.
509 */
510 private String resolveFileName(String inFileName) {
511 String fileName = inFileName.replace('\\', '/');
512 boolean isAbsolute = fileName.startsWith("/");
513 fileName = isAbsolute ? fileName
514 : (String) baseDirStack.peek() + fileName;
515 String baseDir =
516 fileName.substring(0, fileName.lastIndexOf("/") + 1);
517 baseDirStack.push(baseDir);
518 return fileName;
519 }
520
521 /*
522 * Checks to see if the given page contains, as its first element, a <root>
523 * element whose prefix is bound to the JSP namespace, as in:
524 *
525 * <wombat:root xmlns:wombat="http://java.sun.com/JSP/Page" version="1.2">
526 * ...
527 * </wombat:root>
528 *
529 * @param reader The reader for this page
530 *
531 * @return true if this page contains a root element whose prefix is bound
532 * to the JSP namespace, and false otherwise
533 */
534 private boolean hasJspRoot(JspReader reader) throws JasperException {
535
536 // <prefix>:root must be the first element
537 Mark start = null;
538 while ((start = reader.skipUntil("<")) != null) {
539 int c = reader.nextChar();
540 if (c != '!' && c != '?') break;
541 }
542 if (start == null) {
543 return false;
544 }
545 Mark stop = reader.skipUntil(":root");
546 if (stop == null) {
547 return false;
548 }
549 // call substring to get rid of leading '<'
550 String prefix = reader.getText(start, stop).substring(1);
551
552 start = stop;
553 stop = reader.skipUntil(">");
554 if (stop == null) {
555 return false;
556 }
557
558 // Determine namespace associated with <root> element's prefix
559 String root = reader.getText(start, stop);
560 String xmlnsDecl = "xmlns:" + prefix;
561 int index = root.indexOf(xmlnsDecl);
562 if (index == -1) {
563 return false;
564 }
565 index += xmlnsDecl.length();
566 while (index < root.length()
567 && Character.isWhitespace(root.charAt(index))) {
568 index++;
569 }
570 if (index < root.length() && root.charAt(index) == '=') {
571 index++;
572 while (index < root.length()
573 && Character.isWhitespace(root.charAt(index))) {
574 index++;
575 }
576 if (index < root.length() && root.charAt(index++) == '"'
577 && root.regionMatches(index, JSP_URI, 0,
578 JSP_URI.length())) {
579 return true;
580 }
581 }
582
583 return false;
584 }
585
586 private JarFile getJarFile(URL jarFileUrl) throws IOException {
587 JarFile jarFile = null;
588
589 if (jarFileUrl != null) {
590 JarURLConnection conn = (JarURLConnection) jarFileUrl.openConnection();
591 conn.setUseCaches(false);
592 conn.connect();
593 jarFile = conn.getJarFile();
594 }
595
596 return jarFile;
597 }
598
599 }