Source code: org/apache/batik/util/ParsedURL.java
1 /*
2
3 Copyright 2001-2003 The Apache Software Foundation
4
5 Licensed under the Apache License, Version 2.0 (the "License");
6 you may not use this file except in compliance with the License.
7 You may obtain a copy of the License at
8
9 http://www.apache.org/licenses/LICENSE-2.0
10
11 Unless required by applicable law or agreed to in writing, software
12 distributed under the License is distributed on an "AS IS" BASIS,
13 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 See the License for the specific language governing permissions and
15 limitations under the License.
16
17 */
18 package org.apache.batik.util;
19
20 import java.io.IOException;
21 import java.io.InputStream;
22 import java.net.URL;
23 import java.util.ArrayList;
24 import java.util.HashMap;
25 import java.util.Iterator;
26 import java.util.List;
27 import java.util.Map;
28
29 import org.apache.batik.Version;
30
31 /**
32 * This class is used as a replacement for java.net.URL. This is done
33 * for several reasons. First unlike java.net.URL this class will
34 * accept and parse as much of a URL as possible, without throwing a
35 * MalformedURL exception. This makes it extreamly useful for simply
36 * parsing a URL string (hence it's name).
37 *
38 * Second it allows for extension of the protocols supported by the
39 * URL parser. Batik uses this to support the 'Data' protocol.
40 *
41 * Third by default it checks the streams that it opens to see if they
42 * are GZIP compressed, if so it automatically uncompresses them
43 * (avoiding opening the stream twice in the processes).
44 *
45 * It is worth noting that most real work is defered to the
46 * ParsedURLData class to which most methods are forwarded. This is
47 * done because it allows a constructor interface to ParsedURL (mostly
48 * for compatability with core URL), in spite of the fact that the
49 * real implemenation uses the protocol handlers as factories for
50 * protocol specific instances of the ParsedURLData class.
51 *
52 * @author <a href="mailto:deweese@apache.org">Thomas DeWeese</a>
53 * @version $Id: ParsedURL.java,v 1.20 2004/10/08 16:12:01 deweese Exp $
54 */
55 public class ParsedURL {
56
57 /**
58 * The data class we defer most things to.
59 */
60 ParsedURLData data;
61
62 /**
63 * The user agent to associate with this URL
64 */
65 String userAgent;
66
67 /**
68 * This maps between protocol names and ParsedURLProtocolHandler instances.
69 */
70 private static Map handlersMap = null;
71
72 /**
73 * The default protocol handler. This handler is used when
74 * other handlers fail or no match for a protocol can be
75 * found.
76 */
77 private static ParsedURLProtocolHandler defaultHandler
78 = new ParsedURLDefaultProtocolHandler();
79
80 private static String globalUserAgent = "Batik/"+Version.getVersion();
81
82 public static String getGlobalUserAgent() { return globalUserAgent; }
83
84 public static void setGlobalUserAgent(String userAgent) {
85 globalUserAgent = userAgent;
86 }
87
88 /**
89 * Returns the shared instance of HandlersMap. This method is
90 * also responsible for initializing the handler map if this is
91 * the first time it has been requested since the class was
92 * loaded.
93 */
94 private static synchronized Map getHandlersMap() {
95 if (handlersMap != null) return handlersMap;
96
97 handlersMap = new HashMap();
98 registerHandler(new ParsedURLDataProtocolHandler());
99 registerHandler(new ParsedURLJarProtocolHandler());
100
101 Iterator iter = Service.providers(ParsedURLProtocolHandler.class);
102 while (iter.hasNext()) {
103 ParsedURLProtocolHandler handler;
104 handler = (ParsedURLProtocolHandler)iter.next();
105
106 // System.out.println("Handler: " + handler);
107 registerHandler(handler);
108 }
109
110
111 return handlersMap;
112
113 }
114
115 /**
116 * Returns the handler for a particular protocol. If protocol is
117 * <tt>null</tt> or no match is found in the handlers map it
118 * returns the default protocol handler.
119 * @param protocol The protocol to get a handler for.
120 */
121 public static synchronized ParsedURLProtocolHandler getHandler
122 (String protocol) {
123 if (protocol == null)
124 return defaultHandler;
125
126 Map handlers = getHandlersMap();
127 ParsedURLProtocolHandler ret;
128 ret = (ParsedURLProtocolHandler)handlers.get(protocol);
129 if (ret == null)
130 ret = defaultHandler;
131 return ret;
132 }
133
134 /**
135 * Registers a Protocol handler by adding it to the handlers map.
136 * If the given protocol handler returns <tt>null</tt> as it's
137 * supported protocol then it is registered as the default
138 * protocol handler.
139 * @param handler the new Protocol Handler to register
140 */
141 public static synchronized void registerHandler
142 (ParsedURLProtocolHandler handler) {
143 if (handler.getProtocolHandled() == null) {
144 defaultHandler = handler;
145 return;
146 }
147
148 Map handlers = getHandlersMap();
149 handlers.put(handler.getProtocolHandled(), handler);
150 }
151
152 /**
153 * This is a utility function others can call that checks if
154 * is is a GZIP stream if so it returns a GZIPInputStream that
155 * will decode the contents, otherwise it returns (or a
156 * buffered version of is) untouched.
157 * @param is Stream that may potentially be a GZIP stream.
158 */
159 public static InputStream checkGZIP(InputStream is)
160 throws IOException {
161 return ParsedURLData.checkGZIP(is);
162 }
163
164 /**
165 * Construct a ParsedURL from the given url string.
166 * @param urlStr The string to try and parse as a URL
167 */
168 public ParsedURL(String urlStr) {
169 this.userAgent = getGlobalUserAgent();
170 this.data = parseURL(urlStr);
171 }
172
173 /**
174 * Construct a ParsedURL from the given java.net.URL instance.
175 * This is useful if you already have a valid java.net.URL
176 * instance. This bypasses most of the parsing and hence is
177 * quicker and less prone to reinterpretation than converting the
178 * URL to a string before construction.
179 *
180 * @param url The URL to "mimic".
181 */
182 public ParsedURL(URL url) {
183 this.userAgent = getGlobalUserAgent();
184 this.data = new ParsedURLData(url);
185 }
186
187 /**
188 * Construct a sub URL from two strings.
189 * @param baseStr The 'parent' URL. Should be complete.
190 * @param urlStr The 'sub' URL may be complete or partial.
191 * the missing pieces will be taken from the baseStr.
192 */
193 public ParsedURL(String baseStr, String urlStr) {
194 this.userAgent = getGlobalUserAgent();
195 if (baseStr != null)
196 this.data = parseURL(baseStr, urlStr);
197 else
198 this.data = parseURL(urlStr);
199 }
200
201 /**
202 * Construct a sub URL from a base URL and a string for the sub url.
203 * @param baseURL The 'parent' URL.
204 * @param urlStr The 'sub' URL may be complete or partial.
205 * the missing pieces will be taken from the baseURL.
206 */
207 public ParsedURL(URL baseURL, String urlStr) {
208 this.userAgent = getGlobalUserAgent();
209
210 if (baseURL != null)
211 this.data = parseURL(new ParsedURL(baseURL), urlStr);
212 else
213 this.data = parseURL(urlStr);
214 }
215
216 /**
217 * Construct a sub URL from a base ParsedURL and a string for the sub url.
218 * @param baseURL The 'parent' URL.
219 * @param urlStr The 'sub' URL may be complete or partial.
220 * the missing pieces will be taken from the baseURL.
221 */
222 public ParsedURL(ParsedURL baseURL, String urlStr) {
223 this.userAgent = baseURL.getUserAgent();
224 if (baseURL != null)
225 this.data = parseURL(baseURL, urlStr);
226 else
227 this.data = parseURL(urlStr);
228 }
229
230 /**
231 * Return a string rep of the URL (can be passed back into the
232 * constructor if desired).
233 */
234 public String toString() {
235 return data.toString();
236 }
237
238 /**
239 * Implement Object.equals.
240 * Relies heavily on the contained ParsedURLData's implementation
241 * of equals.
242 */
243 public boolean equals(Object obj) {
244 if (obj == null) return false;
245 if (! (obj instanceof ParsedURL))
246 return false;
247 ParsedURL purl = (ParsedURL)obj;
248 return data.equals(purl.data);
249 }
250
251 /**
252 * Implement Object.hashCode.
253 * Relies on the contained ParsedURLData's implementation
254 * of hashCode.
255 */
256 public int hashCode() {
257 return data.hashCode();
258 }
259
260 /**
261 * Returns true if the URL looks well formed and complete.
262 * This does not garuntee that the stream can be opened but
263 * is a good indication that things aren't totally messed up.
264 */
265 public boolean complete() {
266 return data.complete();
267 }
268
269 /**
270 * Return the user agent current associated with this url (or
271 * null if none).
272 */
273 public String getUserAgent() {
274 return userAgent;
275 }
276 /**
277 * Sets the user agent associated with this url (null clears
278 * any associated user agent).
279 */
280 public void setUserAgent(String userAgent) {
281 this.userAgent = userAgent;
282 }
283
284 /**
285 * Returns the protocol for this URL.
286 * The protocol is everything upto the first ':'.
287 */
288 public String getProtocol() {
289 if (data.protocol == null) return null;
290 return new String(data.protocol);
291 }
292
293 /**
294 * Returns the host for this URL, if any, <tt>null</tt> if there isn't
295 * one or it doesn't make sense for the protocol.
296 */
297 public String getHost() {
298 if (data.host == null) return null;
299 return new String(data.host);
300 }
301
302 /**
303 * Returns the port on the host to connect to, if it was specified
304 * in the url that was parsed, otherwise returns -1.
305 */
306 public int getPort() { return data.port; }
307
308 /**
309 * Returns the path for this URL, if any (where appropriate for
310 * the protocol this also includes the file, not just directory).
311 * Note that getPath appears in JDK 1.3 as a synonym for getFile
312 * from JDK 1.2.
313 */
314 public String getPath() {
315 if (data.path == null) return null;
316 return new String(data.path);
317 }
318
319 /**
320 * Returns the 'fragment' reference in the URL.
321 */
322 public String getRef() {
323 if (data.ref == null) return null;
324 return new String(data.ref);
325 }
326
327
328 /**
329 * Returns the URL up to and include the port number on
330 * the host. Does not include the path or fragment pieces.
331 */
332 public String getPortStr() {
333 return data.getPortStr();
334 }
335
336 /**
337 * Returns the content type if available. This is only available
338 * for some protocols.
339 */
340 public String getContentType() {
341 return data.getContentType(userAgent);
342 }
343
344 /**
345 * Returns the content encoding if available. This is only available
346 * for some protocols.
347 */
348 public String getContentEncoding() {
349 return data.getContentEncoding(userAgent);
350 }
351
352 /**
353 * Attempt to open the stream checking for common compression
354 * types, and automatically decompressing them if found.
355 */
356 public InputStream openStream() throws IOException {
357 return data.openStream(userAgent, null);
358 }
359
360 /**
361 * Attempt to open the stream checking for common compression
362 * types, and automatically decompressing them if found.
363 * @param mimeType The expected mime type of the content
364 * in the returned InputStream (mapped to Http accept
365 * header among other possabilities).
366 */
367 public InputStream openStream(String mimeType) throws IOException {
368 List mt = new ArrayList(1);
369 mt.add(mimeType);
370 return data.openStream(userAgent, mt.iterator());
371 }
372
373 /**
374 * Attempt to open the stream checking for common compression
375 * types, and automatically decompressing them if found.
376 * @param mimeTypes The expected mime types of the content
377 * in the returned InputStream (mapped to Http accept
378 * header among other possabilities).
379 */
380 public InputStream openStream(String [] mimeTypes) throws IOException {
381 List mt = new ArrayList(mimeTypes.length);
382 for (int i=0; i<mimeTypes.length; i++)
383 mt.add(mimeTypes[i]);
384 return data.openStream(userAgent, mt.iterator());
385 }
386
387 /**
388 * Attempt to open the stream checking for common compression
389 * types, and automatically decompressing them if found.
390 * @param mimeTypes The expected mime types of the content
391 * in the returned InputStream (mapped to Http accept
392 * header among other possabilities). The elements of
393 * the iterator must be strings.
394 */
395 public InputStream openStream(Iterator mimeTypes) throws IOException {
396 return data.openStream(userAgent, mimeTypes);
397 }
398
399 /**
400 * Attempt to open the stream, does no checking for compression
401 * types.
402 */
403 public InputStream openStreamRaw() throws IOException {
404 return data.openStreamRaw(userAgent, null);
405 }
406
407 /**
408 * Attempt to open the stream, does no checking for compression
409 * types.
410 * @param mimeType The expected mime type of the content
411 * in the returned InputStream (mapped to Http accept
412 * header among other possabilities).
413 */
414 public InputStream openStreamRaw(String mimeType) throws IOException {
415 List mt = new ArrayList(1);
416 mt.add(mimeType);
417 return data.openStreamRaw(userAgent, mt.iterator());
418 }
419
420 /**
421 * Attempt to open the stream, does no checking for comression
422 * types.
423 * @param mimeTypes The expected mime types of the content
424 * in the returned InputStream (mapped to Http accept
425 * header among other possabilities).
426 */
427 public InputStream openStreamRaw(String [] mimeTypes) throws IOException {
428 List mt = new ArrayList(mimeTypes.length);
429 for (int i=0; i<mimeTypes.length; i++)
430 mt.add(mimeTypes[i]);
431 return data.openStreamRaw(userAgent, mt.iterator());
432 }
433
434 /**
435 * Attempt to open the stream, does no checking for comression
436 * types.
437 * @param mimeTypes The expected mime types of the content
438 * in the returned InputStream (mapped to Http accept
439 * header among other possabilities). The elements of
440 * the iterator must be strings.
441 */
442 public InputStream openStreamRaw(Iterator mimeTypes) throws IOException {
443 return data.openStreamRaw(userAgent, mimeTypes);
444 }
445
446 public boolean sameFile(ParsedURL other) {
447 return data.sameFile(other.data);
448 }
449
450
451 /**
452 * Parse out the protocol from a url string. Used internally to
453 * select the proper handler, all other parsing is done by
454 * the selected protocol handler.
455 */
456 protected static String getProtocol(String urlStr) {
457 if (urlStr == null) return null;
458 int idx = 0, len = urlStr.length();
459
460 if (len == 0) return null;
461
462 // Protocol is only allowed to include -+.a-zA-Z
463 // So as soon as we hit something else we know we
464 // are done (if it is a ':' then we have protocol otherwise
465 // we don't.
466 char ch = urlStr.charAt(idx);
467 while ((ch == '-') ||
468 (ch == '+') ||
469 (ch == '.') ||
470 ((ch >= 'a') && (ch <= 'z')) ||
471 ((ch >= 'A') && (ch <= 'Z'))) {
472 idx++;
473 if (idx == len) {
474 ch=0;
475 break;
476 }
477 ch = urlStr.charAt(idx);
478 }
479 if (ch == ':') {
480 // Has a protocol spec...
481 return urlStr.substring(0, idx).toLowerCase();
482 }
483 return null;
484 }
485
486 /**
487 * Factory method to construct an appropriate subclass of ParsedURLData
488 * @param urlStr the string to parse.
489 */
490 public static ParsedURLData parseURL(String urlStr) {
491 ParsedURLProtocolHandler handler = getHandler(getProtocol(urlStr));
492 return handler.parseURL(urlStr);
493 }
494
495 /**
496 * Factory method to construct an appropriate subclass of ParsedURLData,
497 * for a sub url.
498 * @param baseStr The base URL string to parse.
499 * @param urlStr the sub URL string to parse.
500 */
501 public static ParsedURLData parseURL(String baseStr, String urlStr) {
502 if (baseStr == null)
503 return parseURL(urlStr);
504
505 ParsedURL purl = new ParsedURL(baseStr);
506 return parseURL(purl, urlStr);
507 }
508
509 /**
510 * Factory method to construct an appropriate subclass of ParsedURLData,
511 * for a sub url.
512 * @param baseURL The base ParsedURL to parse.
513 * @param urlStr the sub URL string to parse.
514 */
515 public static ParsedURLData parseURL(ParsedURL baseURL, String urlStr) {
516 if (baseURL == null)
517 return parseURL(urlStr);
518
519 String protocol = getProtocol(urlStr);
520 if (protocol == null)
521 protocol = baseURL.getProtocol();
522 ParsedURLProtocolHandler handler = getHandler(protocol);
523 return handler.parseURL(baseURL, urlStr);
524 }
525 }