Docjar: A Java Source and Docuemnt Enginecom.*    java.*    javax.*    org.*    all    new    plug-in

Quick Search    Search Deep

Source code: com/hp/hpl/jena/rdf/arp/URI.java


1   /*
2    * The Apache Software License, Version 1.1
3    *
4    *
5    * Copyright (c) 1999,2001 The Apache Software Foundation.  All rights
6    * reserved.
7    *
8    * Redistribution and use in source and binary forms, with or without
9    * modification, are permitted provided that the following conditions
10   * are met:
11   *
12   * 1. Redistributions of source code must retain the above copyright
13   *    notice, this list of conditions and the following disclaimer.
14   *
15   * 2. Redistributions in binary form must reproduce the above copyright
16   *    notice, this list of conditions and the following disclaimer in
17   *    the documentation and/or other materials provided with the
18   *    distribution.
19   *
20   * 3. The end-user documentation included with the redistribution,
21   *    if any, must include the following acknowledgment:
22   *       "This product includes software developed by the
23   *        Apache Software Foundation (http://www.apache.org/)."
24   *    Alternately, this acknowledgment may appear in the software itself,
25   *    if and wherever such third-party acknowledgments normally appear.
26   *
27   * 4. The names "Xerces" and "Apache Software Foundation" must
28   *    not be used to endorse or promote products derived from this
29   *    software without prior written permission. For written
30   *    permission, please contact apache@apache.org.
31   *
32   * 5. Products derived from this software may not be called "Apache",
33   *    nor may "Apache" appear in their name, without prior written
34   *    permission of the Apache Software Foundation.
35   *
36   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47   * SUCH DAMAGE.
48   * ====================================================================
49   *
50   * This software consists of voluntary contributions made by many
51   * individuals on behalf of the Apache Software Foundation and was
52   * originally based on software copyright (c) 1999, iClick Inc.,
53   * http://www.apache.org.  For more information on the Apache Software
54   * Foundation, please see <http://www.apache.org/>.
55   */
56  
57  /* Modified by Jeremy J. Carroll HP
58   *
59   * Was originally org/apache/xerces/utils/URI.java in Xerces 1.4.4
60   *
61   */
62  
63  package com.hp.hpl.jena.rdf.arp;
64  
65  import java.io.Serializable;
66  import org.apache.commons.logging.Log;
67  import org.apache.commons.logging.LogFactory;
68  
69  
70  /**********************************************************************
71  * A class to represent a Uniform Resource Identifier (URI). This class
72  * is designed to handle the parsing of URIs and provide access to
73  * the various components (scheme, host, port, userinfo, path, query
74  * string and fragment) that may constitute a URI.
75  * <p>
76  * Parsing of a URI specification is done according to the URI
77  * syntax described in RFC 2396
78  * <http://www.ietf.org/rfc/rfc2396.txt?number=2396>. Every URI consists
79  * of a scheme, followed by a colon (':'), followed by a scheme-specific
80  * part. For URIs that follow the "generic URI" syntax, the scheme-
81  * specific part begins with two slashes ("//") and may be followed
82  * by an authority segment (comprised of user information, host, and
83  * port), path segment, query segment and fragment. Note that RFC 2396
84  * no longer specifies the use of the parameters segment and excludes
85  * the "user:password" syntax as part of the authority segment. If
86  * "user:password" appears in a URI, the entire user/password string
87  * is stored as userinfo.
88  * <p>
89  * For URIs that do not follow the "generic URI" syntax (e.g. mailto),
90  * the entire scheme-specific part is treated as the "path" portion
91  * of the URI.
92  * <p>
93  * Note that, unlike the java.net.URL class, this class does not provide
94  * any built-in network access functionality nor does it provide any
95  * scheme-specific functionality (for example, it does not know a
96  * default port for a specific scheme). Rather, it only knows the
97  * grammar and basic set of operations that can be applied to a URI.
98  *
99  * @version  $Id: URI.java,v 1.17 2004/07/13 14:50:24 chris-dollin Exp $
100 *
101 **********************************************************************/
102 public class URI implements Serializable {
103   
104   static Log logger = LogFactory.getLog(URI.class);
105 
106   /** reserved characters */
107   private static final String RESERVED_CHARACTERS = ";/?:@&=+$,[]";
108 
109   /** URI punctuation mark characters - these, combined with
110       alphanumerics, constitute the "unreserved" characters */
111   private static final String MARK_CHARACTERS = "-_.!~*'() ";
112 
113   /** scheme can be composed of alphanumerics and these characters */
114   private static final String SCHEME_CHARACTERS = "+-.";
115 
116   /** userinfo can be composed of unreserved, escaped and these
117       characters */
118   private static final String USERINFO_CHARACTERS = ";:&=+$,";
119 
120   /** Stores the scheme (usually the protocol) for this URI. */
121   private String m_scheme = null;
122 
123   /** If specified, stores the userinfo for this URI; otherwise null */
124   private String m_userinfo = null;
125 
126   /** If specified, stores the host for this URI; otherwise null */
127   private String m_host = null;
128 
129   /** If specified, stores the port for this URI; otherwise null */
130   private String m_port = null;
131   private int n_port = -1;
132 
133   /** If specified, stores the path for this URI; otherwise null */
134   private String m_path = null;
135   /** Lazily assigned variable being: ??
136      * { m_path +"a", dir, parent-dir, grand-parent-dir }
137      * Each component is lazily assigned too.
138    */
139   private String m_subPaths[] = null;
140 
141   /** If specified, stores the query string for this URI; otherwise
142       null.  */
143   private String m_queryString = null;
144 
145   /** If specified, stores the fragment for this URI; otherwise null */
146   private String m_fragment = null;
147 
148   private static boolean DEBUG = false;
149 
150   /**
151   * Construct a new and uninitialized URI.
152   */
153   public URI() {
154   }
155 
156   /**
157    * Construct a new URI from another URI. All fields for this URI are
158    * set equal to the fields of the URI passed in.
159    *
160    * @param p_other the URI to copy (cannot be null)
161    */
162   public URI(URI p_other) {
163     initialize(p_other);
164   }
165 
166   /**
167    * Construct a new URI from a URI specification string. If the
168    * specification follows the "generic URI" syntax, (two slashes
169    * following the first colon), the specification will be parsed
170    * accordingly - setting the scheme, userinfo, host,port, path, query
171    * string and fragment fields as necessary. If the specification does
172    * not follow the "generic URI" syntax, the specification is parsed
173    * into a scheme and scheme-specific part (stored as the path) only.
174    *
175    * @param p_uriSpec the URI specification string (cannot be null or
176    *                  empty)
177    *
178    * @exception MalformedURIException if p_uriSpec violates any syntax
179    *                                   rules
180    */
181   public URI(String p_uriSpec) throws MalformedURIException {
182     this((URI) null, p_uriSpec);
183   }
184 
185   /**
186    * Construct a new URI from a base URI and a URI specification string.
187    * The URI specification string may be a relative URI.
188    *
189    * @param p_base the base URI (cannot be null if p_uriSpec is null or
190    *               empty)
191    * @param p_uriSpec the URI specification string (cannot be null or
192    *                  empty if p_base is null)
193    *
194    * @exception MalformedURIException if p_uriSpec violates any syntax
195    *                                  rules
196    */
197   public URI(URI p_base, String p_uriSpec) throws MalformedURIException {
198     initialize(p_base, p_uriSpec);
199   }
200 
201   /**
202    * Construct a new URI that does not follow the generic URI syntax.
203    * Only the scheme and scheme-specific part (stored as the path) are
204    * initialized.
205    *
206    * @param p_scheme the URI scheme (cannot be null or empty)
207    * @param p_schemeSpecificPart the scheme-specific part (cannot be
208    *                             null or empty)
209    *
210    * @exception MalformedURIException if p_scheme violates any
211    *                                  syntax rules
212    */
213   public URI(String p_scheme, String p_schemeSpecificPart)
214     throws MalformedURIException {
215     if (p_scheme == null || p_scheme.length() == 0) {
216       throw new MalformedURIException("Cannot construct URI with null/empty scheme!");
217     }
218     if (p_schemeSpecificPart == null
219       || p_schemeSpecificPart.length() == 0) {
220       throw new MalformedURIException("Cannot construct URI with null/empty scheme-specific part!");
221     }
222     setScheme(p_scheme);
223     setPath(p_schemeSpecificPart);
224   }
225 
226   /**
227    * Construct a new URI that follows the generic URI syntax from its
228    * component parts. Each component is validated for syntax and some
229    * basic semantic checks are performed as well.  See the individual
230    * setter methods for specifics.
231    *
232    * @param p_scheme the URI scheme (cannot be null or empty)
233    * @param p_host the hostname or IPv4 address for the URI
234    * @param p_path the URI path - if the path contains '?' or '#',
235    *               then the query string and/or fragment will be
236    *               set from the path; however, if the query and
237    *               fragment are specified both in the path and as
238    *               separate parameters, an exception is thrown
239    * @param p_queryString the URI query string (cannot be specified
240    *                      if path is null)
241    * @param p_fragment the URI fragment (cannot be specified if path
242    *                   is null)
243    *
244    * @exception MalformedURIException if any of the parameters violates
245    *                                  syntax rules or semantic rules
246    */
247   public URI(
248     String p_scheme,
249     String p_host,
250     String p_path,
251     String p_queryString,
252     String p_fragment)
253     throws MalformedURIException {
254     this(p_scheme, null, p_host, -1, p_path, p_queryString, p_fragment);
255   }
256 
257   /**
258    * Construct a new URI that follows the generic URI syntax from its
259    * component parts. Each component is validated for syntax and some
260    * basic semantic checks are performed as well.  See the individual
261    * setter methods for specifics.
262    *
263    * @param p_scheme the URI scheme (cannot be null or empty)
264    * @param p_userinfo the URI userinfo (cannot be specified if host
265    *                   is null)
266    * @param p_host the hostname or IPv4 address for the URI
267    * @param p_port the URI port (may be -1 for "unspecified"; cannot
268    *               be specified if host is null)
269    * @param p_path the URI path - if the path contains '?' or '#',
270    *               then the query string and/or fragment will be
271    *               set from the path; however, if the query and
272    *               fragment are specified both in the path and as
273    *               separate parameters, an exception is thrown
274    * @param p_queryString the URI query string (cannot be specified
275    *                      if path is null)
276    * @param p_fragment the URI fragment (cannot be specified if path
277    *                   is null)
278    *
279    * @exception MalformedURIException if any of the parameters violates
280    *                                  syntax rules or semantic rules
281    */
282   public URI(
283     String p_scheme,
284     String p_userinfo,
285     String p_host,
286     int p_port,
287     String p_path,
288     String p_queryString,
289     String p_fragment)
290     throws MalformedURIException {
291     if (p_scheme == null || p_scheme.length() == 0) {
292       throw new MalformedURIException("Scheme is required!");
293     }
294 
295     if (p_host == null) {
296       if (p_userinfo != null) {
297         throw new MalformedURIException("Userinfo may not be specified if host is not specified!");
298       }
299       if (p_port != -1) {
300         throw new MalformedURIException("Port may not be specified if host is not specified!");
301       }
302     }
303 
304     if (p_path != null) {
305       if (p_path.indexOf('?') != -1 && p_queryString != null) {
306         throw new MalformedURIException("Query string cannot be specified in path and query string!");
307       }
308 
309       if (p_path.indexOf('#') != -1 && p_fragment != null) {
310         throw new MalformedURIException("Fragment cannot be specified in both the path and fragment!");
311       }
312     }
313 
314     setScheme(p_scheme);
315     setHost(p_host);
316     setPort(p_port);
317     m_port = "" + p_port;
318     setUserinfo(p_userinfo);
319     setPath(p_path);
320     setQueryString(p_queryString);
321     setFragment(p_fragment);
322   }
323 
324   /**
325    * Initialize all fields of this URI from another URI.
326    *
327    * @param p_other the URI to copy (cannot be null)
328    */
329   private void initialize(URI p_other) {
330     m_scheme = p_other.getScheme();
331     m_userinfo = p_other.getUserinfo();
332     m_host = p_other.getHost();
333     m_port = p_other.m_port;
334     n_port = p_other.n_port;
335     m_path = p_other.getPath();
336     m_queryString = p_other.getQueryString();
337     m_fragment = p_other.getFragment();
338   }
339 
340   /**
341    * Initializes this URI from a base URI and a URI specification string.
342    * See RFC 2396 Section 4 and Appendix B for specifications on parsing
343    * the URI and Section 5 for specifications on resolving relative URIs
344    * and relative paths.
345    *
346    * @param p_base the base URI (may be null if p_uriSpec is an absolute
347    *               URI)
348    * @param p_uriSpec the URI spec string which may be an absolute or
349    *                  relative URI (can only be null/empty if p_base
350    *                  is not null)
351    *
352    * @exception MalformedURIException if p_base is null and p_uriSpec
353    *                                  is not an absolute URI or if
354    *                                  p_uriSpec violates syntax rules
355    */
356   private void initialize(URI p_base, String p_uriSpec)
357     throws MalformedURIException {
358     if (p_base == null && (p_uriSpec == null || p_uriSpec.length() == 0)) {
359       throw new RelativeURIException("Cannot initialize URI with empty parameters.");
360     }
361 
362     // just make a copy of the base if spec is empty
363     if (p_uriSpec == null || p_uriSpec.length() == 0) {
364       initialize(p_base);
365       return;
366     }
367 
368     String uriSpec = p_uriSpec;
369     int uriSpecLen = uriSpec.length();
370     int index = 0;
371 
372     // Check for scheme, which must be before '/', '?' or '#'. Also handle
373     // names with DOS drive letters ('D:'), so 1-character schemes are not
374     // allowed.
375     int colonIdx = uriSpec.indexOf(':');
376     int slashIdx = uriSpec.indexOf('/');
377     int queryIdx = uriSpec.indexOf('?');
378     int fragmentIdx = uriSpec.indexOf('#');
379 
380     if ((colonIdx < 2)
381       || (colonIdx > slashIdx && slashIdx != -1)
382       || (colonIdx > queryIdx && queryIdx != -1)
383       || (colonIdx > fragmentIdx && fragmentIdx != -1)) {
384       // We need to do the relative URI algorithm:
385 
386       // jjc: the spec says:
387       // 'URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]'
388       // My understanding is that if there is only the fragment
389       // then this is a relative URI.
390       if (p_base == null // del jjc: && fragmentIdx != 0
391       ) {
392         // Nothing to be relative against.
393         throw new RelativeURIException("No scheme found in URI '" + p_uriSpec + "'" );
394       } else {
395         if ((!p_base.isGenericURI()) && fragmentIdx != 0)
396           // Can't be relative against opaque URI (except using the #frag).
397           throw new MalformedURIException("Cannot apply relative URI to an opaque URI");
398       }
399     } else {
400       initializeScheme(uriSpec);
401       index = m_scheme.length() + 1;
402     }
403 
404     // two slashes means generic URI syntax, so we get the authority
405     if (((index + 1) < uriSpecLen)
406       && (uriSpec.substring(index).startsWith("//"))) {
407       index += 2;
408       int startPos = index;
409 
410       // get authority - everything up to path, query or fragment
411       char testChar = '\0';
412       while (index < uriSpecLen) {
413         testChar = uriSpec.charAt(index);
414         if (testChar == '/' || testChar == '?' || testChar == '#') {
415           break;
416         }
417         index++;
418       }
419 
420       // if we found authority, parse it out, otherwise we set the
421       // host to empty string
422       if (index > startPos) {
423         initializeAuthority(uriSpec.substring(startPos, index));
424       } else {
425         m_host = "";
426       }
427     }
428 
429     initializePath(uriSpec.substring(index));
430 
431     // Resolve relative URI to base URI - see RFC 2396 Section 5.2
432     // In some cases, it might make more sense to throw an exception
433     // (when scheme is specified is the string spec and the base URI
434     // is also specified, for example), but we're just following the
435     // RFC specifications
436     if (p_base != null) {
437 
438       // check to see if this is the current doc - RFC 2396 5.2 #2
439       // note that this is slightly different from the RFC spec in that
440       // we don't include the check for query string being null
441       // - this handles cases where the urispec is just a query
442       // string or a fragment (e.g. "?y" or "#s") -
443       // see <http://www.ics.uci.edu/~fielding/url/test1.html> which
444       // identified this as a bug in the RFC
445       if (m_path.length() == 0 && m_scheme == null && m_host == null) {
446         m_scheme = p_base.getScheme();
447         m_userinfo = p_base.getUserinfo();
448         m_host = p_base.getHost();
449         m_port = p_base.m_port;
450         n_port = p_base.getPort();
451         m_path = p_base.getPath();
452 
453         if (m_queryString == null) {
454           m_queryString = p_base.getQueryString();
455         }
456         return;
457       }
458 
459       // check for scheme - RFC 2396 5.2 #3
460       // if we found a scheme, it means absolute URI, so we're done
461       if (m_scheme == null) {
462         m_scheme = p_base.getScheme();
463       } else {
464         return;
465       }
466 
467       // check for authority - RFC 2396 5.2 #4
468       // if we found a host, then we've got a network path, so we're done
469       if (m_host == null) {
470         m_userinfo = p_base.getUserinfo();
471         m_host = p_base.getHost();
472         m_port = p_base.m_port;
473         n_port = p_base.getPort();
474       } else {
475         return;
476       }
477 
478       // check for absolute path - RFC 2396 5.2 #5
479       if (m_path.length() > 0 && m_path.startsWith("/")) {
480         return;
481       }
482 
483       // if we get to this point, we need to resolve relative path
484       // RFC 2396 5.2 #6
485         String path = // jjc new String();
486   "/"; // jjc ins
487       String basePath = p_base.getPath();
488 
489       // 6a - get all but the last segment of the base URI path
490       if (basePath != null) {
491         int lastSlash = basePath.lastIndexOf('/');
492         if (lastSlash != -1) {
493           path = basePath.substring(0, lastSlash + 1);
494         }
495       }
496 
497       // 6b - append the relative URI path
498       path = path.concat(m_path);
499 
500       // 6c - remove all "./" where "." is a complete path segment
501       index = -1;
502       while ((index = path.indexOf("/./")) != -1) {
503         path =
504           path.substring(0, index + 1).concat(
505             path.substring(index + 3));
506       }
507 
508       // 6d - remove "." if path ends with "." as a complete path segment
509       if (path.endsWith("/.")) {
510         path = path.substring(0, path.length() - 1);
511       }
512 
513       // 6e - remove all "<segment>/../" where "<segment>" is a complete
514       // path segment not equal to ".."
515       index = 1;
516       int segIndex = -1;
517 
518       while ((index = path.indexOf("/../", index)) > 0) {
519         segIndex = path.lastIndexOf('/', index - 1);
520         if (segIndex != -1
521           && !path.substring(segIndex + 1, index).equals("..")) {
522           path =
523             path.substring(0, segIndex).concat(
524               path.substring(index + 3));
525           index = segIndex;
526         } else {
527           index += 4;
528         }
529       }
530 
531       // 6f - remove ending "<segment>/.." where "<segment>" is a
532       // complete path segment
533       if (path.endsWith("/..")) {
534         index = path.length() - 3;
535         segIndex = path.lastIndexOf('/', index - 1);
536         if (segIndex != -1
537           && !path.substring(segIndex + 1, index).equals("..")) {
538           path = path.substring(0, segIndex + 1);
539         }
540       }
541 
542       m_path = path;
543     }
544   }
545 
546   /**
547    * Initialize the scheme for this URI from a URI string spec.
548    *
549    * @param p_uriSpec the URI specification (cannot be null)
550    *
551    * @exception MalformedURIException if URI does not have a conformant
552    *                                  scheme
553    */
554   private void initializeScheme(String p_uriSpec)
555     throws MalformedURIException {
556     int uriSpecLen = p_uriSpec.length();
557     int index = p_uriSpec.indexOf(':');
558 
559     if (index < 1)
560       throw new MalformedURIException("No scheme found in URI '" + p_uriSpec + "'" );
561 
562     if (index == uriSpecLen - 1)
563       throw new MalformedURIException( "A bare scheme name is not a URI: '" +  p_uriSpec + "'" );
564 
565     setScheme(p_uriSpec.substring(0, index));
566   }
567 
568   /**
569    * Initialize the authority (userinfo, host and port) for this
570    * URI from a URI string spec.
571    *
572    * @param p_uriSpec the URI specification (cannot be null)
573    *
574    * @exception MalformedURIException if p_uriSpec violates syntax rules
575    */
576   private void initializeAuthority(String p_uriSpec)
577     throws MalformedURIException {
578     int index = 0;
579     int start = 0;
580     int end = p_uriSpec.length();
581     char testChar = '\0';
582     String userinfo = null;
583 
584     // userinfo is everything up @
585     if (p_uriSpec.indexOf('@', start) != -1) {
586       while (index < end) {
587         testChar = p_uriSpec.charAt(index);
588         if (testChar == '@') {
589           break;
590         }
591         index++;
592       }
593       userinfo = p_uriSpec.substring(start, index);
594       index++;
595     }
596 
597     // host is everything up to ':'
598     String host = null;
599     start = index;
600     while (index < end) {
601       testChar = p_uriSpec.charAt(index);
602       if (testChar == ':') {
603         break;
604       }
605       index++;
606     }
607     host = p_uriSpec.substring(start, index);
608     int port = -1;
609     if (host.length() > 0) {
610       // port
611       if (testChar == ':') {
612         index++;
613         start = index;
614         while (index < end) {
615           index++;
616         }
617         String portStr = p_uriSpec.substring(start, index);
618         if (portStr.length() > 0) {
619           for (int i = 0; i < portStr.length(); i++) {
620             if (!isDigit(portStr.charAt(i))) {
621               throw new MalformedURIException(
622                 portStr
623                   + " is invalid. Port should only contain digits!");
624             }
625           }
626           try {
627             port = Integer.parseInt(portStr);
628             m_port = portStr;
629           } catch (NumberFormatException nfe) {
630             // can't happen
631           }
632         }
633       }
634     }
635     setHost(host);
636     setPort(port);
637     setUserinfo(userinfo);
638   }
639 
640   /**
641    * Initialize the path for this URI from a URI string spec.
642    *
643    * @param p_uriSpec the URI specification (cannot be null)
644    *
645    * @exception MalformedURIException if p_uriSpec violates syntax rules
646    */
647   private void initializePath(String p_uriSpec)
648     throws MalformedURIException {
649     if (p_uriSpec == null) {
650       throw new MalformedURIException("Cannot initialize path from null string!");
651     }
652 
653     int index = 0;
654     int start = 0;
655     int end = p_uriSpec.length();
656     char testChar = '\0';
657 
658     // path - everything up to query string or fragment
659     while (index < end) {
660       testChar = p_uriSpec.charAt(index);
661       if (testChar == '?' || testChar == '#') {
662         break;
663       }
664       // check for valid escape sequence
665       if (testChar == '%') {
666         if (index + 2 >= end
667           || !isHex(p_uriSpec.charAt(index + 1))
668           || !isHex(p_uriSpec.charAt(index + 2))) {
669           throw new MalformedURIException( "Path contains invalid escape sequence: " + p_uriSpec );
670         }
671       } else if (
672         !isReservedCharacter(testChar)
673           && !isUnreservedCharacter(testChar)) {
674         throw new MalformedURIException(
675           "Path '" + p_uriSpec + "' contains invalid character: " + testChar);
676       }
677       index++;
678     }
679     m_path = p_uriSpec.substring(start, index);
680 
681     // query - starts with ? and up to fragment or end
682     if (testChar == '?') {
683       index++;
684       start = index;
685       while (index < end) {
686         testChar = p_uriSpec.charAt(index);
687         if (testChar == '#') {
688           break;
689         }
690         if (testChar == '%') {
691           if (index + 2 >= end
692             || !isHex(p_uriSpec.charAt(index + 1))
693             || !isHex(p_uriSpec.charAt(index + 2))) {
694             throw new MalformedURIException("Query string contains invalid escape sequence in '" + p_uriSpec + "'" );
695           }
696         } else if (
697           !isReservedCharacter(testChar)
698             && !isUnreservedCharacter(testChar)) {
699           throw new MalformedURIException( "Query string contains invalid character '" + testChar + "' in '" + p_uriSpec + "'" );
700         }
701         index++;
702       }
703       m_queryString = p_uriSpec.substring(start, index);
704     }
705 
706     // fragment - starts with #
707     if (testChar == '#') {
708       index++;
709       start = index;
710       while (index < end) {
711         testChar = p_uriSpec.charAt(index);
712 
713         if (testChar == '%') {
714           if (index + 2 >= end
715             || !isHex(p_uriSpec.charAt(index + 1))
716             || !isHex(p_uriSpec.charAt(index + 2))) {
717             throw new MalformedURIException( "Fragment contains invalid escape sequence in '" + p_uriSpec + "'" );
718           }
719         } else if (
720           !isReservedCharacter(testChar)
721             && !isUnreservedCharacter(testChar)) {
722           throw new MalformedURIException(
723             "Fragment contains invalid character '" + testChar + "' in '" + p_uriSpec + "'" );
724         }
725         index++;
726       }
727       m_fragment = p_uriSpec.substring(start, index);
728     }
729   }
730 
731   /**
732    * Get the scheme for this URI.
733    *
734    * @return the scheme for this URI
735    */
736   public String getScheme() {
737     return m_scheme;
738   }
739 
740   /**
741    * Get the scheme-specific part for this URI (everything following the
742    * scheme and the first colon). See RFC 2396 Section 5.2 for spec.
743    *
744    * @return the scheme-specific part for this URI
745    */
746   public String getSchemeSpecificPart() {
747     StringBuffer schemespec = new StringBuffer();
748 
749     if (m_userinfo != null || m_host != null || m_port != null) {
750       schemespec.append("//");
751     }
752 
753     if (m_userinfo != null) {
754       schemespec.append(m_userinfo);
755       schemespec.append('@');
756     }
757 
758     if (m_host != null) {
759       schemespec.append(m_host);
760     }
761 
762     if (m_port != null) {
763       schemespec.append(':');
764       schemespec.append(m_port);
765     }
766 
767     if (m_path != null) {
768       schemespec.append((m_path));
769     }
770 
771     if (m_queryString != null) {
772       schemespec.append('?');
773       schemespec.append(m_queryString);
774     }
775 
776     if (m_fragment != null) {
777       schemespec.append('#');
778       schemespec.append(m_fragment);
779     }
780 
781     return schemespec.toString();
782   }
783 
784   /**
785    * Get the userinfo for this URI.
786    *
787    * @return the userinfo for this URI (null if not specified).
788    */
789   public String getUserinfo() {
790     return m_userinfo;
791   }
792 
793   /**
794   * Get the host for this URI.
795   *
796   * @return the host for this URI (null if not specified).
797   */
798   public String getHost() {
799     return m_host;
800   }
801 
802   /**
803    * Get the port for this URI.
804    *
805    * @return the port for this URI (-1 if not specified).
806    */
807   public int getPort() {
808     return n_port;
809   }
810 
811   /**
812    * Get the path for this URI (optionally with the query string and
813    * fragment).
814    *
815    * @param p_includeQueryString if true (and query string is not null),
816    *                             then a "?" followed by the query string
817    *                             will be appended
818    * @param p_includeFragment if true (and fragment is not null),
819    *                             then a "#" followed by the fragment
820    *                             will be appended
821    *
822    * @return the path for this URI possibly including the query string
823    *         and fragment
824    */
825   public String getPath(
826     boolean p_includeQueryString,
827     boolean p_includeFragment) {
828     StringBuffer pathString = new StringBuffer(m_path);
829 
830     if (p_includeQueryString && m_queryString != null) {
831       pathString.append('?');
832       pathString.append(m_queryString);
833     }
834 
835     if (p_includeFragment && m_fragment != null) {
836       pathString.append('#');
837       pathString.append(m_fragment);
838     }
839     return pathString.toString();
840   }
841 
842   /**
843    * Get the path for this URI. Note that the value returned is the path
844    * only and does not include the query string or fragment.
845    *
846    * @return the path for this URI.
847    */
848   public String getPath() {
849     return m_path;
850   }
851 
852   /**
853    * Get the query string for this URI.
854    *
855    * @return the query string for this URI. Null is returned if there
856    *         was no "?" in the URI spec, empty string if there was a
857    *         "?" but no query string following it.
858    */
859   public String getQueryString() {
860     return m_queryString;
861   }
862 
863   /**
864    * Get the fragment for this URI.
865    *
866    * @return the fragment for this URI. Null is returned if there
867    *         was no "#" in the URI spec, empty string if there was a
868    *         "#" but no fragment following it.
869    */
870   public String getFragment() {
871     return m_fragment;
872   }
873 
874   /**
875    * Set the scheme for this URI. The scheme is converted to lowercase
876    * before it is set.
877    *
878    * @param p_scheme the scheme for this URI (cannot be null)
879    *
880    * @exception MalformedURIException if p_scheme is not a conformant
881    *                                  scheme name
882    */
883   private void setScheme(String p_scheme) throws MalformedURIException {
884     if (p_scheme == null) {
885       throw new MalformedURIException("Cannot set scheme from null string!");
886     }
887     if (!isConformantSchemeName(p_scheme)) {
888       throw new MalformedURIException("The scheme '" + p_scheme + "' is not conformant.");
889     }
890 
891     m_scheme = p_scheme; //.toLowerCase();
892   }
893 
894   /**
895    * Set the userinfo for this URI. If a non-null value is passed in and
896    * the host value is null, then an exception is thrown.
897    *
898    * @param p_userinfo the userinfo for this URI
899    *
900    * @exception MalformedURIException if p_userinfo contains invalid
901    *                                  characters
902    */
903   private void setUserinfo(String p_userinfo) throws MalformedURIException {
904     if (p_userinfo == null) {
905       m_userinfo = null;
906     } else {
907       if (m_host == null) {
908         throw new MalformedURIException("Userinfo cannot be set when host is null!");
909       }
910 
911       // userinfo can contain alphanumerics, mark characters, escaped
912       // and ';',':','&','=','+','$',','
913       int index = 0;
914       int end = p_userinfo.length();
915       char testChar = '\0';
916       while (index < end) {
917         testChar = p_userinfo.charAt(index);
918         if (testChar == '%') {
919           if (index + 2 >= end
920             || !isHex(p_userinfo.charAt(index + 1))
921             || !isHex(p_userinfo.charAt(index + 2))) {
922             throw new MalformedURIException("Userinfo contains invalid escape sequence!");
923           }
924         } else if (
925           !isUnreservedCharacter(testChar)
926             && USERINFO_CHARACTERS.indexOf(testChar) == -1) {
927           throw new MalformedURIException(
928             "Userinfo contains invalid character:" + testChar);
929         }
930         index++;
931       }
932     }
933     m_userinfo = p_userinfo;
934   }
935 
936   /**
937   * Set the host for this URI. If null is passed in, the userinfo
938   * field is also set to null and the port is set to -1.
939   *
940   * @param p_host the host for this URI
941   *
942   * @exception MalformedURIException if p_host is not a valid IP
943   *                                  address or DNS hostname.
944   */
945   private void setHost(String p_host) throws MalformedURIException {
946     if (p_host == null || p_host.length() == 0) {
947       m_host = p_host;
948       m_userinfo = null;
949       m_port = null;
950       n_port = -1;
951     } else if (!isWellFormedAddress(p_host)) {
952       throw new MalformedURIException( "Host is not a well formed address in '" + p_host + "'" );
953     }
954     m_host = p_host;
955   }
956 
957   /**
958    * Set the port for this URI. -1 is used to indicate that the port is
959    * not specified, otherwise valid port numbers are  between 0 and 65535.
960    * If a valid port number is passed in and the host field is null,
961    * an exception is thrown.
962    *
963    * @param p_port the port number for this URI
964    *
965    * @exception MalformedURIException if p_port is not -1 and not a
966    *                                  valid port number
967    */
968   private void setPort(int p_port) throws MalformedURIException {
969     if (p_port >= 0 && p_port <= 65535) {
970       if (m_host == null) {
971         throw new MalformedURIException("Port cannot be set when host is null!");
972       }
973     } else if (p_port != -1) {
974       throw new MalformedURIException("Invalid port number!");
975     }
976     n_port = p_port;
977 
978   }
979 
980   /**
981    * Set the path for this URI. If the supplied path is null, then the
982    * query string and fragment are set to null as well. If the supplied
983    * path includes a query string and/or fragment, these fields will be
984    * parsed and set as well. Note that, for URIs following the "generic
985    * URI" syntax, the path specified should start with a slash.
986    * For URIs that do not follow the generic URI syntax, this method
987    * sets the scheme-specific part.
988    *
989    * @param p_path the path for this URI (may be null)
990    *
991    * @exception MalformedURIException if p_path contains invalid
992    *                                  characters
993    */
994   private void setPath(String p_path) throws MalformedURIException {
995     if (p_path == null) {
996       m_path = null;
997       m_queryString = null;
998       m_fragment = null;
999     } else {
1000      initializePath(p_path);
1001    }
1002  }
1003
1004  /**
1005   * Append to the end of the path of this URI. If the current path does
1006   * not end in a slash and the path to be appended does not begin with
1007   * a slash, a slash will be appended to the current path before the
1008   * new segment is added. Also, if the current path ends in a slash
1009   * and the new segment begins with a slash, the extra slash will be
1010   * removed before the new segment is appended.
1011   *
1012   * @param p_addToPath the new segment to be added to the current path
1013   *
1014   * @exception MalformedURIException if p_addToPath contains syntax
1015   *                                  errors
1016   */
1017  private void appendPath(String p_addToPath) throws MalformedURIException {
1018    if (p_addToPath == null || p_addToPath.length() == 0) {
1019      return;
1020    }
1021
1022    if (!isURIString(p_addToPath)) {
1023      throw new MalformedURIException("Path contains invalid character!");
1024    }
1025
1026    if (m_path == null || m_path.length() == 0) {
1027      if (p_addToPath.startsWith("/")) {
1028        m_path = p_addToPath;
1029      } else {
1030        m_path = "/" + p_addToPath;
1031      }
1032    } else if (m_path.endsWith("/")) {
1033      if (p_addToPath.startsWith("/")) {
1034        m_path = m_path.concat(p_addToPath.substring(1));
1035      } else {
1036        m_path = m_path.concat(p_addToPath);
1037      }
1038    } else {
1039      if (p_addToPath.startsWith("/")) {
1040        m_path = m_path.concat(p_addToPath);
1041      } else {
1042        m_path = m_path.concat("/" + p_addToPath);
1043      }
1044    }
1045  }
1046
1047  /**
1048   * Set the query string for this URI. A non-null value is valid only
1049   * if this is an URI conforming to the generic URI syntax and
1050   * the path value is not null.
1051   *
1052   * @param p_queryString the query string for this URI
1053   *
1054   * @exception MalformedURIException if p_queryString is not null and this
1055   *                                  URI does not conform to the generic
1056   *                                  URI syntax or if the path is null
1057   */
1058  private void setQueryString(String p_queryString)
1059    throws MalformedURIException {
1060    if (p_queryString == null) {
1061      m_queryString = null;
1062    } else if (!isGenericURI()) {
1063      throw new MalformedURIException("Query string can only be set for a generic URI!");
1064    } else if (getPath() == null) {
1065      throw new MalformedURIException("Query string cannot be set when path is null!");
1066    } else if (!isURIString(p_queryString)) {
1067      throw new MalformedURIException("Query string contains invalid character!");
1068    } else {
1069      m_queryString = p_queryString;
1070    }
1071  }
1072
1073  /**
1074   * Set the fragment for this URI. A non-null value is valid only
1075   * if this is a URI conforming to the generic URI syntax and
1076   * the path value is not null.
1077   *
1078   * @param p_fragment the fragment for this URI
1079   *
1080   * @exception MalformedURIException if p_fragment is not null and this
1081   *                                  URI does not conform to the generic
1082   *                                  URI syntax or if the path is null
1083   */
1084  public void setFragment(String p_fragment) throws MalformedURIException {
1085    if (p_fragment == null) {
1086      m_fragment = null;
1087    } else if (!isGenericURI()) {
1088      throw new MalformedURIException("Fragment can only be set for a generic URI!");
1089    } else if (getPath() == null) {
1090      throw new MalformedURIException("Fragment cannot be set when path is null!");
1091    } else if (!isURIString(p_fragment)) {
1092      throw new MalformedURIException("Fragment contains invalid character!");
1093    } else {
1094      m_fragment = p_fragment;
1095    }
1096  }
1097
1098  /**
1099   * Determines if the passed-in Object is equivalent to this URI.
1100   *
1101   * @param p_test the Object to test for equality.
1102   *
1103   * @return true if p_test is a URI with all values equal to this
1104   *         URI, false otherwise
1105   */
1106  public boolean equals(Object p_test) {
1107    if (p_test instanceof URI) {
1108      URI testURI = (URI) p_test;
1109      if (((m_scheme == null && testURI.m_scheme == null)
1110        || (m_scheme != null
1111          && testURI.m_scheme != null
1112          && m_scheme.equals(testURI.m_scheme)))
1113        && ((m_userinfo == null && testURI.m_userinfo == null)
1114          || (m_userinfo != null
1115            && testURI.m_userinfo != null
1116            && m_userinfo.equals(testURI.m_userinfo)))
1117        && ((m_host == null && testURI.m_host == null)
1118          || (m_host != null
1119            && testURI.m_host != null
1120            && m_host.equals(testURI.m_host)))
1121        && ((m_port == null && testURI.m_port == null)
1122          || (m_port != null
1123            && testURI.m_port != null
1124            && m_port.equals(testURI.m_port)))
1125        && ((m_path == null && testURI.m_path == null)
1126          || (m_path != null
1127            && testURI.m_path != null
1128            && m_path.equals(testURI.m_path)))
1129        && ((m_queryString == null && testURI.m_queryString == null)
1130          || (m_queryString != null
1131            && testURI.m_queryString != null
1132            && m_queryString.equals(testURI.m_queryString)))
1133        && ((m_fragment == null && testURI.m_fragment == null)
1134          || (m_fragment != null
1135            && testURI.m_fragment != null
1136            && m_fragment.equals(testURI.m_fragment)))) {
1137        return true;
1138      }
1139    }
1140    return false;
1141  }
1142
1143
1144    /**
1145        produce a human-consumable string for the URI
1146    */
1147  public String toString() 
1148        { return getURIString(); }
1149    
1150    /**
1151     * Get the URI as a string specification. See RFC 2396 Section 5.2.
1152     *
1153     * @return the URI string specification
1154     */
1155    public String getURIString() {
1156    StringBuffer uriSpecString = new StringBuffer();
1157
1158    if (m_scheme != null) {
1159      uriSpecString.append(m_scheme);
1160      uriSpecString.append(':');
1161    }
1162    uriSpecString.append(getSchemeSpecificPart());
1163    return uriSpecString.toString();
1164  }
1165
1166    public int hashCode() {
1167        return toString().hashCode();
1168    }
1169    
1170  /**
1171   * Get the indicator as to whether this URI uses the "generic URI"
1172   * syntax.
1173   *
1174   * @return true if this URI uses the "generic URI" syntax, false
1175   *         otherwise
1176   */
1177  public boolean isGenericURI() {
1178    // presence of the host (whether valid or empty) means
1179    // double-slashes which means generic uri
1180    return (m_host != null);
1181  }
1182
1183  /**
1184   * Determine whether a scheme conforms to the rules for a scheme name.
1185   * A scheme is conformant if it starts with an alphanumeric, and
1186   * contains only alphanumerics, '+','-' and '.'.
1187   *
1188   * @return true if the scheme is conformant, false otherwise
1189   */
1190  public static boolean isConformantSchemeName(String p_scheme) {
1191    if (p_scheme == null || p_scheme.length() == 0) {
1192      return false;
1193    }
1194
1195    if (!isAlpha(p_scheme.charAt(0))) {
1196      return false;
1197    }
1198
1199    char testChar;
1200    for (int i = 1; i < p_scheme.length(); i++) {
1201      testChar = p_scheme.charAt(i);
1202      if (!isAlphanum(testChar)
1203        && SCHEME_CHARACTERS.indexOf(testChar) == -1) {
1204        return false;
1205      }
1206    }
1207
1208    return true;
1209  }
1210
1211  /**
1212   * Determine whether a string is syntactically capable of representing
1213   * a valid IPv4 address or the domain name of a network host. A valid
1214   * IPv4 address consists of four decimal digit groups separated by a
1215   * '.'. A hostname consists of domain labels (each of which must
1216   * begin and end with an alphanumeric but may contain '-') separated
1217   & by a '.'. See RFC 2396 Section 3.2.2.
1218   *
1219   * @return true if the string is a syntactically valid IPv4 address
1220   *              or hostname
1221   */
1222  public static boolean isWellFormedAddress(String p_address) {
1223    if (p_address == null) {
1224      return false;
1225    }
1226
1227    String address = p_address;
1228    int addrLength = address.length();
1229    if (addrLength == 0 || addrLength > 255) {
1230      return false;
1231    }
1232
1233    if (address.startsWith(".") || address.startsWith("-")) {
1234      return false;
1235    }
1236
1237    // rightmost domain label starting with digit indicates IP address
1238    // since top level domain label can only start with an alpha
1239    // see RFC 2396 Section 3.2.2
1240    int index = address.lastIndexOf('.');
1241    if (address.endsWith(".")) {
1242      index = address.substring(0, index).lastIndexOf('.');
1243    }
1244
1245    if (index + 1 < addrLength && isDigit(p_address.charAt(index + 1))) {
1246      char testChar;
1247      int numDots = 0;
1248
1249      // make sure that 1) we see only digits and dot separators, 2) that
1250      // any dot separator is preceded and followed by a digit and
1251      // 3) that we find 3 dots
1252      for (int i = 0; i < addrLength; i++) {
1253        testChar = address.charAt(i);
1254        if (testChar == '.') {
1255          if (!isDigit(address.charAt(i - 1))
1256            || (i + 1 < addrLength
1257              && !isDigit(address.charAt(i + 1)))) {
1258            return false;
1259          }
1260          numDots++;
1261        } else if (!isDigit(testChar)) {
1262          return false;
1263        }
1264      }
1265      if (numDots != 3) {
1266        return false;
1267      }
1268    } else {
1269      // domain labels can contain alphanumerics and '-"
1270      // but must start and end with an alphanumeric
1271      char testChar;
1272
1273      for (int i = 0; i < addrLength; i++) {
1274        testChar = address.charAt(i);
1275        if (testChar == '.') {
1276          if (!isAlphanum(address.charAt(i - 1))) {
1277            return false;
1278          }
1279          if (i + 1 < addrLength
1280            && !isAlphanum(address.charAt(i + 1))) {
1281            return false;
1282          }
1283        } else if (!isAlphanum(testChar) && testChar != '-') {
1284          return false;
1285        }
1286      }
1287    }
1288    return true;
1289  }
1290
1291  /**
1292   * Determine whether a char is a digit.
1293   *
1294   * @return true if the char is betweeen '0' and '9', false otherwise
1295   */
1296  private static boolean isDigit(char p_char) {
1297    return p_char >= '0' && p_char <= '9';
1298  }
1299
1300  /**
1301   * Determine whether a character is a hexadecimal character.
1302   *
1303   * @return true if the char is betweeen '0' and '9', 'a' and 'f'
1304   *         or 'A' and 'F', false otherwise
1305   */
1306  private static boolean isHex(char p_char) {
1307    return (
1308      isDigit(p_char)
1309        || (p_char >= 'a' && p_char <= 'f')
1310        || (p_char >= 'A' && p_char <= 'F'));
1311  }
1312
1313  /**
1314   * Determine whether a char is an alphabetic character: a-z or A-Z
1315   *
1316   * @return true if the char is alphabetic, false otherwise
1317   */
1318  private static boolean isAlpha(char p_char) {
1319    return (
1320      (p_char >= 'a' && p_char <= 'z')
1321        || (p_char >= 'A' && p_char <= 'Z'));
1322  }
1323
1324  /**
1325   * Determine whether a char is an alphanumeric: 0-9, a-z or A-Z
1326   *
1327   * @return true if the char is alphanumeric, false otherwise
1328   */
1329  private static boolean isAlphanum(char p_char) {
1330    return (isAlpha(p_char) || isDigit(p_char));
1331  }
1332
1333  /**
1334   * Determine whether a character is a reserved character:
1335   * ';', '/', '?', ':', '@', '&', '=', '+', '$' or ','
1336   *
1337   * @return true if the string contains any reserved characters
1338   */
1339  private static boolean isReservedCharacter(char p_char) {
1340    return RESERVED_CHARACTERS.indexOf(p_char) != -1;
1341  }
1342
1343  /**
1344   * Determine whether a char is an unreserved character.
1345   *
1346   * @return true if the char is unreserved, false otherwise
1347   */
1348  private static boolean isUnreservedCharacter(char p_char) {
1349    return (!isReservedCharacter(p_char)) && "#%[]".indexOf(p_char) == -1;
1350    //   return (isAlphanum(p_char) ||
1351    //           MARK_CHARACTERS.indexOf(p_char) != -1);
1352  }
1353
1354  private boolean haveCheckedNFC = false;
1355  private boolean isNFC;
1356
1357  public boolean isNormalFormC() {
1358    if (!haveCheckedNFC) {
1359      isNFC = CharacterModel.isNormalFormC(toString());
1360      haveCheckedNFC = true;
1361    }
1362    return isNFC;
1363  }
1364  /**
1365   * Determine whether a given string contains only URI characters (also
1366   * called "uric" in RFC 2396). uric consist of all reserved
1367   * characters, unreserved characters and escaped characters.
1368   *
1369   * @return true if the string is comprised of uric, false otherwise
1370   */
1371  private static boolean isURIString(String p_uric) {
1372    if (p_uric == null) {
1373      return false;
1374    }
1375    int end = p_uric.length();
1376    char testChar = '\0';
1377    for (int i = 0; i < end; i++) {
1378      testChar = p_uric.charAt(i);
1379      if (testChar == '%') {
1380        if (i + 2 >= end
1381          || !isHex(p_uric.charAt(i + 1))
1382          || !isHex(p_uric.charAt(i + 2))) {
1383          return false;
1384        } else {
1385          i += 2;
1386          continue;
1387        }
1388      }
1389      if (isReservedCharacter(testChar)
1390        || isUnreservedCharacter(testChar)) {
1391        continue;
1392      } else {
1393        return false;
1394      }
1395    }
1396    return true;
1397  }
1398
1399  /* RELATIVIZE */
1400
1401  static final public int SAMEDOCUMENT = 1;
1402  static final public int NETWORK = 2;
1403  static final public int ABSOLUTE = 4;
1404  static final public int RELATIVE = 8;
1405  static final public int PARENT = 16;
1406  static final public int GRANDPARENT = 32;
1407
1408  private boolean equal(String s1, String s2) {
1409    return s1 == null ? s2 == null : s1.equals(s2);
1410  }
1411    static private int prefs[][] = {
1412        { RELATIVE, RELATIVE|PARENT|GRANDPARENT },
1413        { PARENT, PARENT|GRANDPARENT },
1414        { GRANDPARENT, GRANDPARENT }
1415    };
1416    static String exact[] = { ".", "..", "../.." };
1417    static String sub[] = { "", "../", "../../" };
1418  public String relativize(String abs, int flags) throws MalformedURIException {
1419    URI r;
1420      r = new URI(abs);
1421       // logger.info("<"+Util.substituteStandardEntities(abs)+">");
1422       // logger.info("<"+Util.substituteStandardEntities(r.m_path)+">");
1423    if (r.isGenericURI()) {
1424      boolean net = equal(r.m_scheme, m_scheme);
1425</