View Javadoc

1   /*
2    * $Header: /home/jerenkrantz/tmp/commons/commons-convert/cvs/home/cvs/jakarta-commons//httpclient/src/java/org/apache/commons/httpclient/URI.java,v 1.47 2004/05/13 04:03:25 mbecke Exp $
3    * $Revision: 372560 $
4    * $Date: 2006-01-26 11:07:06 -0500 (Thu, 26 Jan 2006) $
5    *
6    * ====================================================================
7    *
8    *  Copyright 2002-2004 The Apache Software Foundation
9    *
10   *  Licensed under the Apache License, Version 2.0 (the "License");
11   *  you may not use this file except in compliance with the License.
12   *  You may obtain a copy of the License at
13   *
14   *      http://www.apache.org/licenses/LICENSE-2.0
15   *
16   *  Unless required by applicable law or agreed to in writing, software
17   *  distributed under the License is distributed on an "AS IS" BASIS,
18   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19   *  See the License for the specific language governing permissions and
20   *  limitations under the License.
21   * ====================================================================
22   *
23   * This software consists of voluntary contributions made by many
24   * individuals on behalf of the Apache Software Foundation.  For more
25   * information on the Apache Software Foundation, please see
26   * <http://www.apache.org/>.
27   *
28   */
29  
30  package org.apache.commons.httpclient;
31  
32  import java.io.IOException;
33  import java.io.ObjectInputStream;
34  import java.io.ObjectOutputStream;
35  import java.io.Serializable;
36  import java.util.Locale;
37  import java.util.BitSet;
38  import java.util.Hashtable;
39  
40  import org.apache.commons.codec.DecoderException;
41  import org.apache.commons.codec.net.URLCodec;
42  import org.apache.commons.httpclient.util.EncodingUtil;
43  
44  /***
45   * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396.
46   * This class has the purpose of supportting of parsing a URI reference to
47   * extend any specific protocols, the character encoding of the protocol to 
48   * be transported and the charset of the document.
49   * <p>
50   * A URI is always in an "escaped" form, since escaping or unescaping a
51   * completed URI might change its semantics.  
52   * <p>
53   * Implementers should be careful not to escape or unescape the same string
54   * more than once, since unescaping an already unescaped string might lead to
55   * misinterpreting a percent data character as another escaped character,
56   * or vice versa in the case of escaping an already escaped string.
57   * <p>
58   * In order to avoid these problems, data types used as follows:
59   * <p><blockquote><pre>
60   *   URI character sequence: char
61   *   octet sequence: byte
62   *   original character sequence: String
63   * </pre></blockquote><p>
64   *
65   * So, a URI is a sequence of characters as an array of a char type, which
66   * is not always represented as a sequence of octets as an array of byte.
67   * <p>
68   * 
69   * URI Syntactic Components
70   * <p><blockquote><pre>
71   * - In general, written as follows:
72   *   Absolute URI = &lt;scheme&gt:&lt;scheme-specific-part&gt;
73   *   Generic URI = &lt;scheme&gt;://&lt;authority&gt;&lt;path&gt;?&lt;query&gt;
74   *
75   * - Syntax
76   *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
77   *   hier_part     = ( net_path | abs_path ) [ "?" query ]
78   *   net_path      = "//" authority [ abs_path ]
79   *   abs_path      = "/"  path_segments
80   * </pre></blockquote><p>
81   *
82   * The following examples illustrate URI that are in common use.
83   * <pre>
84   * ftp://ftp.is.co.za/rfc/rfc1808.txt
85   *    -- ftp scheme for File Transfer Protocol services
86   * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles
87   *    -- gopher scheme for Gopher and Gopher+ Protocol services
88   * http://www.math.uio.no/faq/compression-faq/part1.html
89   *    -- http scheme for Hypertext Transfer Protocol services
90   * mailto:mduerst@ifi.unizh.ch
91   *    -- mailto scheme for electronic mail addresses
92   * news:comp.infosystems.www.servers.unix
93   *    -- news scheme for USENET news groups and articles
94   * telnet://melvyl.ucop.edu/
95   *    -- telnet scheme for interactive services via the TELNET Protocol
96   * </pre>
97   * Please, notice that there are many modifications from URL(RFC 1738) and
98   * relative URL(RFC 1808).
99   * <p>
100  * <b>The expressions for a URI</b>
101  * <p><pre>
102  * For escaped URI forms
103  *  - URI(char[]) // constructor
104  *  - char[] getRawXxx() // method
105  *  - String getEscapedXxx() // method
106  *  - String toString() // method
107  * <p>
108  * For unescaped URI forms
109  *  - URI(String) // constructor
110  *  - String getXXX() // method
111  * </pre><p>
112  *
113  * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
114  * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
115  * @version $Revision: 372560 $ $Date: 2002/03/14 15:14:01 
116  */
117 public class URI implements Cloneable, Comparable, Serializable {
118 
119 
120     // ----------------------------------------------------------- Constructors
121 
122     /*** Create an instance as an internal use */
123     protected URI() {
124     }
125 
126     /***
127      * Construct a URI from a string with the given charset. The input string can 
128      * be either in escaped or unescaped form. 
129      *
130      * @param s URI character sequence
131      * @param escaped <tt>true</tt> if URI character sequence is in escaped form. 
132      *                <tt>false</tt> otherwise. 
133      * @param charset the charset string to do escape encoding, if required
134      * 
135      * @throws URIException If the URI cannot be created.
136      * @throws NullPointerException if input string is <code>null</code>
137      * 
138      * @see #getProtocolCharset
139      * 
140      * @since 3.0
141      */
142     public URI(String s, boolean escaped, String charset)
143         throws URIException, NullPointerException {
144         protocolCharset = charset;
145         parseUriReference(s, escaped);
146     }
147 
148     /***
149      * Construct a URI from a string with the given charset. The input string can 
150      * be either in escaped or unescaped form. 
151      *
152      * @param s URI character sequence
153      * @param escaped <tt>true</tt> if URI character sequence is in escaped form. 
154      *                <tt>false</tt> otherwise. 
155      * 
156      * @throws URIException If the URI cannot be created.
157      * @throws NullPointerException if input string is <code>null</code>
158      * 
159      * @see #getProtocolCharset
160      * 
161      * @since 3.0
162      */
163     public URI(String s, boolean escaped)
164         throws URIException, NullPointerException {
165         parseUriReference(s, escaped);
166     }
167 
168     /***
169      * Construct a URI as an escaped form of a character array with the given
170      * charset.
171      *
172      * @param escaped the URI character sequence
173      * @param charset the charset string to do escape encoding
174      * @throws URIException If the URI cannot be created.
175      * @throws NullPointerException if <code>escaped</code> is <code>null</code>
176      * @see #getProtocolCharset
177      * 
178      * @deprecated Use #URI(String, boolean, String)
179      */
180     public URI(char[] escaped, String charset) 
181         throws URIException, NullPointerException {
182         protocolCharset = charset;
183         parseUriReference(new String(escaped), true);
184     }
185 
186 
187     /***
188      * Construct a URI as an escaped form of a character array.
189      * An URI can be placed within double-quotes or angle brackets like 
190      * "http://test.com/" and &lt;http://test.com/&gt;
191      * 
192      * @param escaped the URI character sequence
193      * @throws URIException If the URI cannot be created.
194      * @throws NullPointerException if <code>escaped</code> is <code>null</code>
195      * @see #getDefaultProtocolCharset
196      * 
197      * @deprecated Use #URI(String, boolean)
198      */
199     public URI(char[] escaped) 
200         throws URIException, NullPointerException {
201         parseUriReference(new String(escaped), true);
202     }
203 
204 
205     /***
206      * Construct a URI from the given string with the given charset.
207      *
208      * @param original the string to be represented to URI character sequence
209      * It is one of absoluteURI and relativeURI.
210      * @param charset the charset string to do escape encoding
211      * @throws URIException If the URI cannot be created.
212      * @see #getProtocolCharset
213      * 
214      * @deprecated Use #URI(String, boolean, String)
215      */
216     public URI(String original, String charset) throws URIException {
217         protocolCharset = charset;
218         parseUriReference(original, false);
219     }
220 
221 
222     /***
223      * Construct a URI from the given string.
224      * <p><blockquote><pre>
225      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
226      * </pre></blockquote><p>
227      * An URI can be placed within double-quotes or angle brackets like 
228      * "http://test.com/" and &lt;http://test.com/&gt;
229      *
230      * @param original the string to be represented to URI character sequence
231      * It is one of absoluteURI and relativeURI.
232      * @throws URIException If the URI cannot be created.
233      * @see #getDefaultProtocolCharset
234      * 
235      * @deprecated Use #URI(String, boolean)
236      */
237     public URI(String original) throws URIException {
238         parseUriReference(original, false);
239     }
240 
241 
242     /***
243      * Construct a general URI from the given components.
244      * <p><blockquote><pre>
245      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
246      *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
247      *   opaque_part   = uric_no_slash *uric
248      * </pre></blockquote><p>
249      * It's for absolute URI = &lt;scheme&gt;:&lt;scheme-specific-part&gt;#
250      * &lt;fragment&gt;.
251      *
252      * @param scheme the scheme string
253      * @param schemeSpecificPart scheme_specific_part
254      * @param fragment the fragment string
255      * @throws URIException If the URI cannot be created.
256      * @see #getDefaultProtocolCharset
257      */
258     public URI(String scheme, String schemeSpecificPart, String fragment)
259         throws URIException {
260 
261         // validate and contruct the URI character sequence
262         if (scheme == null) {
263            throw new URIException(URIException.PARSING, "scheme required");
264         }
265         char[] s = scheme.toLowerCase().toCharArray();
266         if (validate(s, URI.scheme)) {
267             _scheme = s; // is_absoluteURI
268         } else {
269             throw new URIException(URIException.PARSING, "incorrect scheme");
270         }
271         _opaque = encode(schemeSpecificPart, allowed_opaque_part,
272                 getProtocolCharset());
273         // Set flag
274         _is_opaque_part = true;
275         _fragment = fragment == null ? null : fragment.toCharArray(); 
276         setURI();
277     }
278 
279 
280     /***
281      * Construct a general URI from the given components.
282      * <p><blockquote><pre>
283      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
284      *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
285      *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
286      *   hier_part     = ( net_path | abs_path ) [ "?" query ]
287      * </pre></blockquote><p>
288      * It's for absolute URI = &lt;scheme&gt;:&lt;path&gt;?&lt;query&gt;#&lt;
289      * fragment&gt; and relative URI = &lt;path&gt;?&lt;query&gt;#&lt;fragment
290      * &gt;.
291      *
292      * @param scheme the scheme string
293      * @param authority the authority string
294      * @param path the path string
295      * @param query the query string
296      * @param fragment the fragment string
297      * @throws URIException If the new URI cannot be created.
298      * @see #getDefaultProtocolCharset
299      */
300     public URI(String scheme, String authority, String path, String query,
301                String fragment) throws URIException {
302 
303         // validate and contruct the URI character sequence
304         StringBuffer buff = new StringBuffer();
305         if (scheme != null) {
306             buff.append(scheme);
307             buff.append(':');
308         }
309         if (authority != null) {
310             buff.append("//");
311             buff.append(authority);
312         }
313         if (path != null) {  // accept empty path
314             if ((scheme != null || authority != null)
315                     && !path.startsWith("/")) {
316                 throw new URIException(URIException.PARSING,
317                         "abs_path requested");
318             }
319             buff.append(path);
320         }
321         if (query != null) {
322             buff.append('?');
323             buff.append(query);
324         }
325         if (fragment != null) {
326             buff.append('#');
327             buff.append(fragment);
328         }
329         parseUriReference(buff.toString(), false);
330     }
331 
332 
333     /***
334      * Construct a general URI from the given components.
335      *
336      * @param scheme the scheme string
337      * @param userinfo the userinfo string
338      * @param host the host string
339      * @param port the port number
340      * @throws URIException If the new URI cannot be created.
341      * @see #getDefaultProtocolCharset
342      */
343     public URI(String scheme, String userinfo, String host, int port)
344         throws URIException {
345 
346         this(scheme, userinfo, host, port, null, null, null);
347     }
348 
349 
350     /***
351      * Construct a general URI from the given components.
352      *
353      * @param scheme the scheme string
354      * @param userinfo the userinfo string
355      * @param host the host string
356      * @param port the port number
357      * @param path the path string
358      * @throws URIException If the new URI cannot be created.
359      * @see #getDefaultProtocolCharset
360      */
361     public URI(String scheme, String userinfo, String host, int port,
362             String path) throws URIException {
363 
364         this(scheme, userinfo, host, port, path, null, null);
365     }
366 
367 
368     /***
369      * Construct a general URI from the given components.
370      *
371      * @param scheme the scheme string
372      * @param userinfo the userinfo string
373      * @param host the host string
374      * @param port the port number
375      * @param path the path string
376      * @param query the query string
377      * @throws URIException If the new URI cannot be created.
378      * @see #getDefaultProtocolCharset
379      */
380     public URI(String scheme, String userinfo, String host, int port,
381             String path, String query) throws URIException {
382 
383         this(scheme, userinfo, host, port, path, query, null);
384     }
385 
386 
387     /***
388      * Construct a general URI from the given components.
389      *
390      * @param scheme the scheme string
391      * @param userinfo the userinfo string
392      * @param host the host string
393      * @param port the port number
394      * @param path the path string
395      * @param query the query string
396      * @param fragment the fragment string
397      * @throws URIException If the new URI cannot be created.
398      * @see #getDefaultProtocolCharset
399      */
400     public URI(String scheme, String userinfo, String host, int port,
401             String path, String query, String fragment) throws URIException {
402 
403         this(scheme, (host == null) ? null 
404             : ((userinfo != null) ? userinfo + '@' : "") + host 
405                 + ((port != -1) ? ":" + port : ""), path, query, fragment);
406     }
407 
408 
409     /***
410      * Construct a general URI from the given components.
411      *
412      * @param scheme the scheme string
413      * @param host the host string
414      * @param path the path string
415      * @param fragment the fragment string
416      * @throws URIException If the new URI cannot be created.
417      * @see #getDefaultProtocolCharset
418      */
419     public URI(String scheme, String host, String path, String fragment)
420         throws URIException {
421 
422         this(scheme, host, path, null, fragment);
423     }
424 
425 
426     /***
427      * Construct a general URI with the given relative URI string.
428      *
429      * @param base the base URI
430      * @param relative the relative URI string
431      * @throws URIException If the new URI cannot be created.
432      * 
433      * @deprecated Use #URI(URI, String, boolean)
434      */
435     public URI(URI base, String relative) throws URIException {
436         this(base, new URI(relative));
437     }
438 
439 
440     /***
441      * Construct a general URI with the given relative URI string.
442      *
443      * @param base the base URI
444      * @param relative the relative URI string
445      * @param escaped <tt>true</tt> if URI character sequence is in escaped form. 
446      *                <tt>false</tt> otherwise.
447      *  
448      * @throws URIException If the new URI cannot be created.
449      * 
450      * @since 3.0
451      */
452     public URI(URI base, String relative, boolean escaped) throws URIException {
453         this(base, new URI(relative, escaped));
454     }
455 
456 
457     /***
458      * Construct a general URI with the given relative URI.
459      * <p><blockquote><pre>
460      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
461      *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
462      * </pre></blockquote><p>
463      * Resolving Relative References to Absolute Form.
464      *
465      * <strong>Examples of Resolving Relative URI References</strong>
466      *
467      * Within an object with a well-defined base URI of
468      * <p><blockquote><pre>
469      *   http://a/b/c/d;p?q
470      * </pre></blockquote><p>
471      * the relative URI would be resolved as follows:
472      *
473      * Normal Examples
474      *
475      * <p><blockquote><pre>
476      *   g:h           =  g:h
477      *   g             =  http://a/b/c/g
478      *   ./g           =  http://a/b/c/g
479      *   g/            =  http://a/b/c/g/
480      *   /g            =  http://a/g
481      *   //g           =  http://g
482      *   ?y            =  http://a/b/c/?y
483      *   g?y           =  http://a/b/c/g?y
484      *   #s            =  (current document)#s
485      *   g#s           =  http://a/b/c/g#s
486      *   g?y#s         =  http://a/b/c/g?y#s
487      *   ;x            =  http://a/b/c/;x
488      *   g;x           =  http://a/b/c/g;x
489      *   g;x?y#s       =  http://a/b/c/g;x?y#s
490      *   .             =  http://a/b/c/
491      *   ./            =  http://a/b/c/
492      *   ..            =  http://a/b/
493      *   ../           =  http://a/b/
494      *   ../g          =  http://a/b/g
495      *   ../..         =  http://a/
496      *   ../../        =  http://a/ 
497      *   ../../g       =  http://a/g
498      * </pre></blockquote><p>
499      *
500      * Some URI schemes do not allow a hierarchical syntax matching the
501      * <hier_part> syntax, and thus cannot use relative references.
502      *
503      * @param base the base URI
504      * @param relative the relative URI
505      * @throws URIException If the new URI cannot be created.
506      */
507     public URI(URI base, URI relative) throws URIException {
508 
509         if (base._scheme == null) {
510             throw new URIException(URIException.PARSING, "base URI required");
511         }
512         if (base._scheme != null) {
513             this._scheme = base._scheme;
514             this._authority = base._authority;
515         }
516         if (base._is_opaque_part || relative._is_opaque_part) {
517             this._scheme = base._scheme;
518             this._is_opaque_part = base._is_opaque_part 
519                 || relative._is_opaque_part;
520             this._opaque = relative._opaque;
521             this._fragment = relative._fragment;
522             this.setURI();
523             return;
524         }
525         if (relative._scheme != null) {
526             this._scheme = relative._scheme;
527             this._is_net_path = relative._is_net_path;
528             this._authority = relative._authority;
529             if (relative._is_server) {
530                 this._is_server = relative._is_server;
531                 this._userinfo = relative._userinfo;
532                 this._host = relative._host;
533                 this._port = relative._port;
534             } else if (relative._is_reg_name) {
535                 this._is_reg_name = relative._is_reg_name;
536             }
537             this._is_abs_path = relative._is_abs_path;
538             this._is_rel_path = relative._is_rel_path;
539             this._path = relative._path;
540         } else if (base._authority != null && relative._scheme == null) {
541             this._is_net_path = base._is_net_path;
542             this._authority = base._authority;
543             if (base._is_server) {
544                 this._is_server = base._is_server;
545                 this._userinfo = base._userinfo;
546                 this._host = base._host;
547                 this._port = base._port;
548             } else if (base._is_reg_name) {
549                 this._is_reg_name = base._is_reg_name;
550             }
551         }
552         if (relative._authority != null) {
553             this._is_net_path = relative._is_net_path;
554             this._authority = relative._authority;
555             if (relative._is_server) {
556                 this._is_server = relative._is_server;
557                 this._userinfo = relative._userinfo;
558                 this._host = relative._host;
559                 this._port = relative._port;
560             } else if (relative._is_reg_name) {
561                 this._is_reg_name = relative._is_reg_name;
562             }
563             this._is_abs_path = relative._is_abs_path;
564             this._is_rel_path = relative._is_rel_path;
565             this._path = relative._path;
566         }
567         // resolve the path and query if necessary
568         if (relative._scheme == null && relative._authority == null) {
569             if ((relative._path == null || relative._path.length == 0)
570                 && relative._query == null) {
571                 // handle a reference to the current document, see RFC 2396 
572                 // section 5.2 step 2
573                 this._path = base._path;
574                 this._query = base._query;
575             } else {
576                 this._path = resolvePath(base._path, relative._path);
577             }
578         }
579         // base._query removed
580         if (relative._query != null) {
581             this._query = relative._query;
582         }
583         // base._fragment removed
584         if (relative._fragment != null) {
585             this._fragment = relative._fragment;
586         }
587         this.setURI();
588         // reparse the newly built URI, this will ensure that all flags are set correctly.
589         // TODO there must be a better way to do this
590         parseUriReference(new String(_uri), true);
591     }
592 
593     // --------------------------------------------------- Instance Variables
594 
595     /*** Version ID for serialization */
596     static final long serialVersionUID = 604752400577948726L;
597 
598 
599     /***
600      * Cache the hash code for this URI.
601      */
602     protected int hash = 0;
603 
604 
605     /***
606      * This Uniform Resource Identifier (URI).
607      * The URI is always in an "escaped" form, since escaping or unescaping
608      * a completed URI might change its semantics.  
609      */
610     protected char[] _uri = null;
611 
612 
613     /***
614      * The charset of the protocol used by this URI instance.
615      */
616     protected String protocolCharset = null;
617 
618 
619     /***
620      * The default charset of the protocol.  RFC 2277, 2396
621      */
622     protected static String defaultProtocolCharset = "UTF-8";
623 
624 
625     /***
626      * The default charset of the document.  RFC 2277, 2396
627      * The platform's charset is used for the document by default.
628      */
629     protected static String defaultDocumentCharset = null;
630     protected static String defaultDocumentCharsetByLocale = null;
631     protected static String defaultDocumentCharsetByPlatform = null;
632     // Static initializer for defaultDocumentCharset
633     static {
634         Locale locale = Locale.getDefault();
635         // in order to support backward compatiblity
636         if (locale != null) {
637             defaultDocumentCharsetByLocale =
638                 LocaleToCharsetMap.getCharset(locale);
639             // set the default document charset
640             defaultDocumentCharset = defaultDocumentCharsetByLocale;
641         }
642         // in order to support platform encoding
643         try {
644             defaultDocumentCharsetByPlatform = System.getProperty("file.encoding");
645         } catch (SecurityException ignore) {
646         }
647         if (defaultDocumentCharset == null) {
648             // set the default document charset
649             defaultDocumentCharset = defaultDocumentCharsetByPlatform;
650         }
651     }
652 
653 
654     /***
655      * The scheme.
656      */
657     protected char[] _scheme = null;
658 
659 
660     /***
661      * The opaque.
662      */
663     protected char[] _opaque = null;
664 
665 
666     /***
667      * The authority.
668      */
669     protected char[] _authority = null;
670 
671 
672     /***
673      * The userinfo.
674      */
675     protected char[] _userinfo = null;
676 
677 
678     /***
679      * The host.
680      */
681     protected char[] _host = null;
682 
683 
684     /***
685      * The port.
686      */
687     protected int _port = -1;
688 
689 
690     /***
691      * The path.
692      */
693     protected char[] _path = null;
694 
695 
696     /***
697      * The query.
698      */
699     protected char[] _query = null;
700 
701 
702     /***
703      * The fragment.
704      */
705     protected char[] _fragment = null;
706 
707 
708     /***
709      * The root path.
710      */
711     protected static char[] rootPath = { '/' };
712 
713     // ---------------------- Generous characters for each component validation
714 
715     /***
716      * The percent "%" character always has the reserved purpose of being the
717      * escape indicator, it must be escaped as "%25" in order to be used as
718      * data within a URI.
719      */
720     protected static final BitSet percent = new BitSet(256);
721     // Static initializer for percent
722     static {
723         percent.set('%');
724     }
725 
726 
727     /***
728      * BitSet for digit.
729      * <p><blockquote><pre>
730      * digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
731      *            "8" | "9"
732      * </pre></blockquote><p>
733      */
734     protected static final BitSet digit = new BitSet(256);
735     // Static initializer for digit
736     static {
737         for (int i = '0'; i <= '9'; i++) {
738             digit.set(i);
739         }
740     }
741 
742 
743     /***
744      * BitSet for alpha.
745      * <p><blockquote><pre>
746      * alpha         = lowalpha | upalpha
747      * </pre></blockquote><p>
748      */
749     protected static final BitSet alpha = new BitSet(256);
750     // Static initializer for alpha
751     static {
752         for (int i = 'a'; i <= 'z'; i++) {
753             alpha.set(i);
754         }
755         for (int i = 'A'; i <= 'Z'; i++) {
756             alpha.set(i);
757         }
758     }
759 
760 
761     /***
762      * BitSet for alphanum (join of alpha &amp; digit).
763      * <p><blockquote><pre>
764      *  alphanum      = alpha | digit
765      * </pre></blockquote><p>
766      */
767     protected static final BitSet alphanum = new BitSet(256);
768     // Static initializer for alphanum
769     static {
770         alphanum.or(alpha);
771         alphanum.or(digit);
772     }
773 
774 
775     /***
776      * BitSet for hex.
777      * <p><blockquote><pre>
778      * hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
779      *                         "a" | "b" | "c" | "d" | "e" | "f"
780      * </pre></blockquote><p>
781      */
782     protected static final BitSet hex = new BitSet(256);
783     // Static initializer for hex
784     static {
785         hex.or(digit);
786         for (int i = 'a'; i <= 'f'; i++) {
787             hex.set(i);
788         }
789         for (int i = 'A'; i <= 'F'; i++) {
790             hex.set(i);
791         }
792     }
793 
794 
795     /***
796      * BitSet for escaped.
797      * <p><blockquote><pre>
798      * escaped       = "%" hex hex
799      * </pre></blockquote><p>
800      */
801     protected static final BitSet escaped = new BitSet(256);
802     // Static initializer for escaped
803     static {
804         escaped.or(percent);
805         escaped.or(hex);
806     }
807 
808 
809     /***
810      * BitSet for mark.
811      * <p><blockquote><pre>
812      * mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
813      *                 "(" | ")"
814      * </pre></blockquote><p>
815      */
816     protected static final BitSet mark = new BitSet(256);
817     // Static initializer for mark
818     static {
819         mark.set('-');
820         mark.set('_');
821         mark.set('.');
822         mark.set('!');
823         mark.set('~');
824         mark.set('*');
825         mark.set('\'');
826         mark.set('(');
827         mark.set(')');
828     }
829 
830 
831     /***
832      * Data characters that are allowed in a URI but do not have a reserved
833      * purpose are called unreserved.
834      * <p><blockquote><pre>
835      * unreserved    = alphanum | mark
836      * </pre></blockquote><p>
837      */
838     protected static final BitSet unreserved = new BitSet(256);
839     // Static initializer for unreserved
840     static {
841         unreserved.or(alphanum);
842         unreserved.or(mark);
843     }
844 
845 
846     /***
847      * BitSet for reserved.
848      * <p><blockquote><pre>
849      * reserved      = ";" | "/" | "?" | ":" | "@" | "&amp;" | "=" | "+" |
850      *                 "$" | ","
851      * </pre></blockquote><p>
852      */
853     protected static final BitSet reserved = new BitSet(256);
854     // Static initializer for reserved
855     static {
856         reserved.set(';');
857         reserved.set('/');
858         reserved.set('?');
859         reserved.set(':');
860         reserved.set('@');
861         reserved.set('&');
862         reserved.set('=');
863         reserved.set('+');
864         reserved.set('$');
865         reserved.set(',');
866     }
867 
868 
869     /***
870      * BitSet for uric.
871      * <p><blockquote><pre>
872      * uric          = reserved | unreserved | escaped
873      * </pre></blockquote><p>
874      */
875     protected static final BitSet uric = new BitSet(256);
876     // Static initializer for uric
877     static {
878         uric.or(reserved);
879         uric.or(unreserved);
880         uric.or(escaped);
881     }
882 
883 
884     /***
885      * BitSet for fragment (alias for uric).
886      * <p><blockquote><pre>
887      * fragment      = *uric
888      * </pre></blockquote><p>
889      */
890     protected static final BitSet fragment = uric;
891 
892 
893     /***
894      * BitSet for query (alias for uric).
895      * <p><blockquote><pre>
896      * query         = *uric
897      * </pre></blockquote><p>
898      */
899     protected static final BitSet query = uric;
900 
901 
902     /***
903      * BitSet for pchar.
904      * <p><blockquote><pre>
905      * pchar         = unreserved | escaped |
906      *                 ":" | "@" | "&amp;" | "=" | "+" | "$" | ","
907      * </pre></blockquote><p>
908      */
909     protected static final BitSet pchar = new BitSet(256);
910     // Static initializer for pchar
911     static {
912         pchar.or(unreserved);
913         pchar.or(escaped);
914         pchar.set(':');
915         pchar.set('@');
916         pchar.set('&');
917         pchar.set('=');
918         pchar.set('+');
919         pchar.set('$');
920         pchar.set(',');
921     }
922 
923 
924     /***
925      * BitSet for param (alias for pchar).
926      * <p><blockquote><pre>
927      * param         = *pchar
928      * </pre></blockquote><p>
929      */
930     protected static final BitSet param = pchar;
931 
932 
933     /***
934      * BitSet for segment.
935      * <p><blockquote><pre>
936      * segment       = *pchar *( ";" param )
937      * </pre></blockquote><p>
938      */
939     protected static final BitSet segment = new BitSet(256);
940     // Static initializer for segment
941     static {
942         segment.or(pchar);
943         segment.set(';');
944         segment.or(param);
945     }
946 
947 
948     /***
949      * BitSet for path segments.
950      * <p><blockquote><pre>
951      * path_segments = segment *( "/" segment )
952      * </pre></blockquote><p>
953      */
954     protected static final BitSet path_segments = new BitSet(256);
955     // Static initializer for path_segments
956     static {
957         path_segments.set('/');
958         path_segments.or(segment);
959     }
960 
961 
962     /***
963      * URI absolute path.
964      * <p><blockquote><pre>
965      * abs_path      = "/"  path_segments
966      * </pre></blockquote><p>
967      */
968     protected static final BitSet abs_path = new BitSet(256);
969     // Static initializer for abs_path
970     static {
971         abs_path.set('/');
972         abs_path.or(path_segments);
973     }
974 
975 
976     /***
977      * URI bitset for encoding typical non-slash characters.
978      * <p><blockquote><pre>
979      * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
980      *                 "&amp;" | "=" | "+" | "$" | ","
981      * </pre></blockquote><p>
982      */
983     protected static final BitSet uric_no_slash = new BitSet(256);
984     // Static initializer for uric_no_slash
985     static {
986         uric_no_slash.or(unreserved);
987         uric_no_slash.or(escaped);
988         uric_no_slash.set(';');
989         uric_no_slash.set('?');
990         uric_no_slash.set(';');
991         uric_no_slash.set('@');
992         uric_no_slash.set('&');
993         uric_no_slash.set('=');
994         uric_no_slash.set('+');
995         uric_no_slash.set('$');
996         uric_no_slash.set(',');
997     }
998     
999 
1000     /***
1001      * URI bitset that combines uric_no_slash and uric.
1002      * <p><blockquote><pre>
1003      * opaque_part   = uric_no_slash *uric
1004      * </pre></blockquote><p>
1005      */
1006     protected static final BitSet opaque_part = new BitSet(256);
1007     // Static initializer for opaque_part
1008     static {
1009         // it's generous. because first character must not include a slash
1010         opaque_part.or(uric_no_slash);
1011         opaque_part.or(uric);
1012     }
1013     
1014 
1015     /***
1016      * URI bitset that combines absolute path and opaque part.
1017      * <p><blockquote><pre>
1018      * path          = [ abs_path | opaque_part ]
1019      * </pre></blockquote><p>
1020      */
1021     protected static final BitSet path = new BitSet(256);
1022     // Static initializer for path
1023     static {
1024         path.or(abs_path);
1025         path.or(opaque_part);
1026     }
1027 
1028 
1029     /***
1030      * Port, a logical alias for digit.
1031      */
1032     protected static final BitSet port = digit;
1033 
1034 
1035     /***
1036      * Bitset that combines digit and dot fo IPv$address.
1037      * <p><blockquote><pre>
1038      * IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
1039      * </pre></blockquote><p>
1040      */
1041     protected static final BitSet IPv4address = new BitSet(256);
1042     // Static initializer for IPv4address
1043     static {
1044         IPv4address.or(digit);
1045         IPv4address.set('.');
1046     }
1047 
1048 
1049     /***
1050      * RFC 2373.
1051      * <p><blockquote><pre>
1052      * IPv6address = hexpart [ ":" IPv4address ]
1053      * </pre></blockquote><p>
1054      */
1055     protected static final BitSet IPv6address = new BitSet(256);
1056     // Static initializer for IPv6address reference
1057     static {
1058         IPv6address.or(hex); // hexpart
1059         IPv6address.set(':');
1060         IPv6address.or(IPv4address);
1061     }
1062 
1063 
1064     /***
1065      * RFC 2732, 2373.
1066      * <p><blockquote><pre>
1067      * IPv6reference   = "[" IPv6address "]"
1068      * </pre></blockquote><p>
1069      */
1070     protected static final BitSet IPv6reference = new BitSet(256);
1071     // Static initializer for IPv6reference
1072     static {
1073         IPv6reference.set('[');
1074         IPv6reference.or(IPv6address);
1075         IPv6reference.set(']');
1076     }
1077 
1078 
1079     /***
1080      * BitSet for toplabel.
1081      * <p><blockquote><pre>
1082      * toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
1083      * </pre></blockquote><p>
1084      */
1085     protected static final BitSet toplabel = new BitSet(256);
1086     // Static initializer for toplabel
1087     static {
1088         toplabel.or(alphanum);
1089         toplabel.set('-');
1090     }
1091 
1092 
1093     /***
1094      * BitSet for domainlabel.
1095      * <p><blockquote><pre>
1096      * domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
1097      * </pre></blockquote><p>
1098      */
1099     protected static final BitSet domainlabel = toplabel;
1100 
1101 
1102     /***
1103      * BitSet for hostname.
1104      * <p><blockquote><pre>
1105      * hostname      = *( domainlabel "." ) toplabel [ "." ]
1106      * </pre></blockquote><p>
1107      */
1108     protected static final BitSet hostname = new BitSet(256);
1109     // Static initializer for hostname
1110     static {
1111         hostname.or(toplabel);
1112         // hostname.or(domainlabel);
1113         hostname.set('.');
1114     }
1115 
1116 
1117     /***
1118      * BitSet for host.
1119      * <p><blockquote><pre>
1120      * host          = hostname | IPv4address | IPv6reference
1121      * </pre></blockquote><p>
1122      */
1123     protected static final BitSet host = new BitSet(256);
1124     // Static initializer for host
1125     static {
1126         host.or(hostname);
1127         // host.or(IPv4address);
1128         host.or(IPv6reference); // IPv4address
1129     }
1130 
1131 
1132     /***
1133      * BitSet for hostport.
1134      * <p><blockquote><pre>
1135      * hostport      = host [ ":" port ]
1136      * </pre></blockquote><p>
1137      */
1138     protected static final BitSet hostport = new BitSet(256);
1139     // Static initializer for hostport
1140     static {
1141         hostport.or(host);
1142         hostport.set(':');
1143         hostport.or(port);
1144     }
1145 
1146 
1147     /***
1148      * Bitset for userinfo.
1149      * <p><blockquote><pre>
1150      * userinfo      = *( unreserved | escaped |
1151      *                    ";" | ":" | "&amp;" | "=" | "+" | "$" | "," )
1152      * </pre></blockquote><p>
1153      */
1154     protected static final BitSet userinfo = new BitSet(256);
1155     // Static initializer for userinfo
1156     static {
1157         userinfo.or(unreserved);
1158         userinfo.or(escaped);
1159         userinfo.set(';');
1160         userinfo.set(':');
1161         userinfo.set('&');
1162         userinfo.set('=');
1163         userinfo.set('+');
1164         userinfo.set('$');
1165         userinfo.set(',');
1166     }
1167 
1168 
1169     /***
1170      * BitSet for within the userinfo component like user and password.
1171      */
1172     public static final BitSet within_userinfo = new BitSet(256);
1173     // Static initializer for within_userinfo
1174     static {
1175         within_userinfo.or(userinfo);
1176         within_userinfo.clear(';'); // reserved within authority
1177         within_userinfo.clear(':');
1178         within_userinfo.clear('@');
1179         within_userinfo.clear('?');
1180         within_userinfo.clear('/');
1181     }
1182 
1183 
1184     /***
1185      * Bitset for server.
1186      * <p><blockquote><pre>
1187      * server        = [ [ userinfo "@" ] hostport ]
1188      * </pre></blockquote><p>
1189      */
1190     protected static final BitSet server = new BitSet(256);
1191     // Static initializer for server
1192     static {
1193         server.or(userinfo);
1194         server.set('@');
1195         server.or(hostport);
1196     }
1197 
1198 
1199     /***
1200      * BitSet for reg_name.
1201      * <p><blockquote><pre>
1202      * reg_name      = 1*( unreserved | escaped | "$" | "," |
1203      *                     ";" | ":" | "@" | "&amp;" | "=" | "+" )
1204      * </pre></blockquote><p>
1205      */
1206     protected static final BitSet reg_name = new BitSet(256);
1207     // Static initializer for reg_name
1208     static {
1209         reg_name.or(unreserved);
1210         reg_name.or(escaped);
1211         reg_name.set('$');
1212         reg_name.set(',');
1213         reg_name.set(';');
1214         reg_name.set(':');
1215         reg_name.set('@');
1216         reg_name.set('&');
1217         reg_name.set('=');
1218         reg_name.set('+');
1219     }
1220 
1221 
1222     /***
1223      * BitSet for authority.
1224      * <p><blockquote><pre>
1225      * authority     = server | reg_name
1226      * </pre></blockquote><p>
1227      */
1228     protected static final BitSet authority = new BitSet(256);
1229     // Static initializer for authority
1230     static {
1231         authority.or(server);
1232         authority.or(reg_name);
1233     }
1234 
1235 
1236     /***
1237      * BitSet for scheme.
1238      * <p><blockquote><pre>
1239      * scheme        = alpha *( alpha | digit | "+" | "-" | "." )
1240      * </pre></blockquote><p>
1241      */
1242     protected static final BitSet scheme = new BitSet(256);
1243     // Static initializer for scheme
1244     static {
1245         scheme.or(alpha);
1246         scheme.or(digit);
1247         scheme.set('+');
1248         scheme.set('-');
1249         scheme.set('.');
1250     }
1251 
1252 
1253     /***
1254      * BitSet for rel_segment.
1255      * <p><blockquote><pre>
1256      * rel_segment   = 1*( unreserved | escaped |
1257      *                     ";" | "@" | "&amp;" | "=" | "+" | "$" | "," )
1258      * </pre></blockquote><p>
1259      */
1260     protected static final BitSet rel_segment = new BitSet(256);
1261     // Static initializer for rel_segment
1262     static {
1263         rel_segment.or(unreserved);
1264         rel_segment.or(escaped);
1265         rel_segment.set(';');
1266         rel_segment.set('@');
1267         rel_segment.set('&');
1268         rel_segment.set('=');
1269         rel_segment.set('+');
1270         rel_segment.set('$');
1271         rel_segment.set(',');
1272     }
1273 
1274 
1275     /***
1276      * BitSet for rel_path.
1277      * <p><blockquote><pre>
1278      * rel_path      = rel_segment [ abs_path ]
1279      * </pre></blockquote><p>
1280      */
1281     protected static final BitSet rel_path = new BitSet(256);
1282     // Static initializer for rel_path
1283     static {
1284         rel_path.or(rel_segment);
1285         rel_path.or(abs_path);
1286     }
1287 
1288 
1289     /***
1290      * BitSet for net_path.
1291      * <p><blockquote><pre>
1292      * net_path      = "//" authority [ abs_path ]
1293      * </pre></blockquote><p>
1294      */
1295     protected static final BitSet net_path = new BitSet(256);
1296     // Static initializer for net_path
1297     static {
1298         net_path.set('/');
1299         net_path.or(authority);
1300         net_path.or(abs_path);
1301     }
1302     
1303 
1304     /***
1305      * BitSet for hier_part.
1306      * <p><blockquote><pre>
1307      * hier_part     = ( net_path | abs_path ) [ "?" query ]
1308      * </pre></blockquote><p>
1309      */
1310     protected static final BitSet hier_part = new BitSet(256);
1311     // Static initializer for hier_part
1312     static {
1313         hier_part.or(net_path);
1314         hier_part.or(abs_path);
1315         // hier_part.set('?'); aleady included
1316         hier_part.or(query);
1317     }
1318 
1319 
1320     /***
1321      * BitSet for relativeURI.
1322      * <p><blockquote><pre>
1323      * relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
1324      * </pre></blockquote><p>
1325      */
1326     protected static final BitSet relativeURI = new BitSet(256);
1327     // Static initializer for relativeURI
1328     static {
1329         relativeURI.or(net_path);
1330         relativeURI.or(abs_path);
1331         relativeURI.or(rel_path);
1332         // relativeURI.set('?'); aleady included
1333         relativeURI.or(query);
1334     }
1335 
1336 
1337     /***
1338      * BitSet for absoluteURI.
1339      * <p><blockquote><pre>
1340      * absoluteURI   = scheme ":" ( hier_part | opaque_part )
1341      * </pre></blockquote><p>
1342      */
1343     protected static final BitSet absoluteURI = new BitSet(256);
1344     // Static initializer for absoluteURI
1345     static {
1346         absoluteURI.or(scheme);
1347         absoluteURI.set(':');
1348         absoluteURI.or(hier_part);
1349         absoluteURI.or(opaque_part);
1350     }
1351 
1352 
1353     /***
1354      * BitSet for URI-reference.
1355      * <p><blockquote><pre>
1356      * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1357      * </pre></blockquote><p>
1358      */
1359     protected static final BitSet URI_reference = new BitSet(256);
1360     // Static initializer for URI_reference
1361     static {
1362         URI_reference.or(absoluteURI);
1363         URI_reference.or(relativeURI);
1364         URI_reference.set('#');
1365         URI_reference.or(fragment);
1366     }
1367 
1368     // ---------------------------- Characters disallowed within the URI syntax
1369     // Excluded US-ASCII Characters are like control, space, delims and unwise
1370 
1371     /***
1372      * BitSet for control.
1373      */
1374     public static final BitSet control = new BitSet(256);
1375     // Static initializer for control
1376     static {
1377         for (int i = 0; i <= 0x1F; i++) {
1378             control.set(i);
1379         }
1380         control.set(0x7F);
1381     }
1382 
1383     /***
1384      * BitSet for space.
1385      */
1386     public static final BitSet space = new BitSet(256);
1387     // Static initializer for space
1388     static {
1389         space.set(0x20);
1390     }
1391 
1392 
1393     /***
1394      * BitSet for delims.
1395      */
1396     public static final BitSet delims = new BitSet(256);
1397     // Static initializer for delims
1398     static {
1399         delims.set('<');
1400         delims.set('>');
1401         delims.set('#');
1402         delims.set('%');
1403         delims.set('"');
1404     }
1405 
1406 
1407     /***
1408      * BitSet for unwise.
1409      */
1410     public static final BitSet unwise = new BitSet(256);
1411     // Static initializer for unwise
1412     static {
1413         unwise.set('{');
1414         unwise.set('}');
1415         unwise.set('|');
1416         unwise.set('//');
1417         unwise.set('^');
1418         unwise.set('[');
1419         unwise.set(']');
1420         unwise.set('`');
1421     }
1422 
1423 
1424     /***
1425      * Disallowed rel_path before escaping.
1426      */
1427     public static final BitSet disallowed_rel_path = new BitSet(256);
1428     // Static initializer for disallowed_rel_path
1429     static {
1430         disallowed_rel_path.or(uric);
1431         disallowed_rel_path.andNot(rel_path);
1432     }
1433 
1434 
1435     /***
1436      * Disallowed opaque_part before escaping.
1437      */
1438     public static final BitSet disallowed_opaque_part = new BitSet(256);
1439     // Static initializer for disallowed_opaque_part
1440     static {
1441         disallowed_opaque_part.or(uric);
1442         disallowed_opaque_part.andNot(opaque_part);
1443     }
1444 
1445     // ----------------------- Characters allowed within and for each component
1446 
1447     /***
1448      * Those characters that are allowed for the authority component.
1449      */
1450     public static final BitSet allowed_authority = new BitSet(256);
1451     // Static initializer for allowed_authority
1452     static {
1453         allowed_authority.or(authority);
1454         allowed_authority.clear('%');
1455     }
1456 
1457 
1458     /***
1459      * Those characters that are allowed for the opaque_part.
1460      */
1461     public static final BitSet allowed_opaque_part = new BitSet(256);
1462     // Static initializer for allowed_opaque_part 
1463     static {
1464         allowed_opaque_part.or(opaque_part);
1465         allowed_opaque_part.clear('%');
1466     }
1467 
1468 
1469     /***
1470      * Those characters that are allowed for the reg_name.
1471      */
1472     public static final BitSet allowed_reg_name = new BitSet(256);
1473     // Static initializer for allowed_reg_name 
1474     static {
1475         allowed_reg_name.or(reg_name);
1476         // allowed_reg_name.andNot(percent);
1477         allowed_reg_name.clear('%');
1478     }
1479 
1480 
1481     /***
1482      * Those characters that are allowed for the userinfo component.
1483      */
1484     public static final BitSet allowed_userinfo = new BitSet(256);
1485     // Static initializer for allowed_userinfo
1486     static {
1487         allowed_userinfo.or(userinfo);
1488         // allowed_userinfo.andNot(percent);
1489         allowed_userinfo.clear('%');
1490     }
1491 
1492 
1493     /***
1494      * Those characters that are allowed for within the userinfo component.
1495      */
1496     public static final BitSet allowed_within_userinfo = new BitSet(256);
1497     // Static initializer for allowed_within_userinfo
1498     static {
1499         allowed_within_userinfo.or(within_userinfo);
1500         allowed_within_userinfo.clear('%');
1501     }
1502 
1503 
1504     /***
1505      * Those characters that are allowed for the IPv6reference component.
1506      * The characters '[', ']' in IPv6reference should be excluded.
1507      */
1508     public static final BitSet allowed_IPv6reference = new BitSet(256);
1509     // Static initializer for allowed_IPv6reference
1510     static {
1511         allowed_IPv6reference.or(IPv6reference);
1512         // allowed_IPv6reference.andNot(unwise);
1513         allowed_IPv6reference.clear('[');
1514         allowed_IPv6reference.clear(']');
1515     }
1516 
1517 
1518     /***
1519      * Those characters that are allowed for the host component.
1520      * The characters '[', ']' in IPv6reference should be excluded.
1521      */
1522     public static final BitSet allowed_host = new BitSet(256);
1523     // Static initializer for allowed_host
1524     static {
1525         allowed_host.or(hostname);
1526         allowed_host.or(allowed_IPv6reference);
1527     }
1528 
1529 
1530     /***
1531      * Those characters that are allowed for the authority component.
1532      */
1533     public static final BitSet allowed_within_authority = new BitSet(256);
1534     // Static initializer for allowed_within_authority
1535     static {
1536         allowed_within_authority.or(server);
1537         allowed_within_authority.or(reg_name);
1538         allowed_within_authority.clear(';');
1539         allowed_within_authority.clear(':');
1540         allowed_within_authority.clear('@');
1541         allowed_within_authority.clear('?');
1542         allowed_within_authority.clear('/');
1543     }
1544 
1545 
1546     /***
1547      * Those characters that are allowed for the abs_path.
1548      */
1549     public static final BitSet allowed_abs_path = new BitSet(256);
1550     // Static initializer for allowed_abs_path
1551     static {
1552         allowed_abs_path.or(abs_path);
1553         // allowed_abs_path.set('/');  // aleady included
1554         allowed_abs_path.andNot(percent);
1555     }
1556 
1557 
1558     /***
1559      * Those characters that are allowed for the rel_path.
1560      */
1561     public static final BitSet allowed_rel_path = new BitSet(256);
1562     // Static initializer for allowed_rel_path
1563     static {
1564         allowed_rel_path.or(rel_path);
1565         allowed_rel_path.clear('%');
1566     }
1567 
1568 
1569     /***
1570      * Those characters that are allowed within the path.
1571      */
1572     public static final BitSet allowed_within_path = new BitSet(256);
1573     // Static initializer for allowed_within_path
1574     static {
1575         allowed_within_path.or(abs_path);
1576         allowed_within_path.clear('/');
1577         allowed_within_path.clear(';');
1578         allowed_within_path.clear('=');
1579         allowed_within_path.clear('?');
1580     }
1581 
1582 
1583     /***
1584      * Those characters that are allowed for the query component.
1585      */
1586     public static final BitSet allowed_query = new BitSet(256);
1587     // Static initializer for allowed_query
1588     static {
1589         allowed_query.or(uric);
1590         allowed_query.clear('%');
1591     }
1592 
1593 
1594     /***
1595      * Those characters that are allowed within the query component.
1596      */
1597     public static final BitSet allowed_within_query = new BitSet(256);
1598     // Static initializer for allowed_within_query
1599     static {
1600         allowed_within_query.or(allowed_query);
1601         allowed_within_query.andNot(reserved); // excluded 'reserved'
1602     }
1603 
1604 
1605     /***
1606      * Those characters that are allowed for the fragment component.
1607      */
1608     public static final BitSet allowed_fragment = new BitSet(256);
1609     // Static initializer for allowed_fragment
1610     static {
1611         allowed_fragment.or(uric);
1612         allowed_fragment.clear('%');
1613     }
1614 
1615     // ------------------------------------------- Flags for this URI-reference
1616 
1617     // TODO: Figure out what all these variables are for and provide javadoc
1618 
1619     // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1620     // absoluteURI   = scheme ":" ( hier_part | opaque_part )
1621     protected boolean _is_hier_part;
1622     protected boolean _is_opaque_part;
1623     // relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ] 
1624     // hier_part     = ( net_path | abs_path ) [ "?" query ]
1625     protected boolean _is_net_path;
1626     protected boolean _is_abs_path;
1627     protected boolean _is_rel_path;
1628     // net_path      = "//" authority [ abs_path ] 
1629     // authority     = server | reg_name
1630     protected boolean _is_reg_name;
1631     protected boolean _is_server;  // = _has_server
1632     // server        = [ [ userinfo "@" ] hostport ]
1633     // host          = hostname | IPv4address | IPv6reference
1634     protected boolean _is_hostname;
1635     protected boolean _is_IPv4address;
1636     protected boolean _is_IPv6reference;
1637 
1638     // ------------------------------------------ Character and escape encoding
1639     
1640     /***
1641      * Encodes URI string.
1642      *
1643      * This is a two mapping, one from original characters to octets, and
1644      * subsequently a second from octets to URI characters:
1645      * <p><blockquote><pre>
1646      *   original character sequence->octet sequence->URI character sequence
1647      * </pre></blockquote><p>
1648      *
1649      * An escaped octet is encoded as a character triplet, consisting of the
1650      * percent character "%" followed by the two hexadecimal digits
1651      * representing the octet code. For example, "%20" is the escaped
1652      * encoding for the US-ASCII space character.
1653      * <p>
1654      * Conversion from the local filesystem character set to UTF-8 will
1655      * normally involve a two step process. First convert the local character
1656      * set to the UCS; then convert the UCS to UTF-8.
1657      * The first step in the process can be performed by maintaining a mapping
1658      * table that includes the local character set code and the corresponding
1659      * UCS code.
1660      * The next step is to convert the UCS character code to the UTF-8 encoding.
1661      * <p>
1662      * Mapping between vendor codepages can be done in a very similar manner
1663      * as described above.
1664      * <p>
1665      * The only time escape encodings can allowedly be made is when a URI is
1666      * being created from its component parts.  The escape and validate methods
1667      * are internally performed within this method.
1668      *
1669      * @param original the original character sequence
1670      * @param allowed those characters that are allowed within a component
1671      * @param charset the protocol charset
1672      * @return URI character sequence
1673      * @throws URIException null component or unsupported character encoding
1674      */
1675         
1676     protected static char[] encode(String original, BitSet allowed,
1677             String charset) throws URIException {
1678         if (original == null) {
1679             throw new IllegalArgumentException("Original string may not be null");
1680         }
1681         if (allowed == null) {
1682             throw new IllegalArgumentException("Allowed bitset may not be null");
1683         }
1684         byte[] rawdata = URLCodec.encodeUrl(allowed, EncodingUtil.getBytes(original, charset));
1685         return EncodingUtil.getAsciiString(rawdata).toCharArray();
1686     }
1687 
1688     /***
1689      * Decodes URI encoded string.
1690      *
1691      * This is a two mapping, one from URI characters to octets, and
1692      * subsequently a second from octets to original characters:
1693      * <p><blockquote><pre>
1694      *   URI character sequence->octet sequence->original character sequence
1695      * </pre></blockquote><p>
1696      *
1697      * A URI must be separated into its components before the escaped
1698      * characters within those components can be allowedly decoded.
1699      * <p>
1700      * Notice that there is a chance that URI characters that are non UTF-8
1701      * may be parsed as valid UTF-8.  A recent non-scientific analysis found
1702      * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a
1703      * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0%
1704      * false reading.
1705      * <p>
1706      * The percent "%" character always has the reserved purpose of being
1707      * the escape indicator, it must be escaped as "%25" in order to be used
1708      * as data within a URI.
1709      * <p>
1710      * The unescape method is internally performed within this method.
1711      *
1712      * @param component the URI character sequence
1713      * @param charset the protocol charset
1714      * @return original character sequence
1715      * @throws URIException incomplete trailing escape pattern or unsupported
1716      * character encoding
1717      */
1718     protected static String decode(char[] component, String charset) 
1719         throws URIException {
1720         if (component == null) {
1721             throw new IllegalArgumentException("Component array of chars may not be null");
1722         }
1723         return decode(new String(component), charset);
1724     }
1725 
1726     /***
1727      * Decodes URI encoded string.
1728      *
1729      * This is a two mapping, one from URI characters to octets, and
1730      * subsequently a second from octets to original characters:
1731      * <p><blockquote><pre>
1732      *   URI character sequence->octet sequence->original character sequence
1733      * </pre></blockquote><p>
1734      *
1735      * A URI must be separated into its components before the escaped
1736      * characters within those components can be allowedly decoded.
1737      * <p>
1738      * Notice that there is a chance that URI characters that are non UTF-8
1739      * may be parsed as valid UTF-8.  A recent non-scientific analysis found
1740      * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a
1741      * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0%
1742      * false reading.
1743      * <p>
1744      * The percent "%" character always has the reserved purpose of being
1745      * the escape indicator, it must be escaped as "%25" in order to be used
1746      * as data within a URI.
1747      * <p>
1748      * The unescape method is internally performed within this method.
1749      *
1750      * @param component the URI character sequence
1751      * @param charset the protocol charset
1752      * @return original character sequence
1753      * @throws URIException incomplete trailing escape pattern or unsupported
1754      * character encoding
1755      * 
1756      * @since 3.0
1757      */
1758     protected static String decode(String component, String charset) 
1759         throws URIException {
1760         if (component == null) {
1761             throw new IllegalArgumentException("Component array of chars may not be null");
1762         }
1763         byte[] rawdata = null;
1764         try { 
1765             rawdata = URLCodec.decodeUrl(EncodingUtil.getAsciiBytes(component));
1766         } catch (DecoderException e) {
1767             throw new URIException(e.getMessage());
1768         }
1769         return EncodingUtil.getString(rawdata, charset);
1770     }
1771     /***
1772      * Pre-validate the unescaped URI string within a specific component.
1773      *
1774      * @param component the component string within the component
1775      * @param disallowed those characters disallowed within the component
1776      * @return if true, it doesn't have the disallowed characters
1777      * if false, the component is undefined or an incorrect one
1778      */
1779     protected boolean prevalidate(String component, BitSet disallowed) {
1780         // prevalidate the given component by disallowed characters
1781         if (component == null) {
1782             return false; // undefined
1783         }
1784         char[] target = component.toCharArray();
1785         for (int i = 0; i < target.length; i++) {
1786             if (disallowed.get(target[i])) {
1787                 return false;
1788             }
1789         }
1790         return true;
1791     }
1792 
1793 
1794     /***
1795      * Validate the URI characters within a specific component.
1796      * The component must be performed after escape encoding. Or it doesn't
1797      * include escaped characters.
1798      *
1799      * @param component the characters sequence within the component
1800      * @param generous those characters that are allowed within a component
1801      * @return if true, it's the correct URI character sequence
1802      */
1803     protected boolean validate(char[] component, BitSet generous) {
1804         // validate each component by generous characters
1805         return validate(component, 0, -1, generous);
1806     }
1807 
1808 
1809     /***
1810      * Validate the URI characters within a specific component.
1811      * The component must be performed after escape encoding. Or it doesn't
1812      * include escaped characters.
1813      * <p>
1814      * It's not that much strict, generous.  The strict validation might be 
1815      * performed before being called this method.
1816      *
1817      * @param component the characters sequence within the component
1818      * @param soffset the starting offset of the given component
1819      * @param eoffset the ending offset of the given component
1820      * if -1, it means the length of the component
1821      * @param generous those characters that are allowed within a component
1822      * @return if true, it's the correct URI character sequence
1823      */
1824     protected boolean validate(char[] component, int soffset, int eoffset,
1825             BitSet generous) {
1826         // validate each component by generous characters
1827         if (eoffset == -1) {
1828             eoffset = component.length - 1;
1829         }
1830         for (int i = soffset; i <= eoffset; i++) {
1831             if (!generous.get(component[i])) { 
1832                 return false;
1833             }
1834         }
1835         return true;
1836     }
1837 
1838 
1839     /***
1840      * In order to avoid any possilbity of conflict with non-ASCII characters,
1841      * Parse a URI reference as a <code>String</code> with the character
1842      * encoding of the local system or the document.
1843      * <p>
1844      * The following line is the regular expression for breaking-down a URI
1845      * reference into its components.
1846      * <p><blockquote><pre>
1847      *   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1848      *    12            3  4          5       6  7        8 9
1849      * </pre></blockquote><p>
1850      * For example, matching the above expression to
1851      *   http://jakarta.apache.org/ietf/uri/#Related
1852      * results in the following subexpression matches:
1853      * <p><blockquote><pre>
1854      *               $1 = http:
1855      *  scheme    =  $2 = http
1856      *               $3 = //jakarta.apache.org
1857      *  authority =  $4 = jakarta.apache.org
1858      *  path      =  $5 = /ietf/uri/
1859      *               $6 = <undefined>
1860      *  query     =  $7 = <undefined>
1861      *               $8 = #Related
1862      *  fragment  =  $9 = Related
1863      * </pre></blockquote><p>
1864      *
1865      * @param original the original character sequence
1866      * @param escaped <code>true</code> if <code>original</code> is escaped
1867      * @throws URIException If an error occurs.
1868      */
1869     protected void parseUriReference(String original, boolean escaped)
1870         throws URIException {
1871 
1872         // validate and contruct the URI character sequence
1873         if (original == null) {
1874             throw new URIException("URI-Reference required");
1875         }
1876 
1877         /* @
1878          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1879          */
1880         String tmp = original.trim();
1881         
1882         /*
1883          * The length of the string sequence of characters.
1884          * It may not be equal to the length of the byte array.
1885          */
1886         int length = tmp.length();
1887 
1888         /*
1889          * Remove the delimiters like angle brackets around an URI.
1890          */
1891         if (length > 0) {
1892             char[] firstDelimiter = { tmp.charAt(0) };
1893             if (validate(firstDelimiter, delims)) {
1894                 if (length >= 2) {
1895                     char[] lastDelimiter = { tmp.charAt(length - 1) };
1896                     if (validate(lastDelimiter, delims)) {
1897                         tmp = tmp.substring(1, length - 1);
1898                         length = length - 2;
1899                     }
1900                 }
1901             }
1902         }
1903 
1904         /*
1905          * The starting index
1906          */
1907         int from = 0;
1908 
1909         /*
1910          * The test flag whether the URI is started from the path component.
1911          */
1912         boolean isStartedFromPath = false;
1913         int atColon = tmp.indexOf(':');
1914         int atSlash = tmp.indexOf('/');
1915         if (atColon <= 0 || (atSlash >= 0 && atSlash < atColon)) {
1916             isStartedFromPath = true;
1917         }
1918 
1919         /*
1920          * <p><blockquote><pre>
1921          *     @@@@@@@@
1922          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1923          * </pre></blockquote><p>
1924          */
1925         int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from);
1926         if (at == -1) { 
1927             at = 0;
1928         }
1929 
1930         /*
1931          * Parse the scheme.
1932          * <p><blockquote><pre>
1933          *  scheme    =  $2 = http
1934          *              @
1935          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1936          * </pre></blockquote><p>
1937          */
1938         if (at > 0 && at < length && tmp.charAt(at) == ':') {
1939             char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
1940             if (validate(target, scheme)) {
1941                 _scheme = target;
1942             } else {
1943                 throw new URIException("incorrect scheme");
1944             }
1945             from = ++at;
1946         }
1947 
1948         /*
1949          * Parse the authority component.
1950          * <p><blockquote><pre>
1951          *  authority =  $4 = jakarta.apache.org
1952          *                  @@
1953          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1954          * </pre></blockquote><p>
1955          */
1956         // Reset flags
1957         _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
1958         if (0 <= at && at < length && tmp.charAt(at) == '/') {
1959             // Set flag
1960             _is_hier_part = true;
1961             if (at + 2 < length && tmp.charAt(at + 1) == '/') {
1962                 // the temporary index to start the search from
1963                 int next = indexFirstOf(tmp, "/?#", at + 2);
1964                 if (next == -1) {
1965                     next = (tmp.substring(at + 2).length() == 0) ? at + 2 
1966                         : tmp.length();
1967                 }
1968                 parseAuthority(tmp.substring(at + 2, next), escaped);
1969                 from = at = next;
1970                 // Set flag
1971                 _is_net_path = true;
1972             }
1973             if (from == at) {
1974                 // Set flag
1975                 _is_abs_path = true;
1976             }
1977         }
1978 
1979         /*
1980          * Parse the path component.
1981          * <p><blockquote><pre>
1982          *  path      =  $5 = /ietf/uri/
1983          *                                @@@@@@
1984          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1985          * </pre></blockquote><p>
1986          */
1987         if (from < length) {
1988             // rel_path = rel_segment [ abs_path ]
1989             int next = indexFirstOf(tmp, "?#", from);
1990             if (next == -1) {
1991                 next = tmp.length();
1992             }
1993             if (!_is_abs_path) {
1994                 if (!escaped 
1995                     && prevalidate(tmp.substring(from, next), disallowed_rel_path) 
1996                     || escaped 
1997                     && validate(tmp.substring(from, next).toCharArray(), rel_path)) {
1998                     // Set flag
1999                     _is_rel_path = true;
2000                 } else if (!escaped 
2001                     && prevalidate(tmp.substring(from, next), disallowed_opaque_part) 
2002                     || escaped 
2003                     && validate(tmp.substring(from, next).toCharArray(), opaque_part)) {
2004                     // Set flag
2005                     _is_opaque_part = true;
2006                 } else {
2007                     // the path component may be empty
2008                     _path = null;
2009                 }
2010             }
2011             if (escaped) {
2012                 setRawPath(tmp.substring(from, next).toCharArray());
2013             } else {
2014                 setPath(tmp.substring(from, next));
2015             }
2016             at = next;
2017         }
2018 
2019         // set the charset to do escape encoding
2020         String charset = getProtocolCharset();
2021 
2022         /*
2023          * Parse the query component.
2024          * <p><blockquote><pre>
2025          *  query     =  $7 = <undefined>
2026          *                                        @@@@@@@@@
2027          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2028          * </pre></blockquote><p>
2029          */
2030         if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') {
2031             int next = tmp.indexOf('#', at + 1);
2032             if (next == -1) {
2033                 next = tmp.length();
2034             }
2035             _query = (escaped) ? tmp.substring(at + 1, next).toCharArray() 
2036                 : encode(tmp.substring(at + 1, next), allowed_query, charset);
2037             at = next;
2038         }
2039 
2040         /*
2041          * Parse the fragment component.
2042          * <p><blockquote><pre>
2043          *  fragment  =  $9 = Related
2044          *                                                   @@@@@@@@
2045          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2046          * </pre></blockquote><p>
2047          */
2048         if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') {
2049             if (at + 1 == length) { // empty fragment
2050                 _fragment = "".toCharArray();
2051             } else {
2052                 _fragment = (escaped) ? tmp.substring(at + 1).toCharArray() 
2053                     : encode(tmp.substring(at + 1), allowed_fragment, charset);
2054             }
2055         }
2056 
2057         // set this URI.
2058         setURI();
2059     }
2060 
2061 
2062     /***
2063      * Get the earlier index that to be searched for the first occurrance in
2064      * one of any of the given string.
2065      *
2066      * @param s the string to be indexed
2067      * @param delims the delimiters used to index
2068      * @return the earlier index if there are delimiters
2069      */
2070     protected int indexFirstOf(String s, String delims) {
2071         return indexFirstOf(s, delims, -1);
2072     }
2073 
2074 
2075     /***
2076      * Get the earlier index that to be searched for the first occurrance in
2077      * one of any of the given string.
2078      *
2079      * @param s the string to be indexed
2080      * @param delims the delimiters used to index
2081      * @param offset the from index
2082      * @return the earlier index if there are delimiters
2083      */
2084     protected int indexFirstOf(String s, String delims, int offset) {
2085         if (s == null || s.length() == 0) {
2086             return -1;
2087         }
2088         if (delims == null || delims.length() == 0) {
2089             return -1;
2090         }
2091         // check boundaries
2092         if (offset < 0) {
2093             offset = 0;
2094         } else if (offset > s.length()) {
2095             return -1;
2096         }
2097         // s is never null
2098         int min = s.length();
2099         char[] delim = delims.toCharArray();
2100         for (int i = 0; i < delim.length; i++) {
2101             int at = s.indexOf(delim[i], offset);
2102             if (at >= 0 && at < min) {
2103                 min = at;
2104             }
2105         }
2106         return (min == s.length()) ? -1 : min;
2107     }
2108 
2109 
2110     /***
2111      * Get the earlier index that to be searched for the first occurrance in
2112      * one of any of the given array.
2113      *
2114      * @param s the character array to be indexed
2115      * @param delim the delimiter used to index
2116      * @return the ealier index if there are a delimiter
2117      */
2118     protected int indexFirstOf(char[] s, char delim) {
2119         return indexFirstOf(s, delim, 0);
2120     }
2121 
2122 
2123     /***
2124      * Get the earlier index that to be searched for the first occurrance in
2125      * one of any of the given array.
2126      *
2127      * @param s the character array to be indexed
2128      * @param delim the delimiter used to index
2129      * @param offset The offset.
2130      * @return the ealier index if there is a delimiter
2131      */
2132     protected int indexFirstOf(char[] s, char delim, int offset) {
2133         if (s == null || s.length == 0) {
2134             return -1;
2135         }
2136         // check boundaries
2137         if (offset < 0) {
2138             offset = 0;
2139         } else if (offset > s.length) {
2140             return -1;
2141         }
2142         for (int i = offset; i < s.length; i++) {
2143             if (s[i] == delim) {
2144                 return i;
2145             }
2146         }
2147         return -1;
2148     }
2149 
2150 
2151     /***
2152      * Parse the authority component.
2153      *
2154      * @param original the original character sequence of authority component
2155      * @param escaped <code>true</code> if <code>original</code> is escaped
2156      * @throws URIException If an error occurs.
2157      */
2158     protected void parseAuthority(String original, boolean escaped)
2159         throws URIException {
2160 
2161         // Reset flags
2162         _is_reg_name = _is_server =
2163         _is_hostname = _is_IPv4address = _is_IPv6reference = false;
2164 
2165         // set the charset to do escape encoding
2166         String charset = getProtocolCharset();
2167 
2168         boolean hasPort = true;
2169         int from = 0;
2170         int next = original.indexOf('@');
2171         if (next != -1) { // neither -1 and 0
2172             // each protocol extented from URI supports the specific userinfo
2173             _userinfo = (escaped) ? original.substring(0, next).toCharArray() 
2174                 : encode(original.substring(0, next), allowed_userinfo,
2175                         charset);
2176             from = next + 1;
2177         }
2178         next = original.indexOf('[', from);
2179         if (next >= from) {
2180             next = original.indexOf(']', from);
2181             if (next == -1) {
2182                 throw new URIException(URIException.PARSING, "IPv6reference");
2183             } else {
2184                 next++;
2185             }
2186             // In IPv6reference, '[', ']' should be excluded
2187             _host = (escaped) ? original.substring(from, next).toCharArray() 
2188                 : encode(original.substring(from, next), allowed_IPv6reference,
2189                         charset);
2190             // Set flag
2191             _is_IPv6reference = true;
2192         } else { // only for !_is_IPv6reference
2193             next = original.indexOf(':', from);
2194             if (next == -1) {
2195                 next = original.length();
2196                 hasPort = false;
2197             }
2198             // REMINDME: it doesn't need the pre-validation
2199             _host = original.substring(from, next).toCharArray();
2200             if (validate(_host, IPv4address)) {
2201                 // Set flag
2202                 _is_IPv4address = true;
2203             } else if (validate(_host, hostname)) {
2204                 // Set flag
2205                 _is_hostname = true;
2206             } else {
2207                 // Set flag
2208                 _is_reg_name = true;
2209             }
2210         }
2211         if (_is_reg_name) {
2212             // Reset flags for a server-based naming authority
2213             _is_server = _is_hostname = _is_IPv4address =
2214             _is_IPv6reference = false;
2215             // set a registry-based naming authority
2216             _authority = (escaped) ? original.toString().toCharArray() 
2217                 : encode(original.toString(), allowed_reg_name, charset);
2218         } else {
2219             if (original.length() - 1 > next && hasPort 
2220                 && original.charAt(next) == ':') { // not empty
2221                 from = next + 1;
2222                 try {
2223                     _port = Integer.parseInt(original.substring(from));
2224                 } catch (NumberFormatException error) {
2225                     throw new URIException(URIException.PARSING,
2226                             "invalid port number");
2227                 }
2228             }
2229             // set a server-based naming authority
2230             StringBuffer buf = new StringBuffer();
2231             if (_userinfo != null) { // has_userinfo
2232                 buf.append(_userinfo);
2233                 buf.append('@');
2234             }
2235             if (_host != null) {
2236                 buf.append(_host);
2237                 if (_port != -1) {
2238                     buf.append(':');
2239                     buf.append(_port);
2240                 }
2241             }
2242             _authority = buf.toString().toCharArray();
2243             // Set flag
2244             _is_server = true;
2245         }
2246     }
2247 
2248 
2249     /***
2250      * Once it's parsed successfully, set this URI.
2251      *
2252      * @see #getRawURI
2253      */
2254     protected void setURI() {
2255         // set _uri
2256         StringBuffer buf = new StringBuffer();
2257         // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2258         if (_scheme != null) {
2259             buf.append(_scheme);
2260             buf.append(':');
2261         }
2262         if (_is_net_path) {
2263             buf.append("//");
2264             if (_authority != null) { // has_authority
2265                 buf.append(_authority);
2266             }
2267         }
2268         if (_opaque != null && _is_opaque_part) {
2269             buf.append(_opaque);
2270         } else if (_path != null) {
2271             // _is_hier_part or _is_relativeURI
2272             if (_path.length != 0) {
2273                 buf.append(_path);
2274             }
2275         }
2276         if (_query != null) { // has_query
2277             buf.append('?');
2278             buf.append(_query);
2279         }
2280         // ignore the fragment identifier
2281         _uri = buf.toString().toCharArray();
2282         hash = 0;
2283     }
2284 
2285     // ----------------------------------------------------------- Test methods
2286   
2287 
2288     /***
2289      * Tell whether or not this URI is absolute.
2290      *
2291      * @return true iif this URI is absoluteURI
2292      */
2293     public boolean isAbsoluteURI() {
2294         return (_scheme != null);
2295     }
2296   
2297 
2298     /***
2299      * Tell whether or not this URI is relative.
2300      *
2301      * @return true iif this URI is relativeURI
2302      */
2303     public boolean isRelativeURI() {
2304         return (_scheme == null);
2305     }
2306 
2307 
2308     /***
2309      * Tell whether or not the absoluteURI of this URI is hier_part.
2310      *
2311      * @return true iif the absoluteURI is hier_part
2312      */
2313     public boolean isHierPart() {
2314         return _is_hier_part;
2315     }
2316 
2317 
2318     /***
2319      * Tell whether or not the absoluteURI of this URI is opaque_part.
2320      *
2321      * @return true iif the absoluteURI is opaque_part
2322      */
2323     public boolean isOpaquePart() {
2324         return _is_opaque_part;
2325     }
2326 
2327 
2328     /***
2329      * Tell whether or not the relativeURI or heir_part of this URI is net_path.
2330      * It's the same function as the has_authority() method.
2331      *
2332      * @return true iif the relativeURI or heir_part is net_path
2333      * @see #hasAuthority
2334      */
2335     public boolean isNetPath() {
2336         return _is_net_path || (_authority != null);
2337     }
2338 
2339 
2340     /***
2341      * Tell whether or not the relativeURI or hier_part of this URI is abs_path.
2342      *
2343      * @return true iif the relativeURI or hier_part is abs_path
2344      */
2345     public boolean isAbsPath() {
2346         return _is_abs_path;
2347     }
2348 
2349 
2350     /***
2351      * Tell whether or not the relativeURI of this URI is rel_path.
2352      *
2353      * @return true iif the relativeURI is rel_path
2354      */
2355     public boolean isRelPath() {
2356         return _is_rel_path;
2357     }
2358 
2359 
2360     /***
2361      * Tell whether or not this URI has authority.
2362      * It's the same function as the is_net_path() method.
2363      *
2364      * @return true iif this URI has authority
2365      * @see #isNetPath
2366      */
2367     public boolean hasAuthority() {
2368         return (_authority != null) || _is_net_path;
2369     }
2370 
2371     /***
2372      * Tell whether or not the authority component of this URI is reg_name.
2373      *
2374      * @return true iif the authority component is reg_name
2375      */
2376     public boolean isRegName() {
2377         return _is_reg_name;
2378     }
2379   
2380 
2381     /***
2382      * Tell whether or not the authority component of this URI is server.
2383      *
2384      * @return true iif the authority component is server
2385      */
2386     public boolean isServer() {
2387         return _is_server;
2388     }
2389   
2390 
2391     /***
2392      * Tell whether or not this URI has userinfo.
2393      *
2394      * @return true iif this URI has userinfo
2395      */
2396     public boolean hasUserinfo() {
2397         return (_userinfo != null);
2398     }
2399   
2400 
2401     /***
2402      * Tell whether or not the host part of this URI is hostname.
2403      *
2404      * @return true iif the host part is hostname
2405      */
2406     public boolean isHostname() {
2407         return _is_hostname;
2408     }
2409 
2410 
2411     /***
2412      * Tell whether or not the host part of this URI is IPv4address.
2413      *
2414      * @return true iif the host part is IPv4address
2415      */
2416     public boolean isIPv4address() {
2417         return _is_IPv4address;
2418     }
2419 
2420 
2421     /***
2422      * Tell whether or not the host part of this URI is IPv6reference.
2423      *
2424      * @return true iif the host part is IPv6reference
2425      */
2426     public boolean isIPv6reference() {
2427         return _is_IPv6reference;
2428     }
2429 
2430 
2431     /***
2432      * Tell whether or not this URI has query.
2433      *
2434      * @return true iif this URI has query
2435      */
2436     public boolean hasQuery() {
2437         return (_query != null);
2438     }
2439    
2440 
2441     /***
2442      * Tell whether or not this URI has fragment.
2443      *
2444      * @return true iif this URI has fragment
2445      */
2446     public boolean hasFragment() {
2447         return (_fragment != null);
2448     }
2449    
2450    
2451     // ---------------------------------------------------------------- Charset
2452 
2453 
2454     /***
2455      * Set the default charset of the protocol.
2456      * <p>
2457      * The character set used to store files SHALL remain a local decision and
2458      * MAY depend on the capability of local operating systems. Prior to the
2459      * exchange of URIs they SHOULD be converted into a ISO/IEC 10646 format
2460      * and UTF-8 encoded. This approach, while allowing international exchange
2461      * of URIs, will still allow backward compatibility with older systems
2462      * because the code set positions for ASCII characters are identical to the
2463      * one byte sequence in UTF-8.
2464      * <p>
2465      * An individual URI scheme may require a single charset, define a default
2466      * charset, or provide a way to indicate the charset used.
2467      *
2468      * <p>
2469      * Always all the time, the setter method is always succeeded and throws
2470      * <code>DefaultCharsetChanged</code> exception.
2471      *
2472      * So API programmer must follow the following way:
2473      * <code><pre>
2474      *  import org.apache.util.URI$DefaultCharsetChanged;
2475      *      .
2476      *      .
2477      *      .
2478      *  try {
2479      *      URI.setDefaultProtocolCharset("UTF-8");
2480      *  } catch (DefaultCharsetChanged cc) {
2481      *      // CASE 1: the exception could be ignored, when it is set by user
2482      *      if (cc.getReasonCode() == DefaultCharsetChanged.PROTOCOL_CHARSET) {
2483      *      // CASE 2: let user know the default protocol charset changed
2484      *      } else {
2485      *      // CASE 2: let user know the default document charset changed
2486      *      }
2487      *  }
2488      *  </pre></code>
2489      *
2490      * The API programmer is responsible to set the correct charset.
2491      * And each application should remember its own charset to support.
2492      *
2493      * @param charset the default charset for each protocol
2494      * @throws DefaultCharsetChanged default charset changed
2495      */
2496     public static void setDefaultProtocolCharset(String charset) 
2497         throws DefaultCharsetChanged {
2498             
2499         defaultProtocolCharset = charset;
2500         throw new DefaultCharsetChanged(DefaultCharsetChanged.PROTOCOL_CHARSET,
2501                 "the default protocol charset changed");
2502     }
2503 
2504 
2505     /***
2506      * Get the default charset of the protocol.
2507      * <p>
2508      * An individual URI scheme may require a single charset, define a default
2509      * charset, or provide a way to indicate the charset used.
2510      * <p>
2511      * To work globally either requires support of a number of character sets
2512      * and to be able to convert between them, or the use of a single preferred
2513      * character set.
2514      * For support of global compatibility it is STRONGLY RECOMMENDED that
2515      * clients and servers use UTF-8 encoding when exchanging URIs.
2516      *
2517      * @return the default charset string
2518      */
2519     public static String getDefaultProtocolCharset() {
2520         return defaultProtocolCharset;
2521     }
2522 
2523 
2524     /***
2525      * Get the protocol charset used by this current URI instance.
2526      * It was set by the constructor for this instance. If it was not set by
2527      * contructor, it will return the default protocol charset.
2528      *
2529      * @return the protocol charset string
2530      * @see #getDefaultProtocolCharset
2531      */
2532     public String getProtocolCharset() {
2533         return (protocolCharset != null) 
2534             ? protocolCharset 
2535             : defaultProtocolCharset;
2536     }
2537 
2538 
2539     /***
2540      * Set the default charset of the document.
2541      * <p>
2542      * Notice that it will be possible to contain mixed characters (e.g.
2543      * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional
2544      * display of these character sets, the protocol charset could be simply
2545      * used again. Because it's not yet implemented that the insertion of BIDI
2546      * control characters at different points during composition is extracted.
2547      * <p>
2548      *
2549      * Always all the time, the setter method is always succeeded and throws
2550      * <code>DefaultCharsetChanged</code> exception.
2551      *
2552      * So API programmer must follow the following way:
2553      * <code><pre>
2554      *  import org.apache.util.URI$DefaultCharsetChanged;
2555      *      .
2556      *      .
2557      *      .
2558      *  try {
2559      *      URI.setDefaultDocumentCharset("EUC-KR");
2560      *  } catch (DefaultCharsetChanged cc) {
2561      *      // CASE 1: the exception could be ignored, when it is set by user
2562      *      if (cc.getReasonCode() == DefaultCharsetChanged.DOCUMENT_CHARSET) {
2563      *      // CASE 2: let user know the default document charset changed
2564      *      } else {
2565      *      // CASE 2: let user know the default protocol charset changed
2566      *      }
2567      *  }
2568      *  </pre></code>
2569      *
2570      * The API programmer is responsible to set the correct charset.
2571      * And each application should remember its own charset to support.
2572      *
2573      * @param charset the default charset for the document
2574      * @throws DefaultCharsetChanged default charset changed
2575      */
2576     public static void setDefaultDocumentCharset(String charset) 
2577         throws DefaultCharsetChanged {
2578             
2579         defaultDocumentCharset = charset;
2580         throw new DefaultCharsetChanged(DefaultCharsetChanged.DOCUMENT_CHARSET,
2581                 "the default document charset changed");
2582     }
2583 
2584 
2585     /***
2586      * Get the recommended default charset of the document.
2587      *
2588      * @return the default charset string
2589      */
2590     public static String getDefaultDocumentCharset() {
2591         return defaultDocumentCharset;
2592     }
2593 
2594 
2595     /***
2596      * Get the default charset of the document by locale.
2597      *
2598      * @return the default charset string by locale
2599      */
2600     public static String getDefaultDocumentCharsetByLocale() {
2601         return defaultDocumentCharsetByLocale;
2602     }
2603 
2604 
2605     /***
2606      * Get the default charset of the document by platform.
2607      *
2608      * @return the default charset string by platform
2609      */
2610     public static String getDefaultDocumentCharsetByPlatform() {
2611         return defaultDocumentCharsetByPlatform;
2612     }
2613 
2614     // ------------------------------------------------------------- The scheme
2615 
2616     /***
2617      * Get the scheme.
2618      *
2619      * @return the scheme
2620      */
2621     public char[] getRawScheme() {
2622         return _scheme;
2623     }
2624 
2625 
2626     /***
2627      * Get the scheme.
2628      *
2629      * @return the scheme
2630      * null if undefined scheme
2631      */
2632     public String getScheme() {
2633         return (_scheme == null) ? null : new String(_scheme);
2634     }
2635 
2636     // ---------------------------------------------------------- The authority
2637 
2638     /***
2639      * Set the authority.  It can be one type of server, hostport, hostname,
2640      * IPv4address, IPv6reference and reg_name.
2641      * <p><blockquote><pre>
2642      *   authority     = server | reg_name
2643      * </pre></blockquote><p>
2644      *
2645      * @param escapedAuthority the raw escaped authority
2646      * @throws URIException If {@link 
2647      * #parseAuthority(java.lang.String,boolean)} fails
2648      * @throws NullPointerException null authority
2649      */
2650     public void setRawAuthority(char[] escapedAuthority) 
2651         throws URIException, NullPointerException {
2652             
2653         parseAuthority(new String(escapedAuthority), true);
2654         setURI();
2655     }
2656 
2657 
2658     /***
2659      * Set the authority.  It can be one type of server, hostport, hostname,
2660      * IPv4address, IPv6reference and reg_name.
2661      * Note that there is no setAuthority method by the escape encoding reason.
2662      *
2663      * @param escapedAuthority the escaped authority string
2664      * @throws URIException If {@link 
2665      * #parseAuthority(java.lang.String,boolean)} fails
2666      */
2667     public void setEscapedAuthority(String escapedAuthority)
2668         throws URIException {
2669 
2670         parseAuthority(escapedAuthority, true);
2671         setURI();
2672     }
2673 
2674 
2675     /***
2676      * Get the raw-escaped authority.
2677      *
2678      * @return the raw-escaped authority
2679      */
2680     public char[] getRawAuthority() {
2681         return _authority;
2682     }
2683 
2684 
2685     /***
2686      * Get the escaped authority.
2687      *
2688      * @return the escaped authority
2689      */
2690     public String getEscapedAuthority() {
2691         return (_authority == null) ? null : new String(_authority);
2692     }
2693 
2694 
2695     /***
2696      * Get the authority.
2697      *
2698      * @return the authority
2699      * @throws URIException If {@link #decode} fails
2700      */
2701     public String getAuthority() throws URIException {
2702         return (_authority == null) ? null : decode(_authority,
2703                 getProtocolCharset());
2704     }
2705 
2706     // ----------------------------------------------------------- The userinfo
2707 
2708     /***
2709      * Get the raw-escaped userinfo.
2710      *
2711      * @return the raw-escaped userinfo
2712      * @see #getAuthority
2713      */
2714     public char[] getRawUserinfo() {
2715         return _userinfo;
2716     }
2717 
2718 
2719     /***
2720      * Get the escaped userinfo.
2721      *
2722      * @return the escaped userinfo
2723      * @see #getAuthority
2724      */
2725     public String getEscapedUserinfo() {
2726         return (_userinfo == null) ? null : new String(_userinfo);
2727     }
2728 
2729 
2730     /***
2731      * Get the userinfo.
2732      *
2733      * @return the userinfo
2734      * @throws URIException If {@link #decode} fails
2735      * @see #getAuthority
2736      */
2737     public String getUserinfo() throws URIException {
2738         return (_userinfo == null) ? null : decode(_userinfo,
2739                 getProtocolCharset());
2740     }
2741 
2742     // --------------------------------------------------------------- The host
2743 
2744     /***
2745      * Get the host.
2746      * <p><blockquote><pre>
2747      *   host          = hostname | IPv4address | IPv6reference
2748      * </pre></blockquote><p>
2749      *
2750      * @return the host
2751      * @see #getAuthority
2752      */
2753     public char[] getRawHost() {
2754         return _host;
2755     }
2756 
2757 
2758     /***
2759      * Get the host.
2760      * <p><blockquote><pre>
2761      *   host          = hostname | IPv4address | IPv6reference
2762      * </pre></blockquote><p>
2763      *
2764      * @return the host
2765      * @throws URIException If {@link #decode} fails
2766      * @see #getAuthority
2767      */
2768     public String getHost() throws URIException {
2769         if (_host != null) {
2770             return decode(_host, getProtocolCharset());
2771         } else {
2772             return null;
2773         }
2774     }
2775 
2776     // --------------------------------------------------------------- The port
2777 
2778     /***
2779      * Get the port.  In order to get the specfic default port, the specific
2780      * protocol-supported class extended from the URI class should be used.
2781      * It has the server-based naming authority.
2782      *
2783      * @return the port
2784      * if -1, it has the default port for the scheme or the server-based
2785      * naming authority is not supported in the specific URI.
2786      */
2787     public int getPort() {
2788         return _port;
2789     }
2790 
2791     // --------------------------------------------------------------- The path
2792 
2793     /***
2794      * Set the raw-escaped path.
2795      *
2796      * @param escapedPath the path character sequence
2797      * @throws URIException encoding error or not proper for initial instance
2798      * @see #encode
2799      */
2800     public void setRawPath(char[] escapedPath) throws URIException {
2801         if (escapedPath == null || escapedPath.length == 0) {
2802             _path = _opaque = escapedPath;
2803             setURI();
2804             return;
2805         }
2806         // remove the fragment identifier
2807         escapedPath = removeFragmentIdentifier(escapedPath);
2808         if (_is_net_path || _is_abs_path) {
2809             if (escapedPath[0] != '/') {
2810                 throw new URIException(URIException.PARSING,
2811                         "not absolute path");
2812             }
2813             if (!validate(escapedPath, abs_path)) {
2814                 throw new URIException(URIException.ESCAPING,
2815                         "escaped absolute path not valid");
2816             }
2817             _path = escapedPath;
2818         } else if (_is_rel_path) {
2819             int at = indexFirstOf(escapedPath, '/');
2820             if (at == 0) {
2821                 throw new URIException(URIException.PARSING, "incorrect path");
2822             }
2823             if (at > 0 && !validate(escapedPath, 0, at - 1, rel_segment) 
2824                 && !validate(escapedPath, at, -1, abs_path) 
2825                 || at < 0 && !validate(escapedPath, 0, -1, rel_segment)) {
2826             
2827                 throw new URIException(URIException.ESCAPING,
2828                         "escaped relative path not valid");
2829             }
2830             _path = escapedPath;
2831         } else if (_is_opaque_part) {
2832             if (!uric_no_slash.get(escapedPath[0]) 
2833                 && !validate(escapedPath, 1, -1, uric)) {
2834                 throw new URIException(URIException.ESCAPING,
2835                     "escaped opaque part not valid");
2836             }
2837             _opaque = escapedPath;
2838         } else {
2839             throw new URIException(URIException.PARSING, "incorrect path");
2840         }
2841         setURI();
2842     }
2843 
2844 
2845     /***
2846      * Set the escaped path.
2847      *
2848      * @param escapedPath the escaped path string
2849      * @throws URIException encoding error or not proper for initial instance
2850      * @see #encode
2851      */
2852     public void setEscapedPath(String escapedPath) throws URIException {
2853         if (escapedPath == null) {
2854             _path = _opaque = null;
2855             setURI();
2856             return;
2857         }
2858         setRawPath(escapedPath.toCharArray());
2859     }
2860 
2861 
2862     /***
2863      * Set the path.
2864      *
2865      * @param path the path string
2866      * @throws URIException set incorrectly or fragment only
2867      * @see #encode
2868      */
2869     public void setPath(String path) throws URIException {
2870 
2871         if (path == null || path.length() == 0) {
2872             _path = _opaque = (path == null) ? null : path.toCharArray();
2873             setURI();
2874             return;
2875         }
2876         // set the charset to do escape encoding
2877         String charset = getProtocolCharset();
2878 
2879         if (_is_net_path || _is_abs_path) {
2880             _path = encode(path, allowed_abs_path, charset);
2881         } else if (_is_rel_path) {
2882             StringBuffer buff = new StringBuffer(path.length());
2883             int at = path.indexOf('/');
2884             if (at == 0) { // never 0
2885                 throw new URIException(URIException.PARSING,
2886                         "incorrect relative path");
2887             }
2888             if (at > 0) {
2889                 buff.append(encode(path.substring(0, at), allowed_rel_path,
2890                             charset));
2891                 buff.append(encode(path.substring(at), allowed_abs_path,
2892                             charset));
2893             } else {
2894                 buff.append(encode(path, allowed_rel_path, charset));
2895             }
2896             _path = buff.toString().toCharArray();
2897         } else if (_is_opaque_part) {
2898             StringBuffer buf = new StringBuffer();
2899             buf.insert(0, encode(path.substring(0, 1), uric_no_slash, charset));
2900             buf.insert(1, encode(path.substring(1), uric, charset));
2901             _opaque = buf.toString().toCharArray();
2902         } else {
2903             throw new URIException(URIException.PARSING, "incorrect path");
2904         }
2905         setURI();
2906     }
2907 
2908 
2909     /***
2910      * Resolve the base and relative path.
2911      *
2912      * @param basePath a character array of the basePath
2913      * @param relPath a character array of the relPath
2914      * @return the resolved path
2915      * @throws URIException no more higher path level to be resolved
2916      */
2917     protected char[] resolvePath(char[] basePath, char[] relPath)
2918         throws URIException {
2919 
2920         // REMINDME: paths are never null
2921         String base = (basePath == null) ? "" : new String(basePath);
2922         int at = base.lastIndexOf('/');
2923         if (at != -1) {
2924             basePath = base.substring(0, at + 1).toCharArray();
2925         }
2926         // _path could be empty
2927         if (relPath == null || relPath.length == 0) {
2928             return normalize(basePath);
2929         } else if (relPath[0] == '/') {
2930             return normalize(relPath);
2931         } else {
2932             StringBuffer buff = new StringBuffer(base.length() 
2933                 + relPath.length);
2934             buff.append((at != -1) ? base.substring(0, at + 1) : "/");
2935             buff.append(relPath);
2936             return normalize(buff.toString().toCharArray());
2937         }
2938     }
2939 
2940 
2941     /***
2942      * Get the raw-escaped current hierarchy level in the given path.
2943      * If the last namespace is a collection, the slash mark ('/') should be
2944      * ended with at the last character of the path string.
2945      *
2946      * @param path the path
2947      * @return the current hierarchy level
2948      * @throws URIException no hierarchy level
2949      */
2950     protected char[] getRawCurrentHierPath(char[] path) throws URIException {
2951 
2952         if (_is_opaque_part) {
2953             throw new URIException(URIException.PARSING, "no hierarchy level");
2954         }
2955         if (path == null) {
2956             throw new URIException(URIException.PARSING, "empty path");
2957         }
2958         String buff = new String(path);
2959         int first = buff.indexOf('/');
2960         int last = buff.lastIndexOf('/');
2961         if (last == 0) {
2962             return rootPath;
2963         } else if (first != last && last != -1) {
2964             return buff.substring(0, last).toCharArray();
2965         }
2966         // FIXME: it could be a document on the server side
2967         return path;
2968     }
2969 
2970 
2971     /***
2972      * Get the raw-escaped current hierarchy level.
2973      *
2974      * @return the raw-escaped current hierarchy level
2975      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2976      */
2977     public char[] getRawCurrentHierPath() throws URIException {
2978         return (_path == null) ? null : getRawCurrentHierPath(_path);
2979     }
2980  
2981 
2982     /***
2983      * Get the escaped current hierarchy level.
2984      *
2985      * @return the escaped current hierarchy level
2986      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2987      */
2988     public String getEscapedCurrentHierPath() throws URIException {
2989         char[] path = getRawCurrentHierPath();
2990         return (path == null) ? null : new String(path);
2991     }
2992  
2993 
2994     /***
2995      * Get the current hierarchy level.
2996      *
2997      * @return the current hierarchy level
2998      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2999      * @see #decode
3000      */
3001     public String getCurrentHierPath() throws URIException {
3002         char[] path = getRawCurrentHierPath();
3003         return (path == null) ? null : decode(path, getProtocolCharset());
3004     }
3005 
3006 
3007     /***
3008      * Get the level above the this hierarchy level.
3009      *
3010      * @return the raw above hierarchy level
3011      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3012      */
3013     public char[] getRawAboveHierPath() throws URIException {
3014         char[] path = getRawCurrentHierPath();
3015         return (path == null) ? null : getRawCurrentHierPath(path);
3016     }
3017 
3018 
3019     /***
3020      * Get the level above the this hierarchy level.
3021      *
3022      * @return the raw above hierarchy level
3023      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3024      */
3025     public String getEscapedAboveHierPath() throws URIException {
3026         char[] path = getRawAboveHierPath();
3027         return (path == null) ? null : new String(path);
3028     }
3029 
3030 
3031     /***
3032      * Get the level above the this hierarchy level.
3033      *
3034      * @return the above hierarchy level
3035      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3036      * @see #decode
3037      */
3038     public String getAboveHierPath() throws URIException {
3039         char[] path = getRawAboveHierPath();
3040         return (path == null) ? null : decode(path, getProtocolCharset());
3041     }
3042 
3043 
3044     /***
3045      * Get the raw-escaped path.
3046      * <p><blockquote><pre>
3047      *   path          = [ abs_path | opaque_part ]
3048      * </pre></blockquote><p>
3049      *
3050      * @return the raw-escaped path
3051      */
3052     public char[] getRawPath() {
3053         return _is_opaque_part ? _opaque : _path;
3054     }
3055 
3056 
3057     /***
3058      * Get the escaped path.
3059      * <p><blockquote><pre>
3060      *   path          = [ abs_path | opaque_part ]
3061      *   abs_path      = "/"  path_segments 
3062      *   opaque_part   = uric_no_slash *uric
3063      * </pre></blockquote><p>
3064      *
3065      * @return the escaped path string
3066      */
3067     public String getEscapedPath() {
3068         char[] path = getRawPath();
3069         return (path == null) ? null : new String(path);
3070     }
3071 
3072 
3073     /***
3074      * Get the path.
3075      * <p><blockquote><pre>
3076      *   path          = [ abs_path | opaque_part ]
3077      * </pre></blockquote><p>
3078      * @return the path string
3079      * @throws URIException If {@link #decode} fails.
3080      * @see #decode
3081      */
3082     public String getPath() throws URIException { 
3083         char[] path =  getRawPath();
3084         return (path == null) ? null : decode(path, getProtocolCharset());
3085     }
3086 
3087 
3088     /***
3089      * Get the raw-escaped basename of the path.
3090      *
3091      * @return the raw-escaped basename
3092      */
3093     public char[] getRawName() {
3094         if (_path == null) { 
3095             return null;
3096         }
3097 
3098         int at = 0;
3099         for (int i = _path.length - 1; i >= 0; i--) {
3100             if (_path[i] == '/') {
3101                 at = i + 1;
3102                 break;
3103             }
3104         }
3105         int len = _path.length - at;
3106         char[] basename =  new char[len];
3107         System.arraycopy(_path, at, basename, 0, len);
3108         return basename;
3109     }
3110 
3111 
3112     /***
3113      * Get the escaped basename of the path.
3114      *
3115      * @return the escaped basename string
3116      */
3117     public String getEscapedName() {
3118         char[] basename = getRawName();
3119         return (basename == null) ? null : new String(basename);
3120     }
3121 
3122 
3123     /***
3124      * Get the basename of the path.
3125      *
3126      * @return the basename string
3127      * @throws URIException incomplete trailing escape pattern or unsupported
3128      * character encoding
3129      * @see #decode
3130      */
3131     public String getName() throws URIException {
3132         char[] basename = getRawName();
3133         return (basename == null) ? null : decode(getRawName(),
3134                 getProtocolCharset());
3135     }
3136 
3137     // ----------------------------------------------------- The path and query 
3138 
3139     /***
3140      * Get the raw-escaped path and query.
3141      *
3142      * @return the raw-escaped path and query
3143      */
3144     public char[] getRawPathQuery() {
3145 
3146         if (_path == null && _query == null) {
3147             return null;
3148         }
3149         StringBuffer buff = new StringBuffer();
3150         if (_path != null) {
3151             buff.append(_path);
3152         }
3153         if (_query != null) {
3154             buff.append('?');
3155             buff.append(_query);
3156         }
3157         return buff.toString().toCharArray();
3158     }
3159 
3160 
3161     /***
3162      * Get the escaped query.
3163      *
3164      * @return the escaped path and query string
3165      */
3166     public String getEscapedPathQuery() {
3167         char[] rawPathQuery = getRawPathQuery();
3168         return (rawPathQuery == null) ? null : new String(rawPathQuery);
3169     }
3170 
3171 
3172     /***
3173      * Get the path and query.
3174      *
3175      * @return the path and query string.
3176      * @throws URIException incomplete trailing escape pattern or unsupported
3177      * character encoding
3178      * @see #decode
3179      */
3180     public String getPathQuery() throws URIException {
3181         char[] rawPathQuery = getRawPathQuery();
3182         return (rawPathQuery == null) ? null : decode(rawPathQuery,
3183                 getProtocolCharset());
3184     }
3185 
3186     // -------------------------------------------------------------- The query 
3187 
3188     /***
3189      * Set the raw-escaped query.
3190      *
3191      * @param escapedQuery the raw-escaped query
3192      * @throws URIException escaped query not valid
3193      */
3194     public void setRawQuery(char[] escapedQuery) throws URIException {
3195         if (escapedQuery == null || escapedQuery.length == 0) {
3196             _query = escapedQuery;
3197             setURI();
3198             return;
3199         }
3200         // remove the fragment identifier
3201         escapedQuery = removeFragmentIdentifier(escapedQuery);
3202         if (!validate(escapedQuery, query)) {
3203             throw new URIException(URIException.ESCAPING,
3204                     "escaped query not valid");
3205         }
3206         _query = escapedQuery;
3207         setURI();
3208     }
3209 
3210 
3211     /***
3212      * Set the escaped query string.
3213      *
3214      * @param escapedQuery the escaped query string
3215      * @throws URIException escaped query not valid
3216      */
3217     public void setEscapedQuery(String escapedQuery) throws URIException {
3218         if (escapedQuery == null) {
3219             _query = null;
3220             setURI();
3221             return;
3222         }
3223         setRawQuery(escapedQuery.toCharArray());
3224     }
3225 
3226 
3227     /***
3228      * Set the query.
3229      * <p>
3230      * When a query string is not misunderstood the reserved special characters
3231      * ("&amp;", "=", "+", ",", and "$") within a query component, it is
3232      * recommended to use in encoding the whole query with this method.
3233      * <p>
3234      * The additional APIs for the special purpose using by the reserved
3235      * special characters used in each protocol are implemented in each protocol
3236      * classes inherited from <code>URI</code>.  So refer to the same-named APIs
3237      * implemented in each specific protocol instance.
3238      *
3239      * @param query the query string.
3240      * @throws URIException incomplete trailing escape pattern or unsupported
3241      * character encoding
3242      * @see #encode
3243      */
3244     public void setQuery(String query) throws URIException {
3245         if (query == null || query.length() == 0) {
3246             _query = (query == null) ? null : query.toCharArray();
3247             setURI();
3248             return;
3249         }
3250         setRawQuery(encode(query, allowed_query, getProtocolCharset()));
3251     }
3252 
3253 
3254     /***
3255      * Get the raw-escaped query.
3256      *
3257      * @return the raw-escaped query
3258      */
3259     public char[] getRawQuery() {
3260         return _query;
3261     }
3262 
3263 
3264     /***
3265      * Get the escaped query.
3266      *
3267      * @return the escaped query string
3268      */
3269     public String getEscapedQuery() {
3270         return (_query == null) ? null : new String(_query);
3271     }
3272 
3273 
3274     /***
3275      * Get the query.
3276      *
3277      * @return the query string.
3278      * @throws URIException incomplete trailing escape pattern or unsupported
3279      * character encoding
3280      * @see #decode
3281      */
3282     public String getQuery() throws URIException {
3283         return (_query == null) ? null : decode(_query, getProtocolCharset());
3284     }
3285 
3286     // ----------------------------------------------------------- The fragment 
3287 
3288     /***
3289      * Set the raw-escaped fragment.
3290      *
3291      * @param escapedFragment the raw-escaped fragment
3292      * @throws URIException escaped fragment not valid
3293      */
3294     public void setRawFragment(char[] escapedFragment) throws URIException {
3295         if (escapedFragment == null || escapedFragment.length == 0) {
3296             _fragment = escapedFragment;
3297             hash = 0;
3298             return;
3299         }
3300         if (!validate(escapedFragment, fragment)) {
3301             throw new URIException(URIException.ESCAPING,
3302                     "escaped fragment not valid");
3303         }
3304         _fragment = escapedFragment;
3305         hash = 0;
3306     }
3307 
3308 
3309     /***
3310      * Set the escaped fragment string.
3311      *
3312      * @param escapedFragment the escaped fragment string
3313      * @throws URIException escaped fragment not valid
3314      */
3315     public void setEscapedFragment(String escapedFragment) throws URIException {
3316         if (escapedFragment == null) {
3317             _fragment = null;
3318             hash = 0;
3319             return;
3320         }
3321         setRawFragment(escapedFragment.toCharArray());
3322     }
3323 
3324 
3325     /***
3326      * Set the fragment.
3327      *
3328      * @param fragment the fragment string.
3329      * @throws URIException If an error occurs.
3330      */
3331     public void setFragment(String fragment) throws URIException {
3332         if (fragment == null || fragment.length() == 0) {
3333             _fragment = (fragment == null) ? null : fragment.toCharArray();
3334             hash = 0;
3335             return;
3336         }
3337         _fragment = encode(fragment, allowed_fragment, getProtocolCharset());
3338         hash = 0;
3339     }
3340 
3341 
3342     /***
3343      * Get the raw-escaped fragment.
3344      * <p>
3345      * The optional fragment identifier is not part of a URI, but is often used
3346      * in conjunction with a URI.
3347      * <p>
3348      * The format and interpretation of fragment identifiers is dependent on
3349      * the media type [RFC2046] of the retrieval result.
3350      * <p>
3351      * A fragment identifier is only meaningful when a URI reference is
3352      * intended for retrieval and the result of that retrieval is a document
3353      * for which the identified fragment is consistently defined.
3354      *
3355      * @return the raw-escaped fragment
3356      */
3357     public char[] getRawFragment() {
3358         return _fragment;
3359     }
3360 
3361 
3362     /***
3363      * Get the escaped fragment.
3364      *
3365      * @return the escaped fragment string
3366      */
3367     public String getEscapedFragment() {
3368         return (_fragment == null) ? null : new String(_fragment);
3369     }
3370 
3371 
3372     /***
3373      * Get the fragment.
3374      *
3375      * @return the fragment string
3376      * @throws URIException incomplete trailing escape pattern or unsupported
3377      * character encoding
3378      * @see #decode
3379      */
3380     public String getFragment() throws URIException {
3381         return (_fragment == null) ? null : decode(_fragment,
3382                 getProtocolCharset());
3383     }
3384 
3385     // ------------------------------------------------------------- Utilities 
3386 
3387     /***
3388      * Remove the fragment identifier of the given component.
3389      *
3390      * @param component the component that a fragment may be included
3391      * @return the component that the fragment identifier is removed
3392      */
3393     protected char[] removeFragmentIdentifier(char[] component) {
3394         if (component == null) { 
3395             return null;
3396         }
3397         int lastIndex = new String(component).indexOf('#');
3398         if (lastIndex != -1) {
3399             component = new String(component).substring(0,
3400                     lastIndex).toCharArray();
3401         }
3402         return component;
3403     }
3404 
3405 
3406     /***
3407      * Normalize the given hier path part.
3408      * 
3409      * <p>Algorithm taken from URI reference parser at 
3410      * http://www.apache.org/~fielding/uri/rev-2002/issues.html.
3411      *
3412      * @param path the path to normalize
3413      * @return the normalized path
3414      * @throws URIException no more higher path level to be normalized
3415      */
3416     protected char[] normalize(char[] path) throws URIException {
3417 
3418         if (path == null) { 
3419             return null;
3420         }
3421 
3422         String normalized = new String(path);
3423 
3424         // If the buffer begins with "./" or "../", the "." or ".." is removed.
3425         if (normalized.startsWith("./")) {
3426             normalized = normalized.substring(1);
3427         } else if (normalized.startsWith("../")) {
3428             normalized = normalized.substring(2);
3429         } else if (normalized.startsWith("..")) {
3430             normalized = normalized.substring(2);
3431         }
3432 
3433         // All occurrences of "/./" in the buffer are replaced with "/"
3434         int index = -1;
3435         while ((index = normalized.indexOf("/./")) != -1) {
3436             normalized = normalized.substring(0, index) + normalized.substring(index + 2);
3437         }
3438 
3439         // If the buffer ends with "/.", the "." is removed.
3440         if (normalized.endsWith("/.")) {
3441             normalized = normalized.substring(0, normalized.length() - 1);
3442         }
3443 
3444         int startIndex = 0;
3445 
3446         // All occurrences of "/<segment>/../" in the buffer, where ".."
3447         // and <segment> are complete path segments, are iteratively replaced
3448         // with "/" in order from left to right until no matching pattern remains.
3449         // If the buffer ends with "/<segment>/..", that is also replaced
3450         // with "/".  Note that <segment> may be empty.
3451         while ((index = normalized.indexOf("/../", startIndex)) != -1) {
3452             int slashIndex = normalized.lastIndexOf('/', index - 1);
3453             if (slashIndex >= 0) {
3454                 normalized = normalized.substring(0, slashIndex) + normalized.substring(index + 3);
3455             } else {
3456                 startIndex = index + 3;   
3457             }
3458         }
3459         if (normalized.endsWith("/..")) {
3460             int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4);
3461             if (slashIndex >= 0) {
3462                 normalized = normalized.substring(0, slashIndex + 1);
3463             }
3464         }
3465 
3466         // All prefixes of "<segment>/../" in the buffer, where ".."
3467         // and <segment> are complete path segments, are iteratively replaced
3468         // with "/" in order from left to right until no matching pattern remains.
3469         // If the buffer ends with "<segment>/..", that is also replaced
3470         // with "/".  Note that <segment> may be empty.
3471         while ((index = normalized.indexOf("/../")) != -1) {
3472             int slashIndex = normalized.lastIndexOf('/', index - 1);
3473             if (slashIndex >= 0) {
3474                 break;
3475             } else {
3476                 normalized = normalized.substring(index + 3);
3477             }
3478         }
3479         if (normalized.endsWith("/..")) {
3480             int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4);
3481             if (slashIndex < 0) {
3482                 normalized = "/";
3483             }
3484         }
3485 
3486         return normalized.toCharArray();
3487     }
3488 
3489 
3490     /***
3491      * Normalizes the path part of this URI.  Normalization is only meant to be performed on 
3492      * URIs with an absolute path.  Calling this method on a relative path URI will have no
3493      * effect.
3494      *
3495      * @throws URIException no more higher path level to be normalized
3496      * 
3497      * @see #isAbsPath()
3498      */
3499     public void normalize() throws URIException {
3500         if (isAbsPath()) {
3501             _path = normalize(_path);
3502             setURI();
3503         }
3504     }
3505 
3506 
3507     /***
3508      * Test if the first array is equal to the second array.
3509      *
3510      * @param first the first character array
3511      * @param second the second character array
3512      * @return true if they're equal
3513      */
3514     protected boolean equals(char[] first, char[] second) {
3515 
3516         if (first == null && second == null) {
3517             return true;
3518         }
3519         if (first == null || second == null) {
3520             return false;
3521         }
3522         if (first.length != second.length) {
3523             return false;
3524         }
3525         for (int i = 0; i < first.length; i++) {
3526             if (first[i] != second[i]) {
3527                 return false;
3528             }
3529         }
3530         return true;
3531     }
3532 
3533 
3534     /***
3535      * Test an object if this URI is equal to another.
3536      *
3537      * @param obj an object to compare
3538      * @return true if two URI objects are equal
3539      */
3540     public boolean equals(Object obj) {
3541 
3542         // normalize and test each components
3543         if (obj == this) {
3544             return true;
3545         }
3546         if (!(obj instanceof URI)) {
3547             return false;
3548         }
3549         URI another = (URI) obj;
3550         // scheme
3551         if (!equals(_scheme, another._scheme)) {
3552             return false;
3553         }
3554         // is_opaque_part or is_hier_part?  and opaque
3555         if (!equals(_opaque, another._opaque)) {
3556             return false;
3557         }
3558         // is_hier_part
3559         // has_authority
3560         if (!equals(_authority, another._authority)) {
3561             return false;
3562         }
3563         // path
3564         if (!equals(_path, another._path)) {
3565             return false;
3566         }
3567         // has_query
3568         if (!equals(_query, another._query)) {
3569             return false;
3570         }
3571         // has_fragment?  should be careful of the only fragment case.
3572         if (!equals(_fragment, another._fragment)) {
3573             return false;
3574         }
3575         return true;
3576     }
3577 
3578     // ---------------------------------------------------------- Serialization
3579 
3580     /***
3581      * Write the content of this URI.
3582      *
3583      * @param oos the object-output stream
3584      * @throws IOException If an IO problem occurs.
3585      */
3586     protected void writeObject(ObjectOutputStream oos)
3587         throws IOException {
3588 
3589         oos.defaultWriteObject();
3590     }
3591 
3592 
3593     /***
3594      * Read a URI.
3595      *
3596      * @param ois the object-input stream
3597      * @throws ClassNotFoundException If one of the classes specified in the
3598      * input stream cannot be found.
3599      * @throws IOException If an IO problem occurs.
3600      */
3601     protected void readObject(ObjectInputStream ois)
3602         throws ClassNotFoundException, IOException {
3603 
3604         ois.defaultReadObject();
3605     }
3606 
3607     // -------------------------------------------------------------- Hash code
3608 
3609     /***
3610      * Return a hash code for this URI.
3611      *
3612      * @return a has code value for this URI
3613      */
3614     public int hashCode() {
3615         if (hash == 0) {
3616             char[] c = _uri;
3617             if (c != null) {
3618                 for (int i = 0, len = c.length; i < len; i++) {
3619                     hash = 31 * hash + c[i];
3620                 }
3621             }
3622             c = _fragment;
3623             if (c != null) {
3624                 for (int i = 0, len = c.length; i < len; i++) {
3625                     hash = 31 * hash + c[i];
3626                 }
3627             }
3628         }
3629         return hash;
3630     }
3631 
3632     // ------------------------------------------------------------- Comparison 
3633 
3634     /***
3635      * Compare this URI to another object. 
3636      *
3637      * @param obj the object to be compared.
3638      * @return 0, if it's same,
3639      * -1, if failed, first being compared with in the authority component
3640      * @throws ClassCastException not URI argument
3641      */
3642     public int compareTo(Object obj) throws ClassCastException {
3643 
3644         URI another = (URI) obj;
3645         if (!equals(_authority, another.getRawAuthority())) { 
3646             return -1;
3647         }
3648         return toString().compareTo(another.toString());
3649     }
3650 
3651     // ------------------------------------------------------------------ Clone
3652 
3653     /***
3654      * Create and return a copy of this object, the URI-reference containing
3655      * the userinfo component.  Notice that the whole URI-reference including
3656      * the userinfo component counld not be gotten as a <code>String</code>.
3657      * <p>
3658      * To copy the identical <code>URI</code> object including the userinfo
3659      * component, it should be used.
3660      *
3661      * @return a clone of this instance
3662      */
3663     public synchronized Object clone() {
3664 
3665         URI instance = new URI();
3666 
3667         instance._uri = _uri;
3668         instance._scheme = _scheme;
3669         instance._opaque = _opaque;
3670         instance._authority = _authority;
3671         instance._userinfo = _userinfo;
3672         instance._host = _host;
3673         instance._port = _port;
3674         instance._path = _path;
3675         instance._query = _query;
3676         instance._fragment = _fragment;
3677         // the charset to do escape encoding for this instance
3678         instance.protocolCharset = protocolCharset;
3679         // flags
3680         instance._is_hier_part = _is_hier_part;
3681         instance._is_opaque_part = _is_opaque_part;
3682         instance._is_net_path = _is_net_path;
3683         instance._is_abs_path = _is_abs_path;
3684         instance._is_rel_path = _is_rel_path;
3685         instance._is_reg_name = _is_reg_name;
3686         instance._is_server = _is_server;
3687         instance._is_hostname = _is_hostname;
3688         instance._is_IPv4address = _is_IPv4address;
3689         instance._is_IPv6reference = _is_IPv6reference;
3690 
3691         return instance;
3692     }
3693 
3694     // ------------------------------------------------------------ Get the URI
3695 
3696     /***
3697      * It can be gotten the URI character sequence. It's raw-escaped.
3698      * For the purpose of the protocol to be transported, it will be useful.
3699      * <p>
3700      * It is clearly unwise to use a URL that contains a password which is
3701      * intended to be secret. In particular, the use of a password within
3702      * the 'userinfo' component of a URL is strongly disrecommended except
3703      * in those rare cases where the 'password' parameter is intended to be
3704      * public.
3705      * <p>
3706      * When you want to get each part of the userinfo, you need to use the
3707      * specific methods in the specific URL. It depends on the specific URL.
3708      *
3709      * @return the URI character sequence
3710      */
3711     public char[] getRawURI() {
3712         return _uri;
3713     }
3714 
3715 
3716     /***
3717      * It can be gotten the URI character sequence. It's escaped.
3718      * For the purpose of the protocol to be transported, it will be useful.
3719      *
3720      * @return the escaped URI string
3721      */
3722     public String getEscapedURI() {
3723         return (_uri == null) ? null : new String(_uri);
3724     }
3725     
3726 
3727     /***
3728      * It can be gotten the URI character sequence.
3729      *
3730      * @return the original URI string
3731      * @throws URIException incomplete trailing escape pattern or unsupported
3732      * character encoding
3733      * @see #decode
3734      */
3735     public String getURI() throws URIException {
3736         return (_uri == null) ? null : decode(_uri, getProtocolCharset());
3737     }
3738 
3739 
3740     /***
3741      * Get the URI reference character sequence.
3742      *
3743      * @return the URI reference character sequence
3744      */
3745     public char[] getRawURIReference() {
3746         if (_fragment == null) { 
3747             return _uri;
3748         }
3749         if (_uri == null) { 
3750             return _fragment;
3751         }
3752         // if _uri != null &&  _fragment != null
3753         String uriReference = new String(_uri) + "#" + new String(_fragment);
3754         return uriReference.toCharArray();
3755     }
3756 
3757 
3758     /***
3759      * Get the escaped URI reference string.
3760      *
3761      * @return the escaped URI reference string
3762      */
3763     public String getEscapedURIReference() {
3764         char[] uriReference = getRawURIReference();
3765         return (uriReference == null) ? null : new String(uriReference);
3766     }
3767 
3768 
3769     /***
3770      * Get the original URI reference string.
3771      *
3772      * @return the original URI reference string
3773      * @throws URIException If {@link #decode} fails.
3774      */
3775     public String getURIReference() throws URIException {
3776         char[] uriReference = getRawURIReference();
3777         return (uriReference == null) ? null : decode(uriReference,
3778                 getProtocolCharset());
3779     }
3780 
3781 
3782     /***
3783      * Get the escaped URI string.
3784      * <p>
3785      * On the document, the URI-reference form is only used without the userinfo
3786      * component like http://jakarta.apache.org/ by the security reason.
3787      * But the URI-reference form with the userinfo component could be parsed.
3788      * <p>
3789      * In other words, this URI and any its subclasses must not expose the
3790      * URI-reference expression with the userinfo component like
3791      * http://user:password@hostport/restricted_zone.<br>
3792      * It means that the API client programmer should extract each user and
3793      * password to access manually.  Probably it will be supported in the each
3794      * subclass, however, not a whole URI-reference expression.
3795      *
3796      * @return the escaped URI string
3797      * @see #clone()
3798      */
3799     public String toString() {
3800         return getEscapedURI();
3801     }
3802 
3803 
3804     // ------------------------------------------------------------ Inner class
3805 
3806     /*** 
3807      * The charset-changed normal operation to represent to be required to
3808      * alert to user the fact the default charset is changed.
3809      */
3810     public static class DefaultCharsetChanged extends RuntimeException {
3811 
3812         // ------------------------------------------------------- constructors
3813 
3814         /***
3815          * The constructor with a reason string and its code arguments.
3816          *
3817          * @param reasonCode the reason code
3818          * @param reason the reason
3819          */
3820         public DefaultCharsetChanged(int reasonCode, String reason) {
3821             super(reason);
3822             this.reason = reason;
3823             this.reasonCode = reasonCode;
3824         }
3825 
3826         // ---------------------------------------------------------- constants
3827 
3828         /*** No specified reason code. */
3829         public static final int UNKNOWN = 0;
3830 
3831         /*** Protocol charset changed. */
3832         public static final int PROTOCOL_CHARSET = 1;
3833 
3834         /*** Document charset changed. */
3835         public static final int DOCUMENT_CHARSET = 2;
3836 
3837         // ------------------------------------------------- instance variables
3838 
3839         /*** The reason code. */
3840         private int reasonCode;
3841 
3842         /*** The reason message. */
3843         private String reason;
3844 
3845         // ------------------------------------------------------------ methods
3846 
3847         /***
3848          * Get the reason code.
3849          *
3850          * @return the reason code
3851          */
3852         public int getReasonCode() {
3853             return reasonCode;
3854         }
3855 
3856         /***
3857          * Get the reason message.
3858          *
3859          * @return the reason message
3860          */
3861         public String getReason() {
3862             return reason;
3863         }
3864 
3865     }
3866 
3867 
3868     /*** 
3869      * A mapping to determine the (somewhat arbitrarily) preferred charset for a
3870      * given locale.  Supports all locales recognized in JDK 1.1.
3871      * <p>
3872      * The distribution of this class is Servlets.com.    It was originally
3873      * written by Jason Hunter [jhunter at acm.org] and used by with permission.
3874      */
3875     public static class LocaleToCharsetMap {
3876 
3877         /*** A mapping of language code to charset */
3878         private static final Hashtable LOCALE_TO_CHARSET_MAP;
3879         static {
3880             LOCALE_TO_CHARSET_MAP = new Hashtable();
3881             LOCALE_TO_CHARSET_MAP.put("ar", "ISO-8859-6");
3882             LOCALE_TO_CHARSET_MAP.put("be", "ISO-8859-5");
3883             LOCALE_TO_CHARSET_MAP.put("bg", "ISO-8859-5");
3884             LOCALE_TO_CHARSET_MAP.put("ca", "ISO-8859-1");
3885             LOCALE_TO_CHARSET_MAP.put("cs", "ISO-8859-2");
3886             LOCALE_TO_CHARSET_MAP.put("da", "ISO-8859-1");
3887             LOCALE_TO_CHARSET_MAP.put("de", "ISO-8859-1");
3888             LOCALE_TO_CHARSET_MAP.put("el", "ISO-8859-7");
3889             LOCALE_TO_CHARSET_MAP.put("en", "ISO-8859-1");
3890             LOCALE_TO_CHARSET_MAP.put("es", "ISO-8859-1");
3891             LOCALE_TO_CHARSET_MAP.put("et", "ISO-8859-1");
3892             LOCALE_TO_CHARSET_MAP.put("fi", "ISO-8859-1");
3893             LOCALE_TO_CHARSET_MAP.put("fr", "ISO-8859-1");
3894             LOCALE_TO_CHARSET_MAP.put("hr", "ISO-8859-2");
3895             LOCALE_TO_CHARSET_MAP.put("hu", "ISO-8859-2");
3896             LOCALE_TO_CHARSET_MAP.put("is", "ISO-8859-1");
3897             LOCALE_TO_CHARSET_MAP.put("it", "ISO-8859-1");
3898             LOCALE_TO_CHARSET_MAP.put("iw", "ISO-8859-8");
3899             LOCALE_TO_CHARSET_MAP.put("ja", "Shift_JIS");
3900             LOCALE_TO_CHARSET_MAP.put("ko", "EUC-KR");
3901             LOCALE_TO_CHARSET_MAP.put("lt", "ISO-8859-2");
3902             LOCALE_TO_CHARSET_MAP.put("lv", "ISO-8859-2");
3903             LOCALE_TO_CHARSET_MAP.put("mk", "ISO-8859-5");
3904             LOCALE_TO_CHARSET_MAP.put("nl", "ISO-8859-1");
3905             LOCALE_TO_CHARSET_MAP.put("no", "ISO-8859-1");
3906             LOCALE_TO_CHARSET_MAP.put("pl", "ISO-8859-2");
3907             LOCALE_TO_CHARSET_MAP.put("pt", "ISO-8859-1");
3908             LOCALE_TO_CHARSET_MAP.put("ro", "ISO-8859-2");
3909             LOCALE_TO_CHARSET_MAP.put("ru", "ISO-8859-5");
3910             LOCALE_TO_CHARSET_MAP.put("sh", "ISO-8859-5");
3911             LOCALE_TO_CHARSET_MAP.put("sk", "ISO-8859-2");
3912             LOCALE_TO_CHARSET_MAP.put("sl", "ISO-8859-2");
3913             LOCALE_TO_CHARSET_MAP.put("sq", "ISO-8859-2");
3914             LOCALE_TO_CHARSET_MAP.put("sr", "ISO-8859-5");
3915             LOCALE_TO_CHARSET_MAP.put("sv", "ISO-8859-1");
3916             LOCALE_TO_CHARSET_MAP.put("tr", "ISO-8859-9");
3917             LOCALE_TO_CHARSET_MAP.put("uk", "ISO-8859-5");
3918             LOCALE_TO_CHARSET_MAP.put("zh", "GB2312");
3919             LOCALE_TO_CHARSET_MAP.put("zh_TW", "Big5");
3920         }
3921        
3922         /***
3923          * Get the preferred charset for the given locale.
3924          *
3925          * @param locale the locale
3926          * @return the preferred charset or null if the locale is not
3927          * recognized.
3928          */
3929         public static String getCharset(Locale locale) {
3930             // try for an full name match (may include country)
3931             String charset =
3932                 (String) LOCALE_TO_CHARSET_MAP.get(locale.toString());
3933             if (charset != null) { 
3934                 return charset;
3935             }
3936            
3937             // if a full name didn't match, try just the language
3938             charset = (String) LOCALE_TO_CHARSET_MAP.get(locale.getLanguage());
3939             return charset;  // may be null
3940         }
3941 
3942     }
3943 
3944 }
3945