URI.java revision 1c039d71d3879f39e3a75b8788e656f7b4f88f08
1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.net;
19
20import java.io.IOException;
21import java.io.ObjectInputStream;
22import java.io.ObjectOutputStream;
23import java.io.Serializable;
24import java.util.Locale;
25import libcore.net.UriCodec;
26
27/**
28 * This class represents an instance of a URI as defined by
29 * <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>.
30 */
31public final class URI implements Comparable<URI>, Serializable {
32
33    private static final long serialVersionUID = -6052424284110960213l;
34
35    static final String UNRESERVED = "_-!.~\'()*";
36    static final String PUNCTUATION = ",;:$&+=";
37
38    static final UriCodec USER_INFO_ENCODER = new PartEncoder("");
39    static final UriCodec PATH_ENCODER = new PartEncoder("/@");
40    static final UriCodec AUTHORITY_ENCODER = new PartEncoder("@[]");
41
42    /** for java.net.URL, which foolishly combines these two parts */
43    static final UriCodec FILE_AND_QUERY_ENCODER = new PartEncoder("/@?");
44
45    /** for query, fragment, and scheme-specific part */
46    static final UriCodec ALL_LEGAL_ENCODER = new PartEncoder("?/[]@");
47
48    /** Retains all ASCII chars including delimiters. */
49    private static final UriCodec ASCII_ONLY = new UriCodec() {
50        @Override protected boolean isRetained(char c) {
51            return c <= 127;
52        }
53    };
54
55    /**
56     * Encodes the unescaped characters of {@code s} that are not permitted.
57     * Permitted characters are:
58     * <ul>
59     *   <li>Unreserved characters in <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>.
60     *   <li>{@code extraOkayChars},
61     *   <li>non-ASCII, non-control, non-whitespace characters
62     * </ul>
63     */
64    private static class PartEncoder extends UriCodec {
65        private final String extraLegalCharacters;
66
67        PartEncoder(String extraLegalCharacters) {
68            this.extraLegalCharacters = extraLegalCharacters;
69        }
70
71        @Override protected boolean isRetained(char c) {
72            return UNRESERVED.indexOf(c) != -1
73                    || PUNCTUATION.indexOf(c) != -1
74                    || extraLegalCharacters.indexOf(c) != -1
75                    || (c > 127 && !Character.isSpaceChar(c) && !Character.isISOControl(c));
76        }
77    }
78
79    private String string;
80    private transient String scheme;
81    private transient String schemeSpecificPart;
82    private transient String authority;
83    private transient String userInfo;
84    private transient String host;
85    private transient int port = -1;
86    private transient String path;
87    private transient String query;
88    private transient String fragment;
89    private transient boolean opaque;
90    private transient boolean absolute;
91    private transient boolean serverAuthority = false;
92
93    private transient int hash = -1;
94
95    private URI() {}
96
97    /**
98     * Creates a new URI instance according to the given string {@code uri}.
99     *
100     * @param uri
101     *            the textual URI representation to be parsed into a URI object.
102     * @throws URISyntaxException
103     *         if the given {@code uri} isn't an
104     *         <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a> URI
105     *         or could not be parsed correctly.
106     */
107    public URI(String uri) throws URISyntaxException {
108        parseURI(uri, false);
109    }
110
111    /**
112     * Creates a new URI instance using the given arguments. This constructor
113     * first creates a temporary URI string from the given components. This
114     * string will be parsed later on to create the URI instance.
115     * <p>
116     * {@code [scheme:]scheme-specific-part[#fragment]}
117     *
118     * @param scheme
119     *            the scheme part of the URI.
120     * @param ssp
121     *            the scheme-specific-part of the URI.
122     * @param frag
123     *            the fragment part of the URI.
124     * @throws URISyntaxException
125     *         if the resulting URI isn't an
126     *         <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a> URI
127     *         or could not be parsed correctly.
128     */
129    public URI(String scheme, String ssp, String frag) throws URISyntaxException {
130        StringBuilder uri = new StringBuilder();
131        if (scheme != null) {
132            uri.append(scheme);
133            uri.append(':');
134        }
135        if (ssp != null) {
136            ALL_LEGAL_ENCODER.appendEncoded(uri, ssp);
137        }
138        if (frag != null) {
139            uri.append('#');
140            ALL_LEGAL_ENCODER.appendEncoded(uri, frag);
141        }
142
143        parseURI(uri.toString(), false);
144    }
145
146    /**
147     * Creates a new URI instance using the given arguments. This constructor
148     * first creates a temporary URI string from the given components. This
149     * string will be parsed later on to create the URI instance.
150     * <p>
151     * {@code [scheme:][user-info@]host[:port][path][?query][#fragment]}
152     *
153     * @param scheme
154     *            the scheme part of the URI.
155     * @param userInfo
156     *            the user information of the URI for authentication and
157     *            authorization.
158     * @param host
159     *            the host name of the URI.
160     * @param port
161     *            the port number of the URI.
162     * @param path
163     *            the path to the resource on the host.
164     * @param query
165     *            the query part of the URI to specify parameters for the
166     *            resource.
167     * @param fragment
168     *            the fragment part of the URI.
169     * @throws URISyntaxException
170     *         if the resulting URI isn't an
171     *         <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a> URI
172     *         or could not be parsed correctly.
173     */
174    public URI(String scheme, String userInfo, String host, int port,
175            String path, String query, String fragment)
176            throws URISyntaxException {
177
178        if (scheme == null && userInfo == null && host == null && path == null
179                && query == null && fragment == null) {
180            this.path = "";
181            return;
182        }
183
184        if (scheme != null && path != null && path.length() > 0
185                && path.charAt(0) != '/') {
186            throw new URISyntaxException(path, "Relative path");
187        }
188
189        StringBuilder uri = new StringBuilder();
190        if (scheme != null) {
191            uri.append(scheme);
192            uri.append(':');
193        }
194
195        if (userInfo != null || host != null || port != -1) {
196            uri.append("//");
197        }
198
199        if (userInfo != null) {
200            USER_INFO_ENCODER.appendEncoded(uri, userInfo);
201            uri.append('@');
202        }
203
204        if (host != null) {
205            // check for IPv6 addresses that hasn't been enclosed
206            // in square brackets
207            if (host.indexOf(':') != -1 && host.indexOf(']') == -1
208                    && host.indexOf('[') == -1) {
209                host = "[" + host + "]";
210            }
211            uri.append(host);
212        }
213
214        if (port != -1) {
215            uri.append(':');
216            uri.append(port);
217        }
218
219        if (path != null) {
220            PATH_ENCODER.appendEncoded(uri, path);
221        }
222
223        if (query != null) {
224            uri.append('?');
225            ALL_LEGAL_ENCODER.appendEncoded(uri, query);
226        }
227
228        if (fragment != null) {
229            uri.append('#');
230            ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
231        }
232
233        parseURI(uri.toString(), true);
234    }
235
236    /**
237     * Creates a new URI instance using the given arguments. This constructor
238     * first creates a temporary URI string from the given components. This
239     * string will be parsed later on to create the URI instance.
240     * <p>
241     * {@code [scheme:]host[path][#fragment]}
242     *
243     * @param scheme
244     *            the scheme part of the URI.
245     * @param host
246     *            the host name of the URI.
247     * @param path
248     *            the path to the resource on the host.
249     * @param fragment
250     *            the fragment part of the URI.
251     * @throws URISyntaxException
252     *         if the resulting URI isn't an
253     *         <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a> URI
254     *         or could not be parsed correctly.
255     */
256    public URI(String scheme, String host, String path, String fragment) throws URISyntaxException {
257        this(scheme, null, host, -1, path, null, fragment);
258    }
259
260    /**
261     * Creates a new URI instance using the given arguments. This constructor
262     * first creates a temporary URI string from the given components. This
263     * string will be parsed later on to create the URI instance.
264     * <p>
265     * {@code [scheme:][//authority][path][?query][#fragment]}
266     *
267     * @param scheme
268     *            the scheme part of the URI.
269     * @param authority
270     *            the authority part of the URI.
271     * @param path
272     *            the path to the resource on the host.
273     * @param query
274     *            the query part of the URI to specify parameters for the
275     *            resource.
276     * @param fragment
277     *            the fragment part of the URI.
278     * @throws URISyntaxException
279     *         if the resulting URI isn't an
280     *         <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a> URI
281     *         or could not be parsed correctly.
282     */
283    public URI(String scheme, String authority, String path, String query,
284            String fragment) throws URISyntaxException {
285        if (scheme != null && path != null && path.length() > 0
286                && path.charAt(0) != '/') {
287            throw new URISyntaxException(path, "Relative path");
288        }
289
290        StringBuilder uri = new StringBuilder();
291        if (scheme != null) {
292            uri.append(scheme);
293            uri.append(':');
294        }
295        if (authority != null) {
296            uri.append("//");
297            AUTHORITY_ENCODER.appendEncoded(uri, authority);
298        }
299
300        if (path != null) {
301            PATH_ENCODER.appendEncoded(uri, path);
302        }
303        if (query != null) {
304            uri.append('?');
305            ALL_LEGAL_ENCODER.appendEncoded(uri, query);
306        }
307        if (fragment != null) {
308            uri.append('#');
309            ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
310        }
311
312        parseURI(uri.toString(), false);
313    }
314
315    private void parseURI(String uri, boolean forceServer) throws URISyntaxException {
316        String temp = uri;
317        // assign uri string to the input value per spec
318        string = uri;
319        int index, index1, index2, index3;
320        // parse into Fragment, Scheme, and SchemeSpecificPart
321        // then parse SchemeSpecificPart if necessary
322
323        // Fragment
324        index = temp.indexOf('#');
325        if (index != -1) {
326            // remove the fragment from the end
327            fragment = temp.substring(index + 1);
328            validateFragment(uri, fragment, index + 1);
329            temp = temp.substring(0, index);
330        }
331
332        // Scheme and SchemeSpecificPart
333        index = index1 = temp.indexOf(':');
334        index2 = temp.indexOf('/');
335        index3 = temp.indexOf('?');
336
337        // if a '/' or '?' occurs before the first ':' the uri has no
338        // specified scheme, and is therefore not absolute
339        if (index != -1 && (index2 >= index || index2 == -1)
340                && (index3 >= index || index3 == -1)) {
341            // the characters up to the first ':' comprise the scheme
342            absolute = true;
343            scheme = temp.substring(0, index);
344            if (scheme.length() == 0) {
345                throw new URISyntaxException(uri, "Scheme expected", index);
346            }
347            validateScheme(uri, scheme, 0);
348            schemeSpecificPart = temp.substring(index + 1);
349            if (schemeSpecificPart.length() == 0) {
350                throw new URISyntaxException(uri, "Scheme-specific part expected", index + 1);
351            }
352        } else {
353            absolute = false;
354            schemeSpecificPart = temp;
355        }
356
357        if (scheme == null || schemeSpecificPart.length() > 0
358                && schemeSpecificPart.charAt(0) == '/') {
359            opaque = false;
360            // the URI is hierarchical
361
362            // Query
363            temp = schemeSpecificPart;
364            index = temp.indexOf('?');
365            if (index != -1) {
366                query = temp.substring(index + 1);
367                temp = temp.substring(0, index);
368                validateQuery(uri, query, index2 + 1 + index);
369            }
370
371            // Authority and Path
372            if (temp.startsWith("//")) {
373                index = temp.indexOf('/', 2);
374                if (index != -1) {
375                    authority = temp.substring(2, index);
376                    path = temp.substring(index);
377                } else {
378                    authority = temp.substring(2);
379                    if (authority.length() == 0 && query == null
380                            && fragment == null) {
381                        throw new URISyntaxException(uri, "Authority expected", uri.length());
382                    }
383
384                    path = "";
385                    // nothing left, so path is empty (not null, path should
386                    // never be null)
387                }
388
389                if (authority.length() == 0) {
390                    authority = null;
391                } else {
392                    validateAuthority(uri, authority, index1 + 3);
393                }
394            } else { // no authority specified
395                path = temp;
396            }
397
398            int pathIndex = 0;
399            if (index2 > -1) {
400                pathIndex += index2;
401            }
402            if (index > -1) {
403                pathIndex += index;
404            }
405            validatePath(uri, path, pathIndex);
406        } else { // if not hierarchical, URI is opaque
407            opaque = true;
408            validateSsp(uri, schemeSpecificPart, index2 + 2 + index);
409        }
410
411        parseAuthority(forceServer);
412    }
413
414    private void validateScheme(String uri, String scheme, int index)
415            throws URISyntaxException {
416        // first char needs to be an alpha char
417        char ch = scheme.charAt(0);
418        if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))) {
419            throw new URISyntaxException(uri, "Illegal character in scheme", 0);
420        }
421
422        try {
423            UriCodec.validateSimple(scheme, "+-.");
424        } catch (URISyntaxException e) {
425            throw new URISyntaxException(uri, "Illegal character in scheme", index + e.getIndex());
426        }
427    }
428
429    private void validateSsp(String uri, String ssp, int index)
430            throws URISyntaxException {
431        try {
432            ALL_LEGAL_ENCODER.validate(ssp);
433        } catch (URISyntaxException e) {
434            throw new URISyntaxException(uri,
435                    e.getReason() + " in schemeSpecificPart", index + e.getIndex());
436        }
437    }
438
439    private void validateAuthority(String uri, String authority, int index)
440            throws URISyntaxException {
441        try {
442            AUTHORITY_ENCODER.validate(authority);
443        } catch (URISyntaxException e) {
444            throw new URISyntaxException(uri, e.getReason() + " in authority", index + e.getIndex());
445        }
446    }
447
448    private void validatePath(String uri, String path, int index)
449            throws URISyntaxException {
450        try {
451            PATH_ENCODER.validate(path);
452        } catch (URISyntaxException e) {
453            throw new URISyntaxException(uri, e.getReason() + " in path", index + e.getIndex());
454        }
455    }
456
457    private void validateQuery(String uri, String query, int index)
458            throws URISyntaxException {
459        try {
460            ALL_LEGAL_ENCODER.validate(query);
461        } catch (URISyntaxException e) {
462            throw new URISyntaxException(uri, e.getReason() + " in query", index + e.getIndex());
463
464        }
465    }
466
467    private void validateFragment(String uri, String fragment, int index)
468            throws URISyntaxException {
469        try {
470            ALL_LEGAL_ENCODER.validate(fragment);
471        } catch (URISyntaxException e) {
472            throw new URISyntaxException(uri, e.getReason() + " in fragment", index + e.getIndex());
473        }
474    }
475
476    /**
477     * Parse the authority string into its component parts: user info,
478     * host, and port. This operation doesn't apply to registry URIs, and
479     * calling it on such <i>may</i> result in a syntax exception.
480     *
481     * @param forceServer true to always throw if the authority cannot be
482     *     parsed. If false, this method may still throw for some kinds of
483     *     errors; this unpredictable behavior is consistent with the RI.
484     */
485    private void parseAuthority(boolean forceServer) throws URISyntaxException {
486        if (authority == null) {
487            return;
488        }
489
490        String tempUserInfo = null;
491        String temp = authority;
492        int index = temp.indexOf('@');
493        int hostIndex = 0;
494        if (index != -1) {
495            // remove user info
496            tempUserInfo = temp.substring(0, index);
497            validateUserInfo(authority, tempUserInfo, 0);
498            temp = temp.substring(index + 1); // host[:port] is left
499            hostIndex = index + 1;
500        }
501
502        index = temp.lastIndexOf(':');
503        int endIndex = temp.indexOf(']');
504
505        String tempHost;
506        int tempPort = -1;
507        if (index != -1 && endIndex < index) {
508            // determine port and host
509            tempHost = temp.substring(0, index);
510
511            if (index < (temp.length() - 1)) { // port part is not empty
512                try {
513                    tempPort = Integer.parseInt(temp.substring(index + 1));
514                    if (tempPort < 0) {
515                        if (forceServer) {
516                            throw new URISyntaxException(authority,
517                                    "Invalid port number", hostIndex + index + 1);
518                        }
519                        return;
520                    }
521                } catch (NumberFormatException e) {
522                    if (forceServer) {
523                        throw new URISyntaxException(authority,
524                                "Invalid port number", hostIndex + index + 1);
525                    }
526                    return;
527                }
528            }
529        } else {
530            tempHost = temp;
531        }
532
533        if (tempHost.isEmpty()) {
534            if (forceServer) {
535                throw new URISyntaxException(authority, "Expected host", hostIndex);
536            }
537            return;
538        }
539
540        if (!isValidHost(forceServer, tempHost)) {
541            return;
542        }
543
544        // this is a server based uri,
545        // fill in the userInfo, host and port fields
546        userInfo = tempUserInfo;
547        host = tempHost;
548        port = tempPort;
549        serverAuthority = true;
550    }
551
552    private void validateUserInfo(String uri, String userInfo, int index)
553            throws URISyntaxException {
554        for (int i = 0; i < userInfo.length(); i++) {
555            char ch = userInfo.charAt(i);
556            if (ch == ']' || ch == '[') {
557                throw new URISyntaxException(uri, "Illegal character in userInfo", index + i);
558            }
559        }
560    }
561
562    /**
563     * Returns true if {@code host} is a well-formed host name or IP address.
564     *
565     * @param forceServer true to always throw if the host cannot be parsed. If
566     *     false, this method may still throw for some kinds of errors; this
567     *     unpredictable behavior is consistent with the RI.
568     */
569    private boolean isValidHost(boolean forceServer, String host) throws URISyntaxException {
570        if (host.startsWith("[")) {
571            // IPv6 address
572            if (!host.endsWith("]")) {
573                throw new URISyntaxException(host,
574                        "Expected a closing square bracket for IPv6 address", 0);
575            }
576            if (InetAddress.isNumeric(host)) {
577                // If it's numeric, the presence of square brackets guarantees
578                // that it's a numeric IPv6 address.
579                return true;
580            }
581            throw new URISyntaxException(host, "Malformed IPv6 address");
582        }
583
584        // '[' and ']' can only be the first char and last char
585        // of the host name
586        if (host.indexOf('[') != -1 || host.indexOf(']') != -1) {
587            throw new URISyntaxException(host, "Illegal character in host name", 0);
588        }
589
590        int index = host.lastIndexOf('.');
591        if (index < 0 || index == host.length() - 1
592                || !Character.isDigit(host.charAt(index + 1))) {
593            // domain name
594            if (isValidDomainName(host)) {
595                return true;
596            }
597            if (forceServer) {
598                throw new URISyntaxException(host, "Illegal character in host name", 0);
599            }
600            return false;
601        }
602
603        // IPv4 address?
604        try {
605            InetAddress ia = InetAddress.parseNumericAddress(host);
606            if (ia instanceof Inet4Address) {
607                return true;
608            }
609        } catch (IllegalArgumentException ex) {
610        }
611
612        if (forceServer) {
613            throw new URISyntaxException(host, "Malformed IPv4 address", 0);
614        }
615        return false;
616    }
617
618    private boolean isValidDomainName(String host) {
619        try {
620            UriCodec.validateSimple(host, "-.");
621        } catch (URISyntaxException e) {
622            return false;
623        }
624
625        String lastLabel = null;
626        for (String token : host.split("\\.")) {
627            lastLabel = token;
628            if (lastLabel.startsWith("-") || lastLabel.endsWith("-")) {
629                return false;
630            }
631        }
632
633        if (lastLabel == null) {
634            return false;
635        }
636
637        if (!lastLabel.equals(host)) {
638            char ch = lastLabel.charAt(0);
639            if (ch >= '0' && ch <= '9') {
640                return false;
641            }
642        }
643        return true;
644    }
645
646    /**
647     * Compares this URI with the given argument {@code uri}. This method will
648     * return a negative value if this URI instance is less than the given
649     * argument and a positive value if this URI instance is greater than the
650     * given argument. The return value {@code 0} indicates that the two
651     * instances represent the same URI. To define the order the single parts of
652     * the URI are compared with each other. String components will be ordered
653     * in the natural case-sensitive way. A hierarchical URI is less than an
654     * opaque URI and if one part is {@code null} the URI with the undefined
655     * part is less than the other one.
656     *
657     * @param uri
658     *            the URI this instance has to compare with.
659     * @return the value representing the order of the two instances.
660     */
661    public int compareTo(URI uri) {
662        int ret;
663
664        // compare schemes
665        if (scheme == null && uri.scheme != null) {
666            return -1;
667        } else if (scheme != null && uri.scheme == null) {
668            return 1;
669        } else if (scheme != null && uri.scheme != null) {
670            ret = scheme.compareToIgnoreCase(uri.scheme);
671            if (ret != 0) {
672                return ret;
673            }
674        }
675
676        // compare opacities
677        if (!opaque && uri.opaque) {
678            return -1;
679        } else if (opaque && !uri.opaque) {
680            return 1;
681        } else if (opaque && uri.opaque) {
682            ret = schemeSpecificPart.compareTo(uri.schemeSpecificPart);
683            if (ret != 0) {
684                return ret;
685            }
686        } else {
687
688            // otherwise both must be hierarchical
689
690            // compare authorities
691            if (authority != null && uri.authority == null) {
692                return 1;
693            } else if (authority == null && uri.authority != null) {
694                return -1;
695            } else if (authority != null && uri.authority != null) {
696                if (host != null && uri.host != null) {
697                    // both are server based, so compare userInfo, host, port
698                    if (userInfo != null && uri.userInfo == null) {
699                        return 1;
700                    } else if (userInfo == null && uri.userInfo != null) {
701                        return -1;
702                    } else if (userInfo != null && uri.userInfo != null) {
703                        ret = userInfo.compareTo(uri.userInfo);
704                        if (ret != 0) {
705                            return ret;
706                        }
707                    }
708
709                    // userInfo's are the same, compare hostname
710                    ret = host.compareToIgnoreCase(uri.host);
711                    if (ret != 0) {
712                        return ret;
713                    }
714
715                    // compare port
716                    if (port != uri.port) {
717                        return port - uri.port;
718                    }
719                } else { // one or both are registry based, compare the whole
720                    // authority
721                    ret = authority.compareTo(uri.authority);
722                    if (ret != 0) {
723                        return ret;
724                    }
725                }
726            }
727
728            // authorities are the same
729            // compare paths
730            ret = path.compareTo(uri.path);
731            if (ret != 0) {
732                return ret;
733            }
734
735            // compare queries
736
737            if (query != null && uri.query == null) {
738                return 1;
739            } else if (query == null && uri.query != null) {
740                return -1;
741            } else if (query != null && uri.query != null) {
742                ret = query.compareTo(uri.query);
743                if (ret != 0) {
744                    return ret;
745                }
746            }
747        }
748
749        // everything else is identical, so compare fragments
750        if (fragment != null && uri.fragment == null) {
751            return 1;
752        } else if (fragment == null && uri.fragment != null) {
753            return -1;
754        } else if (fragment != null && uri.fragment != null) {
755            ret = fragment.compareTo(uri.fragment);
756            if (ret != 0) {
757                return ret;
758            }
759        }
760
761        // identical
762        return 0;
763    }
764
765    /**
766     * Returns the URI formed by parsing {@code uri}. This method behaves
767     * identically to the string constructor but throws a different exception
768     * on failure. The constructor fails with a checked {@link
769     * URISyntaxException}; this method fails with an unchecked {@link
770     * IllegalArgumentException}.
771     */
772    public static URI create(String uri) {
773        try {
774            return new URI(uri);
775        } catch (URISyntaxException e) {
776            throw new IllegalArgumentException(e.getMessage());
777        }
778    }
779
780    private URI duplicate() {
781        URI clone = new URI();
782        clone.absolute = absolute;
783        clone.authority = authority;
784        clone.fragment = fragment;
785        clone.host = host;
786        clone.opaque = opaque;
787        clone.path = path;
788        clone.port = port;
789        clone.query = query;
790        clone.scheme = scheme;
791        clone.schemeSpecificPart = schemeSpecificPart;
792        clone.userInfo = userInfo;
793        clone.serverAuthority = serverAuthority;
794        return clone;
795    }
796
797    /*
798     * Takes a string that may contain hex sequences like %F1 or %2b and
799     * converts the hex values following the '%' to lowercase
800     */
801    private String convertHexToLowerCase(String s) {
802        StringBuilder result = new StringBuilder("");
803        if (s.indexOf('%') == -1) {
804            return s;
805        }
806
807        int index, prevIndex = 0;
808        while ((index = s.indexOf('%', prevIndex)) != -1) {
809            result.append(s.substring(prevIndex, index + 1));
810            result.append(s.substring(index + 1, index + 3).toLowerCase(Locale.US));
811            index += 3;
812            prevIndex = index;
813        }
814        return result.toString();
815    }
816
817    /**
818     * Returns true if {@code first} and {@code second} are equal after
819     * unescaping hex sequences like %F1 and %2b.
820     */
821    private boolean escapedEquals(String first, String second) {
822        if (first.indexOf('%') != second.indexOf('%')) {
823            return first.equals(second);
824        }
825
826        int index, prevIndex = 0;
827        while ((index = first.indexOf('%', prevIndex)) != -1
828                && second.indexOf('%', prevIndex) == index) {
829            boolean match = first.substring(prevIndex, index).equals(
830                    second.substring(prevIndex, index));
831            if (!match) {
832                return false;
833            }
834
835            match = first.substring(index + 1, index + 3).equalsIgnoreCase(
836                    second.substring(index + 1, index + 3));
837            if (!match) {
838                return false;
839            }
840
841            index += 3;
842            prevIndex = index;
843        }
844        return first.substring(prevIndex).equals(second.substring(prevIndex));
845    }
846
847    /**
848     * Compares this URI instance with the given argument {@code o} and
849     * determines if both are equal. Two URI instances are equal if all single
850     * parts are identical in their meaning.
851     *
852     * @param o
853     *            the URI this instance has to be compared with.
854     * @return {@code true} if both URI instances point to the same resource,
855     *         {@code false} otherwise.
856     */
857    @Override
858    public boolean equals(Object o) {
859        if (!(o instanceof URI)) {
860            return false;
861        }
862        URI uri = (URI) o;
863
864        if (uri.fragment == null && fragment != null || uri.fragment != null
865                && fragment == null) {
866            return false;
867        } else if (uri.fragment != null && fragment != null) {
868            if (!escapedEquals(uri.fragment, fragment)) {
869                return false;
870            }
871        }
872
873        if (uri.scheme == null && scheme != null || uri.scheme != null
874                && scheme == null) {
875            return false;
876        } else if (uri.scheme != null && scheme != null) {
877            if (!uri.scheme.equalsIgnoreCase(scheme)) {
878                return false;
879            }
880        }
881
882        if (uri.opaque && opaque) {
883            return escapedEquals(uri.schemeSpecificPart,
884                    schemeSpecificPart);
885        } else if (!uri.opaque && !opaque) {
886            if (!escapedEquals(path, uri.path)) {
887                return false;
888            }
889
890            if (uri.query != null && query == null || uri.query == null
891                    && query != null) {
892                return false;
893            } else if (uri.query != null && query != null) {
894                if (!escapedEquals(uri.query, query)) {
895                    return false;
896                }
897            }
898
899            if (uri.authority != null && authority == null
900                    || uri.authority == null && authority != null) {
901                return false;
902            } else if (uri.authority != null && authority != null) {
903                if (uri.host != null && host == null || uri.host == null
904                        && host != null) {
905                    return false;
906                } else if (uri.host == null && host == null) {
907                    // both are registry based, so compare the whole authority
908                    return escapedEquals(uri.authority, authority);
909                } else { // uri.host != null && host != null, so server-based
910                    if (!host.equalsIgnoreCase(uri.host)) {
911                        return false;
912                    }
913
914                    if (port != uri.port) {
915                        return false;
916                    }
917
918                    if (uri.userInfo != null && userInfo == null
919                            || uri.userInfo == null && userInfo != null) {
920                        return false;
921                    } else if (uri.userInfo != null && userInfo != null) {
922                        return escapedEquals(userInfo, uri.userInfo);
923                    } else {
924                        return true;
925                    }
926                }
927            } else {
928                // no authority
929                return true;
930            }
931
932        } else {
933            // one is opaque, the other hierarchical
934            return false;
935        }
936    }
937
938    /**
939     * Gets the decoded authority part of this URI.
940     *
941     * @return the decoded authority part or {@code null} if undefined.
942     */
943    public String getAuthority() {
944        return decode(authority);
945    }
946
947    /**
948     * Gets the decoded fragment part of this URI.
949     *
950     * @return the decoded fragment part or {@code null} if undefined.
951     */
952    public String getFragment() {
953        return decode(fragment);
954    }
955
956    /**
957     * Gets the host part of this URI.
958     *
959     * @return the host part or {@code null} if undefined.
960     */
961    public String getHost() {
962        return host;
963    }
964
965    /**
966     * Gets the decoded path part of this URI.
967     *
968     * @return the decoded path part or {@code null} if undefined.
969     */
970    public String getPath() {
971        return decode(path);
972    }
973
974    /**
975     * Gets the port number of this URI.
976     *
977     * @return the port number or {@code -1} if undefined.
978     */
979    public int getPort() {
980        return port;
981    }
982
983    /** @hide */
984    public int getEffectivePort() {
985        return getEffectivePort(scheme, port);
986    }
987
988    /**
989     * Returns the port to use for {@code scheme} connections will use when
990     * {@link #getPort} returns {@code specifiedPort}.
991     *
992     * @hide
993     */
994    public static int getEffectivePort(String scheme, int specifiedPort) {
995        if (specifiedPort != -1) {
996            return specifiedPort;
997        }
998
999        if ("http".equalsIgnoreCase(scheme)) {
1000            return 80;
1001        } else if ("https".equalsIgnoreCase(scheme)) {
1002            return 443;
1003        } else {
1004            return -1;
1005        }
1006    }
1007
1008    /**
1009     * Gets the decoded query part of this URI.
1010     *
1011     * @return the decoded query part or {@code null} if undefined.
1012     */
1013    public String getQuery() {
1014        return decode(query);
1015    }
1016
1017    /**
1018     * Gets the authority part of this URI in raw form.
1019     *
1020     * @return the encoded authority part or {@code null} if undefined.
1021     */
1022    public String getRawAuthority() {
1023        return authority;
1024    }
1025
1026    /**
1027     * Gets the fragment part of this URI in raw form.
1028     *
1029     * @return the encoded fragment part or {@code null} if undefined.
1030     */
1031    public String getRawFragment() {
1032        return fragment;
1033    }
1034
1035    /**
1036     * Gets the path part of this URI in raw form.
1037     *
1038     * @return the encoded path part or {@code null} if undefined.
1039     */
1040    public String getRawPath() {
1041        return path;
1042    }
1043
1044    /**
1045     * Gets the query part of this URI in raw form.
1046     *
1047     * @return the encoded query part or {@code null} if undefined.
1048     */
1049    public String getRawQuery() {
1050        return query;
1051    }
1052
1053    /**
1054     * Gets the scheme-specific part of this URI in raw form.
1055     *
1056     * @return the encoded scheme-specific part or {@code null} if undefined.
1057     */
1058    public String getRawSchemeSpecificPart() {
1059        return schemeSpecificPart;
1060    }
1061
1062    /**
1063     * Gets the user-info part of this URI in raw form.
1064     *
1065     * @return the encoded user-info part or {@code null} if undefined.
1066     */
1067    public String getRawUserInfo() {
1068        return userInfo;
1069    }
1070
1071    /**
1072     * Gets the scheme part of this URI.
1073     *
1074     * @return the scheme part or {@code null} if undefined.
1075     */
1076    public String getScheme() {
1077        return scheme;
1078    }
1079
1080    /**
1081     * Gets the decoded scheme-specific part of this URI.
1082     *
1083     * @return the decoded scheme-specific part or {@code null} if undefined.
1084     */
1085    public String getSchemeSpecificPart() {
1086        return decode(schemeSpecificPart);
1087    }
1088
1089    /**
1090     * Gets the decoded user-info part of this URI.
1091     *
1092     * @return the decoded user-info part or {@code null} if undefined.
1093     */
1094    public String getUserInfo() {
1095        return decode(userInfo);
1096    }
1097
1098    /**
1099     * Gets the hashcode value of this URI instance.
1100     *
1101     * @return the appropriate hashcode value.
1102     */
1103    @Override
1104    public int hashCode() {
1105        if (hash == -1) {
1106            hash = getHashString().hashCode();
1107        }
1108        return hash;
1109    }
1110
1111    /**
1112     * Indicates whether this URI is absolute, which means that a scheme part is
1113     * defined in this URI.
1114     *
1115     * @return {@code true} if this URI is absolute, {@code false} otherwise.
1116     */
1117    public boolean isAbsolute() {
1118        return absolute;
1119    }
1120
1121    /**
1122     * Indicates whether this URI is opaque or not. An opaque URI is absolute
1123     * and has a scheme-specific part which does not start with a slash
1124     * character. All parts except scheme, scheme-specific and fragment are
1125     * undefined.
1126     *
1127     * @return {@code true} if the URI is opaque, {@code false} otherwise.
1128     */
1129    public boolean isOpaque() {
1130        return opaque;
1131    }
1132
1133    /*
1134     * normalize path, and return the resulting string
1135     */
1136    private String normalize(String path) {
1137        // count the number of '/'s, to determine number of segments
1138        int index = -1;
1139        int pathLength = path.length();
1140        int size = 0;
1141        if (pathLength > 0 && path.charAt(0) != '/') {
1142            size++;
1143        }
1144        while ((index = path.indexOf('/', index + 1)) != -1) {
1145            if (index + 1 < pathLength && path.charAt(index + 1) != '/') {
1146                size++;
1147            }
1148        }
1149
1150        String[] segList = new String[size];
1151        boolean[] include = new boolean[size];
1152
1153        // break the path into segments and store in the list
1154        int current = 0;
1155        int index2;
1156        index = (pathLength > 0 && path.charAt(0) == '/') ? 1 : 0;
1157        while ((index2 = path.indexOf('/', index + 1)) != -1) {
1158            segList[current++] = path.substring(index, index2);
1159            index = index2 + 1;
1160        }
1161
1162        // if current==size, then the last character was a slash
1163        // and there are no more segments
1164        if (current < size) {
1165            segList[current] = path.substring(index);
1166        }
1167
1168        // determine which segments get included in the normalized path
1169        for (int i = 0; i < size; i++) {
1170            include[i] = true;
1171            if (segList[i].equals("..")) {
1172                int remove = i - 1;
1173                // search back to find a segment to remove, if possible
1174                while (remove > -1 && !include[remove]) {
1175                    remove--;
1176                }
1177                // if we find a segment to remove, remove it and the ".."
1178                // segment
1179                if (remove > -1 && !segList[remove].equals("..")) {
1180                    include[remove] = false;
1181                    include[i] = false;
1182                }
1183            } else if (segList[i].equals(".")) {
1184                include[i] = false;
1185            }
1186        }
1187
1188        // put the path back together
1189        StringBuilder newPath = new StringBuilder();
1190        if (path.startsWith("/")) {
1191            newPath.append('/');
1192        }
1193
1194        for (int i = 0; i < segList.length; i++) {
1195            if (include[i]) {
1196                newPath.append(segList[i]);
1197                newPath.append('/');
1198            }
1199        }
1200
1201        // if we used at least one segment and the path previously ended with
1202        // a slash and the last segment is still used, then delete the extra
1203        // trailing '/'
1204        if (!path.endsWith("/") && segList.length > 0
1205                && include[segList.length - 1]) {
1206            newPath.deleteCharAt(newPath.length() - 1);
1207        }
1208
1209        String result = newPath.toString();
1210
1211        // check for a ':' in the first segment if one exists,
1212        // prepend "./" to normalize
1213        index = result.indexOf(':');
1214        index2 = result.indexOf('/');
1215        if (index != -1 && (index < index2 || index2 == -1)) {
1216            newPath.insert(0, "./");
1217            result = newPath.toString();
1218        }
1219        return result;
1220    }
1221
1222    /**
1223     * Normalizes the path part of this URI.
1224     *
1225     * @return an URI object which represents this instance with a normalized
1226     *         path.
1227     */
1228    public URI normalize() {
1229        if (opaque) {
1230            return this;
1231        }
1232        String normalizedPath = normalize(path);
1233        // if the path is already normalized, return this
1234        if (path.equals(normalizedPath)) {
1235            return this;
1236        }
1237        // get an exact copy of the URI re-calculate the scheme specific part
1238        // since the path of the normalized URI is different from this URI.
1239        URI result = duplicate();
1240        result.path = normalizedPath;
1241        result.setSchemeSpecificPart();
1242        return result;
1243    }
1244
1245    /**
1246     * Tries to parse the authority component of this URI to divide it into the
1247     * host, port, and user-info. If this URI is already determined as a
1248     * ServerAuthority this instance will be returned without changes.
1249     *
1250     * @return this instance with the components of the parsed server authority.
1251     * @throws URISyntaxException
1252     *             if the authority part could not be parsed as a server-based
1253     *             authority.
1254     */
1255    public URI parseServerAuthority() throws URISyntaxException {
1256        if (!serverAuthority) {
1257            parseAuthority(true);
1258        }
1259        return this;
1260    }
1261
1262    /**
1263     * Makes the given URI {@code relative} to a relative URI against the URI
1264     * represented by this instance.
1265     *
1266     * @param relative
1267     *            the URI which has to be relativized against this URI.
1268     * @return the relative URI.
1269     */
1270    public URI relativize(URI relative) {
1271        if (relative.opaque || opaque) {
1272            return relative;
1273        }
1274
1275        if (scheme == null ? relative.scheme != null : !scheme
1276                .equals(relative.scheme)) {
1277            return relative;
1278        }
1279
1280        if (authority == null ? relative.authority != null : !authority
1281                .equals(relative.authority)) {
1282            return relative;
1283        }
1284
1285        // normalize both paths
1286        String thisPath = normalize(path);
1287        String relativePath = normalize(relative.path);
1288
1289        /*
1290         * if the paths aren't equal, then we need to determine if this URI's
1291         * path is a parent path (begins with) the relative URI's path
1292         */
1293        if (!thisPath.equals(relativePath)) {
1294            // if this URI's path doesn't end in a '/', add one
1295            if (!thisPath.endsWith("/")) {
1296                thisPath = thisPath + '/';
1297            }
1298            /*
1299             * if the relative URI's path doesn't start with this URI's path,
1300             * then just return the relative URI; the URIs have nothing in
1301             * common
1302             */
1303            if (!relativePath.startsWith(thisPath)) {
1304                return relative;
1305            }
1306        }
1307
1308        URI result = new URI();
1309        result.fragment = relative.fragment;
1310        result.query = relative.query;
1311        // the result URI is the remainder of the relative URI's path
1312        result.path = relativePath.substring(thisPath.length());
1313        result.setSchemeSpecificPart();
1314        return result;
1315    }
1316
1317    /**
1318     * Resolves the given URI {@code relative} against the URI represented by
1319     * this instance.
1320     *
1321     * @param relative
1322     *            the URI which has to be resolved against this URI.
1323     * @return the resolved URI.
1324     */
1325    public URI resolve(URI relative) {
1326        if (relative.absolute || opaque) {
1327            return relative;
1328        }
1329
1330        URI result;
1331        if (relative.path.isEmpty() && relative.scheme == null
1332                && relative.authority == null && relative.query == null
1333                && relative.fragment != null) {
1334            // if the relative URI only consists of fragment,
1335            // the resolved URI is very similar to this URI,
1336            // except that it has the fragment from the relative URI.
1337            result = duplicate();
1338            result.fragment = relative.fragment;
1339            // no need to re-calculate the scheme specific part,
1340            // since fragment is not part of scheme specific part.
1341            return result;
1342        }
1343
1344        if (relative.authority != null) {
1345            // if the relative URI has authority,
1346            // the resolved URI is almost the same as the relative URI,
1347            // except that it has the scheme of this URI.
1348            result = relative.duplicate();
1349            result.scheme = scheme;
1350            result.absolute = absolute;
1351        } else {
1352            // since relative URI has no authority,
1353            // the resolved URI is very similar to this URI,
1354            // except that it has the query and fragment of the relative URI,
1355            // and the path is different.
1356            result = duplicate();
1357            result.fragment = relative.fragment;
1358            result.query = relative.query;
1359            if (relative.path.startsWith("/")) {
1360                result.path = relative.path;
1361            } else {
1362                // resolve a relative reference
1363                int endIndex = path.lastIndexOf('/') + 1;
1364                result.path = normalize(path.substring(0, endIndex)
1365                        + relative.path);
1366            }
1367            // re-calculate the scheme specific part since
1368            // query and path of the resolved URI is different from this URI.
1369            result.setSchemeSpecificPart();
1370        }
1371        return result;
1372    }
1373
1374    /**
1375     * Helper method used to re-calculate the scheme specific part of the
1376     * resolved or normalized URIs
1377     */
1378    private void setSchemeSpecificPart() {
1379        // ssp = [//authority][path][?query]
1380        StringBuilder ssp = new StringBuilder();
1381        if (authority != null) {
1382            ssp.append("//" + authority);
1383        }
1384        if (path != null) {
1385            ssp.append(path);
1386        }
1387        if (query != null) {
1388            ssp.append("?" + query);
1389        }
1390        schemeSpecificPart = ssp.toString();
1391        // reset string, so that it can be re-calculated correctly when asked.
1392        string = null;
1393    }
1394
1395    /**
1396     * Creates a new URI instance by parsing the given string {@code relative}
1397     * and resolves the created URI against the URI represented by this
1398     * instance.
1399     *
1400     * @param relative
1401     *            the given string to create the new URI instance which has to
1402     *            be resolved later on.
1403     * @return the created and resolved URI.
1404     */
1405    public URI resolve(String relative) {
1406        return resolve(create(relative));
1407    }
1408
1409    private String decode(String s) {
1410        return s != null ? UriCodec.decode(s) : null;
1411    }
1412
1413    /**
1414     * Returns the textual string representation of this URI instance using the
1415     * US-ASCII encoding.
1416     *
1417     * @return the US-ASCII string representation of this URI.
1418     */
1419    public String toASCIIString() {
1420        StringBuilder result = new StringBuilder();
1421        ASCII_ONLY.appendEncoded(result, toString());
1422        return result.toString();
1423    }
1424
1425    /**
1426     * Returns the textual string representation of this URI instance.
1427     *
1428     * @return the textual string representation of this URI.
1429     */
1430    @Override
1431    public String toString() {
1432        if (string == null) {
1433            StringBuilder result = new StringBuilder();
1434            if (scheme != null) {
1435                result.append(scheme);
1436                result.append(':');
1437            }
1438            if (opaque) {
1439                result.append(schemeSpecificPart);
1440            } else {
1441                if (authority != null) {
1442                    result.append("//");
1443                    result.append(authority);
1444                }
1445
1446                if (path != null) {
1447                    result.append(path);
1448                }
1449
1450                if (query != null) {
1451                    result.append('?');
1452                    result.append(query);
1453                }
1454            }
1455
1456            if (fragment != null) {
1457                result.append('#');
1458                result.append(fragment);
1459            }
1460
1461            string = result.toString();
1462        }
1463        return string;
1464    }
1465
1466    /*
1467     * Form a string from the components of this URI, similarly to the
1468     * toString() method. But this method converts scheme and host to lowercase,
1469     * and converts escaped octets to lowercase.
1470     */
1471    private String getHashString() {
1472        StringBuilder result = new StringBuilder();
1473        if (scheme != null) {
1474            result.append(scheme.toLowerCase(Locale.US));
1475            result.append(':');
1476        }
1477        if (opaque) {
1478            result.append(schemeSpecificPart);
1479        } else {
1480            if (authority != null) {
1481                result.append("//");
1482                if (host == null) {
1483                    result.append(authority);
1484                } else {
1485                    if (userInfo != null) {
1486                        result.append(userInfo + "@");
1487                    }
1488                    result.append(host.toLowerCase(Locale.US));
1489                    if (port != -1) {
1490                        result.append(":" + port);
1491                    }
1492                }
1493            }
1494
1495            if (path != null) {
1496                result.append(path);
1497            }
1498
1499            if (query != null) {
1500                result.append('?');
1501                result.append(query);
1502            }
1503        }
1504
1505        if (fragment != null) {
1506            result.append('#');
1507            result.append(fragment);
1508        }
1509
1510        return convertHexToLowerCase(result.toString());
1511    }
1512
1513    /**
1514     * Converts this URI instance to a URL.
1515     *
1516     * @return the created URL representing the same resource as this URI.
1517     * @throws MalformedURLException
1518     *             if an error occurs while creating the URL or no protocol
1519     *             handler could be found.
1520     */
1521    public URL toURL() throws MalformedURLException {
1522        if (!absolute) {
1523            throw new IllegalArgumentException("URI is not absolute: " + toString());
1524        }
1525        return new URL(toString());
1526    }
1527
1528    private void readObject(ObjectInputStream in) throws IOException,
1529            ClassNotFoundException {
1530        in.defaultReadObject();
1531        try {
1532            parseURI(string, false);
1533        } catch (URISyntaxException e) {
1534            throw new IOException(e.toString());
1535        }
1536    }
1537
1538    private void writeObject(ObjectOutputStream out) throws IOException,
1539            ClassNotFoundException {
1540        // call toString() to ensure the value of string field is calculated
1541        toString();
1542        out.defaultWriteObject();
1543    }
1544}
1545