URI.java revision 2d99ef561304174b8ae01a0a68d5b96d5edb9f10
1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.net;
19
20import java.io.IOException;
21import java.io.ObjectInputStream;
22import java.io.ObjectOutputStream;
23import java.io.Serializable;
24import java.util.Locale;
25import libcore.net.UriCodec;
26import libcore.net.url.UrlUtils;
27
28/**
29 * This class represents an instance of a URI as defined by
30 * <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>.
31 */
32public final class URI implements Comparable<URI>, Serializable {
33
34    private static final long serialVersionUID = -6052424284110960213l;
35
36    static final String UNRESERVED = "_-!.~\'()*";
37    static final String PUNCTUATION = ",;:$&+=";
38
39    static final UriCodec USER_INFO_ENCODER = new PartEncoder("");
40    static final UriCodec PATH_ENCODER = new PartEncoder("/@");
41    static final UriCodec AUTHORITY_ENCODER = new PartEncoder("@[]");
42
43    /** for java.net.URL, which foolishly combines these two parts */
44    static final UriCodec FILE_AND_QUERY_ENCODER = new PartEncoder("/@?");
45
46    /** for query, fragment, and scheme-specific part */
47    static final UriCodec ALL_LEGAL_ENCODER = new PartEncoder("?/[]@");
48
49    /** Retains all ASCII chars including delimiters. */
50    private static final UriCodec ASCII_ONLY = new UriCodec() {
51        @Override protected boolean isRetained(char c) {
52            return c <= 127;
53        }
54    };
55
56    /**
57     * Encodes the unescaped characters of {@code s} that are not permitted.
58     * Permitted characters are:
59     * <ul>
60     *   <li>Unreserved characters in <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>.
61     *   <li>{@code extraOkayChars},
62     *   <li>non-ASCII, non-control, non-whitespace characters
63     * </ul>
64     */
65    private static class PartEncoder extends UriCodec {
66        private final String extraLegalCharacters;
67
68        PartEncoder(String extraLegalCharacters) {
69            this.extraLegalCharacters = extraLegalCharacters;
70        }
71
72        @Override protected boolean isRetained(char c) {
73            return UNRESERVED.indexOf(c) != -1
74                    || PUNCTUATION.indexOf(c) != -1
75                    || extraLegalCharacters.indexOf(c) != -1
76                    || (c > 127 && !Character.isSpaceChar(c) && !Character.isISOControl(c));
77        }
78    }
79
80    private String string;
81    private transient String scheme;
82    private transient String schemeSpecificPart;
83    private transient String authority;
84    private transient String userInfo;
85    private transient String host;
86    private transient int port = -1;
87    private transient String path;
88    private transient String query;
89    private transient String fragment;
90    private transient boolean opaque;
91    private transient boolean absolute;
92    private transient boolean serverAuthority = false;
93
94    private transient int hash = -1;
95
96    private URI() {}
97
98    /**
99     * Creates a new URI instance according to the given string {@code uri}.
100     *
101     * @param uri
102     *            the textual URI representation to be parsed into a URI object.
103     * @throws URISyntaxException
104     *         if the given {@code uri} isn't an
105     *         <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a> URI
106     *         or could not be parsed correctly.
107     */
108    public URI(String uri) throws URISyntaxException {
109        parseURI(uri, false);
110    }
111
112    /**
113     * Creates a new URI instance using the given arguments. This constructor
114     * first creates a temporary URI string from the given components. This
115     * string will be parsed later on to create the URI instance.
116     * <p>
117     * {@code [scheme:]scheme-specific-part[#fragment]}
118     *
119     * @param scheme
120     *            the scheme part of the URI.
121     * @param ssp
122     *            the scheme-specific-part of the URI.
123     * @param frag
124     *            the fragment part of the URI.
125     * @throws URISyntaxException
126     *         if the resulting URI isn't an
127     *         <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a> URI
128     *         or could not be parsed correctly.
129     */
130    public URI(String scheme, String ssp, String frag) throws URISyntaxException {
131        StringBuilder uri = new StringBuilder();
132        if (scheme != null) {
133            uri.append(scheme);
134            uri.append(':');
135        }
136        if (ssp != null) {
137            ALL_LEGAL_ENCODER.appendEncoded(uri, ssp);
138        }
139        if (frag != null) {
140            uri.append('#');
141            ALL_LEGAL_ENCODER.appendEncoded(uri, frag);
142        }
143
144        parseURI(uri.toString(), false);
145    }
146
147    /**
148     * Creates a new URI instance using the given arguments. This constructor
149     * first creates a temporary URI string from the given components. This
150     * string will be parsed later on to create the URI instance.
151     * <p>
152     * {@code [scheme:][user-info@]host[:port][path][?query][#fragment]}
153     *
154     * @param scheme
155     *            the scheme part of the URI.
156     * @param userInfo
157     *            the user information of the URI for authentication and
158     *            authorization.
159     * @param host
160     *            the host name of the URI.
161     * @param port
162     *            the port number of the URI.
163     * @param path
164     *            the path to the resource on the host.
165     * @param query
166     *            the query part of the URI to specify parameters for the
167     *            resource.
168     * @param fragment
169     *            the fragment part of the URI.
170     * @throws URISyntaxException
171     *         if the resulting URI isn't an
172     *         <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a> URI
173     *         or could not be parsed correctly.
174     */
175    public URI(String scheme, String userInfo, String host, int port,
176            String path, String query, String fragment)
177            throws URISyntaxException {
178
179        if (scheme == null && userInfo == null && host == null && path == null
180                && query == null && fragment == null) {
181            this.path = "";
182            return;
183        }
184
185        if (scheme != null && path != null && path.length() > 0
186                && path.charAt(0) != '/') {
187            throw new URISyntaxException(path, "Relative path");
188        }
189
190        StringBuilder uri = new StringBuilder();
191        if (scheme != null) {
192            uri.append(scheme);
193            uri.append(':');
194        }
195
196        if (userInfo != null || host != null || port != -1) {
197            uri.append("//");
198        }
199
200        if (userInfo != null) {
201            USER_INFO_ENCODER.appendEncoded(uri, userInfo);
202            uri.append('@');
203        }
204
205        if (host != null) {
206            // check for IPv6 addresses that hasn't been enclosed
207            // in square brackets
208            if (host.indexOf(':') != -1 && host.indexOf(']') == -1
209                    && host.indexOf('[') == -1) {
210                host = "[" + host + "]";
211            }
212            uri.append(host);
213        }
214
215        if (port != -1) {
216            uri.append(':');
217            uri.append(port);
218        }
219
220        if (path != null) {
221            PATH_ENCODER.appendEncoded(uri, path);
222        }
223
224        if (query != null) {
225            uri.append('?');
226            ALL_LEGAL_ENCODER.appendEncoded(uri, query);
227        }
228
229        if (fragment != null) {
230            uri.append('#');
231            ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
232        }
233
234        parseURI(uri.toString(), true);
235    }
236
237    /**
238     * Creates a new URI instance using the given arguments. This constructor
239     * first creates a temporary URI string from the given components. This
240     * string will be parsed later on to create the URI instance.
241     * <p>
242     * {@code [scheme:]host[path][#fragment]}
243     *
244     * @param scheme
245     *            the scheme part of the URI.
246     * @param host
247     *            the host name of the URI.
248     * @param path
249     *            the path to the resource on the host.
250     * @param fragment
251     *            the fragment part of the URI.
252     * @throws URISyntaxException
253     *         if the resulting URI isn't an
254     *         <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a> URI
255     *         or could not be parsed correctly.
256     */
257    public URI(String scheme, String host, String path, String fragment) throws URISyntaxException {
258        this(scheme, null, host, -1, path, null, fragment);
259    }
260
261    /**
262     * Creates a new URI instance using the given arguments. This constructor
263     * first creates a temporary URI string from the given components. This
264     * string will be parsed later on to create the URI instance.
265     * <p>
266     * {@code [scheme:][//authority][path][?query][#fragment]}
267     *
268     * @param scheme
269     *            the scheme part of the URI.
270     * @param authority
271     *            the authority part of the URI.
272     * @param path
273     *            the path to the resource on the host.
274     * @param query
275     *            the query part of the URI to specify parameters for the
276     *            resource.
277     * @param fragment
278     *            the fragment part of the URI.
279     * @throws URISyntaxException
280     *         if the resulting URI isn't an
281     *         <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a> URI
282     *         or could not be parsed correctly.
283     */
284    public URI(String scheme, String authority, String path, String query,
285            String fragment) throws URISyntaxException {
286        if (scheme != null && path != null && path.length() > 0
287                && path.charAt(0) != '/') {
288            throw new URISyntaxException(path, "Relative path");
289        }
290
291        StringBuilder uri = new StringBuilder();
292        if (scheme != null) {
293            uri.append(scheme);
294            uri.append(':');
295        }
296        if (authority != null) {
297            uri.append("//");
298            AUTHORITY_ENCODER.appendEncoded(uri, authority);
299        }
300
301        if (path != null) {
302            PATH_ENCODER.appendEncoded(uri, path);
303        }
304        if (query != null) {
305            uri.append('?');
306            ALL_LEGAL_ENCODER.appendEncoded(uri, query);
307        }
308        if (fragment != null) {
309            uri.append('#');
310            ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
311        }
312
313        parseURI(uri.toString(), false);
314    }
315
316    private void parseURI(String uri, boolean forceServer) throws URISyntaxException {
317        String temp = uri;
318        // assign uri string to the input value per spec
319        string = uri;
320        int index, index1, index2, index3;
321        // parse into Fragment, Scheme, and SchemeSpecificPart
322        // then parse SchemeSpecificPart if necessary
323
324        // Fragment
325        index = temp.indexOf('#');
326        if (index != -1) {
327            // remove the fragment from the end
328            fragment = temp.substring(index + 1);
329            validateFragment(uri, fragment, index + 1);
330            temp = temp.substring(0, index);
331        }
332
333        // Scheme and SchemeSpecificPart
334        index = index1 = temp.indexOf(':');
335        index2 = temp.indexOf('/');
336        index3 = temp.indexOf('?');
337
338        // if a '/' or '?' occurs before the first ':' the uri has no
339        // specified scheme, and is therefore not absolute
340        if (index != -1 && (index2 >= index || index2 == -1)
341                && (index3 >= index || index3 == -1)) {
342            // the characters up to the first ':' comprise the scheme
343            absolute = true;
344            scheme = temp.substring(0, index);
345            if (scheme.length() == 0) {
346                throw new URISyntaxException(uri, "Scheme expected", index);
347            }
348            validateScheme(uri, scheme, 0);
349            schemeSpecificPart = temp.substring(index + 1);
350            if (schemeSpecificPart.length() == 0) {
351                throw new URISyntaxException(uri, "Scheme-specific part expected", index + 1);
352            }
353        } else {
354            absolute = false;
355            schemeSpecificPart = temp;
356        }
357
358        if (scheme == null || schemeSpecificPart.length() > 0
359                && schemeSpecificPart.charAt(0) == '/') {
360            opaque = false;
361            // the URI is hierarchical
362
363            // Query
364            temp = schemeSpecificPart;
365            index = temp.indexOf('?');
366            if (index != -1) {
367                query = temp.substring(index + 1);
368                temp = temp.substring(0, index);
369                validateQuery(uri, query, index2 + 1 + index);
370            }
371
372            // Authority and Path
373            if (temp.startsWith("//")) {
374                index = temp.indexOf('/', 2);
375                if (index != -1) {
376                    authority = temp.substring(2, index);
377                    path = temp.substring(index);
378                } else {
379                    authority = temp.substring(2);
380                    if (authority.length() == 0 && query == null
381                            && fragment == null) {
382                        throw new URISyntaxException(uri, "Authority expected", uri.length());
383                    }
384
385                    path = "";
386                    // nothing left, so path is empty (not null, path should
387                    // never be null)
388                }
389
390                if (authority.length() == 0) {
391                    authority = null;
392                } else {
393                    validateAuthority(uri, authority, index1 + 3);
394                }
395            } else { // no authority specified
396                path = temp;
397            }
398
399            int pathIndex = 0;
400            if (index2 > -1) {
401                pathIndex += index2;
402            }
403            if (index > -1) {
404                pathIndex += index;
405            }
406            validatePath(uri, path, pathIndex);
407        } else { // if not hierarchical, URI is opaque
408            opaque = true;
409            validateSsp(uri, schemeSpecificPart, index2 + 2 + index);
410        }
411
412        parseAuthority(forceServer);
413    }
414
415    private void validateScheme(String uri, String scheme, int index)
416            throws URISyntaxException {
417        // first char needs to be an alpha char
418        char ch = scheme.charAt(0);
419        if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))) {
420            throw new URISyntaxException(uri, "Illegal character in scheme", 0);
421        }
422
423        try {
424            UriCodec.validateSimple(scheme, "+-.");
425        } catch (URISyntaxException e) {
426            throw new URISyntaxException(uri, "Illegal character in scheme", index + e.getIndex());
427        }
428    }
429
430    private void validateSsp(String uri, String ssp, int index)
431            throws URISyntaxException {
432        try {
433            ALL_LEGAL_ENCODER.validate(ssp);
434        } catch (URISyntaxException e) {
435            throw new URISyntaxException(uri,
436                    e.getReason() + " in schemeSpecificPart", index + e.getIndex());
437        }
438    }
439
440    private void validateAuthority(String uri, String authority, int index)
441            throws URISyntaxException {
442        try {
443            AUTHORITY_ENCODER.validate(authority);
444        } catch (URISyntaxException e) {
445            throw new URISyntaxException(uri, e.getReason() + " in authority", index + e.getIndex());
446        }
447    }
448
449    private void validatePath(String uri, String path, int index)
450            throws URISyntaxException {
451        try {
452            PATH_ENCODER.validate(path);
453        } catch (URISyntaxException e) {
454            throw new URISyntaxException(uri, e.getReason() + " in path", index + e.getIndex());
455        }
456    }
457
458    private void validateQuery(String uri, String query, int index)
459            throws URISyntaxException {
460        try {
461            ALL_LEGAL_ENCODER.validate(query);
462        } catch (URISyntaxException e) {
463            throw new URISyntaxException(uri, e.getReason() + " in query", index + e.getIndex());
464
465        }
466    }
467
468    private void validateFragment(String uri, String fragment, int index)
469            throws URISyntaxException {
470        try {
471            ALL_LEGAL_ENCODER.validate(fragment);
472        } catch (URISyntaxException e) {
473            throw new URISyntaxException(uri, e.getReason() + " in fragment", index + e.getIndex());
474        }
475    }
476
477    /**
478     * Parse the authority string into its component parts: user info,
479     * host, and port. This operation doesn't apply to registry URIs, and
480     * calling it on such <i>may</i> result in a syntax exception.
481     *
482     * @param forceServer true to always throw if the authority cannot be
483     *     parsed. If false, this method may still throw for some kinds of
484     *     errors; this unpredictable behavior is consistent with the RI.
485     */
486    private void parseAuthority(boolean forceServer) throws URISyntaxException {
487        if (authority == null) {
488            return;
489        }
490
491        String tempUserInfo = null;
492        String temp = authority;
493        int index = temp.indexOf('@');
494        int hostIndex = 0;
495        if (index != -1) {
496            // remove user info
497            tempUserInfo = temp.substring(0, index);
498            validateUserInfo(authority, tempUserInfo, 0);
499            temp = temp.substring(index + 1); // host[:port] is left
500            hostIndex = index + 1;
501        }
502
503        index = temp.lastIndexOf(':');
504        int endIndex = temp.indexOf(']');
505
506        String tempHost;
507        int tempPort = -1;
508        if (index != -1 && endIndex < index) {
509            // determine port and host
510            tempHost = temp.substring(0, index);
511
512            if (index < (temp.length() - 1)) { // port part is not empty
513                try {
514                    tempPort = Integer.parseInt(temp.substring(index + 1));
515                    if (tempPort < 0) {
516                        if (forceServer) {
517                            throw new URISyntaxException(authority,
518                                    "Invalid port number", hostIndex + index + 1);
519                        }
520                        return;
521                    }
522                } catch (NumberFormatException e) {
523                    if (forceServer) {
524                        throw new URISyntaxException(authority,
525                                "Invalid port number", hostIndex + index + 1);
526                    }
527                    return;
528                }
529            }
530        } else {
531            tempHost = temp;
532        }
533
534        if (tempHost.isEmpty()) {
535            if (forceServer) {
536                throw new URISyntaxException(authority, "Expected host", hostIndex);
537            }
538            return;
539        }
540
541        if (!isValidHost(forceServer, tempHost)) {
542            return;
543        }
544
545        // this is a server based uri,
546        // fill in the userInfo, host and port fields
547        userInfo = tempUserInfo;
548        host = tempHost;
549        port = tempPort;
550        serverAuthority = true;
551    }
552
553    private void validateUserInfo(String uri, String userInfo, int index)
554            throws URISyntaxException {
555        for (int i = 0; i < userInfo.length(); i++) {
556            char ch = userInfo.charAt(i);
557            if (ch == ']' || ch == '[') {
558                throw new URISyntaxException(uri, "Illegal character in userInfo", index + i);
559            }
560        }
561    }
562
563    /**
564     * Returns true if {@code host} is a well-formed host name or IP address.
565     *
566     * @param forceServer true to always throw if the host cannot be parsed. If
567     *     false, this method may still throw for some kinds of errors; this
568     *     unpredictable behavior is consistent with the RI.
569     */
570    private boolean isValidHost(boolean forceServer, String host) throws URISyntaxException {
571        if (host.startsWith("[")) {
572            // IPv6 address
573            if (!host.endsWith("]")) {
574                throw new URISyntaxException(host,
575                        "Expected a closing square bracket for IPv6 address", 0);
576            }
577            if (InetAddress.isNumeric(host)) {
578                // If it's numeric, the presence of square brackets guarantees
579                // that it's a numeric IPv6 address.
580                return true;
581            }
582            throw new URISyntaxException(host, "Malformed IPv6 address");
583        }
584
585        // '[' and ']' can only be the first char and last char
586        // of the host name
587        if (host.indexOf('[') != -1 || host.indexOf(']') != -1) {
588            throw new URISyntaxException(host, "Illegal character in host name", 0);
589        }
590
591        int index = host.lastIndexOf('.');
592        if (index < 0 || index == host.length() - 1
593                || !Character.isDigit(host.charAt(index + 1))) {
594            // domain name
595            if (isValidDomainName(host)) {
596                return true;
597            }
598            if (forceServer) {
599                throw new URISyntaxException(host, "Illegal character in host name", 0);
600            }
601            return false;
602        }
603
604        // IPv4 address?
605        try {
606            InetAddress ia = InetAddress.parseNumericAddress(host);
607            if (ia instanceof Inet4Address) {
608                return true;
609            }
610        } catch (IllegalArgumentException ex) {
611        }
612
613        if (forceServer) {
614            throw new URISyntaxException(host, "Malformed IPv4 address", 0);
615        }
616        return false;
617    }
618
619    private boolean isValidDomainName(String host) {
620        try {
621            UriCodec.validateSimple(host, "-.");
622        } catch (URISyntaxException e) {
623            return false;
624        }
625
626        String lastLabel = null;
627        for (String token : host.split("\\.")) {
628            lastLabel = token;
629            if (lastLabel.startsWith("-") || lastLabel.endsWith("-")) {
630                return false;
631            }
632        }
633
634        if (lastLabel == null) {
635            return false;
636        }
637
638        if (!lastLabel.equals(host)) {
639            char ch = lastLabel.charAt(0);
640            if (ch >= '0' && ch <= '9') {
641                return false;
642            }
643        }
644        return true;
645    }
646
647    /**
648     * Compares this URI with the given argument {@code uri}. This method will
649     * return a negative value if this URI instance is less than the given
650     * argument and a positive value if this URI instance is greater than the
651     * given argument. The return value {@code 0} indicates that the two
652     * instances represent the same URI. To define the order the single parts of
653     * the URI are compared with each other. String components will be ordered
654     * in the natural case-sensitive way. A hierarchical URI is less than an
655     * opaque URI and if one part is {@code null} the URI with the undefined
656     * part is less than the other one.
657     *
658     * @param uri
659     *            the URI this instance has to compare with.
660     * @return the value representing the order of the two instances.
661     */
662    public int compareTo(URI uri) {
663        int ret;
664
665        // compare schemes
666        if (scheme == null && uri.scheme != null) {
667            return -1;
668        } else if (scheme != null && uri.scheme == null) {
669            return 1;
670        } else if (scheme != null && uri.scheme != null) {
671            ret = scheme.compareToIgnoreCase(uri.scheme);
672            if (ret != 0) {
673                return ret;
674            }
675        }
676
677        // compare opacities
678        if (!opaque && uri.opaque) {
679            return -1;
680        } else if (opaque && !uri.opaque) {
681            return 1;
682        } else if (opaque && uri.opaque) {
683            ret = schemeSpecificPart.compareTo(uri.schemeSpecificPart);
684            if (ret != 0) {
685                return ret;
686            }
687        } else {
688
689            // otherwise both must be hierarchical
690
691            // compare authorities
692            if (authority != null && uri.authority == null) {
693                return 1;
694            } else if (authority == null && uri.authority != null) {
695                return -1;
696            } else if (authority != null && uri.authority != null) {
697                if (host != null && uri.host != null) {
698                    // both are server based, so compare userInfo, host, port
699                    if (userInfo != null && uri.userInfo == null) {
700                        return 1;
701                    } else if (userInfo == null && uri.userInfo != null) {
702                        return -1;
703                    } else if (userInfo != null && uri.userInfo != null) {
704                        ret = userInfo.compareTo(uri.userInfo);
705                        if (ret != 0) {
706                            return ret;
707                        }
708                    }
709
710                    // userInfo's are the same, compare hostname
711                    ret = host.compareToIgnoreCase(uri.host);
712                    if (ret != 0) {
713                        return ret;
714                    }
715
716                    // compare port
717                    if (port != uri.port) {
718                        return port - uri.port;
719                    }
720                } else { // one or both are registry based, compare the whole
721                    // authority
722                    ret = authority.compareTo(uri.authority);
723                    if (ret != 0) {
724                        return ret;
725                    }
726                }
727            }
728
729            // authorities are the same
730            // compare paths
731            ret = path.compareTo(uri.path);
732            if (ret != 0) {
733                return ret;
734            }
735
736            // compare queries
737
738            if (query != null && uri.query == null) {
739                return 1;
740            } else if (query == null && uri.query != null) {
741                return -1;
742            } else if (query != null && uri.query != null) {
743                ret = query.compareTo(uri.query);
744                if (ret != 0) {
745                    return ret;
746                }
747            }
748        }
749
750        // everything else is identical, so compare fragments
751        if (fragment != null && uri.fragment == null) {
752            return 1;
753        } else if (fragment == null && uri.fragment != null) {
754            return -1;
755        } else if (fragment != null && uri.fragment != null) {
756            ret = fragment.compareTo(uri.fragment);
757            if (ret != 0) {
758                return ret;
759            }
760        }
761
762        // identical
763        return 0;
764    }
765
766    /**
767     * Returns the URI formed by parsing {@code uri}. This method behaves
768     * identically to the string constructor but throws a different exception
769     * on failure. The constructor fails with a checked {@link
770     * URISyntaxException}; this method fails with an unchecked {@link
771     * IllegalArgumentException}.
772     */
773    public static URI create(String uri) {
774        try {
775            return new URI(uri);
776        } catch (URISyntaxException e) {
777            throw new IllegalArgumentException(e.getMessage());
778        }
779    }
780
781    private URI duplicate() {
782        URI clone = new URI();
783        clone.absolute = absolute;
784        clone.authority = authority;
785        clone.fragment = fragment;
786        clone.host = host;
787        clone.opaque = opaque;
788        clone.path = path;
789        clone.port = port;
790        clone.query = query;
791        clone.scheme = scheme;
792        clone.schemeSpecificPart = schemeSpecificPart;
793        clone.userInfo = userInfo;
794        clone.serverAuthority = serverAuthority;
795        return clone;
796    }
797
798    /*
799     * Takes a string that may contain hex sequences like %F1 or %2b and
800     * converts the hex values following the '%' to lowercase
801     */
802    private String convertHexToLowerCase(String s) {
803        StringBuilder result = new StringBuilder("");
804        if (s.indexOf('%') == -1) {
805            return s;
806        }
807
808        int index, prevIndex = 0;
809        while ((index = s.indexOf('%', prevIndex)) != -1) {
810            result.append(s.substring(prevIndex, index + 1));
811            result.append(s.substring(index + 1, index + 3).toLowerCase(Locale.US));
812            index += 3;
813            prevIndex = index;
814        }
815        return result.toString();
816    }
817
818    /**
819     * Returns true if {@code first} and {@code second} are equal after
820     * unescaping hex sequences like %F1 and %2b.
821     */
822    private boolean escapedEquals(String first, String second) {
823        if (first.indexOf('%') != second.indexOf('%')) {
824            return first.equals(second);
825        }
826
827        int index, prevIndex = 0;
828        while ((index = first.indexOf('%', prevIndex)) != -1
829                && second.indexOf('%', prevIndex) == index) {
830            boolean match = first.substring(prevIndex, index).equals(
831                    second.substring(prevIndex, index));
832            if (!match) {
833                return false;
834            }
835
836            match = first.substring(index + 1, index + 3).equalsIgnoreCase(
837                    second.substring(index + 1, index + 3));
838            if (!match) {
839                return false;
840            }
841
842            index += 3;
843            prevIndex = index;
844        }
845        return first.substring(prevIndex).equals(second.substring(prevIndex));
846    }
847
848    /**
849     * Compares this URI instance with the given argument {@code o} and
850     * determines if both are equal. Two URI instances are equal if all single
851     * parts are identical in their meaning.
852     *
853     * @param o
854     *            the URI this instance has to be compared with.
855     * @return {@code true} if both URI instances point to the same resource,
856     *         {@code false} otherwise.
857     */
858    @Override
859    public boolean equals(Object o) {
860        if (!(o instanceof URI)) {
861            return false;
862        }
863        URI uri = (URI) o;
864
865        if (uri.fragment == null && fragment != null || uri.fragment != null
866                && fragment == null) {
867            return false;
868        } else if (uri.fragment != null && fragment != null) {
869            if (!escapedEquals(uri.fragment, fragment)) {
870                return false;
871            }
872        }
873
874        if (uri.scheme == null && scheme != null || uri.scheme != null
875                && scheme == null) {
876            return false;
877        } else if (uri.scheme != null && scheme != null) {
878            if (!uri.scheme.equalsIgnoreCase(scheme)) {
879                return false;
880            }
881        }
882
883        if (uri.opaque && opaque) {
884            return escapedEquals(uri.schemeSpecificPart,
885                    schemeSpecificPart);
886        } else if (!uri.opaque && !opaque) {
887            if (!escapedEquals(path, uri.path)) {
888                return false;
889            }
890
891            if (uri.query != null && query == null || uri.query == null
892                    && query != null) {
893                return false;
894            } else if (uri.query != null && query != null) {
895                if (!escapedEquals(uri.query, query)) {
896                    return false;
897                }
898            }
899
900            if (uri.authority != null && authority == null
901                    || uri.authority == null && authority != null) {
902                return false;
903            } else if (uri.authority != null && authority != null) {
904                if (uri.host != null && host == null || uri.host == null
905                        && host != null) {
906                    return false;
907                } else if (uri.host == null && host == null) {
908                    // both are registry based, so compare the whole authority
909                    return escapedEquals(uri.authority, authority);
910                } else { // uri.host != null && host != null, so server-based
911                    if (!host.equalsIgnoreCase(uri.host)) {
912                        return false;
913                    }
914
915                    if (port != uri.port) {
916                        return false;
917                    }
918
919                    if (uri.userInfo != null && userInfo == null
920                            || uri.userInfo == null && userInfo != null) {
921                        return false;
922                    } else if (uri.userInfo != null && userInfo != null) {
923                        return escapedEquals(userInfo, uri.userInfo);
924                    } else {
925                        return true;
926                    }
927                }
928            } else {
929                // no authority
930                return true;
931            }
932
933        } else {
934            // one is opaque, the other hierarchical
935            return false;
936        }
937    }
938
939    /**
940     * Gets the decoded authority part of this URI.
941     *
942     * @return the decoded authority part or {@code null} if undefined.
943     */
944    public String getAuthority() {
945        return decode(authority);
946    }
947
948    /**
949     * Gets the decoded fragment part of this URI.
950     *
951     * @return the decoded fragment part or {@code null} if undefined.
952     */
953    public String getFragment() {
954        return decode(fragment);
955    }
956
957    /**
958     * Gets the host part of this URI.
959     *
960     * @return the host part or {@code null} if undefined.
961     */
962    public String getHost() {
963        return host;
964    }
965
966    /**
967     * Gets the decoded path part of this URI.
968     *
969     * @return the decoded path part or {@code null} if undefined.
970     */
971    public String getPath() {
972        return decode(path);
973    }
974
975    /**
976     * Gets the port number of this URI.
977     *
978     * @return the port number or {@code -1} if undefined.
979     */
980    public int getPort() {
981        return port;
982    }
983
984    /** @hide */
985    public int getEffectivePort() {
986        return getEffectivePort(scheme, port);
987    }
988
989    /**
990     * Returns the port to use for {@code scheme} connections will use when
991     * {@link #getPort} returns {@code specifiedPort}.
992     *
993     * @hide
994     */
995    public static int getEffectivePort(String scheme, int specifiedPort) {
996        if (specifiedPort != -1) {
997            return specifiedPort;
998        }
999
1000        if ("http".equalsIgnoreCase(scheme)) {
1001            return 80;
1002        } else if ("https".equalsIgnoreCase(scheme)) {
1003            return 443;
1004        } else {
1005            return -1;
1006        }
1007    }
1008
1009    /**
1010     * Gets the decoded query part of this URI.
1011     *
1012     * @return the decoded query part or {@code null} if undefined.
1013     */
1014    public String getQuery() {
1015        return decode(query);
1016    }
1017
1018    /**
1019     * Gets the authority part of this URI in raw form.
1020     *
1021     * @return the encoded authority part or {@code null} if undefined.
1022     */
1023    public String getRawAuthority() {
1024        return authority;
1025    }
1026
1027    /**
1028     * Gets the fragment part of this URI in raw form.
1029     *
1030     * @return the encoded fragment part or {@code null} if undefined.
1031     */
1032    public String getRawFragment() {
1033        return fragment;
1034    }
1035
1036    /**
1037     * Gets the path part of this URI in raw form.
1038     *
1039     * @return the encoded path part or {@code null} if undefined.
1040     */
1041    public String getRawPath() {
1042        return path;
1043    }
1044
1045    /**
1046     * Gets the query part of this URI in raw form.
1047     *
1048     * @return the encoded query part or {@code null} if undefined.
1049     */
1050    public String getRawQuery() {
1051        return query;
1052    }
1053
1054    /**
1055     * Gets the scheme-specific part of this URI in raw form.
1056     *
1057     * @return the encoded scheme-specific part or {@code null} if undefined.
1058     */
1059    public String getRawSchemeSpecificPart() {
1060        return schemeSpecificPart;
1061    }
1062
1063    /**
1064     * Gets the user-info part of this URI in raw form.
1065     *
1066     * @return the encoded user-info part or {@code null} if undefined.
1067     */
1068    public String getRawUserInfo() {
1069        return userInfo;
1070    }
1071
1072    /**
1073     * Gets the scheme part of this URI.
1074     *
1075     * @return the scheme part or {@code null} if undefined.
1076     */
1077    public String getScheme() {
1078        return scheme;
1079    }
1080
1081    /**
1082     * Gets the decoded scheme-specific part of this URI.
1083     *
1084     * @return the decoded scheme-specific part or {@code null} if undefined.
1085     */
1086    public String getSchemeSpecificPart() {
1087        return decode(schemeSpecificPart);
1088    }
1089
1090    /**
1091     * Gets the decoded user-info part of this URI.
1092     *
1093     * @return the decoded user-info part or {@code null} if undefined.
1094     */
1095    public String getUserInfo() {
1096        return decode(userInfo);
1097    }
1098
1099    /**
1100     * Gets the hashcode value of this URI instance.
1101     *
1102     * @return the appropriate hashcode value.
1103     */
1104    @Override
1105    public int hashCode() {
1106        if (hash == -1) {
1107            hash = getHashString().hashCode();
1108        }
1109        return hash;
1110    }
1111
1112    /**
1113     * Indicates whether this URI is absolute, which means that a scheme part is
1114     * defined in this URI.
1115     *
1116     * @return {@code true} if this URI is absolute, {@code false} otherwise.
1117     */
1118    public boolean isAbsolute() {
1119        return absolute;
1120    }
1121
1122    /**
1123     * Indicates whether this URI is opaque or not. An opaque URI is absolute
1124     * and has a scheme-specific part which does not start with a slash
1125     * character. All parts except scheme, scheme-specific and fragment are
1126     * undefined.
1127     *
1128     * @return {@code true} if the URI is opaque, {@code false} otherwise.
1129     */
1130    public boolean isOpaque() {
1131        return opaque;
1132    }
1133
1134    /**
1135     * Returns the normalized path.
1136     */
1137    private String normalize(String path, boolean discardRelativePrefix) {
1138        path = UrlUtils.canonicalizePath(path, discardRelativePrefix);
1139
1140        /*
1141         * If the path contains a colon before the first colon, prepend
1142         * "./" to differentiate the path from a scheme prefix.
1143         */
1144        int colon = path.indexOf(':');
1145        if (colon != -1) {
1146            int slash = path.indexOf('/');
1147            if (slash == -1 || colon < slash) {
1148                path = "./" + path;
1149            }
1150        }
1151
1152        return path;
1153    }
1154
1155    /**
1156     * Normalizes the path part of this URI.
1157     *
1158     * @return an URI object which represents this instance with a normalized
1159     *         path.
1160     */
1161    public URI normalize() {
1162        if (opaque) {
1163            return this;
1164        }
1165        String normalizedPath = normalize(path, false);
1166        // if the path is already normalized, return this
1167        if (path.equals(normalizedPath)) {
1168            return this;
1169        }
1170        // get an exact copy of the URI re-calculate the scheme specific part
1171        // since the path of the normalized URI is different from this URI.
1172        URI result = duplicate();
1173        result.path = normalizedPath;
1174        result.setSchemeSpecificPart();
1175        return result;
1176    }
1177
1178    /**
1179     * Tries to parse the authority component of this URI to divide it into the
1180     * host, port, and user-info. If this URI is already determined as a
1181     * ServerAuthority this instance will be returned without changes.
1182     *
1183     * @return this instance with the components of the parsed server authority.
1184     * @throws URISyntaxException
1185     *             if the authority part could not be parsed as a server-based
1186     *             authority.
1187     */
1188    public URI parseServerAuthority() throws URISyntaxException {
1189        if (!serverAuthority) {
1190            parseAuthority(true);
1191        }
1192        return this;
1193    }
1194
1195    /**
1196     * Makes the given URI {@code relative} to a relative URI against the URI
1197     * represented by this instance.
1198     *
1199     * @param relative
1200     *            the URI which has to be relativized against this URI.
1201     * @return the relative URI.
1202     */
1203    public URI relativize(URI relative) {
1204        if (relative.opaque || opaque) {
1205            return relative;
1206        }
1207
1208        if (scheme == null ? relative.scheme != null : !scheme
1209                .equals(relative.scheme)) {
1210            return relative;
1211        }
1212
1213        if (authority == null ? relative.authority != null : !authority
1214                .equals(relative.authority)) {
1215            return relative;
1216        }
1217
1218        // normalize both paths
1219        String thisPath = normalize(path, false);
1220        String relativePath = normalize(relative.path, false);
1221
1222        /*
1223         * if the paths aren't equal, then we need to determine if this URI's
1224         * path is a parent path (begins with) the relative URI's path
1225         */
1226        if (!thisPath.equals(relativePath)) {
1227            // drop everything after the last slash in this path
1228            thisPath = thisPath.substring(0, thisPath.lastIndexOf('/') + 1);
1229
1230            /*
1231             * if the relative URI's path doesn't start with this URI's path,
1232             * then just return the relative URI; the URIs have nothing in
1233             * common
1234             */
1235            if (!relativePath.startsWith(thisPath)) {
1236                return relative;
1237            }
1238        }
1239
1240        URI result = new URI();
1241        result.fragment = relative.fragment;
1242        result.query = relative.query;
1243        // the result URI is the remainder of the relative URI's path
1244        result.path = relativePath.substring(thisPath.length());
1245        result.setSchemeSpecificPart();
1246        return result;
1247    }
1248
1249    /**
1250     * Resolves the given URI {@code relative} against the URI represented by
1251     * this instance.
1252     *
1253     * @param relative
1254     *            the URI which has to be resolved against this URI.
1255     * @return the resolved URI.
1256     */
1257    public URI resolve(URI relative) {
1258        if (relative.absolute || opaque) {
1259            return relative;
1260        }
1261
1262        if (relative.authority != null) {
1263            // If the relative URI has an authority, the result is the relative
1264            // with this URI's scheme.
1265            URI result = relative.duplicate();
1266            result.scheme = scheme;
1267            result.absolute = absolute;
1268            return result;
1269        }
1270
1271        if (relative.path.isEmpty() && relative.scheme == null && relative.query == null) {
1272            // if the relative URI only consists of at most a fragment,
1273            URI result = duplicate();
1274            result.fragment = relative.fragment;
1275            return result;
1276        }
1277
1278        URI result = duplicate();
1279        result.fragment = relative.fragment;
1280        result.query = relative.query;
1281        String resolvedPath;
1282        if (relative.path.startsWith("/")) {
1283            // The relative URI has an absolute path; use it.
1284            resolvedPath = relative.path;
1285        } else if (relative.path.isEmpty()) {
1286            // The relative URI has no path; use the base path.
1287            resolvedPath = path;
1288        } else {
1289            // The relative URI has a relative path; combine the paths.
1290            int endIndex = path.lastIndexOf('/') + 1;
1291            resolvedPath = path.substring(0, endIndex) + relative.path;
1292        }
1293        result.path = UrlUtils.authoritySafePath(result.authority, normalize(resolvedPath, true));
1294        result.setSchemeSpecificPart();
1295        return result;
1296    }
1297
1298    /**
1299     * Helper method used to re-calculate the scheme specific part of the
1300     * resolved or normalized URIs
1301     */
1302    private void setSchemeSpecificPart() {
1303        // ssp = [//authority][path][?query]
1304        StringBuilder ssp = new StringBuilder();
1305        if (authority != null) {
1306            ssp.append("//" + authority);
1307        }
1308        if (path != null) {
1309            ssp.append(path);
1310        }
1311        if (query != null) {
1312            ssp.append("?" + query);
1313        }
1314        schemeSpecificPart = ssp.toString();
1315        // reset string, so that it can be re-calculated correctly when asked.
1316        string = null;
1317    }
1318
1319    /**
1320     * Creates a new URI instance by parsing the given string {@code relative}
1321     * and resolves the created URI against the URI represented by this
1322     * instance.
1323     *
1324     * @param relative
1325     *            the given string to create the new URI instance which has to
1326     *            be resolved later on.
1327     * @return the created and resolved URI.
1328     */
1329    public URI resolve(String relative) {
1330        return resolve(create(relative));
1331    }
1332
1333    private String decode(String s) {
1334        return s != null ? UriCodec.decode(s) : null;
1335    }
1336
1337    /**
1338     * Returns the textual string representation of this URI instance using the
1339     * US-ASCII encoding.
1340     *
1341     * @return the US-ASCII string representation of this URI.
1342     */
1343    public String toASCIIString() {
1344        StringBuilder result = new StringBuilder();
1345        ASCII_ONLY.appendEncoded(result, toString());
1346        return result.toString();
1347    }
1348
1349    /**
1350     * Returns the textual string representation of this URI instance.
1351     *
1352     * @return the textual string representation of this URI.
1353     */
1354    @Override
1355    public String toString() {
1356        if (string == null) {
1357            StringBuilder result = new StringBuilder();
1358            if (scheme != null) {
1359                result.append(scheme);
1360                result.append(':');
1361            }
1362            if (opaque) {
1363                result.append(schemeSpecificPart);
1364            } else {
1365                if (authority != null) {
1366                    result.append("//");
1367                    result.append(authority);
1368                }
1369
1370                if (path != null) {
1371                    result.append(path);
1372                }
1373
1374                if (query != null) {
1375                    result.append('?');
1376                    result.append(query);
1377                }
1378            }
1379
1380            if (fragment != null) {
1381                result.append('#');
1382                result.append(fragment);
1383            }
1384
1385            string = result.toString();
1386        }
1387        return string;
1388    }
1389
1390    /*
1391     * Form a string from the components of this URI, similarly to the
1392     * toString() method. But this method converts scheme and host to lowercase,
1393     * and converts escaped octets to lowercase.
1394     */
1395    private String getHashString() {
1396        StringBuilder result = new StringBuilder();
1397        if (scheme != null) {
1398            result.append(scheme.toLowerCase(Locale.US));
1399            result.append(':');
1400        }
1401        if (opaque) {
1402            result.append(schemeSpecificPart);
1403        } else {
1404            if (authority != null) {
1405                result.append("//");
1406                if (host == null) {
1407                    result.append(authority);
1408                } else {
1409                    if (userInfo != null) {
1410                        result.append(userInfo + "@");
1411                    }
1412                    result.append(host.toLowerCase(Locale.US));
1413                    if (port != -1) {
1414                        result.append(":" + port);
1415                    }
1416                }
1417            }
1418
1419            if (path != null) {
1420                result.append(path);
1421            }
1422
1423            if (query != null) {
1424                result.append('?');
1425                result.append(query);
1426            }
1427        }
1428
1429        if (fragment != null) {
1430            result.append('#');
1431            result.append(fragment);
1432        }
1433
1434        return convertHexToLowerCase(result.toString());
1435    }
1436
1437    /**
1438     * Converts this URI instance to a URL.
1439     *
1440     * @return the created URL representing the same resource as this URI.
1441     * @throws MalformedURLException
1442     *             if an error occurs while creating the URL or no protocol
1443     *             handler could be found.
1444     */
1445    public URL toURL() throws MalformedURLException {
1446        if (!absolute) {
1447            throw new IllegalArgumentException("URI is not absolute: " + toString());
1448        }
1449        return new URL(toString());
1450    }
1451
1452    private void readObject(ObjectInputStream in) throws IOException,
1453            ClassNotFoundException {
1454        in.defaultReadObject();
1455        try {
1456            parseURI(string, false);
1457        } catch (URISyntaxException e) {
1458            throw new IOException(e.toString());
1459        }
1460    }
1461
1462    private void writeObject(ObjectOutputStream out) throws IOException,
1463            ClassNotFoundException {
1464        // call toString() to ensure the value of string field is calculated
1465        toString();
1466        out.defaultWriteObject();
1467    }
1468}
1469