URI.java revision 56099d23fcb002b164bff8fb7f14d6ec0453509e
1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.net;
19
20import java.io.IOException;
21import java.io.ObjectInputStream;
22import java.io.ObjectOutputStream;
23import java.io.Serializable;
24import java.util.Locale;
25import libcore.net.UriCodec;
26
27/**
28 * This class represents an instance of a URI as defined by RFC 2396.
29 */
30public final class URI implements Comparable<URI>, Serializable {
31
32    private static final long serialVersionUID = -6052424284110960213l;
33
34    static final String UNRESERVED = "_-!.~\'()*";
35    static final String PUNCTUATION = ",;:$&+=";
36
37    static final UriCodec USER_INFO_ENCODER = new PartEncoder("");
38    static final UriCodec PATH_ENCODER = new PartEncoder("/@");
39    static final UriCodec AUTHORITY_ENCODER = new PartEncoder("@[]");
40
41    /** for java.net.URL, which foolishly combines these two parts */
42    static final UriCodec FILE_AND_QUERY_ENCODER = new PartEncoder("/@?");
43
44    /** for query, fragment, and scheme-specific part */
45    static final UriCodec ALL_LEGAL_ENCODER = new PartEncoder("?/[]@");
46
47    /** Retains all ASCII chars including delimiters. */
48    private static final UriCodec ASCII_ONLY = new UriCodec() {
49        @Override protected boolean isRetained(char c) {
50            return c <= 127;
51        }
52    };
53
54    /**
55     * Encodes the unescaped characters of {@code s} that are not permitted.
56     * Permitted characters are:
57     * <ul>
58     *   <li>Unreserved characters in RFC 2396.
59     *   <li>{@code extraOkayChars},
60     *   <li>non-ASCII, non-control, non-whitespace characters
61     * </ul>
62     */
63    private static class PartEncoder extends UriCodec {
64        private final String extraLegalCharacters;
65
66        PartEncoder(String extraLegalCharacters) {
67            this.extraLegalCharacters = extraLegalCharacters;
68        }
69
70        @Override protected boolean isRetained(char c) {
71            return UNRESERVED.indexOf(c) != -1
72                    || PUNCTUATION.indexOf(c) != -1
73                    || extraLegalCharacters.indexOf(c) != -1
74                    || (c > 127 && !Character.isSpaceChar(c) && !Character.isISOControl(c));
75        }
76    }
77
78    private String string;
79    private transient String scheme;
80    private transient String schemeSpecificPart;
81    private transient String authority;
82    private transient String userInfo;
83    private transient String host;
84    private transient int port = -1;
85    private transient String path;
86    private transient String query;
87    private transient String fragment;
88    private transient boolean opaque;
89    private transient boolean absolute;
90    private transient boolean serverAuthority = false;
91
92    private transient int hash = -1;
93
94    private URI() {}
95
96    /**
97     * Creates a new URI instance according to the given string {@code uri}.
98     *
99     * @param uri
100     *            the textual URI representation to be parsed into a URI object.
101     * @throws URISyntaxException
102     *             if the given string {@code uri} doesn't fit to the
103     *             specification RFC2396 or could not be parsed correctly.
104     */
105    public URI(String uri) throws URISyntaxException {
106        parseURI(uri, false);
107    }
108
109    /**
110     * Creates a new URI instance using the given arguments. This constructor
111     * first creates a temporary URI string from the given components. This
112     * string will be parsed later on to create the URI instance.
113     * <p>
114     * {@code [scheme:]scheme-specific-part[#fragment]}
115     *
116     * @param scheme
117     *            the scheme part of the URI.
118     * @param ssp
119     *            the scheme-specific-part of the URI.
120     * @param frag
121     *            the fragment part of the URI.
122     * @throws URISyntaxException
123     *             if the temporary created string doesn't fit to the
124     *             specification RFC2396 or could not be parsed correctly.
125     */
126    public URI(String scheme, String ssp, String frag)
127            throws URISyntaxException {
128        StringBuilder uri = new StringBuilder();
129        if (scheme != null) {
130            uri.append(scheme);
131            uri.append(':');
132        }
133        if (ssp != null) {
134            ALL_LEGAL_ENCODER.appendEncoded(uri, ssp);
135        }
136        if (frag != null) {
137            uri.append('#');
138            ALL_LEGAL_ENCODER.appendEncoded(uri, frag);
139        }
140
141        parseURI(uri.toString(), false);
142    }
143
144    /**
145     * Creates a new URI instance using the given arguments. This constructor
146     * first creates a temporary URI string from the given components. This
147     * string will be parsed later on to create the URI instance.
148     * <p>
149     * {@code [scheme:][user-info@]host[:port][path][?query][#fragment]}
150     *
151     * @param scheme
152     *            the scheme part of the URI.
153     * @param userInfo
154     *            the user information of the URI for authentication and
155     *            authorization.
156     * @param host
157     *            the host name of the URI.
158     * @param port
159     *            the port number of the URI.
160     * @param path
161     *            the path to the resource on the host.
162     * @param query
163     *            the query part of the URI to specify parameters for the
164     *            resource.
165     * @param fragment
166     *            the fragment part of the URI.
167     * @throws URISyntaxException
168     *             if the temporary created string doesn't fit to the
169     *             specification RFC2396 or could not be parsed correctly.
170     */
171    public URI(String scheme, String userInfo, String host, int port,
172            String path, String query, String fragment)
173            throws URISyntaxException {
174
175        if (scheme == null && userInfo == null && host == null && path == null
176                && query == null && fragment == null) {
177            this.path = "";
178            return;
179        }
180
181        if (scheme != null && path != null && path.length() > 0
182                && path.charAt(0) != '/') {
183            throw new URISyntaxException(path, "Relative path");
184        }
185
186        StringBuilder uri = new StringBuilder();
187        if (scheme != null) {
188            uri.append(scheme);
189            uri.append(':');
190        }
191
192        if (userInfo != null || host != null || port != -1) {
193            uri.append("//");
194        }
195
196        if (userInfo != null) {
197            USER_INFO_ENCODER.appendEncoded(uri, userInfo);
198            uri.append('@');
199        }
200
201        if (host != null) {
202            // check for IPv6 addresses that hasn't been enclosed
203            // in square brackets
204            if (host.indexOf(':') != -1 && host.indexOf(']') == -1
205                    && host.indexOf('[') == -1) {
206                host = "[" + host + "]";
207            }
208            uri.append(host);
209        }
210
211        if (port != -1) {
212            uri.append(':');
213            uri.append(port);
214        }
215
216        if (path != null) {
217            PATH_ENCODER.appendEncoded(uri, path);
218        }
219
220        if (query != null) {
221            uri.append('?');
222            ALL_LEGAL_ENCODER.appendEncoded(uri, query);
223        }
224
225        if (fragment != null) {
226            uri.append('#');
227            ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
228        }
229
230        parseURI(uri.toString(), true);
231    }
232
233    /**
234     * Creates a new URI instance using the given arguments. This constructor
235     * first creates a temporary URI string from the given components. This
236     * string will be parsed later on to create the URI instance.
237     * <p>
238     * {@code [scheme:]host[path][#fragment]}
239     *
240     * @param scheme
241     *            the scheme part of the URI.
242     * @param host
243     *            the host name of the URI.
244     * @param path
245     *            the path to the resource on the host.
246     * @param fragment
247     *            the fragment part of the URI.
248     * @throws URISyntaxException
249     *             if the temporary created string doesn't fit to the
250     *             specification RFC2396 or could not be parsed correctly.
251     */
252    public URI(String scheme, String host, String path, String fragment)
253            throws URISyntaxException {
254        this(scheme, null, host, -1, path, null, fragment);
255    }
256
257    /**
258     * Creates a new URI instance using the given arguments. This constructor
259     * first creates a temporary URI string from the given components. This
260     * string will be parsed later on to create the URI instance.
261     * <p>
262     * {@code [scheme:][//authority][path][?query][#fragment]}
263     *
264     * @param scheme
265     *            the scheme part of the URI.
266     * @param authority
267     *            the authority part of the URI.
268     * @param path
269     *            the path to the resource on the host.
270     * @param query
271     *            the query part of the URI to specify parameters for the
272     *            resource.
273     * @param fragment
274     *            the fragment part of the URI.
275     * @throws URISyntaxException
276     *             if the temporary created string doesn't fit to the
277     *             specification RFC2396 or could not be parsed correctly.
278     */
279    public URI(String scheme, String authority, String path, String query,
280            String fragment) throws URISyntaxException {
281        if (scheme != null && path != null && path.length() > 0
282                && path.charAt(0) != '/') {
283            throw new URISyntaxException(path, "Relative path");
284        }
285
286        StringBuilder uri = new StringBuilder();
287        if (scheme != null) {
288            uri.append(scheme);
289            uri.append(':');
290        }
291        if (authority != null) {
292            uri.append("//");
293            AUTHORITY_ENCODER.appendEncoded(uri, authority);
294        }
295
296        if (path != null) {
297            PATH_ENCODER.appendEncoded(uri, path);
298        }
299        if (query != null) {
300            uri.append('?');
301            ALL_LEGAL_ENCODER.appendEncoded(uri, query);
302        }
303        if (fragment != null) {
304            uri.append('#');
305            ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
306        }
307
308        parseURI(uri.toString(), false);
309    }
310
311    private void parseURI(String uri, boolean forceServer) throws URISyntaxException {
312        String temp = uri;
313        // assign uri string to the input value per spec
314        string = uri;
315        int index, index1, index2, index3;
316        // parse into Fragment, Scheme, and SchemeSpecificPart
317        // then parse SchemeSpecificPart if necessary
318
319        // Fragment
320        index = temp.indexOf('#');
321        if (index != -1) {
322            // remove the fragment from the end
323            fragment = temp.substring(index + 1);
324            validateFragment(uri, fragment, index + 1);
325            temp = temp.substring(0, index);
326        }
327
328        // Scheme and SchemeSpecificPart
329        index = index1 = temp.indexOf(':');
330        index2 = temp.indexOf('/');
331        index3 = temp.indexOf('?');
332
333        // if a '/' or '?' occurs before the first ':' the uri has no
334        // specified scheme, and is therefore not absolute
335        if (index != -1 && (index2 >= index || index2 == -1)
336                && (index3 >= index || index3 == -1)) {
337            // the characters up to the first ':' comprise the scheme
338            absolute = true;
339            scheme = temp.substring(0, index);
340            if (scheme.length() == 0) {
341                throw new URISyntaxException(uri, "Scheme expected", index);
342            }
343            validateScheme(uri, scheme, 0);
344            schemeSpecificPart = temp.substring(index + 1);
345            if (schemeSpecificPart.length() == 0) {
346                throw new URISyntaxException(uri, "Scheme-specific part expected", index + 1);
347            }
348        } else {
349            absolute = false;
350            schemeSpecificPart = temp;
351        }
352
353        if (scheme == null || schemeSpecificPart.length() > 0
354                && schemeSpecificPart.charAt(0) == '/') {
355            opaque = false;
356            // the URI is hierarchical
357
358            // Query
359            temp = schemeSpecificPart;
360            index = temp.indexOf('?');
361            if (index != -1) {
362                query = temp.substring(index + 1);
363                temp = temp.substring(0, index);
364                validateQuery(uri, query, index2 + 1 + index);
365            }
366
367            // Authority and Path
368            if (temp.startsWith("//")) {
369                index = temp.indexOf('/', 2);
370                if (index != -1) {
371                    authority = temp.substring(2, index);
372                    path = temp.substring(index);
373                } else {
374                    authority = temp.substring(2);
375                    if (authority.length() == 0 && query == null
376                            && fragment == null) {
377                        throw new URISyntaxException(uri, "Authority expected", uri.length());
378                    }
379
380                    path = "";
381                    // nothing left, so path is empty (not null, path should
382                    // never be null)
383                }
384
385                if (authority.length() == 0) {
386                    authority = null;
387                } else {
388                    validateAuthority(uri, authority, index1 + 3);
389                }
390            } else { // no authority specified
391                path = temp;
392            }
393
394            int pathIndex = 0;
395            if (index2 > -1) {
396                pathIndex += index2;
397            }
398            if (index > -1) {
399                pathIndex += index;
400            }
401            validatePath(uri, path, pathIndex);
402        } else { // if not hierarchical, URI is opaque
403            opaque = true;
404            validateSsp(uri, schemeSpecificPart, index2 + 2 + index);
405        }
406
407        parseAuthority(forceServer);
408    }
409
410    private void validateScheme(String uri, String scheme, int index)
411            throws URISyntaxException {
412        // first char needs to be an alpha char
413        char ch = scheme.charAt(0);
414        if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))) {
415            throw new URISyntaxException(uri, "Illegal character in scheme", 0);
416        }
417
418        try {
419            UriCodec.validateSimple(scheme, "+-.");
420        } catch (URISyntaxException e) {
421            throw new URISyntaxException(uri, "Illegal character in scheme", index + e.getIndex());
422        }
423    }
424
425    private void validateSsp(String uri, String ssp, int index)
426            throws URISyntaxException {
427        try {
428            ALL_LEGAL_ENCODER.validate(ssp);
429        } catch (URISyntaxException e) {
430            throw new URISyntaxException(uri,
431                    e.getReason() + " in schemeSpecificPart", index + e.getIndex());
432        }
433    }
434
435    private void validateAuthority(String uri, String authority, int index)
436            throws URISyntaxException {
437        try {
438            AUTHORITY_ENCODER.validate(authority);
439        } catch (URISyntaxException e) {
440            throw new URISyntaxException(uri, e.getReason() + " in authority", index + e.getIndex());
441        }
442    }
443
444    private void validatePath(String uri, String path, int index)
445            throws URISyntaxException {
446        try {
447            PATH_ENCODER.validate(path);
448        } catch (URISyntaxException e) {
449            throw new URISyntaxException(uri, e.getReason() + " in path", index + e.getIndex());
450        }
451    }
452
453    private void validateQuery(String uri, String query, int index)
454            throws URISyntaxException {
455        try {
456            ALL_LEGAL_ENCODER.validate(query);
457        } catch (URISyntaxException e) {
458            throw new URISyntaxException(uri, e.getReason() + " in query", index + e.getIndex());
459
460        }
461    }
462
463    private void validateFragment(String uri, String fragment, int index)
464            throws URISyntaxException {
465        try {
466            ALL_LEGAL_ENCODER.validate(fragment);
467        } catch (URISyntaxException e) {
468            throw new URISyntaxException(uri, e.getReason() + " in fragment", index + e.getIndex());
469        }
470    }
471
472    /**
473     * Parse the authority string into its component parts: user info,
474     * host, and port. This operation doesn't apply to registry URIs, and
475     * calling it on such <i>may</i> result in a syntax exception.
476     *
477     * @param forceServer true to always throw if the authority cannot be
478     *     parsed. If false, this method may still throw for some kinds of
479     *     errors; this unpredictable behavior is consistent with the RI.
480     */
481    private void parseAuthority(boolean forceServer) throws URISyntaxException {
482        if (authority == null) {
483            return;
484        }
485
486        String tempUserInfo = null;
487        String temp = authority;
488        int index = temp.indexOf('@');
489        int hostIndex = 0;
490        if (index != -1) {
491            // remove user info
492            tempUserInfo = temp.substring(0, index);
493            validateUserInfo(authority, tempUserInfo, 0);
494            temp = temp.substring(index + 1); // host[:port] is left
495            hostIndex = index + 1;
496        }
497
498        index = temp.lastIndexOf(':');
499        int endIndex = temp.indexOf(']');
500
501        String tempHost;
502        int tempPort = -1;
503        if (index != -1 && endIndex < index) {
504            // determine port and host
505            tempHost = temp.substring(0, index);
506
507            if (index < (temp.length() - 1)) { // port part is not empty
508                try {
509                    tempPort = Integer.parseInt(temp.substring(index + 1));
510                    if (tempPort < 0) {
511                        if (forceServer) {
512                            throw new URISyntaxException(authority,
513                                    "Invalid port number", hostIndex + index + 1);
514                        }
515                        return;
516                    }
517                } catch (NumberFormatException e) {
518                    if (forceServer) {
519                        throw new URISyntaxException(authority,
520                                "Invalid port number", hostIndex + index + 1);
521                    }
522                    return;
523                }
524            }
525        } else {
526            tempHost = temp;
527        }
528
529        if (tempHost.isEmpty()) {
530            if (forceServer) {
531                throw new URISyntaxException(authority, "Expected host", hostIndex);
532            }
533            return;
534        }
535
536        if (!isValidHost(forceServer, tempHost)) {
537            return;
538        }
539
540        // this is a server based uri,
541        // fill in the userInfo, host and port fields
542        userInfo = tempUserInfo;
543        host = tempHost;
544        port = tempPort;
545        serverAuthority = true;
546    }
547
548    private void validateUserInfo(String uri, String userInfo, int index)
549            throws URISyntaxException {
550        for (int i = 0; i < userInfo.length(); i++) {
551            char ch = userInfo.charAt(i);
552            if (ch == ']' || ch == '[') {
553                throw new URISyntaxException(uri, "Illegal character in userInfo", index + i);
554            }
555        }
556    }
557
558    /**
559     * Returns true if {@code host} is a well-formed host name or IP address.
560     *
561     * @param forceServer true to always throw if the host cannot be parsed. If
562     *     false, this method may still throw for some kinds of errors; this
563     *     unpredictable behavior is consistent with the RI.
564     */
565    private boolean isValidHost(boolean forceServer, String host) throws URISyntaxException {
566        if (host.startsWith("[")) {
567            // IPv6 address
568            if (!host.endsWith("]")) {
569                throw new URISyntaxException(host,
570                        "Expected a closing square bracket for IPv6 address", 0);
571            }
572            byte[] bytes = InetAddress.ipStringToByteArray(host);
573            /*
574             * The native IP parser may return 4 bytes for addresses like
575             * "[::FFFF:127.0.0.1]". This is allowed, but we must not accept
576             * IPv4-formatted addresses in square braces like "[127.0.0.1]".
577             */
578            if (bytes != null && (bytes.length == 16 || bytes.length == 4 && host.contains(":"))) {
579                return true;
580            }
581            throw new URISyntaxException(host, "Malformed IPv6 address");
582        }
583
584        // '[' and ']' can only be the first char and last char
585        // of the host name
586        if (host.indexOf('[') != -1 || host.indexOf(']') != -1) {
587            throw new URISyntaxException(host, "Illegal character in host name", 0);
588        }
589
590        int index = host.lastIndexOf('.');
591        if (index < 0 || index == host.length() - 1
592                || !Character.isDigit(host.charAt(index + 1))) {
593            // domain name
594            if (isValidDomainName(host)) {
595                return true;
596            }
597            if (forceServer) {
598                throw new URISyntaxException(host, "Illegal character in host name", 0);
599            }
600            return false;
601        }
602
603        // IPv4 address
604        byte[] bytes = InetAddress.ipStringToByteArray(host);
605        if (bytes != null && bytes.length == 4) {
606            return true;
607        }
608
609        if (forceServer) {
610            throw new URISyntaxException(host, "Malformed IPv4 address", 0);
611        }
612        return false;
613    }
614
615    private boolean isValidDomainName(String host) {
616        try {
617            UriCodec.validateSimple(host, "-.");
618        } catch (URISyntaxException e) {
619            return false;
620        }
621
622        String lastLabel = null;
623        for (String token : host.split("\\.")) {
624            lastLabel = token;
625            if (lastLabel.startsWith("-") || lastLabel.endsWith("-")) {
626                return false;
627            }
628        }
629
630        if (lastLabel == null) {
631            return false;
632        }
633
634        if (!lastLabel.equals(host)) {
635            char ch = lastLabel.charAt(0);
636            if (ch >= '0' && ch <= '9') {
637                return false;
638            }
639        }
640        return true;
641    }
642
643    /**
644     * Compares this URI with the given argument {@code uri}. This method will
645     * return a negative value if this URI instance is less than the given
646     * argument and a positive value if this URI instance is greater than the
647     * given argument. The return value {@code 0} indicates that the two
648     * instances represent the same URI. To define the order the single parts of
649     * the URI are compared with each other. String components will be ordered
650     * in the natural case-sensitive way. A hierarchical URI is less than an
651     * opaque URI and if one part is {@code null} the URI with the undefined
652     * part is less than the other one.
653     *
654     * @param uri
655     *            the URI this instance has to compare with.
656     * @return the value representing the order of the two instances.
657     */
658    public int compareTo(URI uri) {
659        int ret;
660
661        // compare schemes
662        if (scheme == null && uri.scheme != null) {
663            return -1;
664        } else if (scheme != null && uri.scheme == null) {
665            return 1;
666        } else if (scheme != null && uri.scheme != null) {
667            ret = scheme.compareToIgnoreCase(uri.scheme);
668            if (ret != 0) {
669                return ret;
670            }
671        }
672
673        // compare opacities
674        if (!opaque && uri.opaque) {
675            return -1;
676        } else if (opaque && !uri.opaque) {
677            return 1;
678        } else if (opaque && uri.opaque) {
679            ret = schemeSpecificPart.compareTo(uri.schemeSpecificPart);
680            if (ret != 0) {
681                return ret;
682            }
683        } else {
684
685            // otherwise both must be hierarchical
686
687            // compare authorities
688            if (authority != null && uri.authority == null) {
689                return 1;
690            } else if (authority == null && uri.authority != null) {
691                return -1;
692            } else if (authority != null && uri.authority != null) {
693                if (host != null && uri.host != null) {
694                    // both are server based, so compare userInfo, host, port
695                    if (userInfo != null && uri.userInfo == null) {
696                        return 1;
697                    } else if (userInfo == null && uri.userInfo != null) {
698                        return -1;
699                    } else if (userInfo != null && uri.userInfo != null) {
700                        ret = userInfo.compareTo(uri.userInfo);
701                        if (ret != 0) {
702                            return ret;
703                        }
704                    }
705
706                    // userInfo's are the same, compare hostname
707                    ret = host.compareToIgnoreCase(uri.host);
708                    if (ret != 0) {
709                        return ret;
710                    }
711
712                    // compare port
713                    if (port != uri.port) {
714                        return port - uri.port;
715                    }
716                } else { // one or both are registry based, compare the whole
717                    // authority
718                    ret = authority.compareTo(uri.authority);
719                    if (ret != 0) {
720                        return ret;
721                    }
722                }
723            }
724
725            // authorities are the same
726            // compare paths
727            ret = path.compareTo(uri.path);
728            if (ret != 0) {
729                return ret;
730            }
731
732            // compare queries
733
734            if (query != null && uri.query == null) {
735                return 1;
736            } else if (query == null && uri.query != null) {
737                return -1;
738            } else if (query != null && uri.query != null) {
739                ret = query.compareTo(uri.query);
740                if (ret != 0) {
741                    return ret;
742                }
743            }
744        }
745
746        // everything else is identical, so compare fragments
747        if (fragment != null && uri.fragment == null) {
748            return 1;
749        } else if (fragment == null && uri.fragment != null) {
750            return -1;
751        } else if (fragment != null && uri.fragment != null) {
752            ret = fragment.compareTo(uri.fragment);
753            if (ret != 0) {
754                return ret;
755            }
756        }
757
758        // identical
759        return 0;
760    }
761
762    /**
763     * Returns the URI formed by parsing {@code uri}. This method behaves
764     * identically to the string constructor but throws a different exception
765     * on failure. The constructor fails with a checked {@link
766     * URISyntaxException}; this method fails with an unchecked {@link
767     * IllegalArgumentException}.
768     */
769    public static URI create(String uri) {
770        try {
771            return new URI(uri);
772        } catch (URISyntaxException e) {
773            throw new IllegalArgumentException(e.getMessage());
774        }
775    }
776
777    private URI duplicate() {
778        URI clone = new URI();
779        clone.absolute = absolute;
780        clone.authority = authority;
781        clone.fragment = fragment;
782        clone.host = host;
783        clone.opaque = opaque;
784        clone.path = path;
785        clone.port = port;
786        clone.query = query;
787        clone.scheme = scheme;
788        clone.schemeSpecificPart = schemeSpecificPart;
789        clone.userInfo = userInfo;
790        clone.serverAuthority = serverAuthority;
791        return clone;
792    }
793
794    /*
795     * Takes a string that may contain hex sequences like %F1 or %2b and
796     * converts the hex values following the '%' to lowercase
797     */
798    private String convertHexToLowerCase(String s) {
799        StringBuilder result = new StringBuilder("");
800        if (s.indexOf('%') == -1) {
801            return s;
802        }
803
804        int index, prevIndex = 0;
805        while ((index = s.indexOf('%', prevIndex)) != -1) {
806            result.append(s.substring(prevIndex, index + 1));
807            result.append(s.substring(index + 1, index + 3).toLowerCase(Locale.US));
808            index += 3;
809            prevIndex = index;
810        }
811        return result.toString();
812    }
813
814    /**
815     * Returns true if {@code first} and {@code second} are equal after
816     * unescaping hex sequences like %F1 and %2b.
817     */
818    private boolean escapedEquals(String first, String second) {
819        if (first.indexOf('%') != second.indexOf('%')) {
820            return first.equals(second);
821        }
822
823        int index, prevIndex = 0;
824        while ((index = first.indexOf('%', prevIndex)) != -1
825                && second.indexOf('%', prevIndex) == index) {
826            boolean match = first.substring(prevIndex, index).equals(
827                    second.substring(prevIndex, index));
828            if (!match) {
829                return false;
830            }
831
832            match = first.substring(index + 1, index + 3).equalsIgnoreCase(
833                    second.substring(index + 1, index + 3));
834            if (!match) {
835                return false;
836            }
837
838            index += 3;
839            prevIndex = index;
840        }
841        return first.substring(prevIndex).equals(second.substring(prevIndex));
842    }
843
844    /**
845     * Compares this URI instance with the given argument {@code o} and
846     * determines if both are equal. Two URI instances are equal if all single
847     * parts are identical in their meaning.
848     *
849     * @param o
850     *            the URI this instance has to be compared with.
851     * @return {@code true} if both URI instances point to the same resource,
852     *         {@code false} otherwise.
853     */
854    @Override
855    public boolean equals(Object o) {
856        if (!(o instanceof URI)) {
857            return false;
858        }
859        URI uri = (URI) o;
860
861        if (uri.fragment == null && fragment != null || uri.fragment != null
862                && fragment == null) {
863            return false;
864        } else if (uri.fragment != null && fragment != null) {
865            if (!escapedEquals(uri.fragment, fragment)) {
866                return false;
867            }
868        }
869
870        if (uri.scheme == null && scheme != null || uri.scheme != null
871                && scheme == null) {
872            return false;
873        } else if (uri.scheme != null && scheme != null) {
874            if (!uri.scheme.equalsIgnoreCase(scheme)) {
875                return false;
876            }
877        }
878
879        if (uri.opaque && opaque) {
880            return escapedEquals(uri.schemeSpecificPart,
881                    schemeSpecificPart);
882        } else if (!uri.opaque && !opaque) {
883            if (!escapedEquals(path, uri.path)) {
884                return false;
885            }
886
887            if (uri.query != null && query == null || uri.query == null
888                    && query != null) {
889                return false;
890            } else if (uri.query != null && query != null) {
891                if (!escapedEquals(uri.query, query)) {
892                    return false;
893                }
894            }
895
896            if (uri.authority != null && authority == null
897                    || uri.authority == null && authority != null) {
898                return false;
899            } else if (uri.authority != null && authority != null) {
900                if (uri.host != null && host == null || uri.host == null
901                        && host != null) {
902                    return false;
903                } else if (uri.host == null && host == null) {
904                    // both are registry based, so compare the whole authority
905                    return escapedEquals(uri.authority, authority);
906                } else { // uri.host != null && host != null, so server-based
907                    if (!host.equalsIgnoreCase(uri.host)) {
908                        return false;
909                    }
910
911                    if (port != uri.port) {
912                        return false;
913                    }
914
915                    if (uri.userInfo != null && userInfo == null
916                            || uri.userInfo == null && userInfo != null) {
917                        return false;
918                    } else if (uri.userInfo != null && userInfo != null) {
919                        return escapedEquals(userInfo, uri.userInfo);
920                    } else {
921                        return true;
922                    }
923                }
924            } else {
925                // no authority
926                return true;
927            }
928
929        } else {
930            // one is opaque, the other hierarchical
931            return false;
932        }
933    }
934
935    /**
936     * Gets the decoded authority part of this URI.
937     *
938     * @return the decoded authority part or {@code null} if undefined.
939     */
940    public String getAuthority() {
941        return decode(authority);
942    }
943
944    /**
945     * Gets the decoded fragment part of this URI.
946     *
947     * @return the decoded fragment part or {@code null} if undefined.
948     */
949    public String getFragment() {
950        return decode(fragment);
951    }
952
953    /**
954     * Gets the host part of this URI.
955     *
956     * @return the host part or {@code null} if undefined.
957     */
958    public String getHost() {
959        return host;
960    }
961
962    /**
963     * Gets the decoded path part of this URI.
964     *
965     * @return the decoded path part or {@code null} if undefined.
966     */
967    public String getPath() {
968        return decode(path);
969    }
970
971    /**
972     * Gets the port number of this URI.
973     *
974     * @return the port number or {@code -1} if undefined.
975     */
976    public int getPort() {
977        return port;
978    }
979
980    /** @hide */
981    public int getEffectivePort() {
982        return getEffectivePort(scheme, port);
983    }
984
985    /**
986     * Returns the port to use for {@code scheme} connections will use when
987     * {@link #getPort} returns {@code specifiedPort}.
988     *
989     * @hide
990     */
991    public static int getEffectivePort(String scheme, int specifiedPort) {
992        if (specifiedPort != -1) {
993            return specifiedPort;
994        }
995
996        if ("http".equalsIgnoreCase(scheme)) {
997            return 80;
998        } else if ("https".equalsIgnoreCase(scheme)) {
999            return 443;
1000        } else {
1001            return -1;
1002        }
1003    }
1004
1005    /**
1006     * Gets the decoded query part of this URI.
1007     *
1008     * @return the decoded query part or {@code null} if undefined.
1009     */
1010    public String getQuery() {
1011        return decode(query);
1012    }
1013
1014    /**
1015     * Gets the authority part of this URI in raw form.
1016     *
1017     * @return the encoded authority part or {@code null} if undefined.
1018     */
1019    public String getRawAuthority() {
1020        return authority;
1021    }
1022
1023    /**
1024     * Gets the fragment part of this URI in raw form.
1025     *
1026     * @return the encoded fragment part or {@code null} if undefined.
1027     */
1028    public String getRawFragment() {
1029        return fragment;
1030    }
1031
1032    /**
1033     * Gets the path part of this URI in raw form.
1034     *
1035     * @return the encoded path part or {@code null} if undefined.
1036     */
1037    public String getRawPath() {
1038        return path;
1039    }
1040
1041    /**
1042     * Gets the query part of this URI in raw form.
1043     *
1044     * @return the encoded query part or {@code null} if undefined.
1045     */
1046    public String getRawQuery() {
1047        return query;
1048    }
1049
1050    /**
1051     * Gets the scheme-specific part of this URI in raw form.
1052     *
1053     * @return the encoded scheme-specific part or {@code null} if undefined.
1054     */
1055    public String getRawSchemeSpecificPart() {
1056        return schemeSpecificPart;
1057    }
1058
1059    /**
1060     * Gets the user-info part of this URI in raw form.
1061     *
1062     * @return the encoded user-info part or {@code null} if undefined.
1063     */
1064    public String getRawUserInfo() {
1065        return userInfo;
1066    }
1067
1068    /**
1069     * Gets the scheme part of this URI.
1070     *
1071     * @return the scheme part or {@code null} if undefined.
1072     */
1073    public String getScheme() {
1074        return scheme;
1075    }
1076
1077    /**
1078     * Gets the decoded scheme-specific part of this URI.
1079     *
1080     * @return the decoded scheme-specific part or {@code null} if undefined.
1081     */
1082    public String getSchemeSpecificPart() {
1083        return decode(schemeSpecificPart);
1084    }
1085
1086    /**
1087     * Gets the decoded user-info part of this URI.
1088     *
1089     * @return the decoded user-info part or {@code null} if undefined.
1090     */
1091    public String getUserInfo() {
1092        return decode(userInfo);
1093    }
1094
1095    /**
1096     * Gets the hashcode value of this URI instance.
1097     *
1098     * @return the appropriate hashcode value.
1099     */
1100    @Override
1101    public int hashCode() {
1102        if (hash == -1) {
1103            hash = getHashString().hashCode();
1104        }
1105        return hash;
1106    }
1107
1108    /**
1109     * Indicates whether this URI is absolute, which means that a scheme part is
1110     * defined in this URI.
1111     *
1112     * @return {@code true} if this URI is absolute, {@code false} otherwise.
1113     */
1114    public boolean isAbsolute() {
1115        return absolute;
1116    }
1117
1118    /**
1119     * Indicates whether this URI is opaque or not. An opaque URI is absolute
1120     * and has a scheme-specific part which does not start with a slash
1121     * character. All parts except scheme, scheme-specific and fragment are
1122     * undefined.
1123     *
1124     * @return {@code true} if the URI is opaque, {@code false} otherwise.
1125     */
1126    public boolean isOpaque() {
1127        return opaque;
1128    }
1129
1130    /*
1131     * normalize path, and return the resulting string
1132     */
1133    private String normalize(String path) {
1134        // count the number of '/'s, to determine number of segments
1135        int index = -1;
1136        int pathLength = path.length();
1137        int size = 0;
1138        if (pathLength > 0 && path.charAt(0) != '/') {
1139            size++;
1140        }
1141        while ((index = path.indexOf('/', index + 1)) != -1) {
1142            if (index + 1 < pathLength && path.charAt(index + 1) != '/') {
1143                size++;
1144            }
1145        }
1146
1147        String[] segList = new String[size];
1148        boolean[] include = new boolean[size];
1149
1150        // break the path into segments and store in the list
1151        int current = 0;
1152        int index2;
1153        index = (pathLength > 0 && path.charAt(0) == '/') ? 1 : 0;
1154        while ((index2 = path.indexOf('/', index + 1)) != -1) {
1155            segList[current++] = path.substring(index, index2);
1156            index = index2 + 1;
1157        }
1158
1159        // if current==size, then the last character was a slash
1160        // and there are no more segments
1161        if (current < size) {
1162            segList[current] = path.substring(index);
1163        }
1164
1165        // determine which segments get included in the normalized path
1166        for (int i = 0; i < size; i++) {
1167            include[i] = true;
1168            if (segList[i].equals("..")) {
1169                int remove = i - 1;
1170                // search back to find a segment to remove, if possible
1171                while (remove > -1 && !include[remove]) {
1172                    remove--;
1173                }
1174                // if we find a segment to remove, remove it and the ".."
1175                // segment
1176                if (remove > -1 && !segList[remove].equals("..")) {
1177                    include[remove] = false;
1178                    include[i] = false;
1179                }
1180            } else if (segList[i].equals(".")) {
1181                include[i] = false;
1182            }
1183        }
1184
1185        // put the path back together
1186        StringBuilder newPath = new StringBuilder();
1187        if (path.startsWith("/")) {
1188            newPath.append('/');
1189        }
1190
1191        for (int i = 0; i < segList.length; i++) {
1192            if (include[i]) {
1193                newPath.append(segList[i]);
1194                newPath.append('/');
1195            }
1196        }
1197
1198        // if we used at least one segment and the path previously ended with
1199        // a slash and the last segment is still used, then delete the extra
1200        // trailing '/'
1201        if (!path.endsWith("/") && segList.length > 0
1202                && include[segList.length - 1]) {
1203            newPath.deleteCharAt(newPath.length() - 1);
1204        }
1205
1206        String result = newPath.toString();
1207
1208        // check for a ':' in the first segment if one exists,
1209        // prepend "./" to normalize
1210        index = result.indexOf(':');
1211        index2 = result.indexOf('/');
1212        if (index != -1 && (index < index2 || index2 == -1)) {
1213            newPath.insert(0, "./");
1214            result = newPath.toString();
1215        }
1216        return result;
1217    }
1218
1219    /**
1220     * Normalizes the path part of this URI.
1221     *
1222     * @return an URI object which represents this instance with a normalized
1223     *         path.
1224     */
1225    public URI normalize() {
1226        if (opaque) {
1227            return this;
1228        }
1229        String normalizedPath = normalize(path);
1230        // if the path is already normalized, return this
1231        if (path.equals(normalizedPath)) {
1232            return this;
1233        }
1234        // get an exact copy of the URI re-calculate the scheme specific part
1235        // since the path of the normalized URI is different from this URI.
1236        URI result = duplicate();
1237        result.path = normalizedPath;
1238        result.setSchemeSpecificPart();
1239        return result;
1240    }
1241
1242    /**
1243     * Tries to parse the authority component of this URI to divide it into the
1244     * host, port, and user-info. If this URI is already determined as a
1245     * ServerAuthority this instance will be returned without changes.
1246     *
1247     * @return this instance with the components of the parsed server authority.
1248     * @throws URISyntaxException
1249     *             if the authority part could not be parsed as a server-based
1250     *             authority.
1251     */
1252    public URI parseServerAuthority() throws URISyntaxException {
1253        if (!serverAuthority) {
1254            parseAuthority(true);
1255        }
1256        return this;
1257    }
1258
1259    /**
1260     * Makes the given URI {@code relative} to a relative URI against the URI
1261     * represented by this instance.
1262     *
1263     * @param relative
1264     *            the URI which has to be relativized against this URI.
1265     * @return the relative URI.
1266     */
1267    public URI relativize(URI relative) {
1268        if (relative.opaque || opaque) {
1269            return relative;
1270        }
1271
1272        if (scheme == null ? relative.scheme != null : !scheme
1273                .equals(relative.scheme)) {
1274            return relative;
1275        }
1276
1277        if (authority == null ? relative.authority != null : !authority
1278                .equals(relative.authority)) {
1279            return relative;
1280        }
1281
1282        // normalize both paths
1283        String thisPath = normalize(path);
1284        String relativePath = normalize(relative.path);
1285
1286        /*
1287         * if the paths aren't equal, then we need to determine if this URI's
1288         * path is a parent path (begins with) the relative URI's path
1289         */
1290        if (!thisPath.equals(relativePath)) {
1291            // if this URI's path doesn't end in a '/', add one
1292            if (!thisPath.endsWith("/")) {
1293                thisPath = thisPath + '/';
1294            }
1295            /*
1296             * if the relative URI's path doesn't start with this URI's path,
1297             * then just return the relative URI; the URIs have nothing in
1298             * common
1299             */
1300            if (!relativePath.startsWith(thisPath)) {
1301                return relative;
1302            }
1303        }
1304
1305        URI result = new URI();
1306        result.fragment = relative.fragment;
1307        result.query = relative.query;
1308        // the result URI is the remainder of the relative URI's path
1309        result.path = relativePath.substring(thisPath.length());
1310        result.setSchemeSpecificPart();
1311        return result;
1312    }
1313
1314    /**
1315     * Resolves the given URI {@code relative} against the URI represented by
1316     * this instance.
1317     *
1318     * @param relative
1319     *            the URI which has to be resolved against this URI.
1320     * @return the resolved URI.
1321     */
1322    public URI resolve(URI relative) {
1323        if (relative.absolute || opaque) {
1324            return relative;
1325        }
1326
1327        URI result;
1328        if (relative.path.isEmpty() && relative.scheme == null
1329                && relative.authority == null && relative.query == null
1330                && relative.fragment != null) {
1331            // if the relative URI only consists of fragment,
1332            // the resolved URI is very similar to this URI,
1333            // except that it has the fragment from the relative URI.
1334            result = duplicate();
1335            result.fragment = relative.fragment;
1336            // no need to re-calculate the scheme specific part,
1337            // since fragment is not part of scheme specific part.
1338            return result;
1339        }
1340
1341        if (relative.authority != null) {
1342            // if the relative URI has authority,
1343            // the resolved URI is almost the same as the relative URI,
1344            // except that it has the scheme of this URI.
1345            result = relative.duplicate();
1346            result.scheme = scheme;
1347            result.absolute = absolute;
1348        } else {
1349            // since relative URI has no authority,
1350            // the resolved URI is very similar to this URI,
1351            // except that it has the query and fragment of the relative URI,
1352            // and the path is different.
1353            result = duplicate();
1354            result.fragment = relative.fragment;
1355            result.query = relative.query;
1356            if (relative.path.startsWith("/")) {
1357                result.path = relative.path;
1358            } else {
1359                // resolve a relative reference
1360                int endIndex = path.lastIndexOf('/') + 1;
1361                result.path = normalize(path.substring(0, endIndex)
1362                        + relative.path);
1363            }
1364            // re-calculate the scheme specific part since
1365            // query and path of the resolved URI is different from this URI.
1366            result.setSchemeSpecificPart();
1367        }
1368        return result;
1369    }
1370
1371    /**
1372     * Helper method used to re-calculate the scheme specific part of the
1373     * resolved or normalized URIs
1374     */
1375    private void setSchemeSpecificPart() {
1376        // ssp = [//authority][path][?query]
1377        StringBuilder ssp = new StringBuilder();
1378        if (authority != null) {
1379            ssp.append("//" + authority);
1380        }
1381        if (path != null) {
1382            ssp.append(path);
1383        }
1384        if (query != null) {
1385            ssp.append("?" + query);
1386        }
1387        schemeSpecificPart = ssp.toString();
1388        // reset string, so that it can be re-calculated correctly when asked.
1389        string = null;
1390    }
1391
1392    /**
1393     * Creates a new URI instance by parsing the given string {@code relative}
1394     * and resolves the created URI against the URI represented by this
1395     * instance.
1396     *
1397     * @param relative
1398     *            the given string to create the new URI instance which has to
1399     *            be resolved later on.
1400     * @return the created and resolved URI.
1401     */
1402    public URI resolve(String relative) {
1403        return resolve(create(relative));
1404    }
1405
1406    private String decode(String s) {
1407        return s != null ? UriCodec.decode(s) : null;
1408    }
1409
1410    /**
1411     * Returns the textual string representation of this URI instance using the
1412     * US-ASCII encoding.
1413     *
1414     * @return the US-ASCII string representation of this URI.
1415     */
1416    public String toASCIIString() {
1417        StringBuilder result = new StringBuilder();
1418        ASCII_ONLY.appendEncoded(result, toString());
1419        return result.toString();
1420    }
1421
1422    /**
1423     * Returns the textual string representation of this URI instance.
1424     *
1425     * @return the textual string representation of this URI.
1426     */
1427    @Override
1428    public String toString() {
1429        if (string == null) {
1430            StringBuilder result = new StringBuilder();
1431            if (scheme != null) {
1432                result.append(scheme);
1433                result.append(':');
1434            }
1435            if (opaque) {
1436                result.append(schemeSpecificPart);
1437            } else {
1438                if (authority != null) {
1439                    result.append("//");
1440                    result.append(authority);
1441                }
1442
1443                if (path != null) {
1444                    result.append(path);
1445                }
1446
1447                if (query != null) {
1448                    result.append('?');
1449                    result.append(query);
1450                }
1451            }
1452
1453            if (fragment != null) {
1454                result.append('#');
1455                result.append(fragment);
1456            }
1457
1458            string = result.toString();
1459        }
1460        return string;
1461    }
1462
1463    /*
1464     * Form a string from the components of this URI, similarly to the
1465     * toString() method. But this method converts scheme and host to lowercase,
1466     * and converts escaped octets to lowercase.
1467     */
1468    private String getHashString() {
1469        StringBuilder result = new StringBuilder();
1470        if (scheme != null) {
1471            result.append(scheme.toLowerCase(Locale.US));
1472            result.append(':');
1473        }
1474        if (opaque) {
1475            result.append(schemeSpecificPart);
1476        } else {
1477            if (authority != null) {
1478                result.append("//");
1479                if (host == null) {
1480                    result.append(authority);
1481                } else {
1482                    if (userInfo != null) {
1483                        result.append(userInfo + "@");
1484                    }
1485                    result.append(host.toLowerCase(Locale.US));
1486                    if (port != -1) {
1487                        result.append(":" + port);
1488                    }
1489                }
1490            }
1491
1492            if (path != null) {
1493                result.append(path);
1494            }
1495
1496            if (query != null) {
1497                result.append('?');
1498                result.append(query);
1499            }
1500        }
1501
1502        if (fragment != null) {
1503            result.append('#');
1504            result.append(fragment);
1505        }
1506
1507        return convertHexToLowerCase(result.toString());
1508    }
1509
1510    /**
1511     * Converts this URI instance to a URL.
1512     *
1513     * @return the created URL representing the same resource as this URI.
1514     * @throws MalformedURLException
1515     *             if an error occurs while creating the URL or no protocol
1516     *             handler could be found.
1517     */
1518    public URL toURL() throws MalformedURLException {
1519        if (!absolute) {
1520            throw new IllegalArgumentException("URI is not absolute: " + toString());
1521        }
1522        return new URL(toString());
1523    }
1524
1525    private void readObject(ObjectInputStream in) throws IOException,
1526            ClassNotFoundException {
1527        in.defaultReadObject();
1528        try {
1529            parseURI(string, false);
1530        } catch (URISyntaxException e) {
1531            throw new IOException(e.toString());
1532        }
1533    }
1534
1535    private void writeObject(ObjectOutputStream out) throws IOException,
1536            ClassNotFoundException {
1537        // call toString() to ensure the value of string field is calculated
1538        toString();
1539        out.defaultWriteObject();
1540    }
1541}
1542