URI.java revision 5501a3d4b3d7657c183ed5446fe67fa011fbf70b
1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.net;
19
20import java.io.IOException;
21import java.io.ObjectInputStream;
22import java.io.ObjectOutputStream;
23import java.io.Serializable;
24import java.io.UnsupportedEncodingException;
25import java.util.StringTokenizer;
26import org.apache.harmony.luni.platform.INetworkSystem;
27import org.apache.harmony.luni.platform.Platform;
28
29/**
30 * This class represents an instance of a URI as defined by RFC 2396.
31 */
32public final class URI implements Comparable<URI>, Serializable {
33
34    private static final long serialVersionUID = -6052424284110960213l;
35
36    static final String UNRESERVED = "_-!.~\'()*";
37    static final String PUNCTUATION = ",;:$&+=";
38    static final String RESERVED = PUNCTUATION + "?/[]@";
39    static final String SOME_LEGAL = UNRESERVED + PUNCTUATION;
40    static final String ALL_LEGAL = UNRESERVED + RESERVED;
41
42    private String string;
43    private transient String scheme;
44    private transient String schemeSpecificPart;
45    private transient String authority;
46    private transient String userInfo;
47    private transient String host;
48    private transient int port = -1;
49    private transient String path;
50    private transient String query;
51    private transient String fragment;
52    private transient boolean opaque;
53    private transient boolean absolute;
54    private transient boolean serverAuthority = false;
55
56    private transient int hash = -1;
57
58    private URI() {}
59
60    /**
61     * Creates a new URI instance according to the given string {@code uri}.
62     *
63     * @param uri
64     *            the textual URI representation to be parsed into a URI object.
65     * @throws URISyntaxException
66     *             if the given string {@code uri} doesn't fit to the
67     *             specification RFC2396 or could not be parsed correctly.
68     */
69    public URI(String uri) throws URISyntaxException {
70        parseURI(uri, false);
71    }
72
73    /**
74     * Creates a new URI instance using the given arguments. This constructor
75     * first creates a temporary URI string from the given components. This
76     * string will be parsed later on to create the URI instance.
77     * <p>
78     * {@code [scheme:]scheme-specific-part[#fragment]}
79     *
80     * @param scheme
81     *            the scheme part of the URI.
82     * @param ssp
83     *            the scheme-specific-part of the URI.
84     * @param frag
85     *            the fragment part of the URI.
86     * @throws URISyntaxException
87     *             if the temporary created string doesn't fit to the
88     *             specification RFC2396 or could not be parsed correctly.
89     */
90    public URI(String scheme, String ssp, String frag)
91            throws URISyntaxException {
92        StringBuilder uri = new StringBuilder();
93        if (scheme != null) {
94            uri.append(scheme);
95            uri.append(':');
96        }
97        if (ssp != null) {
98            // QUOTE ILLEGAL CHARACTERS
99            uri.append(quoteComponent(ssp, ALL_LEGAL));
100        }
101        if (frag != null) {
102            uri.append('#');
103            // QUOTE ILLEGAL CHARACTERS
104            uri.append(quoteComponent(frag, ALL_LEGAL));
105        }
106
107        parseURI(uri.toString(), false);
108    }
109
110    /**
111     * Creates a new URI instance using the given arguments. This constructor
112     * first creates a temporary URI string from the given components. This
113     * string will be parsed later on to create the URI instance.
114     * <p>
115     * {@code [scheme:][user-info@]host[:port][path][?query][#fragment]}
116     *
117     * @param scheme
118     *            the scheme part of the URI.
119     * @param userInfo
120     *            the user information of the URI for authentication and
121     *            authorization.
122     * @param host
123     *            the host name of the URI.
124     * @param port
125     *            the port number of the URI.
126     * @param path
127     *            the path to the resource on the host.
128     * @param query
129     *            the query part of the URI to specify parameters for the
130     *            resource.
131     * @param fragment
132     *            the fragment part of the URI.
133     * @throws URISyntaxException
134     *             if the temporary created string doesn't fit to the
135     *             specification RFC2396 or could not be parsed correctly.
136     */
137    public URI(String scheme, String userInfo, String host, int port,
138            String path, String query, String fragment)
139            throws URISyntaxException {
140
141        if (scheme == null && userInfo == null && host == null && path == null
142                && query == null && fragment == null) {
143            this.path = "";
144            return;
145        }
146
147        if (scheme != null && path != null && path.length() > 0
148                && path.charAt(0) != '/') {
149            throw new URISyntaxException(path, "Relative path");
150        }
151
152        StringBuilder uri = new StringBuilder();
153        if (scheme != null) {
154            uri.append(scheme);
155            uri.append(':');
156        }
157
158        if (userInfo != null || host != null || port != -1) {
159            uri.append("//");
160        }
161
162        if (userInfo != null) {
163            // QUOTE ILLEGAL CHARACTERS in userInfo
164            uri.append(quoteComponent(userInfo, SOME_LEGAL));
165            uri.append('@');
166        }
167
168        if (host != null) {
169            // check for IPv6 addresses that hasn't been enclosed
170            // in square brackets
171            if (host.indexOf(':') != -1 && host.indexOf(']') == -1
172                    && host.indexOf('[') == -1) {
173                host = "[" + host + "]";
174            }
175            uri.append(host);
176        }
177
178        if (port != -1) {
179            uri.append(':');
180            uri.append(port);
181        }
182
183        if (path != null) {
184            // QUOTE ILLEGAL CHARS
185            uri.append(quoteComponent(path, "/@" + SOME_LEGAL));
186        }
187
188        if (query != null) {
189            uri.append('?');
190            // QUOTE ILLEGAL CHARS
191            uri.append(quoteComponent(query, ALL_LEGAL));
192        }
193
194        if (fragment != null) {
195            // QUOTE ILLEGAL CHARS
196            uri.append('#');
197            uri.append(quoteComponent(fragment, ALL_LEGAL));
198        }
199
200        parseURI(uri.toString(), true);
201    }
202
203    /**
204     * Creates a new URI instance using the given arguments. This constructor
205     * first creates a temporary URI string from the given components. This
206     * string will be parsed later on to create the URI instance.
207     * <p>
208     * {@code [scheme:]host[path][#fragment]}
209     *
210     * @param scheme
211     *            the scheme part of the URI.
212     * @param host
213     *            the host name of the URI.
214     * @param path
215     *            the path to the resource on the host.
216     * @param fragment
217     *            the fragment part of the URI.
218     * @throws URISyntaxException
219     *             if the temporary created string doesn't fit to the
220     *             specification RFC2396 or could not be parsed correctly.
221     */
222    public URI(String scheme, String host, String path, String fragment)
223            throws URISyntaxException {
224        this(scheme, null, host, -1, path, null, fragment);
225    }
226
227    /**
228     * Creates a new URI instance using the given arguments. This constructor
229     * first creates a temporary URI string from the given components. This
230     * string will be parsed later on to create the URI instance.
231     * <p>
232     * {@code [scheme:][//authority][path][?query][#fragment]}
233     *
234     * @param scheme
235     *            the scheme part of the URI.
236     * @param authority
237     *            the authority part of the URI.
238     * @param path
239     *            the path to the resource on the host.
240     * @param query
241     *            the query part of the URI to specify parameters for the
242     *            resource.
243     * @param fragment
244     *            the fragment part of the URI.
245     * @throws URISyntaxException
246     *             if the temporary created string doesn't fit to the
247     *             specification RFC2396 or could not be parsed correctly.
248     */
249    public URI(String scheme, String authority, String path, String query,
250            String fragment) throws URISyntaxException {
251        if (scheme != null && path != null && path.length() > 0
252                && path.charAt(0) != '/') {
253            throw new URISyntaxException(path, "Relative path");
254        }
255
256        StringBuilder uri = new StringBuilder();
257        if (scheme != null) {
258            uri.append(scheme);
259            uri.append(':');
260        }
261        if (authority != null) {
262            uri.append("//");
263            // QUOTE ILLEGAL CHARS
264            uri.append(quoteComponent(authority, "@[]" + SOME_LEGAL));
265        }
266
267        if (path != null) {
268            // QUOTE ILLEGAL CHARS
269            uri.append(quoteComponent(path, "/@" + SOME_LEGAL));
270        }
271        if (query != null) {
272            // QUOTE ILLEGAL CHARS
273            uri.append('?');
274            uri.append(quoteComponent(query, ALL_LEGAL));
275        }
276        if (fragment != null) {
277            // QUOTE ILLEGAL CHARS
278            uri.append('#');
279            uri.append(quoteComponent(fragment, ALL_LEGAL));
280        }
281
282        parseURI(uri.toString(), false);
283    }
284
285    private void parseURI(String uri, boolean forceServer) throws URISyntaxException {
286        String temp = uri;
287        // assign uri string to the input value per spec
288        string = uri;
289        int index, index1, index2, index3;
290        // parse into Fragment, Scheme, and SchemeSpecificPart
291        // then parse SchemeSpecificPart if necessary
292
293        // Fragment
294        index = temp.indexOf('#');
295        if (index != -1) {
296            // remove the fragment from the end
297            fragment = temp.substring(index + 1);
298            validateFragment(uri, fragment, index + 1);
299            temp = temp.substring(0, index);
300        }
301
302        // Scheme and SchemeSpecificPart
303        index = index1 = temp.indexOf(':');
304        index2 = temp.indexOf('/');
305        index3 = temp.indexOf('?');
306
307        // if a '/' or '?' occurs before the first ':' the uri has no
308        // specified scheme, and is therefore not absolute
309        if (index != -1 && (index2 >= index || index2 == -1)
310                && (index3 >= index || index3 == -1)) {
311            // the characters up to the first ':' comprise the scheme
312            absolute = true;
313            scheme = temp.substring(0, index);
314            if (scheme.length() == 0) {
315                throw new URISyntaxException(uri, "Scheme expected", index);
316            }
317            validateScheme(uri, scheme, 0);
318            schemeSpecificPart = temp.substring(index + 1);
319            if (schemeSpecificPart.length() == 0) {
320                throw new URISyntaxException(uri, "Scheme-specific part expected", index + 1);
321            }
322        } else {
323            absolute = false;
324            schemeSpecificPart = temp;
325        }
326
327        if (scheme == null || schemeSpecificPart.length() > 0
328                && schemeSpecificPart.charAt(0) == '/') {
329            opaque = false;
330            // the URI is hierarchical
331
332            // Query
333            temp = schemeSpecificPart;
334            index = temp.indexOf('?');
335            if (index != -1) {
336                query = temp.substring(index + 1);
337                temp = temp.substring(0, index);
338                validateQuery(uri, query, index2 + 1 + index);
339            }
340
341            // Authority and Path
342            if (temp.startsWith("//")) {
343                index = temp.indexOf('/', 2);
344                if (index != -1) {
345                    authority = temp.substring(2, index);
346                    path = temp.substring(index);
347                } else {
348                    authority = temp.substring(2);
349                    if (authority.length() == 0 && query == null
350                            && fragment == null) {
351                        throw new URISyntaxException(uri, "Authority expected", uri.length());
352                    }
353
354                    path = "";
355                    // nothing left, so path is empty (not null, path should
356                    // never be null)
357                }
358
359                if (authority.length() == 0) {
360                    authority = null;
361                } else {
362                    validateAuthority(uri, authority, index1 + 3);
363                }
364            } else { // no authority specified
365                path = temp;
366            }
367
368            int pathIndex = 0;
369            if (index2 > -1) {
370                pathIndex += index2;
371            }
372            if (index > -1) {
373                pathIndex += index;
374            }
375            validatePath(uri, path, pathIndex);
376        } else { // if not hierarchical, URI is opaque
377            opaque = true;
378            validateSsp(uri, schemeSpecificPart, index2 + 2 + index);
379        }
380
381        parseAuthority(forceServer);
382    }
383
384    private void validateScheme(String uri, String scheme, int index)
385            throws URISyntaxException {
386        // first char needs to be an alpha char
387        char ch = scheme.charAt(0);
388        if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))) {
389            throw new URISyntaxException(uri, "Illegal character in scheme", 0);
390        }
391
392        try {
393            URIEncoderDecoder.validateSimple(scheme, "+-.");
394        } catch (URISyntaxException e) {
395            throw new URISyntaxException(uri, "Illegal character in scheme", index + e.getIndex());
396        }
397    }
398
399    private void validateSsp(String uri, String ssp, int index)
400            throws URISyntaxException {
401        try {
402            URIEncoderDecoder.validate(ssp, ALL_LEGAL);
403        } catch (URISyntaxException e) {
404            throw new URISyntaxException(uri,
405                    e.getReason() + " in schemeSpecificPart", index + e.getIndex());
406        }
407    }
408
409    private void validateAuthority(String uri, String authority, int index)
410            throws URISyntaxException {
411        try {
412            URIEncoderDecoder.validate(authority, "@[]" + SOME_LEGAL);
413        } catch (URISyntaxException e) {
414            throw new URISyntaxException(uri, e.getReason() + " in authority", index + e.getIndex());
415        }
416    }
417
418    private void validatePath(String uri, String path, int index)
419            throws URISyntaxException {
420        try {
421            URIEncoderDecoder.validate(path, "/@" + SOME_LEGAL);
422        } catch (URISyntaxException e) {
423            throw new URISyntaxException(uri, e.getReason() + " in path", index + e.getIndex());
424        }
425    }
426
427    private void validateQuery(String uri, String query, int index)
428            throws URISyntaxException {
429        try {
430            URIEncoderDecoder.validate(query, ALL_LEGAL);
431        } catch (URISyntaxException e) {
432            throw new URISyntaxException(uri, e.getReason() + " in query", index + e.getIndex());
433
434        }
435    }
436
437    private void validateFragment(String uri, String fragment, int index)
438            throws URISyntaxException {
439        try {
440            URIEncoderDecoder.validate(fragment, ALL_LEGAL);
441        } catch (URISyntaxException e) {
442            throw new URISyntaxException(uri, e.getReason() + " in fragment", index + e.getIndex());
443        }
444    }
445
446    /**
447     * Parse the authority string into its component parts: user info,
448     * host, and port. This operation doesn't apply to registry URIs, and
449     * calling it on such <i>may</i> result in a syntax exception.
450     *
451     * @param forceServer true to always throw if the authority cannot be
452     *     parsed. If false, this method may still throw for some kinds of
453     *     errors; this unpredictable behavior is consistent with the RI.
454     */
455    private void parseAuthority(boolean forceServer) throws URISyntaxException {
456        if (authority == null) {
457            return;
458        }
459
460        String tempUserInfo = null;
461        String temp = authority;
462        int index = temp.indexOf('@');
463        int hostIndex = 0;
464        if (index != -1) {
465            // remove user info
466            tempUserInfo = temp.substring(0, index);
467            validateUserInfo(authority, tempUserInfo, 0);
468            temp = temp.substring(index + 1); // host[:port] is left
469            hostIndex = index + 1;
470        }
471
472        index = temp.lastIndexOf(':');
473        int endIndex = temp.indexOf(']');
474
475        String tempHost;
476        int tempPort = -1;
477        if (index != -1 && endIndex < index) {
478            // determine port and host
479            tempHost = temp.substring(0, index);
480
481            if (index < (temp.length() - 1)) { // port part is not empty
482                try {
483                    tempPort = Integer.parseInt(temp.substring(index + 1));
484                    if (tempPort < 0) {
485                        if (forceServer) {
486                            throw new URISyntaxException(authority,
487                                    "Invalid port number", hostIndex + index + 1);
488                        }
489                        return;
490                    }
491                } catch (NumberFormatException e) {
492                    if (forceServer) {
493                        throw new URISyntaxException(authority,
494                                "Invalid port number", hostIndex + index + 1);
495                    }
496                    return;
497                }
498            }
499        } else {
500            tempHost = temp;
501        }
502
503        if (tempHost.isEmpty()) {
504            if (forceServer) {
505                throw new URISyntaxException(authority, "Expected host", hostIndex);
506            }
507            return;
508        }
509
510        if (!isValidHost(forceServer, tempHost)) {
511            return;
512        }
513
514        // this is a server based uri,
515        // fill in the userInfo, host and port fields
516        userInfo = tempUserInfo;
517        host = tempHost;
518        port = tempPort;
519        serverAuthority = true;
520    }
521
522    private void validateUserInfo(String uri, String userInfo, int index)
523            throws URISyntaxException {
524        for (int i = 0; i < userInfo.length(); i++) {
525            char ch = userInfo.charAt(i);
526            if (ch == ']' || ch == '[') {
527                throw new URISyntaxException(uri, "Illegal character in userInfo", index + i);
528            }
529        }
530    }
531
532    /**
533     * Returns true if {@code host} is a well-formed host name or IP address.
534     *
535     * @param forceServer true to always throw if the host cannot be parsed. If
536     *     false, this method may still throw for some kinds of errors; this
537     *     unpredictable behavior is consistent with the RI.
538     */
539    private boolean isValidHost(boolean forceServer, String host) throws URISyntaxException {
540        if (host.startsWith("[")) {
541            // IPv6 address
542            if (!host.endsWith("]")) {
543                throw new URISyntaxException(host,
544                        "Expected a closing square bracket for IPv6 address", 0);
545            }
546            byte[] bytes = InetAddress.ipStringToByteArray(host);
547            /*
548             * The native IP parser may return 4 bytes for addresses like
549             * "[::FFFF:127.0.0.1]". This is allowed, but we must not accept
550             * IPv4-formatted addresses in square braces like "[127.0.0.1]".
551             */
552            if (bytes != null && (bytes.length == 16 || bytes.length == 4 && host.contains(":"))) {
553                return true;
554            }
555            throw new URISyntaxException(host, "Malformed IPv6 address");
556        }
557
558        // '[' and ']' can only be the first char and last char
559        // of the host name
560        if (host.indexOf('[') != -1 || host.indexOf(']') != -1) {
561            throw new URISyntaxException(host, "Illegal character in host name", 0);
562        }
563
564        int index = host.lastIndexOf('.');
565        if (index < 0 || index == host.length() - 1
566                || !Character.isDigit(host.charAt(index + 1))) {
567            // domain name
568            if (isValidDomainName(host)) {
569                return true;
570            }
571            if (forceServer) {
572                throw new URISyntaxException(host, "Illegal character in host name", 0);
573            }
574            return false;
575        }
576
577        // IPv4 address
578        byte[] bytes = InetAddress.ipStringToByteArray(host);
579        if (bytes != null && bytes.length == 4) {
580            return true;
581        }
582
583        if (forceServer) {
584            throw new URISyntaxException(host, "Malformed IPv4 address", 0);
585        }
586        return false;
587    }
588
589    private boolean isValidDomainName(String host) {
590        try {
591            URIEncoderDecoder.validateSimple(host, "-.");
592        } catch (URISyntaxException e) {
593            return false;
594        }
595
596        String lastLabel = null;
597        StringTokenizer st = new StringTokenizer(host, ".");
598        while (st.hasMoreTokens()) {
599            lastLabel = st.nextToken();
600            if (lastLabel.startsWith("-") || lastLabel.endsWith("-")) {
601                return false;
602            }
603        }
604
605        if (lastLabel == null) {
606            return false;
607        }
608
609        if (!lastLabel.equals(host)) {
610            char ch = lastLabel.charAt(0);
611            if (ch >= '0' && ch <= '9') {
612                return false;
613            }
614        }
615        return true;
616    }
617
618    /**
619     * Quote illegal chars for each component, but not the others
620     *
621     * @param component java.lang.String the component to be converted
622     * @param legalSet the legal character set allowed in the component
623     * @return java.lang.String the converted string
624     */
625    private String quoteComponent(String component, String legalSet) {
626        try {
627            /*
628             * Use a different encoder than URLEncoder since: 1. chars like "/",
629             * "#", "@" etc needs to be preserved instead of being encoded, 2.
630             * UTF-8 char set needs to be used for encoding instead of default
631             * platform one
632             */
633            return URIEncoderDecoder.quoteIllegal(component, legalSet);
634        } catch (UnsupportedEncodingException e) {
635            throw new RuntimeException(e.toString());
636        }
637    }
638
639    /**
640     * Compares this URI with the given argument {@code uri}. This method will
641     * return a negative value if this URI instance is less than the given
642     * argument and a positive value if this URI instance is greater than the
643     * given argument. The return value {@code 0} indicates that the two
644     * instances represent the same URI. To define the order the single parts of
645     * the URI are compared with each other. String components will be ordered
646     * in the natural case-sensitive way. A hierarchical URI is less than an
647     * opaque URI and if one part is {@code null} the URI with the undefined
648     * part is less than the other one.
649     *
650     * @param uri
651     *            the URI this instance has to compare with.
652     * @return the value representing the order of the two instances.
653     */
654    public int compareTo(URI uri) {
655        int ret;
656
657        // compare schemes
658        if (scheme == null && uri.scheme != null) {
659            return -1;
660        } else if (scheme != null && uri.scheme == null) {
661            return 1;
662        } else if (scheme != null && uri.scheme != null) {
663            ret = scheme.compareToIgnoreCase(uri.scheme);
664            if (ret != 0) {
665                return ret;
666            }
667        }
668
669        // compare opacities
670        if (!opaque && uri.opaque) {
671            return -1;
672        } else if (opaque && !uri.opaque) {
673            return 1;
674        } else if (opaque && uri.opaque) {
675            ret = schemeSpecificPart.compareTo(uri.schemeSpecificPart);
676            if (ret != 0) {
677                return ret;
678            }
679        } else {
680
681            // otherwise both must be hierarchical
682
683            // compare authorities
684            if (authority != null && uri.authority == null) {
685                return 1;
686            } else if (authority == null && uri.authority != null) {
687                return -1;
688            } else if (authority != null && uri.authority != null) {
689                if (host != null && uri.host != null) {
690                    // both are server based, so compare userInfo, host, port
691                    if (userInfo != null && uri.userInfo == null) {
692                        return 1;
693                    } else if (userInfo == null && uri.userInfo != null) {
694                        return -1;
695                    } else if (userInfo != null && uri.userInfo != null) {
696                        ret = userInfo.compareTo(uri.userInfo);
697                        if (ret != 0) {
698                            return ret;
699                        }
700                    }
701
702                    // userInfo's are the same, compare hostname
703                    ret = host.compareToIgnoreCase(uri.host);
704                    if (ret != 0) {
705                        return ret;
706                    }
707
708                    // compare port
709                    if (port != uri.port) {
710                        return port - uri.port;
711                    }
712                } else { // one or both are registry based, compare the whole
713                    // authority
714                    ret = authority.compareTo(uri.authority);
715                    if (ret != 0) {
716                        return ret;
717                    }
718                }
719            }
720
721            // authorities are the same
722            // compare paths
723            ret = path.compareTo(uri.path);
724            if (ret != 0) {
725                return ret;
726            }
727
728            // compare queries
729
730            if (query != null && uri.query == null) {
731                return 1;
732            } else if (query == null && uri.query != null) {
733                return -1;
734            } else if (query != null && uri.query != null) {
735                ret = query.compareTo(uri.query);
736                if (ret != 0) {
737                    return ret;
738                }
739            }
740        }
741
742        // everything else is identical, so compare fragments
743        if (fragment != null && uri.fragment == null) {
744            return 1;
745        } else if (fragment == null && uri.fragment != null) {
746            return -1;
747        } else if (fragment != null && uri.fragment != null) {
748            ret = fragment.compareTo(uri.fragment);
749            if (ret != 0) {
750                return ret;
751            }
752        }
753
754        // identical
755        return 0;
756    }
757
758    /**
759     * Returns the URI formed by parsing {@code uri}. This method behaves
760     * identically to the string constructor but throws a different exception
761     * on failure. The constructor fails with a checked {@link
762     * URISyntaxException}; this method fails with an unchecked {@link
763     * IllegalArgumentException}.
764     */
765    public static URI create(String uri) {
766        try {
767            return new URI(uri);
768        } catch (URISyntaxException e) {
769            throw new IllegalArgumentException(e.getMessage());
770        }
771    }
772
773    private URI duplicate() {
774        URI clone = new URI();
775        clone.absolute = absolute;
776        clone.authority = authority;
777        clone.fragment = fragment;
778        clone.host = host;
779        clone.opaque = opaque;
780        clone.path = path;
781        clone.port = port;
782        clone.query = query;
783        clone.scheme = scheme;
784        clone.schemeSpecificPart = schemeSpecificPart;
785        clone.userInfo = userInfo;
786        clone.serverAuthority = serverAuthority;
787        return clone;
788    }
789
790    /*
791     * Takes a string that may contain hex sequences like %F1 or %2b and
792     * converts the hex values following the '%' to lowercase
793     */
794    private String convertHexToLowerCase(String s) {
795        StringBuilder result = new StringBuilder("");
796        if (s.indexOf('%') == -1) {
797            return s;
798        }
799
800        int index, prevIndex = 0;
801        while ((index = s.indexOf('%', prevIndex)) != -1) {
802            result.append(s.substring(prevIndex, index + 1));
803            result.append(s.substring(index + 1, index + 3).toLowerCase());
804            index += 3;
805            prevIndex = index;
806        }
807        return result.toString();
808    }
809
810    /**
811     * Returns true if {@code first} and {@code second} are equal after
812     * unescaping hex sequences like %F1 and %2b.
813     */
814    private boolean escapedEquals(String first, String second) {
815        if (first.indexOf('%') != second.indexOf('%')) {
816            return first.equals(second);
817        }
818
819        int index, prevIndex = 0;
820        while ((index = first.indexOf('%', prevIndex)) != -1
821                && second.indexOf('%', prevIndex) == index) {
822            boolean match = first.substring(prevIndex, index).equals(
823                    second.substring(prevIndex, index));
824            if (!match) {
825                return false;
826            }
827
828            match = first.substring(index + 1, index + 3).equalsIgnoreCase(
829                    second.substring(index + 1, index + 3));
830            if (!match) {
831                return false;
832            }
833
834            index += 3;
835            prevIndex = index;
836        }
837        return first.substring(prevIndex).equals(second.substring(prevIndex));
838    }
839
840    /**
841     * Compares this URI instance with the given argument {@code o} and
842     * determines if both are equal. Two URI instances are equal if all single
843     * parts are identical in their meaning.
844     *
845     * @param o
846     *            the URI this instance has to be compared with.
847     * @return {@code true} if both URI instances point to the same resource,
848     *         {@code false} otherwise.
849     */
850    @Override
851    public boolean equals(Object o) {
852        if (!(o instanceof URI)) {
853            return false;
854        }
855        URI uri = (URI) o;
856
857        if (uri.fragment == null && fragment != null || uri.fragment != null
858                && fragment == null) {
859            return false;
860        } else if (uri.fragment != null && fragment != null) {
861            if (!escapedEquals(uri.fragment, fragment)) {
862                return false;
863            }
864        }
865
866        if (uri.scheme == null && scheme != null || uri.scheme != null
867                && scheme == null) {
868            return false;
869        } else if (uri.scheme != null && scheme != null) {
870            if (!uri.scheme.equalsIgnoreCase(scheme)) {
871                return false;
872            }
873        }
874
875        if (uri.opaque && opaque) {
876            return escapedEquals(uri.schemeSpecificPart,
877                    schemeSpecificPart);
878        } else if (!uri.opaque && !opaque) {
879            if (!escapedEquals(path, uri.path)) {
880                return false;
881            }
882
883            if (uri.query != null && query == null || uri.query == null
884                    && query != null) {
885                return false;
886            } else if (uri.query != null && query != null) {
887                if (!escapedEquals(uri.query, query)) {
888                    return false;
889                }
890            }
891
892            if (uri.authority != null && authority == null
893                    || uri.authority == null && authority != null) {
894                return false;
895            } else if (uri.authority != null && authority != null) {
896                if (uri.host != null && host == null || uri.host == null
897                        && host != null) {
898                    return false;
899                } else if (uri.host == null && host == null) {
900                    // both are registry based, so compare the whole authority
901                    return escapedEquals(uri.authority, authority);
902                } else { // uri.host != null && host != null, so server-based
903                    if (!host.equalsIgnoreCase(uri.host)) {
904                        return false;
905                    }
906
907                    if (port != uri.port) {
908                        return false;
909                    }
910
911                    if (uri.userInfo != null && userInfo == null
912                            || uri.userInfo == null && userInfo != null) {
913                        return false;
914                    } else if (uri.userInfo != null && userInfo != null) {
915                        return escapedEquals(userInfo, uri.userInfo);
916                    } else {
917                        return true;
918                    }
919                }
920            } else {
921                // no authority
922                return true;
923            }
924
925        } else {
926            // one is opaque, the other hierarchical
927            return false;
928        }
929    }
930
931    /**
932     * Gets the decoded authority part of this URI.
933     *
934     * @return the decoded authority part or {@code null} if undefined.
935     */
936    public String getAuthority() {
937        return decode(authority);
938    }
939
940    /**
941     * Gets the decoded fragment part of this URI.
942     *
943     * @return the decoded fragment part or {@code null} if undefined.
944     */
945    public String getFragment() {
946        return decode(fragment);
947    }
948
949    /**
950     * Gets the host part of this URI.
951     *
952     * @return the host part or {@code null} if undefined.
953     */
954    public String getHost() {
955        return host;
956    }
957
958    /**
959     * Gets the decoded path part of this URI.
960     *
961     * @return the decoded path part or {@code null} if undefined.
962     */
963    public String getPath() {
964        return decode(path);
965    }
966
967    /**
968     * Gets the port number of this URI.
969     *
970     * @return the port number or {@code -1} if undefined.
971     */
972    public int getPort() {
973        return port;
974    }
975
976    /** @hide */
977    public int getEffectivePort() {
978        return getEffectivePort(scheme, port);
979    }
980
981    /**
982     * Returns the port to use for {@code scheme} connections will use when
983     * {@link #getPort} returns {@code specifiedPort}.
984     *
985     * @hide
986     */
987    public static int getEffectivePort(String scheme, int specifiedPort) {
988        if (specifiedPort != -1) {
989            return specifiedPort;
990        }
991
992        if ("http".equalsIgnoreCase(scheme)) {
993            return 80;
994        } else if ("https".equalsIgnoreCase(scheme)) {
995            return 443;
996        } else {
997            return -1;
998        }
999    }
1000
1001    /**
1002     * Gets the decoded query part of this URI.
1003     *
1004     * @return the decoded query part or {@code null} if undefined.
1005     */
1006    public String getQuery() {
1007        return decode(query);
1008    }
1009
1010    /**
1011     * Gets the authority part of this URI in raw form.
1012     *
1013     * @return the encoded authority part or {@code null} if undefined.
1014     */
1015    public String getRawAuthority() {
1016        return authority;
1017    }
1018
1019    /**
1020     * Gets the fragment part of this URI in raw form.
1021     *
1022     * @return the encoded fragment part or {@code null} if undefined.
1023     */
1024    public String getRawFragment() {
1025        return fragment;
1026    }
1027
1028    /**
1029     * Gets the path part of this URI in raw form.
1030     *
1031     * @return the encoded path part or {@code null} if undefined.
1032     */
1033    public String getRawPath() {
1034        return path;
1035    }
1036
1037    /**
1038     * Gets the query part of this URI in raw form.
1039     *
1040     * @return the encoded query part or {@code null} if undefined.
1041     */
1042    public String getRawQuery() {
1043        return query;
1044    }
1045
1046    /**
1047     * Gets the scheme-specific part of this URI in raw form.
1048     *
1049     * @return the encoded scheme-specific part or {@code null} if undefined.
1050     */
1051    public String getRawSchemeSpecificPart() {
1052        return schemeSpecificPart;
1053    }
1054
1055    /**
1056     * Gets the user-info part of this URI in raw form.
1057     *
1058     * @return the encoded user-info part or {@code null} if undefined.
1059     */
1060    public String getRawUserInfo() {
1061        return userInfo;
1062    }
1063
1064    /**
1065     * Gets the scheme part of this URI.
1066     *
1067     * @return the scheme part or {@code null} if undefined.
1068     */
1069    public String getScheme() {
1070        return scheme;
1071    }
1072
1073    /**
1074     * Gets the decoded scheme-specific part of this URI.
1075     *
1076     * @return the decoded scheme-specific part or {@code null} if undefined.
1077     */
1078    public String getSchemeSpecificPart() {
1079        return decode(schemeSpecificPart);
1080    }
1081
1082    /**
1083     * Gets the decoded user-info part of this URI.
1084     *
1085     * @return the decoded user-info part or {@code null} if undefined.
1086     */
1087    public String getUserInfo() {
1088        return decode(userInfo);
1089    }
1090
1091    /**
1092     * Gets the hashcode value of this URI instance.
1093     *
1094     * @return the appropriate hashcode value.
1095     */
1096    @Override
1097    public int hashCode() {
1098        if (hash == -1) {
1099            hash = getHashString().hashCode();
1100        }
1101        return hash;
1102    }
1103
1104    /**
1105     * Indicates whether this URI is absolute, which means that a scheme part is
1106     * defined in this URI.
1107     *
1108     * @return {@code true} if this URI is absolute, {@code false} otherwise.
1109     */
1110    public boolean isAbsolute() {
1111        return absolute;
1112    }
1113
1114    /**
1115     * Indicates whether this URI is opaque or not. An opaque URI is absolute
1116     * and has a scheme-specific part which does not start with a slash
1117     * character. All parts except scheme, scheme-specific and fragment are
1118     * undefined.
1119     *
1120     * @return {@code true} if the URI is opaque, {@code false} otherwise.
1121     */
1122    public boolean isOpaque() {
1123        return opaque;
1124    }
1125
1126    /*
1127     * normalize path, and return the resulting string
1128     */
1129    private String normalize(String path) {
1130        // count the number of '/'s, to determine number of segments
1131        int index = -1;
1132        int pathLength = path.length();
1133        int size = 0;
1134        if (pathLength > 0 && path.charAt(0) != '/') {
1135            size++;
1136        }
1137        while ((index = path.indexOf('/', index + 1)) != -1) {
1138            if (index + 1 < pathLength && path.charAt(index + 1) != '/') {
1139                size++;
1140            }
1141        }
1142
1143        String[] segList = new String[size];
1144        boolean[] include = new boolean[size];
1145
1146        // break the path into segments and store in the list
1147        int current = 0;
1148        int index2;
1149        index = (pathLength > 0 && path.charAt(0) == '/') ? 1 : 0;
1150        while ((index2 = path.indexOf('/', index + 1)) != -1) {
1151            segList[current++] = path.substring(index, index2);
1152            index = index2 + 1;
1153        }
1154
1155        // if current==size, then the last character was a slash
1156        // and there are no more segments
1157        if (current < size) {
1158            segList[current] = path.substring(index);
1159        }
1160
1161        // determine which segments get included in the normalized path
1162        for (int i = 0; i < size; i++) {
1163            include[i] = true;
1164            if (segList[i].equals("..")) {
1165                int remove = i - 1;
1166                // search back to find a segment to remove, if possible
1167                while (remove > -1 && !include[remove]) {
1168                    remove--;
1169                }
1170                // if we find a segment to remove, remove it and the ".."
1171                // segment
1172                if (remove > -1 && !segList[remove].equals("..")) {
1173                    include[remove] = false;
1174                    include[i] = false;
1175                }
1176            } else if (segList[i].equals(".")) {
1177                include[i] = false;
1178            }
1179        }
1180
1181        // put the path back together
1182        StringBuilder newPath = new StringBuilder();
1183        if (path.startsWith("/")) {
1184            newPath.append('/');
1185        }
1186
1187        for (int i = 0; i < segList.length; i++) {
1188            if (include[i]) {
1189                newPath.append(segList[i]);
1190                newPath.append('/');
1191            }
1192        }
1193
1194        // if we used at least one segment and the path previously ended with
1195        // a slash and the last segment is still used, then delete the extra
1196        // trailing '/'
1197        if (!path.endsWith("/") && segList.length > 0
1198                && include[segList.length - 1]) {
1199            newPath.deleteCharAt(newPath.length() - 1);
1200        }
1201
1202        String result = newPath.toString();
1203
1204        // check for a ':' in the first segment if one exists,
1205        // prepend "./" to normalize
1206        index = result.indexOf(':');
1207        index2 = result.indexOf('/');
1208        if (index != -1 && (index < index2 || index2 == -1)) {
1209            newPath.insert(0, "./");
1210            result = newPath.toString();
1211        }
1212        return result;
1213    }
1214
1215    /**
1216     * Normalizes the path part of this URI.
1217     *
1218     * @return an URI object which represents this instance with a normalized
1219     *         path.
1220     */
1221    public URI normalize() {
1222        if (opaque) {
1223            return this;
1224        }
1225        String normalizedPath = normalize(path);
1226        // if the path is already normalized, return this
1227        if (path.equals(normalizedPath)) {
1228            return this;
1229        }
1230        // get an exact copy of the URI re-calculate the scheme specific part
1231        // since the path of the normalized URI is different from this URI.
1232        URI result = duplicate();
1233        result.path = normalizedPath;
1234        result.setSchemeSpecificPart();
1235        return result;
1236    }
1237
1238    /**
1239     * Tries to parse the authority component of this URI to divide it into the
1240     * host, port, and user-info. If this URI is already determined as a
1241     * ServerAuthority this instance will be returned without changes.
1242     *
1243     * @return this instance with the components of the parsed server authority.
1244     * @throws URISyntaxException
1245     *             if the authority part could not be parsed as a server-based
1246     *             authority.
1247     */
1248    public URI parseServerAuthority() throws URISyntaxException {
1249        if (!serverAuthority) {
1250            parseAuthority(true);
1251        }
1252        return this;
1253    }
1254
1255    /**
1256     * Makes the given URI {@code relative} to a relative URI against the URI
1257     * represented by this instance.
1258     *
1259     * @param relative
1260     *            the URI which has to be relativized against this URI.
1261     * @return the relative URI.
1262     */
1263    public URI relativize(URI relative) {
1264        if (relative.opaque || opaque) {
1265            return relative;
1266        }
1267
1268        if (scheme == null ? relative.scheme != null : !scheme
1269                .equals(relative.scheme)) {
1270            return relative;
1271        }
1272
1273        if (authority == null ? relative.authority != null : !authority
1274                .equals(relative.authority)) {
1275            return relative;
1276        }
1277
1278        // normalize both paths
1279        String thisPath = normalize(path);
1280        String relativePath = normalize(relative.path);
1281
1282        /*
1283         * if the paths aren't equal, then we need to determine if this URI's
1284         * path is a parent path (begins with) the relative URI's path
1285         */
1286        if (!thisPath.equals(relativePath)) {
1287            // if this URI's path doesn't end in a '/', add one
1288            if (!thisPath.endsWith("/")) {
1289                thisPath = thisPath + '/';
1290            }
1291            /*
1292             * if the relative URI's path doesn't start with this URI's path,
1293             * then just return the relative URI; the URIs have nothing in
1294             * common
1295             */
1296            if (!relativePath.startsWith(thisPath)) {
1297                return relative;
1298            }
1299        }
1300
1301        URI result = new URI();
1302        result.fragment = relative.fragment;
1303        result.query = relative.query;
1304        // the result URI is the remainder of the relative URI's path
1305        result.path = relativePath.substring(thisPath.length());
1306        result.setSchemeSpecificPart();
1307        return result;
1308    }
1309
1310    /**
1311     * Resolves the given URI {@code relative} against the URI represented by
1312     * this instance.
1313     *
1314     * @param relative
1315     *            the URI which has to be resolved against this URI.
1316     * @return the resolved URI.
1317     */
1318    public URI resolve(URI relative) {
1319        if (relative.absolute || opaque) {
1320            return relative;
1321        }
1322
1323        URI result;
1324        if (relative.path.isEmpty() && relative.scheme == null
1325                && relative.authority == null && relative.query == null
1326                && relative.fragment != null) {
1327            // if the relative URI only consists of fragment,
1328            // the resolved URI is very similar to this URI,
1329            // except that it has the fragment from the relative URI.
1330            result = duplicate();
1331            result.fragment = relative.fragment;
1332            // no need to re-calculate the scheme specific part,
1333            // since fragment is not part of scheme specific part.
1334            return result;
1335        }
1336
1337        if (relative.authority != null) {
1338            // if the relative URI has authority,
1339            // the resolved URI is almost the same as the relative URI,
1340            // except that it has the scheme of this URI.
1341            result = relative.duplicate();
1342            result.scheme = scheme;
1343            result.absolute = absolute;
1344        } else {
1345            // since relative URI has no authority,
1346            // the resolved URI is very similar to this URI,
1347            // except that it has the query and fragment of the relative URI,
1348            // and the path is different.
1349            result = duplicate();
1350            result.fragment = relative.fragment;
1351            result.query = relative.query;
1352            if (relative.path.startsWith("/")) {
1353                result.path = relative.path;
1354            } else {
1355                // resolve a relative reference
1356                int endIndex = path.lastIndexOf('/') + 1;
1357                result.path = normalize(path.substring(0, endIndex)
1358                        + relative.path);
1359            }
1360            // re-calculate the scheme specific part since
1361            // query and path of the resolved URI is different from this URI.
1362            result.setSchemeSpecificPart();
1363        }
1364        return result;
1365    }
1366
1367    /**
1368     * Helper method used to re-calculate the scheme specific part of the
1369     * resolved or normalized URIs
1370     */
1371    private void setSchemeSpecificPart() {
1372        // ssp = [//authority][path][?query]
1373        StringBuilder ssp = new StringBuilder();
1374        if (authority != null) {
1375            ssp.append("//" + authority);
1376        }
1377        if (path != null) {
1378            ssp.append(path);
1379        }
1380        if (query != null) {
1381            ssp.append("?" + query);
1382        }
1383        schemeSpecificPart = ssp.toString();
1384        // reset string, so that it can be re-calculated correctly when asked.
1385        string = null;
1386    }
1387
1388    /**
1389     * Creates a new URI instance by parsing the given string {@code relative}
1390     * and resolves the created URI against the URI represented by this
1391     * instance.
1392     *
1393     * @param relative
1394     *            the given string to create the new URI instance which has to
1395     *            be resolved later on.
1396     * @return the created and resolved URI.
1397     */
1398    public URI resolve(String relative) {
1399        return resolve(create(relative));
1400    }
1401
1402    /**
1403     * Encode unicode chars that are not part of US-ASCII char set into the
1404     * escaped form
1405     *
1406     * i.e. The Euro currency symbol is encoded as "%E2%82%AC".
1407     */
1408    private String encodeNonAscii(String s) {
1409        try {
1410            /*
1411             * Use a different encoder than URLEncoder since: 1. chars like "/",
1412             * "#", "@" etc needs to be preserved instead of being encoded, 2.
1413             * UTF-8 char set needs to be used for encoding instead of default
1414             * platform one 3. Only other chars need to be converted
1415             */
1416            return URIEncoderDecoder.encodeOthers(s);
1417        } catch (UnsupportedEncodingException e) {
1418            throw new RuntimeException(e.toString());
1419        }
1420    }
1421
1422    private String decode(String s) {
1423        if (s == null) {
1424            return s;
1425        }
1426
1427        try {
1428            return URIEncoderDecoder.decode(s);
1429        } catch (UnsupportedEncodingException e) {
1430            throw new RuntimeException(e.toString());
1431        }
1432    }
1433
1434    /**
1435     * Returns the textual string representation of this URI instance using the
1436     * US-ASCII encoding.
1437     *
1438     * @return the US-ASCII string representation of this URI.
1439     */
1440    public String toASCIIString() {
1441        return encodeNonAscii(toString());
1442    }
1443
1444    /**
1445     * Returns the textual string representation of this URI instance.
1446     *
1447     * @return the textual string representation of this URI.
1448     */
1449    @Override
1450    public String toString() {
1451        if (string == null) {
1452            StringBuilder result = new StringBuilder();
1453            if (scheme != null) {
1454                result.append(scheme);
1455                result.append(':');
1456            }
1457            if (opaque) {
1458                result.append(schemeSpecificPart);
1459            } else {
1460                if (authority != null) {
1461                    result.append("//");
1462                    result.append(authority);
1463                }
1464
1465                if (path != null) {
1466                    result.append(path);
1467                }
1468
1469                if (query != null) {
1470                    result.append('?');
1471                    result.append(query);
1472                }
1473            }
1474
1475            if (fragment != null) {
1476                result.append('#');
1477                result.append(fragment);
1478            }
1479
1480            string = result.toString();
1481        }
1482        return string;
1483    }
1484
1485    /*
1486     * Form a string from the components of this URI, similarly to the
1487     * toString() method. But this method converts scheme and host to lowercase,
1488     * and converts escaped octets to lowercase.
1489     */
1490    private String getHashString() {
1491        StringBuilder result = new StringBuilder();
1492        if (scheme != null) {
1493            result.append(scheme.toLowerCase());
1494            result.append(':');
1495        }
1496        if (opaque) {
1497            result.append(schemeSpecificPart);
1498        } else {
1499            if (authority != null) {
1500                result.append("//");
1501                if (host == null) {
1502                    result.append(authority);
1503                } else {
1504                    if (userInfo != null) {
1505                        result.append(userInfo + "@");
1506                    }
1507                    result.append(host.toLowerCase());
1508                    if (port != -1) {
1509                        result.append(":" + port);
1510                    }
1511                }
1512            }
1513
1514            if (path != null) {
1515                result.append(path);
1516            }
1517
1518            if (query != null) {
1519                result.append('?');
1520                result.append(query);
1521            }
1522        }
1523
1524        if (fragment != null) {
1525            result.append('#');
1526            result.append(fragment);
1527        }
1528
1529        return convertHexToLowerCase(result.toString());
1530    }
1531
1532    /**
1533     * Converts this URI instance to a URL.
1534     *
1535     * @return the created URL representing the same resource as this URI.
1536     * @throws MalformedURLException
1537     *             if an error occurs while creating the URL or no protocol
1538     *             handler could be found.
1539     */
1540    public URL toURL() throws MalformedURLException {
1541        if (!absolute) {
1542            throw new IllegalArgumentException("URI is not absolute: " + toString());
1543        }
1544        return new URL(toString());
1545    }
1546
1547    private void readObject(ObjectInputStream in) throws IOException,
1548            ClassNotFoundException {
1549        in.defaultReadObject();
1550        try {
1551            parseURI(string, false);
1552        } catch (URISyntaxException e) {
1553            throw new IOException(e.toString());
1554        }
1555    }
1556
1557    private void writeObject(ObjectOutputStream out) throws IOException,
1558            ClassNotFoundException {
1559        // call toString() to ensure the value of string field is calculated
1560        toString();
1561        out.defaultWriteObject();
1562    }
1563}
1564