URI.java revision 118abc3050371812703e4fabf03f4399d01fb28c
1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.net;
19
20import java.io.IOException;
21import java.io.ObjectInputStream;
22import java.io.ObjectOutputStream;
23import java.io.Serializable;
24import java.io.UnsupportedEncodingException;
25import java.util.Locale;
26import org.apache.harmony.luni.platform.INetworkSystem;
27import org.apache.harmony.luni.platform.Platform;
28
29/**
30 * This class represents an instance of a URI as defined by RFC 2396.
31 */
32public final class URI implements Comparable<URI>, Serializable {
33
34    private static final long serialVersionUID = -6052424284110960213l;
35
36    static final String UNRESERVED = "_-!.~\'()*";
37    static final String PUNCTUATION = ",;:$&+=";
38    static final String RESERVED = PUNCTUATION + "?/[]@";
39    static final String SOME_LEGAL = UNRESERVED + PUNCTUATION;
40    static final String ALL_LEGAL = UNRESERVED + RESERVED;
41
42    private String string;
43    private transient String scheme;
44    private transient String schemeSpecificPart;
45    private transient String authority;
46    private transient String userInfo;
47    private transient String host;
48    private transient int port = -1;
49    private transient String path;
50    private transient String query;
51    private transient String fragment;
52    private transient boolean opaque;
53    private transient boolean absolute;
54    private transient boolean serverAuthority = false;
55
56    private transient int hash = -1;
57
58    private URI() {}
59
60    /**
61     * Creates a new URI instance according to the given string {@code uri}.
62     *
63     * @param uri
64     *            the textual URI representation to be parsed into a URI object.
65     * @throws URISyntaxException
66     *             if the given string {@code uri} doesn't fit to the
67     *             specification RFC2396 or could not be parsed correctly.
68     */
69    public URI(String uri) throws URISyntaxException {
70        parseURI(uri, false);
71    }
72
73    /**
74     * Creates a new URI instance using the given arguments. This constructor
75     * first creates a temporary URI string from the given components. This
76     * string will be parsed later on to create the URI instance.
77     * <p>
78     * {@code [scheme:]scheme-specific-part[#fragment]}
79     *
80     * @param scheme
81     *            the scheme part of the URI.
82     * @param ssp
83     *            the scheme-specific-part of the URI.
84     * @param frag
85     *            the fragment part of the URI.
86     * @throws URISyntaxException
87     *             if the temporary created string doesn't fit to the
88     *             specification RFC2396 or could not be parsed correctly.
89     */
90    public URI(String scheme, String ssp, String frag)
91            throws URISyntaxException {
92        StringBuilder uri = new StringBuilder();
93        if (scheme != null) {
94            uri.append(scheme);
95            uri.append(':');
96        }
97        if (ssp != null) {
98            // QUOTE ILLEGAL CHARACTERS
99            uri.append(quoteComponent(ssp, ALL_LEGAL));
100        }
101        if (frag != null) {
102            uri.append('#');
103            // QUOTE ILLEGAL CHARACTERS
104            uri.append(quoteComponent(frag, ALL_LEGAL));
105        }
106
107        parseURI(uri.toString(), false);
108    }
109
110    /**
111     * Creates a new URI instance using the given arguments. This constructor
112     * first creates a temporary URI string from the given components. This
113     * string will be parsed later on to create the URI instance.
114     * <p>
115     * {@code [scheme:][user-info@]host[:port][path][?query][#fragment]}
116     *
117     * @param scheme
118     *            the scheme part of the URI.
119     * @param userInfo
120     *            the user information of the URI for authentication and
121     *            authorization.
122     * @param host
123     *            the host name of the URI.
124     * @param port
125     *            the port number of the URI.
126     * @param path
127     *            the path to the resource on the host.
128     * @param query
129     *            the query part of the URI to specify parameters for the
130     *            resource.
131     * @param fragment
132     *            the fragment part of the URI.
133     * @throws URISyntaxException
134     *             if the temporary created string doesn't fit to the
135     *             specification RFC2396 or could not be parsed correctly.
136     */
137    public URI(String scheme, String userInfo, String host, int port,
138            String path, String query, String fragment)
139            throws URISyntaxException {
140
141        if (scheme == null && userInfo == null && host == null && path == null
142                && query == null && fragment == null) {
143            this.path = "";
144            return;
145        }
146
147        if (scheme != null && path != null && path.length() > 0
148                && path.charAt(0) != '/') {
149            throw new URISyntaxException(path, "Relative path");
150        }
151
152        StringBuilder uri = new StringBuilder();
153        if (scheme != null) {
154            uri.append(scheme);
155            uri.append(':');
156        }
157
158        if (userInfo != null || host != null || port != -1) {
159            uri.append("//");
160        }
161
162        if (userInfo != null) {
163            // QUOTE ILLEGAL CHARACTERS in userInfo
164            uri.append(quoteComponent(userInfo, SOME_LEGAL));
165            uri.append('@');
166        }
167
168        if (host != null) {
169            // check for IPv6 addresses that hasn't been enclosed
170            // in square brackets
171            if (host.indexOf(':') != -1 && host.indexOf(']') == -1
172                    && host.indexOf('[') == -1) {
173                host = "[" + host + "]";
174            }
175            uri.append(host);
176        }
177
178        if (port != -1) {
179            uri.append(':');
180            uri.append(port);
181        }
182
183        if (path != null) {
184            // QUOTE ILLEGAL CHARS
185            uri.append(quoteComponent(path, "/@" + SOME_LEGAL));
186        }
187
188        if (query != null) {
189            uri.append('?');
190            // QUOTE ILLEGAL CHARS
191            uri.append(quoteComponent(query, ALL_LEGAL));
192        }
193
194        if (fragment != null) {
195            // QUOTE ILLEGAL CHARS
196            uri.append('#');
197            uri.append(quoteComponent(fragment, ALL_LEGAL));
198        }
199
200        parseURI(uri.toString(), true);
201    }
202
203    /**
204     * Creates a new URI instance using the given arguments. This constructor
205     * first creates a temporary URI string from the given components. This
206     * string will be parsed later on to create the URI instance.
207     * <p>
208     * {@code [scheme:]host[path][#fragment]}
209     *
210     * @param scheme
211     *            the scheme part of the URI.
212     * @param host
213     *            the host name of the URI.
214     * @param path
215     *            the path to the resource on the host.
216     * @param fragment
217     *            the fragment part of the URI.
218     * @throws URISyntaxException
219     *             if the temporary created string doesn't fit to the
220     *             specification RFC2396 or could not be parsed correctly.
221     */
222    public URI(String scheme, String host, String path, String fragment)
223            throws URISyntaxException {
224        this(scheme, null, host, -1, path, null, fragment);
225    }
226
227    /**
228     * Creates a new URI instance using the given arguments. This constructor
229     * first creates a temporary URI string from the given components. This
230     * string will be parsed later on to create the URI instance.
231     * <p>
232     * {@code [scheme:][//authority][path][?query][#fragment]}
233     *
234     * @param scheme
235     *            the scheme part of the URI.
236     * @param authority
237     *            the authority part of the URI.
238     * @param path
239     *            the path to the resource on the host.
240     * @param query
241     *            the query part of the URI to specify parameters for the
242     *            resource.
243     * @param fragment
244     *            the fragment part of the URI.
245     * @throws URISyntaxException
246     *             if the temporary created string doesn't fit to the
247     *             specification RFC2396 or could not be parsed correctly.
248     */
249    public URI(String scheme, String authority, String path, String query,
250            String fragment) throws URISyntaxException {
251        if (scheme != null && path != null && path.length() > 0
252                && path.charAt(0) != '/') {
253            throw new URISyntaxException(path, "Relative path");
254        }
255
256        StringBuilder uri = new StringBuilder();
257        if (scheme != null) {
258            uri.append(scheme);
259            uri.append(':');
260        }
261        if (authority != null) {
262            uri.append("//");
263            // QUOTE ILLEGAL CHARS
264            uri.append(quoteComponent(authority, "@[]" + SOME_LEGAL));
265        }
266
267        if (path != null) {
268            // QUOTE ILLEGAL CHARS
269            uri.append(quoteComponent(path, "/@" + SOME_LEGAL));
270        }
271        if (query != null) {
272            // QUOTE ILLEGAL CHARS
273            uri.append('?');
274            uri.append(quoteComponent(query, ALL_LEGAL));
275        }
276        if (fragment != null) {
277            // QUOTE ILLEGAL CHARS
278            uri.append('#');
279            uri.append(quoteComponent(fragment, ALL_LEGAL));
280        }
281
282        parseURI(uri.toString(), false);
283    }
284
285    private void parseURI(String uri, boolean forceServer) throws URISyntaxException {
286        String temp = uri;
287        // assign uri string to the input value per spec
288        string = uri;
289        int index, index1, index2, index3;
290        // parse into Fragment, Scheme, and SchemeSpecificPart
291        // then parse SchemeSpecificPart if necessary
292
293        // Fragment
294        index = temp.indexOf('#');
295        if (index != -1) {
296            // remove the fragment from the end
297            fragment = temp.substring(index + 1);
298            validateFragment(uri, fragment, index + 1);
299            temp = temp.substring(0, index);
300        }
301
302        // Scheme and SchemeSpecificPart
303        index = index1 = temp.indexOf(':');
304        index2 = temp.indexOf('/');
305        index3 = temp.indexOf('?');
306
307        // if a '/' or '?' occurs before the first ':' the uri has no
308        // specified scheme, and is therefore not absolute
309        if (index != -1 && (index2 >= index || index2 == -1)
310                && (index3 >= index || index3 == -1)) {
311            // the characters up to the first ':' comprise the scheme
312            absolute = true;
313            scheme = temp.substring(0, index);
314            if (scheme.length() == 0) {
315                throw new URISyntaxException(uri, "Scheme expected", index);
316            }
317            validateScheme(uri, scheme, 0);
318            schemeSpecificPart = temp.substring(index + 1);
319            if (schemeSpecificPart.length() == 0) {
320                throw new URISyntaxException(uri, "Scheme-specific part expected", index + 1);
321            }
322        } else {
323            absolute = false;
324            schemeSpecificPart = temp;
325        }
326
327        if (scheme == null || schemeSpecificPart.length() > 0
328                && schemeSpecificPart.charAt(0) == '/') {
329            opaque = false;
330            // the URI is hierarchical
331
332            // Query
333            temp = schemeSpecificPart;
334            index = temp.indexOf('?');
335            if (index != -1) {
336                query = temp.substring(index + 1);
337                temp = temp.substring(0, index);
338                validateQuery(uri, query, index2 + 1 + index);
339            }
340
341            // Authority and Path
342            if (temp.startsWith("//")) {
343                index = temp.indexOf('/', 2);
344                if (index != -1) {
345                    authority = temp.substring(2, index);
346                    path = temp.substring(index);
347                } else {
348                    authority = temp.substring(2);
349                    if (authority.length() == 0 && query == null
350                            && fragment == null) {
351                        throw new URISyntaxException(uri, "Authority expected", uri.length());
352                    }
353
354                    path = "";
355                    // nothing left, so path is empty (not null, path should
356                    // never be null)
357                }
358
359                if (authority.length() == 0) {
360                    authority = null;
361                } else {
362                    validateAuthority(uri, authority, index1 + 3);
363                }
364            } else { // no authority specified
365                path = temp;
366            }
367
368            int pathIndex = 0;
369            if (index2 > -1) {
370                pathIndex += index2;
371            }
372            if (index > -1) {
373                pathIndex += index;
374            }
375            validatePath(uri, path, pathIndex);
376        } else { // if not hierarchical, URI is opaque
377            opaque = true;
378            validateSsp(uri, schemeSpecificPart, index2 + 2 + index);
379        }
380
381        parseAuthority(forceServer);
382    }
383
384    private void validateScheme(String uri, String scheme, int index)
385            throws URISyntaxException {
386        // first char needs to be an alpha char
387        char ch = scheme.charAt(0);
388        if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))) {
389            throw new URISyntaxException(uri, "Illegal character in scheme", 0);
390        }
391
392        try {
393            URIEncoderDecoder.validateSimple(scheme, "+-.");
394        } catch (URISyntaxException e) {
395            throw new URISyntaxException(uri, "Illegal character in scheme", index + e.getIndex());
396        }
397    }
398
399    private void validateSsp(String uri, String ssp, int index)
400            throws URISyntaxException {
401        try {
402            URIEncoderDecoder.validate(ssp, ALL_LEGAL);
403        } catch (URISyntaxException e) {
404            throw new URISyntaxException(uri,
405                    e.getReason() + " in schemeSpecificPart", index + e.getIndex());
406        }
407    }
408
409    private void validateAuthority(String uri, String authority, int index)
410            throws URISyntaxException {
411        try {
412            URIEncoderDecoder.validate(authority, "@[]" + SOME_LEGAL);
413        } catch (URISyntaxException e) {
414            throw new URISyntaxException(uri, e.getReason() + " in authority", index + e.getIndex());
415        }
416    }
417
418    private void validatePath(String uri, String path, int index)
419            throws URISyntaxException {
420        try {
421            URIEncoderDecoder.validate(path, "/@" + SOME_LEGAL);
422        } catch (URISyntaxException e) {
423            throw new URISyntaxException(uri, e.getReason() + " in path", index + e.getIndex());
424        }
425    }
426
427    private void validateQuery(String uri, String query, int index)
428            throws URISyntaxException {
429        try {
430            URIEncoderDecoder.validate(query, ALL_LEGAL);
431        } catch (URISyntaxException e) {
432            throw new URISyntaxException(uri, e.getReason() + " in query", index + e.getIndex());
433
434        }
435    }
436
437    private void validateFragment(String uri, String fragment, int index)
438            throws URISyntaxException {
439        try {
440            URIEncoderDecoder.validate(fragment, ALL_LEGAL);
441        } catch (URISyntaxException e) {
442            throw new URISyntaxException(uri, e.getReason() + " in fragment", index + e.getIndex());
443        }
444    }
445
446    /**
447     * Parse the authority string into its component parts: user info,
448     * host, and port. This operation doesn't apply to registry URIs, and
449     * calling it on such <i>may</i> result in a syntax exception.
450     *
451     * @param forceServer true to always throw if the authority cannot be
452     *     parsed. If false, this method may still throw for some kinds of
453     *     errors; this unpredictable behavior is consistent with the RI.
454     */
455    private void parseAuthority(boolean forceServer) throws URISyntaxException {
456        if (authority == null) {
457            return;
458        }
459
460        String tempUserInfo = null;
461        String temp = authority;
462        int index = temp.indexOf('@');
463        int hostIndex = 0;
464        if (index != -1) {
465            // remove user info
466            tempUserInfo = temp.substring(0, index);
467            validateUserInfo(authority, tempUserInfo, 0);
468            temp = temp.substring(index + 1); // host[:port] is left
469            hostIndex = index + 1;
470        }
471
472        index = temp.lastIndexOf(':');
473        int endIndex = temp.indexOf(']');
474
475        String tempHost;
476        int tempPort = -1;
477        if (index != -1 && endIndex < index) {
478            // determine port and host
479            tempHost = temp.substring(0, index);
480
481            if (index < (temp.length() - 1)) { // port part is not empty
482                try {
483                    tempPort = Integer.parseInt(temp.substring(index + 1));
484                    if (tempPort < 0) {
485                        if (forceServer) {
486                            throw new URISyntaxException(authority,
487                                    "Invalid port number", hostIndex + index + 1);
488                        }
489                        return;
490                    }
491                } catch (NumberFormatException e) {
492                    if (forceServer) {
493                        throw new URISyntaxException(authority,
494                                "Invalid port number", hostIndex + index + 1);
495                    }
496                    return;
497                }
498            }
499        } else {
500            tempHost = temp;
501        }
502
503        if (tempHost.isEmpty()) {
504            if (forceServer) {
505                throw new URISyntaxException(authority, "Expected host", hostIndex);
506            }
507            return;
508        }
509
510        if (!isValidHost(forceServer, tempHost)) {
511            return;
512        }
513
514        // this is a server based uri,
515        // fill in the userInfo, host and port fields
516        userInfo = tempUserInfo;
517        host = tempHost;
518        port = tempPort;
519        serverAuthority = true;
520    }
521
522    private void validateUserInfo(String uri, String userInfo, int index)
523            throws URISyntaxException {
524        for (int i = 0; i < userInfo.length(); i++) {
525            char ch = userInfo.charAt(i);
526            if (ch == ']' || ch == '[') {
527                throw new URISyntaxException(uri, "Illegal character in userInfo", index + i);
528            }
529        }
530    }
531
532    /**
533     * Returns true if {@code host} is a well-formed host name or IP address.
534     *
535     * @param forceServer true to always throw if the host cannot be parsed. If
536     *     false, this method may still throw for some kinds of errors; this
537     *     unpredictable behavior is consistent with the RI.
538     */
539    private boolean isValidHost(boolean forceServer, String host) throws URISyntaxException {
540        if (host.startsWith("[")) {
541            // IPv6 address
542            if (!host.endsWith("]")) {
543                throw new URISyntaxException(host,
544                        "Expected a closing square bracket for IPv6 address", 0);
545            }
546            byte[] bytes = InetAddress.ipStringToByteArray(host);
547            /*
548             * The native IP parser may return 4 bytes for addresses like
549             * "[::FFFF:127.0.0.1]". This is allowed, but we must not accept
550             * IPv4-formatted addresses in square braces like "[127.0.0.1]".
551             */
552            if (bytes != null && (bytes.length == 16 || bytes.length == 4 && host.contains(":"))) {
553                return true;
554            }
555            throw new URISyntaxException(host, "Malformed IPv6 address");
556        }
557
558        // '[' and ']' can only be the first char and last char
559        // of the host name
560        if (host.indexOf('[') != -1 || host.indexOf(']') != -1) {
561            throw new URISyntaxException(host, "Illegal character in host name", 0);
562        }
563
564        int index = host.lastIndexOf('.');
565        if (index < 0 || index == host.length() - 1
566                || !Character.isDigit(host.charAt(index + 1))) {
567            // domain name
568            if (isValidDomainName(host)) {
569                return true;
570            }
571            if (forceServer) {
572                throw new URISyntaxException(host, "Illegal character in host name", 0);
573            }
574            return false;
575        }
576
577        // IPv4 address
578        byte[] bytes = InetAddress.ipStringToByteArray(host);
579        if (bytes != null && bytes.length == 4) {
580            return true;
581        }
582
583        if (forceServer) {
584            throw new URISyntaxException(host, "Malformed IPv4 address", 0);
585        }
586        return false;
587    }
588
589    private boolean isValidDomainName(String host) {
590        try {
591            URIEncoderDecoder.validateSimple(host, "-.");
592        } catch (URISyntaxException e) {
593            return false;
594        }
595
596        String lastLabel = null;
597        for (String token : host.split("\\.")) {
598            lastLabel = token;
599            if (lastLabel.startsWith("-") || lastLabel.endsWith("-")) {
600                return false;
601            }
602        }
603
604        if (lastLabel == null) {
605            return false;
606        }
607
608        if (!lastLabel.equals(host)) {
609            char ch = lastLabel.charAt(0);
610            if (ch >= '0' && ch <= '9') {
611                return false;
612            }
613        }
614        return true;
615    }
616
617    /**
618     * Quote illegal chars for each component, but not the others
619     *
620     * @param component java.lang.String the component to be converted
621     * @param legalSet the legal character set allowed in the component
622     * @return java.lang.String the converted string
623     */
624    private String quoteComponent(String component, String legalSet) {
625        try {
626            /*
627             * Use a different encoder than URLEncoder since: 1. chars like "/",
628             * "#", "@" etc needs to be preserved instead of being encoded, 2.
629             * UTF-8 char set needs to be used for encoding instead of default
630             * platform one
631             */
632            return URIEncoderDecoder.quoteIllegal(component, legalSet);
633        } catch (UnsupportedEncodingException e) {
634            throw new RuntimeException(e.toString());
635        }
636    }
637
638    /**
639     * Compares this URI with the given argument {@code uri}. This method will
640     * return a negative value if this URI instance is less than the given
641     * argument and a positive value if this URI instance is greater than the
642     * given argument. The return value {@code 0} indicates that the two
643     * instances represent the same URI. To define the order the single parts of
644     * the URI are compared with each other. String components will be ordered
645     * in the natural case-sensitive way. A hierarchical URI is less than an
646     * opaque URI and if one part is {@code null} the URI with the undefined
647     * part is less than the other one.
648     *
649     * @param uri
650     *            the URI this instance has to compare with.
651     * @return the value representing the order of the two instances.
652     */
653    public int compareTo(URI uri) {
654        int ret;
655
656        // compare schemes
657        if (scheme == null && uri.scheme != null) {
658            return -1;
659        } else if (scheme != null && uri.scheme == null) {
660            return 1;
661        } else if (scheme != null && uri.scheme != null) {
662            ret = scheme.compareToIgnoreCase(uri.scheme);
663            if (ret != 0) {
664                return ret;
665            }
666        }
667
668        // compare opacities
669        if (!opaque && uri.opaque) {
670            return -1;
671        } else if (opaque && !uri.opaque) {
672            return 1;
673        } else if (opaque && uri.opaque) {
674            ret = schemeSpecificPart.compareTo(uri.schemeSpecificPart);
675            if (ret != 0) {
676                return ret;
677            }
678        } else {
679
680            // otherwise both must be hierarchical
681
682            // compare authorities
683            if (authority != null && uri.authority == null) {
684                return 1;
685            } else if (authority == null && uri.authority != null) {
686                return -1;
687            } else if (authority != null && uri.authority != null) {
688                if (host != null && uri.host != null) {
689                    // both are server based, so compare userInfo, host, port
690                    if (userInfo != null && uri.userInfo == null) {
691                        return 1;
692                    } else if (userInfo == null && uri.userInfo != null) {
693                        return -1;
694                    } else if (userInfo != null && uri.userInfo != null) {
695                        ret = userInfo.compareTo(uri.userInfo);
696                        if (ret != 0) {
697                            return ret;
698                        }
699                    }
700
701                    // userInfo's are the same, compare hostname
702                    ret = host.compareToIgnoreCase(uri.host);
703                    if (ret != 0) {
704                        return ret;
705                    }
706
707                    // compare port
708                    if (port != uri.port) {
709                        return port - uri.port;
710                    }
711                } else { // one or both are registry based, compare the whole
712                    // authority
713                    ret = authority.compareTo(uri.authority);
714                    if (ret != 0) {
715                        return ret;
716                    }
717                }
718            }
719
720            // authorities are the same
721            // compare paths
722            ret = path.compareTo(uri.path);
723            if (ret != 0) {
724                return ret;
725            }
726
727            // compare queries
728
729            if (query != null && uri.query == null) {
730                return 1;
731            } else if (query == null && uri.query != null) {
732                return -1;
733            } else if (query != null && uri.query != null) {
734                ret = query.compareTo(uri.query);
735                if (ret != 0) {
736                    return ret;
737                }
738            }
739        }
740
741        // everything else is identical, so compare fragments
742        if (fragment != null && uri.fragment == null) {
743            return 1;
744        } else if (fragment == null && uri.fragment != null) {
745            return -1;
746        } else if (fragment != null && uri.fragment != null) {
747            ret = fragment.compareTo(uri.fragment);
748            if (ret != 0) {
749                return ret;
750            }
751        }
752
753        // identical
754        return 0;
755    }
756
757    /**
758     * Returns the URI formed by parsing {@code uri}. This method behaves
759     * identically to the string constructor but throws a different exception
760     * on failure. The constructor fails with a checked {@link
761     * URISyntaxException}; this method fails with an unchecked {@link
762     * IllegalArgumentException}.
763     */
764    public static URI create(String uri) {
765        try {
766            return new URI(uri);
767        } catch (URISyntaxException e) {
768            throw new IllegalArgumentException(e.getMessage());
769        }
770    }
771
772    private URI duplicate() {
773        URI clone = new URI();
774        clone.absolute = absolute;
775        clone.authority = authority;
776        clone.fragment = fragment;
777        clone.host = host;
778        clone.opaque = opaque;
779        clone.path = path;
780        clone.port = port;
781        clone.query = query;
782        clone.scheme = scheme;
783        clone.schemeSpecificPart = schemeSpecificPart;
784        clone.userInfo = userInfo;
785        clone.serverAuthority = serverAuthority;
786        return clone;
787    }
788
789    /*
790     * Takes a string that may contain hex sequences like %F1 or %2b and
791     * converts the hex values following the '%' to lowercase
792     */
793    private String convertHexToLowerCase(String s) {
794        StringBuilder result = new StringBuilder("");
795        if (s.indexOf('%') == -1) {
796            return s;
797        }
798
799        int index, prevIndex = 0;
800        while ((index = s.indexOf('%', prevIndex)) != -1) {
801            result.append(s.substring(prevIndex, index + 1));
802            result.append(s.substring(index + 1, index + 3).toLowerCase(Locale.US));
803            index += 3;
804            prevIndex = index;
805        }
806        return result.toString();
807    }
808
809    /**
810     * Returns true if {@code first} and {@code second} are equal after
811     * unescaping hex sequences like %F1 and %2b.
812     */
813    private boolean escapedEquals(String first, String second) {
814        if (first.indexOf('%') != second.indexOf('%')) {
815            return first.equals(second);
816        }
817
818        int index, prevIndex = 0;
819        while ((index = first.indexOf('%', prevIndex)) != -1
820                && second.indexOf('%', prevIndex) == index) {
821            boolean match = first.substring(prevIndex, index).equals(
822                    second.substring(prevIndex, index));
823            if (!match) {
824                return false;
825            }
826
827            match = first.substring(index + 1, index + 3).equalsIgnoreCase(
828                    second.substring(index + 1, index + 3));
829            if (!match) {
830                return false;
831            }
832
833            index += 3;
834            prevIndex = index;
835        }
836        return first.substring(prevIndex).equals(second.substring(prevIndex));
837    }
838
839    /**
840     * Compares this URI instance with the given argument {@code o} and
841     * determines if both are equal. Two URI instances are equal if all single
842     * parts are identical in their meaning.
843     *
844     * @param o
845     *            the URI this instance has to be compared with.
846     * @return {@code true} if both URI instances point to the same resource,
847     *         {@code false} otherwise.
848     */
849    @Override
850    public boolean equals(Object o) {
851        if (!(o instanceof URI)) {
852            return false;
853        }
854        URI uri = (URI) o;
855
856        if (uri.fragment == null && fragment != null || uri.fragment != null
857                && fragment == null) {
858            return false;
859        } else if (uri.fragment != null && fragment != null) {
860            if (!escapedEquals(uri.fragment, fragment)) {
861                return false;
862            }
863        }
864
865        if (uri.scheme == null && scheme != null || uri.scheme != null
866                && scheme == null) {
867            return false;
868        } else if (uri.scheme != null && scheme != null) {
869            if (!uri.scheme.equalsIgnoreCase(scheme)) {
870                return false;
871            }
872        }
873
874        if (uri.opaque && opaque) {
875            return escapedEquals(uri.schemeSpecificPart,
876                    schemeSpecificPart);
877        } else if (!uri.opaque && !opaque) {
878            if (!escapedEquals(path, uri.path)) {
879                return false;
880            }
881
882            if (uri.query != null && query == null || uri.query == null
883                    && query != null) {
884                return false;
885            } else if (uri.query != null && query != null) {
886                if (!escapedEquals(uri.query, query)) {
887                    return false;
888                }
889            }
890
891            if (uri.authority != null && authority == null
892                    || uri.authority == null && authority != null) {
893                return false;
894            } else if (uri.authority != null && authority != null) {
895                if (uri.host != null && host == null || uri.host == null
896                        && host != null) {
897                    return false;
898                } else if (uri.host == null && host == null) {
899                    // both are registry based, so compare the whole authority
900                    return escapedEquals(uri.authority, authority);
901                } else { // uri.host != null && host != null, so server-based
902                    if (!host.equalsIgnoreCase(uri.host)) {
903                        return false;
904                    }
905
906                    if (port != uri.port) {
907                        return false;
908                    }
909
910                    if (uri.userInfo != null && userInfo == null
911                            || uri.userInfo == null && userInfo != null) {
912                        return false;
913                    } else if (uri.userInfo != null && userInfo != null) {
914                        return escapedEquals(userInfo, uri.userInfo);
915                    } else {
916                        return true;
917                    }
918                }
919            } else {
920                // no authority
921                return true;
922            }
923
924        } else {
925            // one is opaque, the other hierarchical
926            return false;
927        }
928    }
929
930    /**
931     * Gets the decoded authority part of this URI.
932     *
933     * @return the decoded authority part or {@code null} if undefined.
934     */
935    public String getAuthority() {
936        return decode(authority);
937    }
938
939    /**
940     * Gets the decoded fragment part of this URI.
941     *
942     * @return the decoded fragment part or {@code null} if undefined.
943     */
944    public String getFragment() {
945        return decode(fragment);
946    }
947
948    /**
949     * Gets the host part of this URI.
950     *
951     * @return the host part or {@code null} if undefined.
952     */
953    public String getHost() {
954        return host;
955    }
956
957    /**
958     * Gets the decoded path part of this URI.
959     *
960     * @return the decoded path part or {@code null} if undefined.
961     */
962    public String getPath() {
963        return decode(path);
964    }
965
966    /**
967     * Gets the port number of this URI.
968     *
969     * @return the port number or {@code -1} if undefined.
970     */
971    public int getPort() {
972        return port;
973    }
974
975    /** @hide */
976    public int getEffectivePort() {
977        return getEffectivePort(scheme, port);
978    }
979
980    /**
981     * Returns the port to use for {@code scheme} connections will use when
982     * {@link #getPort} returns {@code specifiedPort}.
983     *
984     * @hide
985     */
986    public static int getEffectivePort(String scheme, int specifiedPort) {
987        if (specifiedPort != -1) {
988            return specifiedPort;
989        }
990
991        if ("http".equalsIgnoreCase(scheme)) {
992            return 80;
993        } else if ("https".equalsIgnoreCase(scheme)) {
994            return 443;
995        } else {
996            return -1;
997        }
998    }
999
1000    /**
1001     * Gets the decoded query part of this URI.
1002     *
1003     * @return the decoded query part or {@code null} if undefined.
1004     */
1005    public String getQuery() {
1006        return decode(query);
1007    }
1008
1009    /**
1010     * Gets the authority part of this URI in raw form.
1011     *
1012     * @return the encoded authority part or {@code null} if undefined.
1013     */
1014    public String getRawAuthority() {
1015        return authority;
1016    }
1017
1018    /**
1019     * Gets the fragment part of this URI in raw form.
1020     *
1021     * @return the encoded fragment part or {@code null} if undefined.
1022     */
1023    public String getRawFragment() {
1024        return fragment;
1025    }
1026
1027    /**
1028     * Gets the path part of this URI in raw form.
1029     *
1030     * @return the encoded path part or {@code null} if undefined.
1031     */
1032    public String getRawPath() {
1033        return path;
1034    }
1035
1036    /**
1037     * Gets the query part of this URI in raw form.
1038     *
1039     * @return the encoded query part or {@code null} if undefined.
1040     */
1041    public String getRawQuery() {
1042        return query;
1043    }
1044
1045    /**
1046     * Gets the scheme-specific part of this URI in raw form.
1047     *
1048     * @return the encoded scheme-specific part or {@code null} if undefined.
1049     */
1050    public String getRawSchemeSpecificPart() {
1051        return schemeSpecificPart;
1052    }
1053
1054    /**
1055     * Gets the user-info part of this URI in raw form.
1056     *
1057     * @return the encoded user-info part or {@code null} if undefined.
1058     */
1059    public String getRawUserInfo() {
1060        return userInfo;
1061    }
1062
1063    /**
1064     * Gets the scheme part of this URI.
1065     *
1066     * @return the scheme part or {@code null} if undefined.
1067     */
1068    public String getScheme() {
1069        return scheme;
1070    }
1071
1072    /**
1073     * Gets the decoded scheme-specific part of this URI.
1074     *
1075     * @return the decoded scheme-specific part or {@code null} if undefined.
1076     */
1077    public String getSchemeSpecificPart() {
1078        return decode(schemeSpecificPart);
1079    }
1080
1081    /**
1082     * Gets the decoded user-info part of this URI.
1083     *
1084     * @return the decoded user-info part or {@code null} if undefined.
1085     */
1086    public String getUserInfo() {
1087        return decode(userInfo);
1088    }
1089
1090    /**
1091     * Gets the hashcode value of this URI instance.
1092     *
1093     * @return the appropriate hashcode value.
1094     */
1095    @Override
1096    public int hashCode() {
1097        if (hash == -1) {
1098            hash = getHashString().hashCode();
1099        }
1100        return hash;
1101    }
1102
1103    /**
1104     * Indicates whether this URI is absolute, which means that a scheme part is
1105     * defined in this URI.
1106     *
1107     * @return {@code true} if this URI is absolute, {@code false} otherwise.
1108     */
1109    public boolean isAbsolute() {
1110        return absolute;
1111    }
1112
1113    /**
1114     * Indicates whether this URI is opaque or not. An opaque URI is absolute
1115     * and has a scheme-specific part which does not start with a slash
1116     * character. All parts except scheme, scheme-specific and fragment are
1117     * undefined.
1118     *
1119     * @return {@code true} if the URI is opaque, {@code false} otherwise.
1120     */
1121    public boolean isOpaque() {
1122        return opaque;
1123    }
1124
1125    /*
1126     * normalize path, and return the resulting string
1127     */
1128    private String normalize(String path) {
1129        // count the number of '/'s, to determine number of segments
1130        int index = -1;
1131        int pathLength = path.length();
1132        int size = 0;
1133        if (pathLength > 0 && path.charAt(0) != '/') {
1134            size++;
1135        }
1136        while ((index = path.indexOf('/', index + 1)) != -1) {
1137            if (index + 1 < pathLength && path.charAt(index + 1) != '/') {
1138                size++;
1139            }
1140        }
1141
1142        String[] segList = new String[size];
1143        boolean[] include = new boolean[size];
1144
1145        // break the path into segments and store in the list
1146        int current = 0;
1147        int index2;
1148        index = (pathLength > 0 && path.charAt(0) == '/') ? 1 : 0;
1149        while ((index2 = path.indexOf('/', index + 1)) != -1) {
1150            segList[current++] = path.substring(index, index2);
1151            index = index2 + 1;
1152        }
1153
1154        // if current==size, then the last character was a slash
1155        // and there are no more segments
1156        if (current < size) {
1157            segList[current] = path.substring(index);
1158        }
1159
1160        // determine which segments get included in the normalized path
1161        for (int i = 0; i < size; i++) {
1162            include[i] = true;
1163            if (segList[i].equals("..")) {
1164                int remove = i - 1;
1165                // search back to find a segment to remove, if possible
1166                while (remove > -1 && !include[remove]) {
1167                    remove--;
1168                }
1169                // if we find a segment to remove, remove it and the ".."
1170                // segment
1171                if (remove > -1 && !segList[remove].equals("..")) {
1172                    include[remove] = false;
1173                    include[i] = false;
1174                }
1175            } else if (segList[i].equals(".")) {
1176                include[i] = false;
1177            }
1178        }
1179
1180        // put the path back together
1181        StringBuilder newPath = new StringBuilder();
1182        if (path.startsWith("/")) {
1183            newPath.append('/');
1184        }
1185
1186        for (int i = 0; i < segList.length; i++) {
1187            if (include[i]) {
1188                newPath.append(segList[i]);
1189                newPath.append('/');
1190            }
1191        }
1192
1193        // if we used at least one segment and the path previously ended with
1194        // a slash and the last segment is still used, then delete the extra
1195        // trailing '/'
1196        if (!path.endsWith("/") && segList.length > 0
1197                && include[segList.length - 1]) {
1198            newPath.deleteCharAt(newPath.length() - 1);
1199        }
1200
1201        String result = newPath.toString();
1202
1203        // check for a ':' in the first segment if one exists,
1204        // prepend "./" to normalize
1205        index = result.indexOf(':');
1206        index2 = result.indexOf('/');
1207        if (index != -1 && (index < index2 || index2 == -1)) {
1208            newPath.insert(0, "./");
1209            result = newPath.toString();
1210        }
1211        return result;
1212    }
1213
1214    /**
1215     * Normalizes the path part of this URI.
1216     *
1217     * @return an URI object which represents this instance with a normalized
1218     *         path.
1219     */
1220    public URI normalize() {
1221        if (opaque) {
1222            return this;
1223        }
1224        String normalizedPath = normalize(path);
1225        // if the path is already normalized, return this
1226        if (path.equals(normalizedPath)) {
1227            return this;
1228        }
1229        // get an exact copy of the URI re-calculate the scheme specific part
1230        // since the path of the normalized URI is different from this URI.
1231        URI result = duplicate();
1232        result.path = normalizedPath;
1233        result.setSchemeSpecificPart();
1234        return result;
1235    }
1236
1237    /**
1238     * Tries to parse the authority component of this URI to divide it into the
1239     * host, port, and user-info. If this URI is already determined as a
1240     * ServerAuthority this instance will be returned without changes.
1241     *
1242     * @return this instance with the components of the parsed server authority.
1243     * @throws URISyntaxException
1244     *             if the authority part could not be parsed as a server-based
1245     *             authority.
1246     */
1247    public URI parseServerAuthority() throws URISyntaxException {
1248        if (!serverAuthority) {
1249            parseAuthority(true);
1250        }
1251        return this;
1252    }
1253
1254    /**
1255     * Makes the given URI {@code relative} to a relative URI against the URI
1256     * represented by this instance.
1257     *
1258     * @param relative
1259     *            the URI which has to be relativized against this URI.
1260     * @return the relative URI.
1261     */
1262    public URI relativize(URI relative) {
1263        if (relative.opaque || opaque) {
1264            return relative;
1265        }
1266
1267        if (scheme == null ? relative.scheme != null : !scheme
1268                .equals(relative.scheme)) {
1269            return relative;
1270        }
1271
1272        if (authority == null ? relative.authority != null : !authority
1273                .equals(relative.authority)) {
1274            return relative;
1275        }
1276
1277        // normalize both paths
1278        String thisPath = normalize(path);
1279        String relativePath = normalize(relative.path);
1280
1281        /*
1282         * if the paths aren't equal, then we need to determine if this URI's
1283         * path is a parent path (begins with) the relative URI's path
1284         */
1285        if (!thisPath.equals(relativePath)) {
1286            // if this URI's path doesn't end in a '/', add one
1287            if (!thisPath.endsWith("/")) {
1288                thisPath = thisPath + '/';
1289            }
1290            /*
1291             * if the relative URI's path doesn't start with this URI's path,
1292             * then just return the relative URI; the URIs have nothing in
1293             * common
1294             */
1295            if (!relativePath.startsWith(thisPath)) {
1296                return relative;
1297            }
1298        }
1299
1300        URI result = new URI();
1301        result.fragment = relative.fragment;
1302        result.query = relative.query;
1303        // the result URI is the remainder of the relative URI's path
1304        result.path = relativePath.substring(thisPath.length());
1305        result.setSchemeSpecificPart();
1306        return result;
1307    }
1308
1309    /**
1310     * Resolves the given URI {@code relative} against the URI represented by
1311     * this instance.
1312     *
1313     * @param relative
1314     *            the URI which has to be resolved against this URI.
1315     * @return the resolved URI.
1316     */
1317    public URI resolve(URI relative) {
1318        if (relative.absolute || opaque) {
1319            return relative;
1320        }
1321
1322        URI result;
1323        if (relative.path.isEmpty() && relative.scheme == null
1324                && relative.authority == null && relative.query == null
1325                && relative.fragment != null) {
1326            // if the relative URI only consists of fragment,
1327            // the resolved URI is very similar to this URI,
1328            // except that it has the fragment from the relative URI.
1329            result = duplicate();
1330            result.fragment = relative.fragment;
1331            // no need to re-calculate the scheme specific part,
1332            // since fragment is not part of scheme specific part.
1333            return result;
1334        }
1335
1336        if (relative.authority != null) {
1337            // if the relative URI has authority,
1338            // the resolved URI is almost the same as the relative URI,
1339            // except that it has the scheme of this URI.
1340            result = relative.duplicate();
1341            result.scheme = scheme;
1342            result.absolute = absolute;
1343        } else {
1344            // since relative URI has no authority,
1345            // the resolved URI is very similar to this URI,
1346            // except that it has the query and fragment of the relative URI,
1347            // and the path is different.
1348            result = duplicate();
1349            result.fragment = relative.fragment;
1350            result.query = relative.query;
1351            if (relative.path.startsWith("/")) {
1352                result.path = relative.path;
1353            } else {
1354                // resolve a relative reference
1355                int endIndex = path.lastIndexOf('/') + 1;
1356                result.path = normalize(path.substring(0, endIndex)
1357                        + relative.path);
1358            }
1359            // re-calculate the scheme specific part since
1360            // query and path of the resolved URI is different from this URI.
1361            result.setSchemeSpecificPart();
1362        }
1363        return result;
1364    }
1365
1366    /**
1367     * Helper method used to re-calculate the scheme specific part of the
1368     * resolved or normalized URIs
1369     */
1370    private void setSchemeSpecificPart() {
1371        // ssp = [//authority][path][?query]
1372        StringBuilder ssp = new StringBuilder();
1373        if (authority != null) {
1374            ssp.append("//" + authority);
1375        }
1376        if (path != null) {
1377            ssp.append(path);
1378        }
1379        if (query != null) {
1380            ssp.append("?" + query);
1381        }
1382        schemeSpecificPart = ssp.toString();
1383        // reset string, so that it can be re-calculated correctly when asked.
1384        string = null;
1385    }
1386
1387    /**
1388     * Creates a new URI instance by parsing the given string {@code relative}
1389     * and resolves the created URI against the URI represented by this
1390     * instance.
1391     *
1392     * @param relative
1393     *            the given string to create the new URI instance which has to
1394     *            be resolved later on.
1395     * @return the created and resolved URI.
1396     */
1397    public URI resolve(String relative) {
1398        return resolve(create(relative));
1399    }
1400
1401    /**
1402     * Encode unicode chars that are not part of US-ASCII char set into the
1403     * escaped form
1404     *
1405     * i.e. The Euro currency symbol is encoded as "%E2%82%AC".
1406     */
1407    private String encodeNonAscii(String s) {
1408        try {
1409            /*
1410             * Use a different encoder than URLEncoder since: 1. chars like "/",
1411             * "#", "@" etc needs to be preserved instead of being encoded, 2.
1412             * UTF-8 char set needs to be used for encoding instead of default
1413             * platform one 3. Only other chars need to be converted
1414             */
1415            return URIEncoderDecoder.encodeOthers(s);
1416        } catch (UnsupportedEncodingException e) {
1417            throw new RuntimeException(e.toString());
1418        }
1419    }
1420
1421    private String decode(String s) {
1422        if (s == null) {
1423            return s;
1424        }
1425
1426        try {
1427            return URIEncoderDecoder.decode(s);
1428        } catch (UnsupportedEncodingException e) {
1429            throw new RuntimeException(e.toString());
1430        }
1431    }
1432
1433    /**
1434     * Returns the textual string representation of this URI instance using the
1435     * US-ASCII encoding.
1436     *
1437     * @return the US-ASCII string representation of this URI.
1438     */
1439    public String toASCIIString() {
1440        return encodeNonAscii(toString());
1441    }
1442
1443    /**
1444     * Returns the textual string representation of this URI instance.
1445     *
1446     * @return the textual string representation of this URI.
1447     */
1448    @Override
1449    public String toString() {
1450        if (string == null) {
1451            StringBuilder result = new StringBuilder();
1452            if (scheme != null) {
1453                result.append(scheme);
1454                result.append(':');
1455            }
1456            if (opaque) {
1457                result.append(schemeSpecificPart);
1458            } else {
1459                if (authority != null) {
1460                    result.append("//");
1461                    result.append(authority);
1462                }
1463
1464                if (path != null) {
1465                    result.append(path);
1466                }
1467
1468                if (query != null) {
1469                    result.append('?');
1470                    result.append(query);
1471                }
1472            }
1473
1474            if (fragment != null) {
1475                result.append('#');
1476                result.append(fragment);
1477            }
1478
1479            string = result.toString();
1480        }
1481        return string;
1482    }
1483
1484    /*
1485     * Form a string from the components of this URI, similarly to the
1486     * toString() method. But this method converts scheme and host to lowercase,
1487     * and converts escaped octets to lowercase.
1488     */
1489    private String getHashString() {
1490        StringBuilder result = new StringBuilder();
1491        if (scheme != null) {
1492            result.append(scheme.toLowerCase(Locale.US));
1493            result.append(':');
1494        }
1495        if (opaque) {
1496            result.append(schemeSpecificPart);
1497        } else {
1498            if (authority != null) {
1499                result.append("//");
1500                if (host == null) {
1501                    result.append(authority);
1502                } else {
1503                    if (userInfo != null) {
1504                        result.append(userInfo + "@");
1505                    }
1506                    result.append(host.toLowerCase(Locale.US));
1507                    if (port != -1) {
1508                        result.append(":" + port);
1509                    }
1510                }
1511            }
1512
1513            if (path != null) {
1514                result.append(path);
1515            }
1516
1517            if (query != null) {
1518                result.append('?');
1519                result.append(query);
1520            }
1521        }
1522
1523        if (fragment != null) {
1524            result.append('#');
1525            result.append(fragment);
1526        }
1527
1528        return convertHexToLowerCase(result.toString());
1529    }
1530
1531    /**
1532     * Converts this URI instance to a URL.
1533     *
1534     * @return the created URL representing the same resource as this URI.
1535     * @throws MalformedURLException
1536     *             if an error occurs while creating the URL or no protocol
1537     *             handler could be found.
1538     */
1539    public URL toURL() throws MalformedURLException {
1540        if (!absolute) {
1541            throw new IllegalArgumentException("URI is not absolute: " + toString());
1542        }
1543        return new URL(toString());
1544    }
1545
1546    private void readObject(ObjectInputStream in) throws IOException,
1547            ClassNotFoundException {
1548        in.defaultReadObject();
1549        try {
1550            parseURI(string, false);
1551        } catch (URISyntaxException e) {
1552            throw new IOException(e.toString());
1553        }
1554    }
1555
1556    private void writeObject(ObjectOutputStream out) throws IOException,
1557            ClassNotFoundException {
1558        // call toString() to ensure the value of string field is calculated
1559        toString();
1560        out.defaultWriteObject();
1561    }
1562}
1563