URI.java revision 753dcd862b31e85766225590d90ba0b9f481176f
1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.net;
19
20import java.io.IOException;
21import java.io.ObjectInputStream;
22import java.io.ObjectOutputStream;
23import java.io.Serializable;
24import java.io.UnsupportedEncodingException;
25import java.util.StringTokenizer;
26import org.apache.harmony.luni.platform.INetworkSystem;
27import org.apache.harmony.luni.platform.Platform;
28
29/**
30 * This class represents an instance of a URI as defined by RFC 2396.
31 */
32public final class URI implements Comparable<URI>, Serializable {
33
34    private final static INetworkSystem NETWORK_SYSTEM = Platform.getNetworkSystem();
35
36    private static final long serialVersionUID = -6052424284110960213l;
37
38    static final String UNRESERVED = "_-!.~\'()*";
39    static final String PUNCTUATION = ",;:$&+=";
40    static final String RESERVED = PUNCTUATION + "?/[]@";
41    static final String SOME_LEGAL = UNRESERVED + PUNCTUATION;
42    static final String ALL_LEGAL = UNRESERVED + RESERVED;
43
44    private String string;
45    private transient String scheme;
46    private transient String schemeSpecificPart;
47    private transient String authority;
48    private transient String userInfo;
49    private transient String host;
50    private transient int port = -1;
51    private transient String path;
52    private transient String query;
53    private transient String fragment;
54    private transient boolean opaque;
55    private transient boolean absolute;
56    private transient boolean serverAuthority = false;
57
58    private transient int hash = -1;
59
60    private URI() {}
61
62    /**
63     * Creates a new URI instance according to the given string {@code uri}.
64     *
65     * @param uri
66     *            the textual URI representation to be parsed into a URI object.
67     * @throws URISyntaxException
68     *             if the given string {@code uri} doesn't fit to the
69     *             specification RFC2396 or could not be parsed correctly.
70     */
71    public URI(String uri) throws URISyntaxException {
72        parseURI(uri, false);
73    }
74
75    /**
76     * Creates a new URI instance using the given arguments. This constructor
77     * first creates a temporary URI string from the given components. This
78     * string will be parsed later on to create the URI instance.
79     * <p>
80     * {@code [scheme:]scheme-specific-part[#fragment]}
81     *
82     * @param scheme
83     *            the scheme part of the URI.
84     * @param ssp
85     *            the scheme-specific-part of the URI.
86     * @param frag
87     *            the fragment part of the URI.
88     * @throws URISyntaxException
89     *             if the temporary created string doesn't fit to the
90     *             specification RFC2396 or could not be parsed correctly.
91     */
92    public URI(String scheme, String ssp, String frag)
93            throws URISyntaxException {
94        StringBuilder uri = new StringBuilder();
95        if (scheme != null) {
96            uri.append(scheme);
97            uri.append(':');
98        }
99        if (ssp != null) {
100            // QUOTE ILLEGAL CHARACTERS
101            uri.append(quoteComponent(ssp, ALL_LEGAL));
102        }
103        if (frag != null) {
104            uri.append('#');
105            // QUOTE ILLEGAL CHARACTERS
106            uri.append(quoteComponent(frag, ALL_LEGAL));
107        }
108
109        parseURI(uri.toString(), false);
110    }
111
112    /**
113     * Creates a new URI instance using the given arguments. This constructor
114     * first creates a temporary URI string from the given components. This
115     * string will be parsed later on to create the URI instance.
116     * <p>
117     * {@code [scheme:][user-info@]host[:port][path][?query][#fragment]}
118     *
119     * @param scheme
120     *            the scheme part of the URI.
121     * @param userInfo
122     *            the user information of the URI for authentication and
123     *            authorization.
124     * @param host
125     *            the host name of the URI.
126     * @param port
127     *            the port number of the URI.
128     * @param path
129     *            the path to the resource on the host.
130     * @param query
131     *            the query part of the URI to specify parameters for the
132     *            resource.
133     * @param fragment
134     *            the fragment part of the URI.
135     * @throws URISyntaxException
136     *             if the temporary created string doesn't fit to the
137     *             specification RFC2396 or could not be parsed correctly.
138     */
139    public URI(String scheme, String userInfo, String host, int port,
140            String path, String query, String fragment)
141            throws URISyntaxException {
142
143        if (scheme == null && userInfo == null && host == null && path == null
144                && query == null && fragment == null) {
145            this.path = "";
146            return;
147        }
148
149        if (scheme != null && path != null && path.length() > 0
150                && path.charAt(0) != '/') {
151            throw new URISyntaxException(path, "Relative path");
152        }
153
154        StringBuilder uri = new StringBuilder();
155        if (scheme != null) {
156            uri.append(scheme);
157            uri.append(':');
158        }
159
160        if (userInfo != null || host != null || port != -1) {
161            uri.append("//");
162        }
163
164        if (userInfo != null) {
165            // QUOTE ILLEGAL CHARACTERS in userInfo
166            uri.append(quoteComponent(userInfo, SOME_LEGAL));
167            uri.append('@');
168        }
169
170        if (host != null) {
171            // check for IPv6 addresses that hasn't been enclosed
172            // in square brackets
173            if (host.indexOf(':') != -1 && host.indexOf(']') == -1
174                    && host.indexOf('[') == -1) {
175                host = "[" + host + "]";
176            }
177            uri.append(host);
178        }
179
180        if (port != -1) {
181            uri.append(':');
182            uri.append(port);
183        }
184
185        if (path != null) {
186            // QUOTE ILLEGAL CHARS
187            uri.append(quoteComponent(path, "/@" + SOME_LEGAL));
188        }
189
190        if (query != null) {
191            uri.append('?');
192            // QUOTE ILLEGAL CHARS
193            uri.append(quoteComponent(query, ALL_LEGAL));
194        }
195
196        if (fragment != null) {
197            // QUOTE ILLEGAL CHARS
198            uri.append('#');
199            uri.append(quoteComponent(fragment, ALL_LEGAL));
200        }
201
202        parseURI(uri.toString(), true);
203    }
204
205    /**
206     * Creates a new URI instance using the given arguments. This constructor
207     * first creates a temporary URI string from the given components. This
208     * string will be parsed later on to create the URI instance.
209     * <p>
210     * {@code [scheme:]host[path][#fragment]}
211     *
212     * @param scheme
213     *            the scheme part of the URI.
214     * @param host
215     *            the host name of the URI.
216     * @param path
217     *            the path to the resource on the host.
218     * @param fragment
219     *            the fragment part of the URI.
220     * @throws URISyntaxException
221     *             if the temporary created string doesn't fit to the
222     *             specification RFC2396 or could not be parsed correctly.
223     */
224    public URI(String scheme, String host, String path, String fragment)
225            throws URISyntaxException {
226        this(scheme, null, host, -1, path, null, fragment);
227    }
228
229    /**
230     * Creates a new URI instance using the given arguments. This constructor
231     * first creates a temporary URI string from the given components. This
232     * string will be parsed later on to create the URI instance.
233     * <p>
234     * {@code [scheme:][//authority][path][?query][#fragment]}
235     *
236     * @param scheme
237     *            the scheme part of the URI.
238     * @param authority
239     *            the authority part of the URI.
240     * @param path
241     *            the path to the resource on the host.
242     * @param query
243     *            the query part of the URI to specify parameters for the
244     *            resource.
245     * @param fragment
246     *            the fragment part of the URI.
247     * @throws URISyntaxException
248     *             if the temporary created string doesn't fit to the
249     *             specification RFC2396 or could not be parsed correctly.
250     */
251    public URI(String scheme, String authority, String path, String query,
252            String fragment) throws URISyntaxException {
253        if (scheme != null && path != null && path.length() > 0
254                && path.charAt(0) != '/') {
255            throw new URISyntaxException(path, "Relative path");
256        }
257
258        StringBuilder uri = new StringBuilder();
259        if (scheme != null) {
260            uri.append(scheme);
261            uri.append(':');
262        }
263        if (authority != null) {
264            uri.append("//");
265            // QUOTE ILLEGAL CHARS
266            uri.append(quoteComponent(authority, "@[]" + SOME_LEGAL));
267        }
268
269        if (path != null) {
270            // QUOTE ILLEGAL CHARS
271            uri.append(quoteComponent(path, "/@" + SOME_LEGAL));
272        }
273        if (query != null) {
274            // QUOTE ILLEGAL CHARS
275            uri.append('?');
276            uri.append(quoteComponent(query, ALL_LEGAL));
277        }
278        if (fragment != null) {
279            // QUOTE ILLEGAL CHARS
280            uri.append('#');
281            uri.append(quoteComponent(fragment, ALL_LEGAL));
282        }
283
284        parseURI(uri.toString(), false);
285    }
286
287    private void parseURI(String uri, boolean forceServer) throws URISyntaxException {
288        String temp = uri;
289        // assign uri string to the input value per spec
290        string = uri;
291        int index, index1, index2, index3;
292        // parse into Fragment, Scheme, and SchemeSpecificPart
293        // then parse SchemeSpecificPart if necessary
294
295        // Fragment
296        index = temp.indexOf('#');
297        if (index != -1) {
298            // remove the fragment from the end
299            fragment = temp.substring(index + 1);
300            validateFragment(uri, fragment, index + 1);
301            temp = temp.substring(0, index);
302        }
303
304        // Scheme and SchemeSpecificPart
305        index = index1 = temp.indexOf(':');
306        index2 = temp.indexOf('/');
307        index3 = temp.indexOf('?');
308
309        // if a '/' or '?' occurs before the first ':' the uri has no
310        // specified scheme, and is therefore not absolute
311        if (index != -1 && (index2 >= index || index2 == -1)
312                && (index3 >= index || index3 == -1)) {
313            // the characters up to the first ':' comprise the scheme
314            absolute = true;
315            scheme = temp.substring(0, index);
316            if (scheme.length() == 0) {
317                throw new URISyntaxException(uri, "Scheme expected", index);
318            }
319            validateScheme(uri, scheme, 0);
320            schemeSpecificPart = temp.substring(index + 1);
321            if (schemeSpecificPart.length() == 0) {
322                throw new URISyntaxException(uri, "Scheme-specific part expected", index + 1);
323            }
324        } else {
325            absolute = false;
326            schemeSpecificPart = temp;
327        }
328
329        if (scheme == null || schemeSpecificPart.length() > 0
330                && schemeSpecificPart.charAt(0) == '/') {
331            opaque = false;
332            // the URI is hierarchical
333
334            // Query
335            temp = schemeSpecificPart;
336            index = temp.indexOf('?');
337            if (index != -1) {
338                query = temp.substring(index + 1);
339                temp = temp.substring(0, index);
340                validateQuery(uri, query, index2 + 1 + index);
341            }
342
343            // Authority and Path
344            if (temp.startsWith("//")) {
345                index = temp.indexOf('/', 2);
346                if (index != -1) {
347                    authority = temp.substring(2, index);
348                    path = temp.substring(index);
349                } else {
350                    authority = temp.substring(2);
351                    if (authority.length() == 0 && query == null
352                            && fragment == null) {
353                        throw new URISyntaxException(uri, "Authority expected", uri.length());
354                    }
355
356                    path = "";
357                    // nothing left, so path is empty (not null, path should
358                    // never be null)
359                }
360
361                if (authority.length() == 0) {
362                    authority = null;
363                } else {
364                    validateAuthority(uri, authority, index1 + 3);
365                }
366            } else { // no authority specified
367                path = temp;
368            }
369
370            int pathIndex = 0;
371            if (index2 > -1) {
372                pathIndex += index2;
373            }
374            if (index > -1) {
375                pathIndex += index;
376            }
377            validatePath(uri, path, pathIndex);
378        } else { // if not hierarchical, URI is opaque
379            opaque = true;
380            validateSsp(uri, schemeSpecificPart, index2 + 2 + index);
381        }
382
383        parseAuthority(forceServer);
384    }
385
386    private void validateScheme(String uri, String scheme, int index)
387            throws URISyntaxException {
388        // first char needs to be an alpha char
389        char ch = scheme.charAt(0);
390        if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))) {
391            throw new URISyntaxException(uri, "Illegal character in scheme", 0);
392        }
393
394        try {
395            URIEncoderDecoder.validateSimple(scheme, "+-.");
396        } catch (URISyntaxException e) {
397            throw new URISyntaxException(uri, "Illegal character in scheme", index + e.getIndex());
398        }
399    }
400
401    private void validateSsp(String uri, String ssp, int index)
402            throws URISyntaxException {
403        try {
404            URIEncoderDecoder.validate(ssp, ALL_LEGAL);
405        } catch (URISyntaxException e) {
406            throw new URISyntaxException(uri,
407                    e.getReason() + " in schemeSpecificPart", index + e.getIndex());
408        }
409    }
410
411    private void validateAuthority(String uri, String authority, int index)
412            throws URISyntaxException {
413        try {
414            URIEncoderDecoder.validate(authority, "@[]" + SOME_LEGAL);
415        } catch (URISyntaxException e) {
416            throw new URISyntaxException(uri, e.getReason() + " in authority", index + e.getIndex());
417        }
418    }
419
420    private void validatePath(String uri, String path, int index)
421            throws URISyntaxException {
422        try {
423            URIEncoderDecoder.validate(path, "/@" + SOME_LEGAL);
424        } catch (URISyntaxException e) {
425            throw new URISyntaxException(uri, e.getReason() + " in path", index + e.getIndex());
426        }
427    }
428
429    private void validateQuery(String uri, String query, int index)
430            throws URISyntaxException {
431        try {
432            URIEncoderDecoder.validate(query, ALL_LEGAL);
433        } catch (URISyntaxException e) {
434            throw new URISyntaxException(uri, e.getReason() + " in query", index + e.getIndex());
435
436        }
437    }
438
439    private void validateFragment(String uri, String fragment, int index)
440            throws URISyntaxException {
441        try {
442            URIEncoderDecoder.validate(fragment, ALL_LEGAL);
443        } catch (URISyntaxException e) {
444            throw new URISyntaxException(uri, e.getReason() + " in fragment", index + e.getIndex());
445        }
446    }
447
448    /**
449     * Parse the authority string into its component parts: user info,
450     * host, and port. This operation doesn't apply to registry URIs, and
451     * calling it on such <i>may</i> result in a syntax exception.
452     *
453     * @param forceServer true to always throw if the authority cannot be
454     *     parsed. If false, this method may still throw for some kinds of
455     *     errors; this unpredictable behaviour is consistent with the RI.
456     */
457    private void parseAuthority(boolean forceServer) throws URISyntaxException {
458        if (authority == null) {
459            return;
460        }
461
462        String tempUserInfo = null;
463        String temp = authority;
464        int index = temp.indexOf('@');
465        int hostIndex = 0;
466        if (index != -1) {
467            // remove user info
468            tempUserInfo = temp.substring(0, index);
469            validateUserInfo(authority, tempUserInfo, 0);
470            temp = temp.substring(index + 1); // host[:port] is left
471            hostIndex = index + 1;
472        }
473
474        index = temp.lastIndexOf(':');
475        int endIndex = temp.indexOf(']');
476
477        String tempHost;
478        int tempPort = -1;
479        if (index != -1 && endIndex < index) {
480            // determine port and host
481            tempHost = temp.substring(0, index);
482
483            if (index < (temp.length() - 1)) { // port part is not empty
484                try {
485                    tempPort = Integer.parseInt(temp.substring(index + 1));
486                    if (tempPort < 0) {
487                        if (forceServer) {
488                            throw new URISyntaxException(authority,
489                                    "Invalid port number", hostIndex + index + 1);
490                        }
491                        return;
492                    }
493                } catch (NumberFormatException e) {
494                    if (forceServer) {
495                        throw new URISyntaxException(authority,
496                                "Invalid port number", hostIndex + index + 1);
497                    }
498                    return;
499                }
500            }
501        } else {
502            tempHost = temp;
503        }
504
505        if (tempHost.isEmpty()) {
506            if (forceServer) {
507                throw new URISyntaxException(authority, "Expected host", hostIndex);
508            }
509            return;
510        }
511
512        if (!isValidHost(forceServer, tempHost)) {
513            return;
514        }
515
516        // this is a server based uri,
517        // fill in the userInfo, host and port fields
518        userInfo = tempUserInfo;
519        host = tempHost;
520        port = tempPort;
521        serverAuthority = true;
522    }
523
524    private void validateUserInfo(String uri, String userInfo, int index)
525            throws URISyntaxException {
526        for (int i = 0; i < userInfo.length(); i++) {
527            char ch = userInfo.charAt(i);
528            if (ch == ']' || ch == '[') {
529                throw new URISyntaxException(uri, "Illegal character in userInfo", index + i);
530            }
531        }
532    }
533
534    /**
535     * Returns true if {@code host} is a well-formed host name or IP address.
536     *
537     * @param forceServer true to always throw if the host cannot be parsed. If
538     *     false, this method may still throw for some kinds of errors; this
539     *     unpredictable behaviour is consistent with the RI.
540     */
541    private boolean isValidHost(boolean forceServer, String host) throws URISyntaxException {
542        if (host.startsWith("[")) {
543            // IPv6 address
544            if (!host.endsWith("]")) {
545                throw new URISyntaxException(host,
546                        "Expected a closing square bracket for IPv6 address", 0);
547            }
548            try {
549                byte[] bytes = InetAddress.ipStringToByteArray(host);
550                /*
551                 * The native IP parser may return 4 bytes for addresses like
552                 * "[::FFFF:127.0.0.1]". This is allowed, but we must not accept
553                 * IPv4-formatted addresses in square braces like "[127.0.0.1]".
554                 */
555                if (bytes.length == 16 || bytes.length == 4 && host.contains(":")) {
556                    return true;
557                }
558            } catch (UnknownHostException e) {
559            }
560            throw new URISyntaxException(host, "Malformed IPv6 address");
561        }
562
563        // '[' and ']' can only be the first char and last char
564        // of the host name
565        if (host.indexOf('[') != -1 || host.indexOf(']') != -1) {
566            throw new URISyntaxException(host, "Illegal character in host name", 0);
567        }
568
569        int index = host.lastIndexOf('.');
570        if (index < 0 || index == host.length() - 1
571                || !Character.isDigit(host.charAt(index + 1))) {
572            // domain name
573            if (isValidDomainName(host)) {
574                return true;
575            }
576            if (forceServer) {
577                throw new URISyntaxException(host, "Illegal character in host name", 0);
578            }
579            return false;
580        }
581
582        // IPv4 address
583        try {
584            if (InetAddress.ipStringToByteArray(host).length == 4) {
585                return true;
586            }
587        } catch (UnknownHostException e) {
588        }
589
590        if (forceServer) {
591            throw new URISyntaxException(host, "Malformed IPv4 address", 0);
592        }
593        return false;
594    }
595
596    private boolean isValidDomainName(String host) {
597        try {
598            URIEncoderDecoder.validateSimple(host, "-.");
599        } catch (URISyntaxException e) {
600            return false;
601        }
602
603        String lastLabel = null;
604        StringTokenizer st = new StringTokenizer(host, ".");
605        while (st.hasMoreTokens()) {
606            lastLabel = st.nextToken();
607            if (lastLabel.startsWith("-") || lastLabel.endsWith("-")) {
608                return false;
609            }
610        }
611
612        if (lastLabel == null) {
613            return false;
614        }
615
616        if (!lastLabel.equals(host)) {
617            char ch = lastLabel.charAt(0);
618            if (ch >= '0' && ch <= '9') {
619                return false;
620            }
621        }
622        return true;
623    }
624
625    /**
626     * Quote illegal chars for each component, but not the others
627     *
628     * @param component java.lang.String the component to be converted
629     * @param legalSet the legal character set allowed in the component
630     * @return java.lang.String the converted string
631     */
632    private String quoteComponent(String component, String legalSet) {
633        try {
634            /*
635             * Use a different encoder than URLEncoder since: 1. chars like "/",
636             * "#", "@" etc needs to be preserved instead of being encoded, 2.
637             * UTF-8 char set needs to be used for encoding instead of default
638             * platform one
639             */
640            return URIEncoderDecoder.quoteIllegal(component, legalSet);
641        } catch (UnsupportedEncodingException e) {
642            throw new RuntimeException(e.toString());
643        }
644    }
645
646    /**
647     * Compares this URI with the given argument {@code uri}. This method will
648     * return a negative value if this URI instance is less than the given
649     * argument and a positive value if this URI instance is greater than the
650     * given argument. The return value {@code 0} indicates that the two
651     * instances represent the same URI. To define the order the single parts of
652     * the URI are compared with each other. String components will be ordered
653     * in the natural case-sensitive way. A hierarchical URI is less than an
654     * opaque URI and if one part is {@code null} the URI with the undefined
655     * part is less than the other one.
656     *
657     * @param uri
658     *            the URI this instance has to compare with.
659     * @return the value representing the order of the two instances.
660     */
661    public int compareTo(URI uri) {
662        int ret;
663
664        // compare schemes
665        if (scheme == null && uri.scheme != null) {
666            return -1;
667        } else if (scheme != null && uri.scheme == null) {
668            return 1;
669        } else if (scheme != null && uri.scheme != null) {
670            ret = scheme.compareToIgnoreCase(uri.scheme);
671            if (ret != 0) {
672                return ret;
673            }
674        }
675
676        // compare opacities
677        if (!opaque && uri.opaque) {
678            return -1;
679        } else if (opaque && !uri.opaque) {
680            return 1;
681        } else if (opaque && uri.opaque) {
682            ret = schemeSpecificPart.compareTo(uri.schemeSpecificPart);
683            if (ret != 0) {
684                return ret;
685            }
686        } else {
687
688            // otherwise both must be hierarchical
689
690            // compare authorities
691            if (authority != null && uri.authority == null) {
692                return 1;
693            } else if (authority == null && uri.authority != null) {
694                return -1;
695            } else if (authority != null && uri.authority != null) {
696                if (host != null && uri.host != null) {
697                    // both are server based, so compare userInfo, host, port
698                    if (userInfo != null && uri.userInfo == null) {
699                        return 1;
700                    } else if (userInfo == null && uri.userInfo != null) {
701                        return -1;
702                    } else if (userInfo != null && uri.userInfo != null) {
703                        ret = userInfo.compareTo(uri.userInfo);
704                        if (ret != 0) {
705                            return ret;
706                        }
707                    }
708
709                    // userInfo's are the same, compare hostname
710                    ret = host.compareToIgnoreCase(uri.host);
711                    if (ret != 0) {
712                        return ret;
713                    }
714
715                    // compare port
716                    if (port != uri.port) {
717                        return port - uri.port;
718                    }
719                } else { // one or both are registry based, compare the whole
720                    // authority
721                    ret = authority.compareTo(uri.authority);
722                    if (ret != 0) {
723                        return ret;
724                    }
725                }
726            }
727
728            // authorities are the same
729            // compare paths
730            ret = path.compareTo(uri.path);
731            if (ret != 0) {
732                return ret;
733            }
734
735            // compare queries
736
737            if (query != null && uri.query == null) {
738                return 1;
739            } else if (query == null && uri.query != null) {
740                return -1;
741            } else if (query != null && uri.query != null) {
742                ret = query.compareTo(uri.query);
743                if (ret != 0) {
744                    return ret;
745                }
746            }
747        }
748
749        // everything else is identical, so compare fragments
750        if (fragment != null && uri.fragment == null) {
751            return 1;
752        } else if (fragment == null && uri.fragment != null) {
753            return -1;
754        } else if (fragment != null && uri.fragment != null) {
755            ret = fragment.compareTo(uri.fragment);
756            if (ret != 0) {
757                return ret;
758            }
759        }
760
761        // identical
762        return 0;
763    }
764
765    /**
766     * Returns the URI formed by parsing {@code uri}. This method behaves
767     * identically to the string constructor but throws a different exception
768     * on failure. The constructor fails with a checked {@link
769     * URISyntaxException}; this method fails with an unchecked {@link
770     * IllegalArgumentException}.
771     */
772    public static URI create(String uri) {
773        try {
774            return new URI(uri);
775        } catch (URISyntaxException e) {
776            throw new IllegalArgumentException(e.getMessage());
777        }
778    }
779
780    private URI duplicate() {
781        URI clone = new URI();
782        clone.absolute = absolute;
783        clone.authority = authority;
784        clone.fragment = fragment;
785        clone.host = host;
786        clone.opaque = opaque;
787        clone.path = path;
788        clone.port = port;
789        clone.query = query;
790        clone.scheme = scheme;
791        clone.schemeSpecificPart = schemeSpecificPart;
792        clone.userInfo = userInfo;
793        clone.serverAuthority = serverAuthority;
794        return clone;
795    }
796
797    /*
798     * Takes a string that may contain hex sequences like %F1 or %2b and
799     * converts the hex values following the '%' to lowercase
800     */
801    private String convertHexToLowerCase(String s) {
802        StringBuilder result = new StringBuilder("");
803        if (s.indexOf('%') == -1) {
804            return s;
805        }
806
807        int index, prevIndex = 0;
808        while ((index = s.indexOf('%', prevIndex)) != -1) {
809            result.append(s.substring(prevIndex, index + 1));
810            result.append(s.substring(index + 1, index + 3).toLowerCase());
811            index += 3;
812            prevIndex = index;
813        }
814        return result.toString();
815    }
816
817    /**
818     * Returns true if {@code first} and {@code second} are equal after
819     * unescaping hex sequences like %F1 and %2b.
820     */
821    private boolean escapedEquals(String first, String second) {
822        if (first.indexOf('%') != second.indexOf('%')) {
823            return first.equals(second);
824        }
825
826        int index, prevIndex = 0;
827        while ((index = first.indexOf('%', prevIndex)) != -1
828                && second.indexOf('%', prevIndex) == index) {
829            boolean match = first.substring(prevIndex, index).equals(
830                    second.substring(prevIndex, index));
831            if (!match) {
832                return false;
833            }
834
835            match = first.substring(index + 1, index + 3).equalsIgnoreCase(
836                    second.substring(index + 1, index + 3));
837            if (!match) {
838                return false;
839            }
840
841            index += 3;
842            prevIndex = index;
843        }
844        return first.substring(prevIndex).equals(second.substring(prevIndex));
845    }
846
847    /**
848     * Compares this URI instance with the given argument {@code o} and
849     * determines if both are equal. Two URI instances are equal if all single
850     * parts are identical in their meaning.
851     *
852     * @param o
853     *            the URI this instance has to be compared with.
854     * @return {@code true} if both URI instances point to the same resource,
855     *         {@code false} otherwise.
856     */
857    @Override
858    public boolean equals(Object o) {
859        if (!(o instanceof URI)) {
860            return false;
861        }
862        URI uri = (URI) o;
863
864        if (uri.fragment == null && fragment != null || uri.fragment != null
865                && fragment == null) {
866            return false;
867        } else if (uri.fragment != null && fragment != null) {
868            if (!escapedEquals(uri.fragment, fragment)) {
869                return false;
870            }
871        }
872
873        if (uri.scheme == null && scheme != null || uri.scheme != null
874                && scheme == null) {
875            return false;
876        } else if (uri.scheme != null && scheme != null) {
877            if (!uri.scheme.equalsIgnoreCase(scheme)) {
878                return false;
879            }
880        }
881
882        if (uri.opaque && opaque) {
883            return escapedEquals(uri.schemeSpecificPart,
884                    schemeSpecificPart);
885        } else if (!uri.opaque && !opaque) {
886            if (!escapedEquals(path, uri.path)) {
887                return false;
888            }
889
890            if (uri.query != null && query == null || uri.query == null
891                    && query != null) {
892                return false;
893            } else if (uri.query != null && query != null) {
894                if (!escapedEquals(uri.query, query)) {
895                    return false;
896                }
897            }
898
899            if (uri.authority != null && authority == null
900                    || uri.authority == null && authority != null) {
901                return false;
902            } else if (uri.authority != null && authority != null) {
903                if (uri.host != null && host == null || uri.host == null
904                        && host != null) {
905                    return false;
906                } else if (uri.host == null && host == null) {
907                    // both are registry based, so compare the whole authority
908                    return escapedEquals(uri.authority, authority);
909                } else { // uri.host != null && host != null, so server-based
910                    if (!host.equalsIgnoreCase(uri.host)) {
911                        return false;
912                    }
913
914                    if (port != uri.port) {
915                        return false;
916                    }
917
918                    if (uri.userInfo != null && userInfo == null
919                            || uri.userInfo == null && userInfo != null) {
920                        return false;
921                    } else if (uri.userInfo != null && userInfo != null) {
922                        return escapedEquals(userInfo, uri.userInfo);
923                    } else {
924                        return true;
925                    }
926                }
927            } else {
928                // no authority
929                return true;
930            }
931
932        } else {
933            // one is opaque, the other hierarchical
934            return false;
935        }
936    }
937
938    /**
939     * Gets the decoded authority part of this URI.
940     *
941     * @return the decoded authority part or {@code null} if undefined.
942     */
943    public String getAuthority() {
944        return decode(authority);
945    }
946
947    /**
948     * Gets the decoded fragment part of this URI.
949     *
950     * @return the decoded fragment part or {@code null} if undefined.
951     */
952    public String getFragment() {
953        return decode(fragment);
954    }
955
956    /**
957     * Gets the host part of this URI.
958     *
959     * @return the host part or {@code null} if undefined.
960     */
961    public String getHost() {
962        return host;
963    }
964
965    /**
966     * Gets the decoded path part of this URI.
967     *
968     * @return the decoded path part or {@code null} if undefined.
969     */
970    public String getPath() {
971        return decode(path);
972    }
973
974    /**
975     * Gets the port number of this URI.
976     *
977     * @return the port number or {@code -1} if undefined.
978     */
979    public int getPort() {
980        return port;
981    }
982
983    /**
984     * Returns the port of {@code host} that requests to this URI shall use.
985     * Unlike {@code getPort}, this returns the default port (80 or 443) for
986     * built-in protocols when known.
987     *
988     * @hide
989     */
990    public int getEffectivePort() {
991        if (port != -1) {
992            return port;
993        }
994
995        if ("http".equalsIgnoreCase(scheme)) {
996            return 80;
997        } else if ("https".equalsIgnoreCase(scheme)) {
998            return 443;
999        } else {
1000            return -1;
1001        }
1002    }
1003
1004    /**
1005     * Gets the decoded query part of this URI.
1006     *
1007     * @return the decoded query part or {@code null} if undefined.
1008     */
1009    public String getQuery() {
1010        return decode(query);
1011    }
1012
1013    /**
1014     * Gets the authority part of this URI in raw form.
1015     *
1016     * @return the encoded authority part or {@code null} if undefined.
1017     */
1018    public String getRawAuthority() {
1019        return authority;
1020    }
1021
1022    /**
1023     * Gets the fragment part of this URI in raw form.
1024     *
1025     * @return the encoded fragment part or {@code null} if undefined.
1026     */
1027    public String getRawFragment() {
1028        return fragment;
1029    }
1030
1031    /**
1032     * Gets the path part of this URI in raw form.
1033     *
1034     * @return the encoded path part or {@code null} if undefined.
1035     */
1036    public String getRawPath() {
1037        return path;
1038    }
1039
1040    /**
1041     * Gets the query part of this URI in raw form.
1042     *
1043     * @return the encoded query part or {@code null} if undefined.
1044     */
1045    public String getRawQuery() {
1046        return query;
1047    }
1048
1049    /**
1050     * Gets the scheme-specific part of this URI in raw form.
1051     *
1052     * @return the encoded scheme-specific part or {@code null} if undefined.
1053     */
1054    public String getRawSchemeSpecificPart() {
1055        return schemeSpecificPart;
1056    }
1057
1058    /**
1059     * Gets the user-info part of this URI in raw form.
1060     *
1061     * @return the encoded user-info part or {@code null} if undefined.
1062     */
1063    public String getRawUserInfo() {
1064        return userInfo;
1065    }
1066
1067    /**
1068     * Gets the scheme part of this URI.
1069     *
1070     * @return the scheme part or {@code null} if undefined.
1071     */
1072    public String getScheme() {
1073        return scheme;
1074    }
1075
1076    /**
1077     * Gets the decoded scheme-specific part of this URI.
1078     *
1079     * @return the decoded scheme-specific part or {@code null} if undefined.
1080     */
1081    public String getSchemeSpecificPart() {
1082        return decode(schemeSpecificPart);
1083    }
1084
1085    /**
1086     * Gets the decoded user-info part of this URI.
1087     *
1088     * @return the decoded user-info part or {@code null} if undefined.
1089     */
1090    public String getUserInfo() {
1091        return decode(userInfo);
1092    }
1093
1094    /**
1095     * Gets the hashcode value of this URI instance.
1096     *
1097     * @return the appropriate hashcode value.
1098     */
1099    @Override
1100    public int hashCode() {
1101        if (hash == -1) {
1102            hash = getHashString().hashCode();
1103        }
1104        return hash;
1105    }
1106
1107    /**
1108     * Indicates whether this URI is absolute, which means that a scheme part is
1109     * defined in this URI.
1110     *
1111     * @return {@code true} if this URI is absolute, {@code false} otherwise.
1112     */
1113    public boolean isAbsolute() {
1114        return absolute;
1115    }
1116
1117    /**
1118     * Indicates whether this URI is opaque or not. An opaque URI is absolute
1119     * and has a scheme-specific part which does not start with a slash
1120     * character. All parts except scheme, scheme-specific and fragment are
1121     * undefined.
1122     *
1123     * @return {@code true} if the URI is opaque, {@code false} otherwise.
1124     */
1125    public boolean isOpaque() {
1126        return opaque;
1127    }
1128
1129    /*
1130     * normalize path, and return the resulting string
1131     */
1132    private String normalize(String path) {
1133        // count the number of '/'s, to determine number of segments
1134        int index = -1;
1135        int pathLength = path.length();
1136        int size = 0;
1137        if (pathLength > 0 && path.charAt(0) != '/') {
1138            size++;
1139        }
1140        while ((index = path.indexOf('/', index + 1)) != -1) {
1141            if (index + 1 < pathLength && path.charAt(index + 1) != '/') {
1142                size++;
1143            }
1144        }
1145
1146        String[] segList = new String[size];
1147        boolean[] include = new boolean[size];
1148
1149        // break the path into segments and store in the list
1150        int current = 0;
1151        int index2;
1152        index = (pathLength > 0 && path.charAt(0) == '/') ? 1 : 0;
1153        while ((index2 = path.indexOf('/', index + 1)) != -1) {
1154            segList[current++] = path.substring(index, index2);
1155            index = index2 + 1;
1156        }
1157
1158        // if current==size, then the last character was a slash
1159        // and there are no more segments
1160        if (current < size) {
1161            segList[current] = path.substring(index);
1162        }
1163
1164        // determine which segments get included in the normalized path
1165        for (int i = 0; i < size; i++) {
1166            include[i] = true;
1167            if (segList[i].equals("..")) {
1168                int remove = i - 1;
1169                // search back to find a segment to remove, if possible
1170                while (remove > -1 && !include[remove]) {
1171                    remove--;
1172                }
1173                // if we find a segment to remove, remove it and the ".."
1174                // segment
1175                if (remove > -1 && !segList[remove].equals("..")) {
1176                    include[remove] = false;
1177                    include[i] = false;
1178                }
1179            } else if (segList[i].equals(".")) {
1180                include[i] = false;
1181            }
1182        }
1183
1184        // put the path back together
1185        StringBuilder newPath = new StringBuilder();
1186        if (path.startsWith("/")) {
1187            newPath.append('/');
1188        }
1189
1190        for (int i = 0; i < segList.length; i++) {
1191            if (include[i]) {
1192                newPath.append(segList[i]);
1193                newPath.append('/');
1194            }
1195        }
1196
1197        // if we used at least one segment and the path previously ended with
1198        // a slash and the last segment is still used, then delete the extra
1199        // trailing '/'
1200        if (!path.endsWith("/") && segList.length > 0
1201                && include[segList.length - 1]) {
1202            newPath.deleteCharAt(newPath.length() - 1);
1203        }
1204
1205        String result = newPath.toString();
1206
1207        // check for a ':' in the first segment if one exists,
1208        // prepend "./" to normalize
1209        index = result.indexOf(':');
1210        index2 = result.indexOf('/');
1211        if (index != -1 && (index < index2 || index2 == -1)) {
1212            newPath.insert(0, "./");
1213            result = newPath.toString();
1214        }
1215        return result;
1216    }
1217
1218    /**
1219     * Normalizes the path part of this URI.
1220     *
1221     * @return an URI object which represents this instance with a normalized
1222     *         path.
1223     */
1224    public URI normalize() {
1225        if (opaque) {
1226            return this;
1227        }
1228        String normalizedPath = normalize(path);
1229        // if the path is already normalized, return this
1230        if (path.equals(normalizedPath)) {
1231            return this;
1232        }
1233        // get an exact copy of the URI re-calculate the scheme specific part
1234        // since the path of the normalized URI is different from this URI.
1235        URI result = duplicate();
1236        result.path = normalizedPath;
1237        result.setSchemeSpecificPart();
1238        return result;
1239    }
1240
1241    /**
1242     * Tries to parse the authority component of this URI to divide it into the
1243     * host, port, and user-info. If this URI is already determined as a
1244     * ServerAuthority this instance will be returned without changes.
1245     *
1246     * @return this instance with the components of the parsed server authority.
1247     * @throws URISyntaxException
1248     *             if the authority part could not be parsed as a server-based
1249     *             authority.
1250     */
1251    public URI parseServerAuthority() throws URISyntaxException {
1252        if (!serverAuthority) {
1253            parseAuthority(true);
1254        }
1255        return this;
1256    }
1257
1258    /**
1259     * Makes the given URI {@code relative} to a relative URI against the URI
1260     * represented by this instance.
1261     *
1262     * @param relative
1263     *            the URI which has to be relativized against this URI.
1264     * @return the relative URI.
1265     */
1266    public URI relativize(URI relative) {
1267        if (relative.opaque || opaque) {
1268            return relative;
1269        }
1270
1271        if (scheme == null ? relative.scheme != null : !scheme
1272                .equals(relative.scheme)) {
1273            return relative;
1274        }
1275
1276        if (authority == null ? relative.authority != null : !authority
1277                .equals(relative.authority)) {
1278            return relative;
1279        }
1280
1281        // normalize both paths
1282        String thisPath = normalize(path);
1283        String relativePath = normalize(relative.path);
1284
1285        /*
1286         * if the paths aren't equal, then we need to determine if this URI's
1287         * path is a parent path (begins with) the relative URI's path
1288         */
1289        if (!thisPath.equals(relativePath)) {
1290            // if this URI's path doesn't end in a '/', add one
1291            if (!thisPath.endsWith("/")) {
1292                thisPath = thisPath + '/';
1293            }
1294            /*
1295             * if the relative URI's path doesn't start with this URI's path,
1296             * then just return the relative URI; the URIs have nothing in
1297             * common
1298             */
1299            if (!relativePath.startsWith(thisPath)) {
1300                return relative;
1301            }
1302        }
1303
1304        URI result = new URI();
1305        result.fragment = relative.fragment;
1306        result.query = relative.query;
1307        // the result URI is the remainder of the relative URI's path
1308        result.path = relativePath.substring(thisPath.length());
1309        result.setSchemeSpecificPart();
1310        return result;
1311    }
1312
1313    /**
1314     * Resolves the given URI {@code relative} against the URI represented by
1315     * this instance.
1316     *
1317     * @param relative
1318     *            the URI which has to be resolved against this URI.
1319     * @return the resolved URI.
1320     */
1321    public URI resolve(URI relative) {
1322        if (relative.absolute || opaque) {
1323            return relative;
1324        }
1325
1326        URI result;
1327        if (relative.path.isEmpty() && relative.scheme == null
1328                && relative.authority == null && relative.query == null
1329                && relative.fragment != null) {
1330            // if the relative URI only consists of fragment,
1331            // the resolved URI is very similar to this URI,
1332            // except that it has the fragment from the relative URI.
1333            result = duplicate();
1334            result.fragment = relative.fragment;
1335            // no need to re-calculate the scheme specific part,
1336            // since fragment is not part of scheme specific part.
1337            return result;
1338        }
1339
1340        if (relative.authority != null) {
1341            // if the relative URI has authority,
1342            // the resolved URI is almost the same as the relative URI,
1343            // except that it has the scheme of this URI.
1344            result = relative.duplicate();
1345            result.scheme = scheme;
1346            result.absolute = absolute;
1347        } else {
1348            // since relative URI has no authority,
1349            // the resolved URI is very similar to this URI,
1350            // except that it has the query and fragment of the relative URI,
1351            // and the path is different.
1352            result = duplicate();
1353            result.fragment = relative.fragment;
1354            result.query = relative.query;
1355            if (relative.path.startsWith("/")) {
1356                result.path = relative.path;
1357            } else {
1358                // resolve a relative reference
1359                int endIndex = path.lastIndexOf('/') + 1;
1360                result.path = normalize(path.substring(0, endIndex)
1361                        + relative.path);
1362            }
1363            // re-calculate the scheme specific part since
1364            // query and path of the resolved URI is different from this URI.
1365            result.setSchemeSpecificPart();
1366        }
1367        return result;
1368    }
1369
1370    /**
1371     * Helper method used to re-calculate the scheme specific part of the
1372     * resolved or normalized URIs
1373     */
1374    private void setSchemeSpecificPart() {
1375        // ssp = [//authority][path][?query]
1376        StringBuilder ssp = new StringBuilder();
1377        if (authority != null) {
1378            ssp.append("//" + authority);
1379        }
1380        if (path != null) {
1381            ssp.append(path);
1382        }
1383        if (query != null) {
1384            ssp.append("?" + query);
1385        }
1386        schemeSpecificPart = ssp.toString();
1387        // reset string, so that it can be re-calculated correctly when asked.
1388        string = null;
1389    }
1390
1391    /**
1392     * Creates a new URI instance by parsing the given string {@code relative}
1393     * and resolves the created URI against the URI represented by this
1394     * instance.
1395     *
1396     * @param relative
1397     *            the given string to create the new URI instance which has to
1398     *            be resolved later on.
1399     * @return the created and resolved URI.
1400     */
1401    public URI resolve(String relative) {
1402        return resolve(create(relative));
1403    }
1404
1405    /**
1406     * Encode unicode chars that are not part of US-ASCII char set into the
1407     * escaped form
1408     *
1409     * i.e. The Euro currency symbol is encoded as "%E2%82%AC".
1410     */
1411    private String encodeNonAscii(String s) {
1412        try {
1413            /*
1414             * Use a different encoder than URLEncoder since: 1. chars like "/",
1415             * "#", "@" etc needs to be preserved instead of being encoded, 2.
1416             * UTF-8 char set needs to be used for encoding instead of default
1417             * platform one 3. Only other chars need to be converted
1418             */
1419            return URIEncoderDecoder.encodeOthers(s);
1420        } catch (UnsupportedEncodingException e) {
1421            throw new RuntimeException(e.toString());
1422        }
1423    }
1424
1425    private String decode(String s) {
1426        if (s == null) {
1427            return s;
1428        }
1429
1430        try {
1431            return URIEncoderDecoder.decode(s);
1432        } catch (UnsupportedEncodingException e) {
1433            throw new RuntimeException(e.toString());
1434        }
1435    }
1436
1437    /**
1438     * Returns the textual string representation of this URI instance using the
1439     * US-ASCII encoding.
1440     *
1441     * @return the US-ASCII string representation of this URI.
1442     */
1443    public String toASCIIString() {
1444        return encodeNonAscii(toString());
1445    }
1446
1447    /**
1448     * Returns the textual string representation of this URI instance.
1449     *
1450     * @return the textual string representation of this URI.
1451     */
1452    @Override
1453    public String toString() {
1454        if (string == null) {
1455            StringBuilder result = new StringBuilder();
1456            if (scheme != null) {
1457                result.append(scheme);
1458                result.append(':');
1459            }
1460            if (opaque) {
1461                result.append(schemeSpecificPart);
1462            } else {
1463                if (authority != null) {
1464                    result.append("//");
1465                    result.append(authority);
1466                }
1467
1468                if (path != null) {
1469                    result.append(path);
1470                }
1471
1472                if (query != null) {
1473                    result.append('?');
1474                    result.append(query);
1475                }
1476            }
1477
1478            if (fragment != null) {
1479                result.append('#');
1480                result.append(fragment);
1481            }
1482
1483            string = result.toString();
1484        }
1485        return string;
1486    }
1487
1488    /*
1489     * Form a string from the components of this URI, similarly to the
1490     * toString() method. But this method converts scheme and host to lowercase,
1491     * and converts escaped octets to lowercase.
1492     */
1493    private String getHashString() {
1494        StringBuilder result = new StringBuilder();
1495        if (scheme != null) {
1496            result.append(scheme.toLowerCase());
1497            result.append(':');
1498        }
1499        if (opaque) {
1500            result.append(schemeSpecificPart);
1501        } else {
1502            if (authority != null) {
1503                result.append("//");
1504                if (host == null) {
1505                    result.append(authority);
1506                } else {
1507                    if (userInfo != null) {
1508                        result.append(userInfo + "@");
1509                    }
1510                    result.append(host.toLowerCase());
1511                    if (port != -1) {
1512                        result.append(":" + port);
1513                    }
1514                }
1515            }
1516
1517            if (path != null) {
1518                result.append(path);
1519            }
1520
1521            if (query != null) {
1522                result.append('?');
1523                result.append(query);
1524            }
1525        }
1526
1527        if (fragment != null) {
1528            result.append('#');
1529            result.append(fragment);
1530        }
1531
1532        return convertHexToLowerCase(result.toString());
1533    }
1534
1535    /**
1536     * Converts this URI instance to a URL.
1537     *
1538     * @return the created URL representing the same resource as this URI.
1539     * @throws MalformedURLException
1540     *             if an error occurs while creating the URL or no protocol
1541     *             handler could be found.
1542     */
1543    public URL toURL() throws MalformedURLException {
1544        if (!absolute) {
1545            throw new IllegalArgumentException("URI is not absolute: " + toString());
1546        }
1547        return new URL(toString());
1548    }
1549
1550    private void readObject(ObjectInputStream in) throws IOException,
1551            ClassNotFoundException {
1552        in.defaultReadObject();
1553        try {
1554            parseURI(string, false);
1555        } catch (URISyntaxException e) {
1556            throw new IOException(e.toString());
1557        }
1558    }
1559
1560    private void writeObject(ObjectOutputStream out) throws IOException,
1561            ClassNotFoundException {
1562        // call toString() to ensure the value of string field is calculated
1563        toString();
1564        out.defaultWriteObject();
1565    }
1566}
1567