URI.java revision 38607710cdc82cb1a0e81c2fc5c78278b435e4fc
1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.net;
19
20import java.io.IOException;
21import java.io.ObjectInputStream;
22import java.io.ObjectOutputStream;
23import java.io.Serializable;
24import java.io.UnsupportedEncodingException;
25import java.util.StringTokenizer;
26import org.apache.harmony.luni.platform.INetworkSystem;
27import org.apache.harmony.luni.platform.Platform;
28
29/**
30 * This class represents an instance of a URI as defined by RFC 2396.
31 */
32public final class URI implements Comparable<URI>, Serializable {
33
34    private final static INetworkSystem NETWORK_SYSTEM = Platform.getNetworkSystem();
35
36    private static final long serialVersionUID = -6052424284110960213l;
37
38    static final String UNRESERVED = "_-!.~\'()*";
39    static final String PUNCTUATION = ",;:$&+=";
40    static final String RESERVED = PUNCTUATION + "?/[]@";
41    static final String SOME_LEGAL = UNRESERVED + PUNCTUATION;
42    static final String ALL_LEGAL = UNRESERVED + RESERVED;
43
44    private String string;
45    private transient String scheme;
46    private transient String schemeSpecificPart;
47    private transient String authority;
48    private transient String userInfo;
49    private transient String host;
50    private transient int port = -1;
51    private transient String path;
52    private transient String query;
53    private transient String fragment;
54    private transient boolean opaque;
55    private transient boolean absolute;
56    private transient boolean serverAuthority = false;
57
58    private transient int hash = -1;
59
60    private URI() {}
61
62    /**
63     * Creates a new URI instance according to the given string {@code uri}.
64     *
65     * @param uri
66     *            the textual URI representation to be parsed into a URI object.
67     * @throws URISyntaxException
68     *             if the given string {@code uri} doesn't fit to the
69     *             specification RFC2396 or could not be parsed correctly.
70     */
71    public URI(String uri) throws URISyntaxException {
72        parseURI(uri, false);
73    }
74
75    /**
76     * Creates a new URI instance using the given arguments. This constructor
77     * first creates a temporary URI string from the given components. This
78     * string will be parsed later on to create the URI instance.
79     * <p>
80     * {@code [scheme:]scheme-specific-part[#fragment]}
81     *
82     * @param scheme
83     *            the scheme part of the URI.
84     * @param ssp
85     *            the scheme-specific-part of the URI.
86     * @param frag
87     *            the fragment part of the URI.
88     * @throws URISyntaxException
89     *             if the temporary created string doesn't fit to the
90     *             specification RFC2396 or could not be parsed correctly.
91     */
92    public URI(String scheme, String ssp, String frag)
93            throws URISyntaxException {
94        StringBuilder uri = new StringBuilder();
95        if (scheme != null) {
96            uri.append(scheme);
97            uri.append(':');
98        }
99        if (ssp != null) {
100            // QUOTE ILLEGAL CHARACTERS
101            uri.append(quoteComponent(ssp, ALL_LEGAL));
102        }
103        if (frag != null) {
104            uri.append('#');
105            // QUOTE ILLEGAL CHARACTERS
106            uri.append(quoteComponent(frag, ALL_LEGAL));
107        }
108
109        parseURI(uri.toString(), false);
110    }
111
112    /**
113     * Creates a new URI instance using the given arguments. This constructor
114     * first creates a temporary URI string from the given components. This
115     * string will be parsed later on to create the URI instance.
116     * <p>
117     * {@code [scheme:][user-info@]host[:port][path][?query][#fragment]}
118     *
119     * @param scheme
120     *            the scheme part of the URI.
121     * @param userInfo
122     *            the user information of the URI for authentication and
123     *            authorization.
124     * @param host
125     *            the host name of the URI.
126     * @param port
127     *            the port number of the URI.
128     * @param path
129     *            the path to the resource on the host.
130     * @param query
131     *            the query part of the URI to specify parameters for the
132     *            resource.
133     * @param fragment
134     *            the fragment part of the URI.
135     * @throws URISyntaxException
136     *             if the temporary created string doesn't fit to the
137     *             specification RFC2396 or could not be parsed correctly.
138     */
139    public URI(String scheme, String userInfo, String host, int port,
140            String path, String query, String fragment)
141            throws URISyntaxException {
142
143        if (scheme == null && userInfo == null && host == null && path == null
144                && query == null && fragment == null) {
145            this.path = "";
146            return;
147        }
148
149        if (scheme != null && path != null && path.length() > 0
150                && path.charAt(0) != '/') {
151            throw new URISyntaxException(path, "Relative path");
152        }
153
154        StringBuilder uri = new StringBuilder();
155        if (scheme != null) {
156            uri.append(scheme);
157            uri.append(':');
158        }
159
160        if (userInfo != null || host != null || port != -1) {
161            uri.append("//");
162        }
163
164        if (userInfo != null) {
165            // QUOTE ILLEGAL CHARACTERS in userInfo
166            uri.append(quoteComponent(userInfo, SOME_LEGAL));
167            uri.append('@');
168        }
169
170        if (host != null) {
171            // check for IPv6 addresses that hasn't been enclosed
172            // in square brackets
173            if (host.indexOf(':') != -1 && host.indexOf(']') == -1
174                    && host.indexOf('[') == -1) {
175                host = "[" + host + "]";
176            }
177            uri.append(host);
178        }
179
180        if (port != -1) {
181            uri.append(':');
182            uri.append(port);
183        }
184
185        if (path != null) {
186            // QUOTE ILLEGAL CHARS
187            uri.append(quoteComponent(path, "/@" + SOME_LEGAL));
188        }
189
190        if (query != null) {
191            uri.append('?');
192            // QUOTE ILLEGAL CHARS
193            uri.append(quoteComponent(query, ALL_LEGAL));
194        }
195
196        if (fragment != null) {
197            // QUOTE ILLEGAL CHARS
198            uri.append('#');
199            uri.append(quoteComponent(fragment, ALL_LEGAL));
200        }
201
202        parseURI(uri.toString(), true);
203    }
204
205    /**
206     * Creates a new URI instance using the given arguments. This constructor
207     * first creates a temporary URI string from the given components. This
208     * string will be parsed later on to create the URI instance.
209     * <p>
210     * {@code [scheme:]host[path][#fragment]}
211     *
212     * @param scheme
213     *            the scheme part of the URI.
214     * @param host
215     *            the host name of the URI.
216     * @param path
217     *            the path to the resource on the host.
218     * @param fragment
219     *            the fragment part of the URI.
220     * @throws URISyntaxException
221     *             if the temporary created string doesn't fit to the
222     *             specification RFC2396 or could not be parsed correctly.
223     */
224    public URI(String scheme, String host, String path, String fragment)
225            throws URISyntaxException {
226        this(scheme, null, host, -1, path, null, fragment);
227    }
228
229    /**
230     * Creates a new URI instance using the given arguments. This constructor
231     * first creates a temporary URI string from the given components. This
232     * string will be parsed later on to create the URI instance.
233     * <p>
234     * {@code [scheme:][//authority][path][?query][#fragment]}
235     *
236     * @param scheme
237     *            the scheme part of the URI.
238     * @param authority
239     *            the authority part of the URI.
240     * @param path
241     *            the path to the resource on the host.
242     * @param query
243     *            the query part of the URI to specify parameters for the
244     *            resource.
245     * @param fragment
246     *            the fragment part of the URI.
247     * @throws URISyntaxException
248     *             if the temporary created string doesn't fit to the
249     *             specification RFC2396 or could not be parsed correctly.
250     */
251    public URI(String scheme, String authority, String path, String query,
252            String fragment) throws URISyntaxException {
253        if (scheme != null && path != null && path.length() > 0
254                && path.charAt(0) != '/') {
255            throw new URISyntaxException(path, "Relative path");
256        }
257
258        StringBuilder uri = new StringBuilder();
259        if (scheme != null) {
260            uri.append(scheme);
261            uri.append(':');
262        }
263        if (authority != null) {
264            uri.append("//");
265            // QUOTE ILLEGAL CHARS
266            uri.append(quoteComponent(authority, "@[]" + SOME_LEGAL));
267        }
268
269        if (path != null) {
270            // QUOTE ILLEGAL CHARS
271            uri.append(quoteComponent(path, "/@" + SOME_LEGAL));
272        }
273        if (query != null) {
274            // QUOTE ILLEGAL CHARS
275            uri.append('?');
276            uri.append(quoteComponent(query, ALL_LEGAL));
277        }
278        if (fragment != null) {
279            // QUOTE ILLEGAL CHARS
280            uri.append('#');
281            uri.append(quoteComponent(fragment, ALL_LEGAL));
282        }
283
284        parseURI(uri.toString(), false);
285    }
286
287    private void parseURI(String uri, boolean forceServer) throws URISyntaxException {
288        String temp = uri;
289        // assign uri string to the input value per spec
290        string = uri;
291        int index, index1, index2, index3;
292        // parse into Fragment, Scheme, and SchemeSpecificPart
293        // then parse SchemeSpecificPart if necessary
294
295        // Fragment
296        index = temp.indexOf('#');
297        if (index != -1) {
298            // remove the fragment from the end
299            fragment = temp.substring(index + 1);
300            validateFragment(uri, fragment, index + 1);
301            temp = temp.substring(0, index);
302        }
303
304        // Scheme and SchemeSpecificPart
305        index = index1 = temp.indexOf(':');
306        index2 = temp.indexOf('/');
307        index3 = temp.indexOf('?');
308
309        // if a '/' or '?' occurs before the first ':' the uri has no
310        // specified scheme, and is therefore not absolute
311        if (index != -1 && (index2 >= index || index2 == -1)
312                && (index3 >= index || index3 == -1)) {
313            // the characters up to the first ':' comprise the scheme
314            absolute = true;
315            scheme = temp.substring(0, index);
316            if (scheme.length() == 0) {
317                throw new URISyntaxException(uri, "Scheme expected", index);
318            }
319            validateScheme(uri, scheme, 0);
320            schemeSpecificPart = temp.substring(index + 1);
321            if (schemeSpecificPart.length() == 0) {
322                throw new URISyntaxException(uri, "Scheme-specific part expected", index + 1);
323            }
324        } else {
325            absolute = false;
326            schemeSpecificPart = temp;
327        }
328
329        if (scheme == null || schemeSpecificPart.length() > 0
330                && schemeSpecificPart.charAt(0) == '/') {
331            opaque = false;
332            // the URI is hierarchical
333
334            // Query
335            temp = schemeSpecificPart;
336            index = temp.indexOf('?');
337            if (index != -1) {
338                query = temp.substring(index + 1);
339                temp = temp.substring(0, index);
340                validateQuery(uri, query, index2 + 1 + index);
341            }
342
343            // Authority and Path
344            if (temp.startsWith("//")) {
345                index = temp.indexOf('/', 2);
346                if (index != -1) {
347                    authority = temp.substring(2, index);
348                    path = temp.substring(index);
349                } else {
350                    authority = temp.substring(2);
351                    if (authority.length() == 0 && query == null
352                            && fragment == null) {
353                        throw new URISyntaxException(uri, "Authority expected", uri.length());
354                    }
355
356                    path = "";
357                    // nothing left, so path is empty (not null, path should
358                    // never be null)
359                }
360
361                if (authority.length() == 0) {
362                    authority = null;
363                } else {
364                    validateAuthority(uri, authority, index1 + 3);
365                }
366            } else { // no authority specified
367                path = temp;
368            }
369
370            int pathIndex = 0;
371            if (index2 > -1) {
372                pathIndex += index2;
373            }
374            if (index > -1) {
375                pathIndex += index;
376            }
377            validatePath(uri, path, pathIndex);
378        } else { // if not hierarchical, URI is opaque
379            opaque = true;
380            validateSsp(uri, schemeSpecificPart, index2 + 2 + index);
381        }
382
383        parseAuthority(forceServer);
384    }
385
386    private void validateScheme(String uri, String scheme, int index)
387            throws URISyntaxException {
388        // first char needs to be an alpha char
389        char ch = scheme.charAt(0);
390        if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))) {
391            throw new URISyntaxException(uri, "Illegal character in scheme", 0);
392        }
393
394        try {
395            URIEncoderDecoder.validateSimple(scheme, "+-.");
396        } catch (URISyntaxException e) {
397            throw new URISyntaxException(uri, "Illegal character in scheme", index + e.getIndex());
398        }
399    }
400
401    private void validateSsp(String uri, String ssp, int index)
402            throws URISyntaxException {
403        try {
404            URIEncoderDecoder.validate(ssp, ALL_LEGAL);
405        } catch (URISyntaxException e) {
406            throw new URISyntaxException(uri,
407                    e.getReason() + " in schemeSpecificPart", index + e.getIndex());
408        }
409    }
410
411    private void validateAuthority(String uri, String authority, int index)
412            throws URISyntaxException {
413        try {
414            URIEncoderDecoder.validate(authority, "@[]" + SOME_LEGAL);
415        } catch (URISyntaxException e) {
416            throw new URISyntaxException(uri, e.getReason() + " in authority", index + e.getIndex());
417        }
418    }
419
420    private void validatePath(String uri, String path, int index)
421            throws URISyntaxException {
422        try {
423            URIEncoderDecoder.validate(path, "/@" + SOME_LEGAL);
424        } catch (URISyntaxException e) {
425            throw new URISyntaxException(uri, e.getReason() + " in path", index + e.getIndex());
426        }
427    }
428
429    private void validateQuery(String uri, String query, int index)
430            throws URISyntaxException {
431        try {
432            URIEncoderDecoder.validate(query, ALL_LEGAL);
433        } catch (URISyntaxException e) {
434            throw new URISyntaxException(uri, e.getReason() + " in query", index + e.getIndex());
435
436        }
437    }
438
439    private void validateFragment(String uri, String fragment, int index)
440            throws URISyntaxException {
441        try {
442            URIEncoderDecoder.validate(fragment, ALL_LEGAL);
443        } catch (URISyntaxException e) {
444            throw new URISyntaxException(uri, e.getReason() + " in fragment", index + e.getIndex());
445        }
446    }
447
448    /**
449     * Parse the authority string into its component parts: user info,
450     * host, and port. This operation doesn't apply to registry URIs, and
451     * calling it on such <i>may</i> result in a syntax exception.
452     *
453     * @param forceServer true to always throw if the authority cannot be
454     *     parsed. If false, this method may still throw for some kinds of
455     *     errors; this unpredictable behaviour is consistent with the RI.
456     */
457    private void parseAuthority(boolean forceServer) throws URISyntaxException {
458        if (authority == null) {
459            return;
460        }
461
462        String tempUserInfo = null;
463        String temp = authority;
464        int index = temp.indexOf('@');
465        int hostIndex = 0;
466        if (index != -1) {
467            // remove user info
468            tempUserInfo = temp.substring(0, index);
469            validateUserInfo(authority, tempUserInfo, 0);
470            temp = temp.substring(index + 1); // host[:port] is left
471            hostIndex = index + 1;
472        }
473
474        index = temp.lastIndexOf(':');
475        int endIndex = temp.indexOf(']');
476
477        String tempHost;
478        int tempPort = -1;
479        if (index != -1 && endIndex < index) {
480            // determine port and host
481            tempHost = temp.substring(0, index);
482
483            if (index < (temp.length() - 1)) { // port part is not empty
484                try {
485                    tempPort = Integer.parseInt(temp.substring(index + 1));
486                    if (tempPort < 0) {
487                        if (forceServer) {
488                            throw new URISyntaxException(authority,
489                                    "Invalid port number", hostIndex + index + 1);
490                        }
491                        return;
492                    }
493                } catch (NumberFormatException e) {
494                    if (forceServer) {
495                        throw new URISyntaxException(authority,
496                                "Invalid port number", hostIndex + index + 1);
497                    }
498                    return;
499                }
500            }
501        } else {
502            tempHost = temp;
503        }
504
505        if (tempHost.equals("")) {
506            if (forceServer) {
507                throw new URISyntaxException(authority, "Expected host", hostIndex);
508            }
509            return;
510        }
511
512        if (!isValidHost(forceServer, tempHost)) {
513            return;
514        }
515
516        // this is a server based uri,
517        // fill in the userInfo, host and port fields
518        userInfo = tempUserInfo;
519        host = tempHost;
520        port = tempPort;
521        serverAuthority = true;
522    }
523
524    private void validateUserInfo(String uri, String userInfo, int index)
525            throws URISyntaxException {
526        for (int i = 0; i < userInfo.length(); i++) {
527            char ch = userInfo.charAt(i);
528            if (ch == ']' || ch == '[') {
529                throw new URISyntaxException(uri, "Illegal character in userInfo", index + i);
530            }
531        }
532    }
533
534    /**
535     * Returns true if {@code host} is a well-formed host name or IP address.
536     *
537     * @param forceServer true to always throw if the host cannot be parsed. If
538     *     false, this method may still throw for some kinds of errors; this
539     *     unpredictable behaviour is consistent with the RI.
540     */
541    private boolean isValidHost(boolean forceServer, String host) throws URISyntaxException {
542        if (host.startsWith("[")) {
543            // IPv6 address
544            if (!host.endsWith("]")) {
545                throw new URISyntaxException(host,
546                        "Expected a closing square bracket for IPv6 address", 0);
547            }
548            try {
549                byte[] bytes = NETWORK_SYSTEM.ipStringToByteArray(host);
550                /*
551                 * The native IP parser may return 4 bytes for addresses like
552                 * "[::FFFF:127.0.0.1]". This is allowed, but we must not accept
553                 * IPv4-formatted addresses in square braces like "[127.0.0.1]".
554                 */
555                if (bytes.length == 16 || bytes.length == 4 && host.contains(":")) {
556                    return true;
557                }
558            } catch (UnknownHostException e) {
559            }
560            throw new URISyntaxException(host, "Malformed IPv6 address");
561        }
562
563        // '[' and ']' can only be the first char and last char
564        // of the host name
565        if (host.indexOf('[') != -1 || host.indexOf(']') != -1) {
566            throw new URISyntaxException(host, "Illegal character in host name", 0);
567        }
568
569        int index = host.lastIndexOf('.');
570        if (index < 0 || index == host.length() - 1
571                || !Character.isDigit(host.charAt(index + 1))) {
572            // domain name
573            if (isValidDomainName(host)) {
574                return true;
575            }
576            if (forceServer) {
577                throw new URISyntaxException(host, "Illegal character in host name", 0);
578            }
579            return false;
580        }
581
582        // IPv4 address
583        try {
584            if (NETWORK_SYSTEM.ipStringToByteArray(host).length == 4) {
585                return true;
586            }
587        } catch (UnknownHostException e) {
588        }
589
590        if (forceServer) {
591            throw new URISyntaxException(host, "Malformed IPv4 address", 0);
592        }
593        return false;
594    }
595
596    private boolean isValidDomainName(String host) {
597        try {
598            URIEncoderDecoder.validateSimple(host, "-.");
599        } catch (URISyntaxException e) {
600            return false;
601        }
602
603        String lastLabel = null;
604        StringTokenizer st = new StringTokenizer(host, ".");
605        while (st.hasMoreTokens()) {
606            lastLabel = st.nextToken();
607            if (lastLabel.startsWith("-") || lastLabel.endsWith("-")) {
608                return false;
609            }
610        }
611
612        if (lastLabel == null) {
613            return false;
614        }
615
616        if (!lastLabel.equals(host)) {
617            char ch = lastLabel.charAt(0);
618            if (ch >= '0' && ch <= '9') {
619                return false;
620            }
621        }
622        return true;
623    }
624
625    /**
626     * Quote illegal chars for each component, but not the others
627     *
628     * @param component java.lang.String the component to be converted
629     * @param legalSet the legal character set allowed in the component
630     * @return java.lang.String the converted string
631     */
632    private String quoteComponent(String component, String legalSet) {
633        try {
634            /*
635             * Use a different encoder than URLEncoder since: 1. chars like "/",
636             * "#", "@" etc needs to be preserved instead of being encoded, 2.
637             * UTF-8 char set needs to be used for encoding instead of default
638             * platform one
639             */
640            return URIEncoderDecoder.quoteIllegal(component, legalSet);
641        } catch (UnsupportedEncodingException e) {
642            throw new RuntimeException(e.toString());
643        }
644    }
645
646    /**
647     * Compares this URI with the given argument {@code uri}. This method will
648     * return a negative value if this URI instance is less than the given
649     * argument and a positive value if this URI instance is greater than the
650     * given argument. The return value {@code 0} indicates that the two
651     * instances represent the same URI. To define the order the single parts of
652     * the URI are compared with each other. String components will be ordered
653     * in the natural case-sensitive way. A hierarchical URI is less than an
654     * opaque URI and if one part is {@code null} the URI with the undefined
655     * part is less than the other one.
656     *
657     * @param uri
658     *            the URI this instance has to compare with.
659     * @return the value representing the order of the two instances.
660     */
661    public int compareTo(URI uri) {
662        int ret;
663
664        // compare schemes
665        if (scheme == null && uri.scheme != null) {
666            return -1;
667        } else if (scheme != null && uri.scheme == null) {
668            return 1;
669        } else if (scheme != null && uri.scheme != null) {
670            ret = scheme.compareToIgnoreCase(uri.scheme);
671            if (ret != 0) {
672                return ret;
673            }
674        }
675
676        // compare opacities
677        if (!opaque && uri.opaque) {
678            return -1;
679        } else if (opaque && !uri.opaque) {
680            return 1;
681        } else if (opaque && uri.opaque) {
682            ret = schemeSpecificPart.compareTo(uri.schemeSpecificPart);
683            if (ret != 0) {
684                return ret;
685            }
686        } else {
687
688            // otherwise both must be hierarchical
689
690            // compare authorities
691            if (authority != null && uri.authority == null) {
692                return 1;
693            } else if (authority == null && uri.authority != null) {
694                return -1;
695            } else if (authority != null && uri.authority != null) {
696                if (host != null && uri.host != null) {
697                    // both are server based, so compare userInfo, host, port
698                    if (userInfo != null && uri.userInfo == null) {
699                        return 1;
700                    } else if (userInfo == null && uri.userInfo != null) {
701                        return -1;
702                    } else if (userInfo != null && uri.userInfo != null) {
703                        ret = userInfo.compareTo(uri.userInfo);
704                        if (ret != 0) {
705                            return ret;
706                        }
707                    }
708
709                    // userInfo's are the same, compare hostname
710                    ret = host.compareToIgnoreCase(uri.host);
711                    if (ret != 0) {
712                        return ret;
713                    }
714
715                    // compare port
716                    if (port != uri.port) {
717                        return port - uri.port;
718                    }
719                } else { // one or both are registry based, compare the whole
720                    // authority
721                    ret = authority.compareTo(uri.authority);
722                    if (ret != 0) {
723                        return ret;
724                    }
725                }
726            }
727
728            // authorities are the same
729            // compare paths
730            ret = path.compareTo(uri.path);
731            if (ret != 0) {
732                return ret;
733            }
734
735            // compare queries
736
737            if (query != null && uri.query == null) {
738                return 1;
739            } else if (query == null && uri.query != null) {
740                return -1;
741            } else if (query != null && uri.query != null) {
742                ret = query.compareTo(uri.query);
743                if (ret != 0) {
744                    return ret;
745                }
746            }
747        }
748
749        // everything else is identical, so compare fragments
750        if (fragment != null && uri.fragment == null) {
751            return 1;
752        } else if (fragment == null && uri.fragment != null) {
753            return -1;
754        } else if (fragment != null && uri.fragment != null) {
755            ret = fragment.compareTo(uri.fragment);
756            if (ret != 0) {
757                return ret;
758            }
759        }
760
761        // identical
762        return 0;
763    }
764
765    /**
766     * Returns the URI formed by parsing {@code uri}. This method behaves
767     * identically to the string constructor but throws a different exception
768     * on failure. The constructor fails with a checked {@link
769     * URISyntaxException}; this method fails with an unchecked {@link
770     * IllegalArgumentException}.
771     */
772    public static URI create(String uri) {
773        try {
774            return new URI(uri);
775        } catch (URISyntaxException e) {
776            throw new IllegalArgumentException(e.getMessage());
777        }
778    }
779
780    private URI duplicate() {
781        URI clone = new URI();
782        clone.absolute = absolute;
783        clone.authority = authority;
784        clone.fragment = fragment;
785        clone.host = host;
786        clone.opaque = opaque;
787        clone.path = path;
788        clone.port = port;
789        clone.query = query;
790        clone.scheme = scheme;
791        clone.schemeSpecificPart = schemeSpecificPart;
792        clone.userInfo = userInfo;
793        clone.serverAuthority = serverAuthority;
794        return clone;
795    }
796
797    /*
798     * Takes a string that may contain hex sequences like %F1 or %2b and
799     * converts the hex values following the '%' to lowercase
800     */
801    private String convertHexToLowerCase(String s) {
802        StringBuilder result = new StringBuilder("");
803        if (s.indexOf('%') == -1) {
804            return s;
805        }
806
807        int index, prevIndex = 0;
808        while ((index = s.indexOf('%', prevIndex)) != -1) {
809            result.append(s.substring(prevIndex, index + 1));
810            result.append(s.substring(index + 1, index + 3).toLowerCase());
811            index += 3;
812            prevIndex = index;
813        }
814        return result.toString();
815    }
816
817    /**
818     * Returns true if {@code first} and {@code second} are equal after
819     * unescaping hex sequences like %F1 and %2b.
820     */
821    private boolean escapedEquals(String first, String second) {
822        if (first.indexOf('%') != second.indexOf('%')) {
823            return first.equals(second);
824        }
825
826        int index, prevIndex = 0;
827        while ((index = first.indexOf('%', prevIndex)) != -1
828                && second.indexOf('%', prevIndex) == index) {
829            boolean match = first.substring(prevIndex, index).equals(
830                    second.substring(prevIndex, index));
831            if (!match) {
832                return false;
833            }
834
835            match = first.substring(index + 1, index + 3).equalsIgnoreCase(
836                    second.substring(index + 1, index + 3));
837            if (!match) {
838                return false;
839            }
840
841            index += 3;
842            prevIndex = index;
843        }
844        return first.substring(prevIndex).equals(second.substring(prevIndex));
845    }
846
847    /**
848     * Compares this URI instance with the given argument {@code o} and
849     * determines if both are equal. Two URI instances are equal if all single
850     * parts are identical in their meaning.
851     *
852     * @param o
853     *            the URI this instance has to be compared with.
854     * @return {@code true} if both URI instances point to the same resource,
855     *         {@code false} otherwise.
856     */
857    @Override
858    public boolean equals(Object o) {
859        if (!(o instanceof URI)) {
860            return false;
861        }
862        URI uri = (URI) o;
863
864        if (uri.fragment == null && fragment != null || uri.fragment != null
865                && fragment == null) {
866            return false;
867        } else if (uri.fragment != null && fragment != null) {
868            if (!escapedEquals(uri.fragment, fragment)) {
869                return false;
870            }
871        }
872
873        if (uri.scheme == null && scheme != null || uri.scheme != null
874                && scheme == null) {
875            return false;
876        } else if (uri.scheme != null && scheme != null) {
877            if (!uri.scheme.equalsIgnoreCase(scheme)) {
878                return false;
879            }
880        }
881
882        if (uri.opaque && opaque) {
883            return escapedEquals(uri.schemeSpecificPart,
884                    schemeSpecificPart);
885        } else if (!uri.opaque && !opaque) {
886            if (!escapedEquals(path, uri.path)) {
887                return false;
888            }
889
890            if (uri.query != null && query == null || uri.query == null
891                    && query != null) {
892                return false;
893            } else if (uri.query != null && query != null) {
894                if (!escapedEquals(uri.query, query)) {
895                    return false;
896                }
897            }
898
899            if (uri.authority != null && authority == null
900                    || uri.authority == null && authority != null) {
901                return false;
902            } else if (uri.authority != null && authority != null) {
903                if (uri.host != null && host == null || uri.host == null
904                        && host != null) {
905                    return false;
906                } else if (uri.host == null && host == null) {
907                    // both are registry based, so compare the whole authority
908                    return escapedEquals(uri.authority, authority);
909                } else { // uri.host != null && host != null, so server-based
910                    if (!host.equalsIgnoreCase(uri.host)) {
911                        return false;
912                    }
913
914                    if (port != uri.port) {
915                        return false;
916                    }
917
918                    if (uri.userInfo != null && userInfo == null
919                            || uri.userInfo == null && userInfo != null) {
920                        return false;
921                    } else if (uri.userInfo != null && userInfo != null) {
922                        return escapedEquals(userInfo, uri.userInfo);
923                    } else {
924                        return true;
925                    }
926                }
927            } else {
928                // no authority
929                return true;
930            }
931
932        } else {
933            // one is opaque, the other hierarchical
934            return false;
935        }
936    }
937
938    /**
939     * Gets the decoded authority part of this URI.
940     *
941     * @return the decoded authority part or {@code null} if undefined.
942     */
943    public String getAuthority() {
944        return decode(authority);
945    }
946
947    /**
948     * Gets the decoded fragment part of this URI.
949     *
950     * @return the decoded fragment part or {@code null} if undefined.
951     */
952    public String getFragment() {
953        return decode(fragment);
954    }
955
956    /**
957     * Gets the host part of this URI.
958     *
959     * @return the host part or {@code null} if undefined.
960     */
961    public String getHost() {
962        return host;
963    }
964
965    /**
966     * Gets the decoded path part of this URI.
967     *
968     * @return the decoded path part or {@code null} if undefined.
969     */
970    public String getPath() {
971        return decode(path);
972    }
973
974    /**
975     * Gets the port number of this URI.
976     *
977     * @return the port number or {@code -1} if undefined.
978     */
979    public int getPort() {
980        return port;
981    }
982
983    /**
984     * Gets the decoded query part of this URI.
985     *
986     * @return the decoded query part or {@code null} if undefined.
987     */
988    public String getQuery() {
989        return decode(query);
990    }
991
992    /**
993     * Gets the authority part of this URI in raw form.
994     *
995     * @return the encoded authority part or {@code null} if undefined.
996     */
997    public String getRawAuthority() {
998        return authority;
999    }
1000
1001    /**
1002     * Gets the fragment part of this URI in raw form.
1003     *
1004     * @return the encoded fragment part or {@code null} if undefined.
1005     */
1006    public String getRawFragment() {
1007        return fragment;
1008    }
1009
1010    /**
1011     * Gets the path part of this URI in raw form.
1012     *
1013     * @return the encoded path part or {@code null} if undefined.
1014     */
1015    public String getRawPath() {
1016        return path;
1017    }
1018
1019    /**
1020     * Gets the query part of this URI in raw form.
1021     *
1022     * @return the encoded query part or {@code null} if undefined.
1023     */
1024    public String getRawQuery() {
1025        return query;
1026    }
1027
1028    /**
1029     * Gets the scheme-specific part of this URI in raw form.
1030     *
1031     * @return the encoded scheme-specific part or {@code null} if undefined.
1032     */
1033    public String getRawSchemeSpecificPart() {
1034        return schemeSpecificPart;
1035    }
1036
1037    /**
1038     * Gets the user-info part of this URI in raw form.
1039     *
1040     * @return the encoded user-info part or {@code null} if undefined.
1041     */
1042    public String getRawUserInfo() {
1043        return userInfo;
1044    }
1045
1046    /**
1047     * Gets the scheme part of this URI.
1048     *
1049     * @return the scheme part or {@code null} if undefined.
1050     */
1051    public String getScheme() {
1052        return scheme;
1053    }
1054
1055    /**
1056     * Gets the decoded scheme-specific part of this URI.
1057     *
1058     * @return the decoded scheme-specific part or {@code null} if undefined.
1059     */
1060    public String getSchemeSpecificPart() {
1061        return decode(schemeSpecificPart);
1062    }
1063
1064    /**
1065     * Gets the decoded user-info part of this URI.
1066     *
1067     * @return the decoded user-info part or {@code null} if undefined.
1068     */
1069    public String getUserInfo() {
1070        return decode(userInfo);
1071    }
1072
1073    /**
1074     * Gets the hashcode value of this URI instance.
1075     *
1076     * @return the appropriate hashcode value.
1077     */
1078    @Override
1079    public int hashCode() {
1080        if (hash == -1) {
1081            hash = getHashString().hashCode();
1082        }
1083        return hash;
1084    }
1085
1086    /**
1087     * Indicates whether this URI is absolute, which means that a scheme part is
1088     * defined in this URI.
1089     *
1090     * @return {@code true} if this URI is absolute, {@code false} otherwise.
1091     */
1092    public boolean isAbsolute() {
1093        return absolute;
1094    }
1095
1096    /**
1097     * Indicates whether this URI is opaque or not. An opaque URI is absolute
1098     * and has a scheme-specific part which does not start with a slash
1099     * character. All parts except scheme, scheme-specific and fragment are
1100     * undefined.
1101     *
1102     * @return {@code true} if the URI is opaque, {@code false} otherwise.
1103     */
1104    public boolean isOpaque() {
1105        return opaque;
1106    }
1107
1108    /*
1109     * normalize path, and return the resulting string
1110     */
1111    private String normalize(String path) {
1112        // count the number of '/'s, to determine number of segments
1113        int index = -1;
1114        int pathLength = path.length();
1115        int size = 0;
1116        if (pathLength > 0 && path.charAt(0) != '/') {
1117            size++;
1118        }
1119        while ((index = path.indexOf('/', index + 1)) != -1) {
1120            if (index + 1 < pathLength && path.charAt(index + 1) != '/') {
1121                size++;
1122            }
1123        }
1124
1125        String[] segList = new String[size];
1126        boolean[] include = new boolean[size];
1127
1128        // break the path into segments and store in the list
1129        int current = 0;
1130        int index2;
1131        index = (pathLength > 0 && path.charAt(0) == '/') ? 1 : 0;
1132        while ((index2 = path.indexOf('/', index + 1)) != -1) {
1133            segList[current++] = path.substring(index, index2);
1134            index = index2 + 1;
1135        }
1136
1137        // if current==size, then the last character was a slash
1138        // and there are no more segments
1139        if (current < size) {
1140            segList[current] = path.substring(index);
1141        }
1142
1143        // determine which segments get included in the normalized path
1144        for (int i = 0; i < size; i++) {
1145            include[i] = true;
1146            if (segList[i].equals("..")) {
1147                int remove = i - 1;
1148                // search back to find a segment to remove, if possible
1149                while (remove > -1 && !include[remove]) {
1150                    remove--;
1151                }
1152                // if we find a segment to remove, remove it and the ".."
1153                // segment
1154                if (remove > -1 && !segList[remove].equals("..")) {
1155                    include[remove] = false;
1156                    include[i] = false;
1157                }
1158            } else if (segList[i].equals(".")) {
1159                include[i] = false;
1160            }
1161        }
1162
1163        // put the path back together
1164        StringBuilder newPath = new StringBuilder();
1165        if (path.startsWith("/")) {
1166            newPath.append('/');
1167        }
1168
1169        for (int i = 0; i < segList.length; i++) {
1170            if (include[i]) {
1171                newPath.append(segList[i]);
1172                newPath.append('/');
1173            }
1174        }
1175
1176        // if we used at least one segment and the path previously ended with
1177        // a slash and the last segment is still used, then delete the extra
1178        // trailing '/'
1179        if (!path.endsWith("/") && segList.length > 0
1180                && include[segList.length - 1]) {
1181            newPath.deleteCharAt(newPath.length() - 1);
1182        }
1183
1184        String result = newPath.toString();
1185
1186        // check for a ':' in the first segment if one exists,
1187        // prepend "./" to normalize
1188        index = result.indexOf(':');
1189        index2 = result.indexOf('/');
1190        if (index != -1 && (index < index2 || index2 == -1)) {
1191            newPath.insert(0, "./");
1192            result = newPath.toString();
1193        }
1194        return result;
1195    }
1196
1197    /**
1198     * Normalizes the path part of this URI.
1199     *
1200     * @return an URI object which represents this instance with a normalized
1201     *         path.
1202     */
1203    public URI normalize() {
1204        if (opaque) {
1205            return this;
1206        }
1207        String normalizedPath = normalize(path);
1208        // if the path is already normalized, return this
1209        if (path.equals(normalizedPath)) {
1210            return this;
1211        }
1212        // get an exact copy of the URI re-calculate the scheme specific part
1213        // since the path of the normalized URI is different from this URI.
1214        URI result = duplicate();
1215        result.path = normalizedPath;
1216        result.setSchemeSpecificPart();
1217        return result;
1218    }
1219
1220    /**
1221     * Tries to parse the authority component of this URI to divide it into the
1222     * host, port, and user-info. If this URI is already determined as a
1223     * ServerAuthority this instance will be returned without changes.
1224     *
1225     * @return this instance with the components of the parsed server authority.
1226     * @throws URISyntaxException
1227     *             if the authority part could not be parsed as a server-based
1228     *             authority.
1229     */
1230    public URI parseServerAuthority() throws URISyntaxException {
1231        if (!serverAuthority) {
1232            parseAuthority(true);
1233        }
1234        return this;
1235    }
1236
1237    /**
1238     * Makes the given URI {@code relative} to a relative URI against the URI
1239     * represented by this instance.
1240     *
1241     * @param relative
1242     *            the URI which has to be relativized against this URI.
1243     * @return the relative URI.
1244     */
1245    public URI relativize(URI relative) {
1246        if (relative.opaque || opaque) {
1247            return relative;
1248        }
1249
1250        if (scheme == null ? relative.scheme != null : !scheme
1251                .equals(relative.scheme)) {
1252            return relative;
1253        }
1254
1255        if (authority == null ? relative.authority != null : !authority
1256                .equals(relative.authority)) {
1257            return relative;
1258        }
1259
1260        // normalize both paths
1261        String thisPath = normalize(path);
1262        String relativePath = normalize(relative.path);
1263
1264        /*
1265         * if the paths aren't equal, then we need to determine if this URI's
1266         * path is a parent path (begins with) the relative URI's path
1267         */
1268        if (!thisPath.equals(relativePath)) {
1269            // if this URI's path doesn't end in a '/', add one
1270            if (!thisPath.endsWith("/")) {
1271                thisPath = thisPath + '/';
1272            }
1273            /*
1274             * if the relative URI's path doesn't start with this URI's path,
1275             * then just return the relative URI; the URIs have nothing in
1276             * common
1277             */
1278            if (!relativePath.startsWith(thisPath)) {
1279                return relative;
1280            }
1281        }
1282
1283        URI result = new URI();
1284        result.fragment = relative.fragment;
1285        result.query = relative.query;
1286        // the result URI is the remainder of the relative URI's path
1287        result.path = relativePath.substring(thisPath.length());
1288        result.setSchemeSpecificPart();
1289        return result;
1290    }
1291
1292    /**
1293     * Resolves the given URI {@code relative} against the URI represented by
1294     * this instance.
1295     *
1296     * @param relative
1297     *            the URI which has to be resolved against this URI.
1298     * @return the resolved URI.
1299     */
1300    public URI resolve(URI relative) {
1301        if (relative.absolute || opaque) {
1302            return relative;
1303        }
1304
1305        URI result;
1306        if (relative.path.equals("") && relative.scheme == null
1307                && relative.authority == null && relative.query == null
1308                && relative.fragment != null) {
1309            // if the relative URI only consists of fragment,
1310            // the resolved URI is very similar to this URI,
1311            // except that it has the fragment from the relative URI.
1312            result = duplicate();
1313            result.fragment = relative.fragment;
1314            // no need to re-calculate the scheme specific part,
1315            // since fragment is not part of scheme specific part.
1316            return result;
1317        }
1318
1319        if (relative.authority != null) {
1320            // if the relative URI has authority,
1321            // the resolved URI is almost the same as the relative URI,
1322            // except that it has the scheme of this URI.
1323            result = relative.duplicate();
1324            result.scheme = scheme;
1325            result.absolute = absolute;
1326        } else {
1327            // since relative URI has no authority,
1328            // the resolved URI is very similar to this URI,
1329            // except that it has the query and fragment of the relative URI,
1330            // and the path is different.
1331            result = duplicate();
1332            result.fragment = relative.fragment;
1333            result.query = relative.query;
1334            if (relative.path.startsWith("/")) {
1335                result.path = relative.path;
1336            } else {
1337                // resolve a relative reference
1338                int endIndex = path.lastIndexOf('/') + 1;
1339                result.path = normalize(path.substring(0, endIndex)
1340                        + relative.path);
1341            }
1342            // re-calculate the scheme specific part since
1343            // query and path of the resolved URI is different from this URI.
1344            result.setSchemeSpecificPart();
1345        }
1346        return result;
1347    }
1348
1349    /**
1350     * Helper method used to re-calculate the scheme specific part of the
1351     * resolved or normalized URIs
1352     */
1353    private void setSchemeSpecificPart() {
1354        // ssp = [//authority][path][?query]
1355        StringBuilder ssp = new StringBuilder();
1356        if (authority != null) {
1357            ssp.append("//" + authority);
1358        }
1359        if (path != null) {
1360            ssp.append(path);
1361        }
1362        if (query != null) {
1363            ssp.append("?" + query);
1364        }
1365        schemeSpecificPart = ssp.toString();
1366        // reset string, so that it can be re-calculated correctly when asked.
1367        string = null;
1368    }
1369
1370    /**
1371     * Creates a new URI instance by parsing the given string {@code relative}
1372     * and resolves the created URI against the URI represented by this
1373     * instance.
1374     *
1375     * @param relative
1376     *            the given string to create the new URI instance which has to
1377     *            be resolved later on.
1378     * @return the created and resolved URI.
1379     */
1380    public URI resolve(String relative) {
1381        return resolve(create(relative));
1382    }
1383
1384    /**
1385     * Encode unicode chars that are not part of US-ASCII char set into the
1386     * escaped form
1387     *
1388     * i.e. The Euro currency symbol is encoded as "%E2%82%AC".
1389     */
1390    private String encodeNonAscii(String s) {
1391        try {
1392            /*
1393             * Use a different encoder than URLEncoder since: 1. chars like "/",
1394             * "#", "@" etc needs to be preserved instead of being encoded, 2.
1395             * UTF-8 char set needs to be used for encoding instead of default
1396             * platform one 3. Only other chars need to be converted
1397             */
1398            return URIEncoderDecoder.encodeOthers(s);
1399        } catch (UnsupportedEncodingException e) {
1400            throw new RuntimeException(e.toString());
1401        }
1402    }
1403
1404    private String decode(String s) {
1405        if (s == null) {
1406            return s;
1407        }
1408
1409        try {
1410            return URIEncoderDecoder.decode(s);
1411        } catch (UnsupportedEncodingException e) {
1412            throw new RuntimeException(e.toString());
1413        }
1414    }
1415
1416    /**
1417     * Returns the textual string representation of this URI instance using the
1418     * US-ASCII encoding.
1419     *
1420     * @return the US-ASCII string representation of this URI.
1421     */
1422    public String toASCIIString() {
1423        return encodeNonAscii(toString());
1424    }
1425
1426    /**
1427     * Returns the textual string representation of this URI instance.
1428     *
1429     * @return the textual string representation of this URI.
1430     */
1431    @Override
1432    public String toString() {
1433        if (string == null) {
1434            StringBuilder result = new StringBuilder();
1435            if (scheme != null) {
1436                result.append(scheme);
1437                result.append(':');
1438            }
1439            if (opaque) {
1440                result.append(schemeSpecificPart);
1441            } else {
1442                if (authority != null) {
1443                    result.append("//");
1444                    result.append(authority);
1445                }
1446
1447                if (path != null) {
1448                    result.append(path);
1449                }
1450
1451                if (query != null) {
1452                    result.append('?');
1453                    result.append(query);
1454                }
1455            }
1456
1457            if (fragment != null) {
1458                result.append('#');
1459                result.append(fragment);
1460            }
1461
1462            string = result.toString();
1463        }
1464        return string;
1465    }
1466
1467    /*
1468     * Form a string from the components of this URI, similarly to the
1469     * toString() method. But this method converts scheme and host to lowercase,
1470     * and converts escaped octets to lowercase.
1471     */
1472    private String getHashString() {
1473        StringBuilder result = new StringBuilder();
1474        if (scheme != null) {
1475            result.append(scheme.toLowerCase());
1476            result.append(':');
1477        }
1478        if (opaque) {
1479            result.append(schemeSpecificPart);
1480        } else {
1481            if (authority != null) {
1482                result.append("//");
1483                if (host == null) {
1484                    result.append(authority);
1485                } else {
1486                    if (userInfo != null) {
1487                        result.append(userInfo + "@");
1488                    }
1489                    result.append(host.toLowerCase());
1490                    if (port != -1) {
1491                        result.append(":" + port);
1492                    }
1493                }
1494            }
1495
1496            if (path != null) {
1497                result.append(path);
1498            }
1499
1500            if (query != null) {
1501                result.append('?');
1502                result.append(query);
1503            }
1504        }
1505
1506        if (fragment != null) {
1507            result.append('#');
1508            result.append(fragment);
1509        }
1510
1511        return convertHexToLowerCase(result.toString());
1512    }
1513
1514    /**
1515     * Converts this URI instance to a URL.
1516     *
1517     * @return the created URL representing the same resource as this URI.
1518     * @throws MalformedURLException
1519     *             if an error occurs while creating the URL or no protocol
1520     *             handler could be found.
1521     */
1522    public URL toURL() throws MalformedURLException {
1523        if (!absolute) {
1524            throw new IllegalArgumentException("URI is not absolute: " + toString());
1525        }
1526        return new URL(toString());
1527    }
1528
1529    private void readObject(ObjectInputStream in) throws IOException,
1530            ClassNotFoundException {
1531        in.defaultReadObject();
1532        try {
1533            parseURI(string, false);
1534        } catch (URISyntaxException e) {
1535            throw new IOException(e.toString());
1536        }
1537    }
1538
1539    private void writeObject(ObjectOutputStream out) throws IOException,
1540            ClassNotFoundException {
1541        // call toString() to ensure the value of string field is calculated
1542        toString();
1543        out.defaultWriteObject();
1544    }
1545}
1546