URI.java revision ecf950e56b55f95a960f151268286576159c6530
1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.net;
19
20import java.io.IOException;
21import java.io.ObjectInputStream;
22import java.io.ObjectOutputStream;
23import java.io.Serializable;
24import java.util.Locale;
25import libcore.net.UriCodec;
26import libcore.net.url.UrlUtils;
27
28/**
29 * A Uniform Resource Identifier that identifies an abstract or physical
30 * resource, as specified by <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC
31 * 2396</a>.
32 *
33 * <h3>Parts of a URI</h3>
34 * A URI is composed of many parts. This class can both parse URI strings into
35 * parts and compose URI strings from parts. For example, consider the parts of
36 * this URI:
37 * {@code http://username:password@host:8080/directory/file?query#fragment}
38 * <table>
39 * <tr><th>Component                                            </th><th>Example value                                                      </th><th>Also known as</th></tr>
40 * <tr><td>{@link #getScheme() Scheme}                          </td><td>{@code http}                                                       </td><td>protocol</td></tr>
41 * <tr><td>{@link #getSchemeSpecificPart() Scheme-specific part}</td><td>{@code //username:password@host:8080/directory/file?query#fragment}</td><td></td></tr>
42 * <tr><td>{@link #getAuthority() Authority}                    </td><td>{@code username:password@host:8080}                                </td><td></td></tr>
43 * <tr><td>{@link #getUserInfo() User Info}                     </td><td>{@code username:password}                                          </td><td></td></tr>
44 * <tr><td>{@link #getHost() Host}                              </td><td>{@code host}                                                       </td><td></td></tr>
45 * <tr><td>{@link #getPort() Port}                              </td><td>{@code 8080}                                                       </td><td></td></tr>
46 * <tr><td>{@link #getPath() Path}                              </td><td>{@code /directory/file}                                            </td><td></td></tr>
47 * <tr><td>{@link #getQuery() Query}                            </td><td>{@code query}                                                      </td><td></td></tr>
48 * <tr><td>{@link #getFragment() Fragment}                      </td><td>{@code fragment}                                                   </td><td>ref</td></tr>
49 * </table>
50 *
51 * <h3>Absolute vs. Relative URIs</h3>
52 * URIs are either {@link #isAbsolute() absolute or relative}.
53 * <ul>
54 *     <li><strong>Absolute:</strong> {@code http://android.com/robots.txt}
55 *     <li><strong>Relative:</strong> {@code robots.txt}
56 * </ul>
57 *
58 * <p>Absolute URIs always have a scheme. If its scheme is supported by {@link
59 * URL}, you can use {@link #toURL} to convert an absolute URI to a URL.
60 *
61 * <p>Relative URIs do not have a scheme and cannot be converted to URLs. If you
62 * have the absolute URI that a relative URI is relative to, you can use {@link
63 * #resolve} to compute the referenced absolute URI. Symmetrically, you can use
64 * {@link #relativize} to compute the relative URI from one URI to another.
65 * <pre>   {@code
66 *   URI absolute = new URI("http://android.com/");
67 *   URI relative = new URI("robots.txt");
68 *   URI resolved = new URI("http://android.com/robots.txt");
69 *
70 *   // print "http://android.com/robots.txt"
71 *   System.out.println(absolute.resolve(relative));
72 *
73 *   // print "robots.txt"
74 *   System.out.println(absolute.relativize(resolved));
75 * }</pre>
76 *
77 * <h3>Opaque vs. Hierarchical URIs</h3>
78 * Absolute URIs are either {@link #isOpaque() opaque or hierarchical}. Relative
79 * URIs are always hierarchical.
80 * <ul>
81 *     <li><strong>Hierarchical:</strong> {@code http://android.com/robots.txt}
82 *     <li><strong>Opaque:</strong> {@code mailto:robots@example.com}
83 * </ul>
84 *
85 * <p>Opaque URIs have both a scheme and a scheme-specific part that does not
86 * begin with the slash character: {@code /}. The contents of the
87 * scheme-specific part of an opaque URI is not parsed so an opaque URI never
88 * has an authority, user info, host, port, path or query. An opaque URIs may
89 * have a fragment, however. A typical opaque URI is
90 * {@code mailto:robots@example.com}.
91 * <table>
92 * <tr><th>Component           </th><th>Example value             </th></tr>
93 * <tr><td>Scheme              </td><td>{@code mailto}            </td></tr>
94 * <tr><td>Scheme-specific part</td><td>{@code robots@example.com}</td></tr>
95 * <tr><td>Fragment            </td><td>                          </td></tr>
96 * </table>
97 * <p>Hierarchical URIs may have values for any URL component. They always
98 * have a non-null path, though that path may be the empty string.
99 *
100 * <h3>Encoding and Decoding URI Components</h3>
101 * Each component of a URI permits a limited set of legal characters. Other
102 * characters must first be <i>encoded</i> before they can be embedded in a URI.
103 * To recover the original characters from a URI, they may be <i>decoded</i>.
104 * <strong>Contrary to what you might expect,</strong> this class uses the
105 * term <i>raw</i> to refer to encoded strings. The non-<i>raw</i> accessors
106 * return decoded strings. For example, consider how this URI is decoded:
107 * {@code http://user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots#over%206%22}
108 * <table>
109 * <tr><th>Component           </th><th>Legal Characters                                                    </th><th>Other Constraints                                  </th><th>Raw Value                                                      </th><th>Value</th></tr>
110 * <tr><td>Scheme              </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code +-.}                  </td><td>First character must be in {@code a-z}, {@code A-Z}</td><td>                                                               </td><td>{@code http}</td></tr>
111 * <tr><td>Scheme-specific part</td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code //user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots}</td><td>{@code //user:pa55w?rd@host:80/doc|search?q=green robots}</td></tr>
112 * <tr><td>Authority           </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=@[]}  </td><td>Non-ASCII characters okay                          </td><td>{@code user:pa55w%3Frd@host:80}                                </td><td>{@code user:pa55w?rd@host:80}</td></tr>
113 * <tr><td>User Info           </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=}     </td><td>Non-ASCII characters okay                          </td><td>{@code user:pa55w%3Frd}                                        </td><td>{@code user:pa55w?rd}</td></tr>
114 * <tr><td>Host                </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code -.[]}                 </td><td>Domain name, IPv4 address or [IPv6 address]        </td><td>                                                               </td><td>host</td></tr>
115 * <tr><td>Port                </td><td>{@code 0-9}                                                         </td><td>                                                   </td><td>                                                               </td><td>{@code 80}</td></tr>
116 * <tr><td>Path                </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=/@}   </td><td>Non-ASCII characters okay                          </td><td>{@code /doc%7Csearch}                                          </td><td>{@code /doc|search}</td></tr>
117 * <tr><td>Query               </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code q=green%20robots}                                       </td><td>{@code q=green robots}</td></tr>
118 * <tr><td>Fragment            </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code over%206%22}                                            </td><td>{@code over 6"}</td></tr>
119 * </table>
120 * A URI's host, port and scheme are not eligible for encoding and must not
121 * contain illegal characters.
122 *
123 * <p>To encode a URI, invoke any of the multiple-parameter constructors of this
124 * class. These constructors accept your original strings and encode them into
125 * their raw form.
126 *
127 * <p>To decode a URI, invoke the single-string constructor, and then use the
128 * appropriate accessor methods to get the decoded components.
129 *
130 * <p>The {@link URL} class can be used to retrieve resources by their URI.
131 */
132public final class URI implements Comparable<URI>, Serializable {
133
134    private static final long serialVersionUID = -6052424284110960213l;
135
136    static final String UNRESERVED = "_-!.~\'()*";
137    static final String PUNCTUATION = ",;:$&+=";
138
139    static final UriCodec USER_INFO_ENCODER = new PartEncoder("");
140    static final UriCodec PATH_ENCODER = new PartEncoder("/@");
141    static final UriCodec AUTHORITY_ENCODER = new PartEncoder("@[]");
142
143    /** for java.net.URL, which foolishly combines these two parts */
144    static final UriCodec FILE_AND_QUERY_ENCODER = new PartEncoder("/@?");
145
146    /** for query, fragment, and scheme-specific part */
147    static final UriCodec ALL_LEGAL_ENCODER = new PartEncoder("?/[]@");
148
149    /** Retains all ASCII chars including delimiters. */
150    private static final UriCodec ASCII_ONLY = new UriCodec() {
151        @Override protected boolean isRetained(char c) {
152            return c <= 127;
153        }
154    };
155
156    /**
157     * Encodes the unescaped characters of {@code s} that are not permitted.
158     * Permitted characters are:
159     * <ul>
160     *   <li>Unreserved characters in <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>.
161     *   <li>{@code extraOkayChars},
162     *   <li>non-ASCII, non-control, non-whitespace characters
163     * </ul>
164     */
165    private static class PartEncoder extends UriCodec {
166        private final String extraLegalCharacters;
167
168        PartEncoder(String extraLegalCharacters) {
169            this.extraLegalCharacters = extraLegalCharacters;
170        }
171
172        @Override protected boolean isRetained(char c) {
173            return UNRESERVED.indexOf(c) != -1
174                    || PUNCTUATION.indexOf(c) != -1
175                    || extraLegalCharacters.indexOf(c) != -1
176                    || (c > 127 && !Character.isSpaceChar(c) && !Character.isISOControl(c));
177        }
178    }
179
180    private String string;
181    private transient String scheme;
182    private transient String schemeSpecificPart;
183    private transient String authority;
184    private transient String userInfo;
185    private transient String host;
186    private transient int port = -1;
187    private transient String path;
188    private transient String query;
189    private transient String fragment;
190    private transient boolean opaque;
191    private transient boolean absolute;
192    private transient boolean serverAuthority = false;
193
194    private transient int hash = -1;
195
196    private URI() {}
197
198    /**
199     * Creates a new URI instance by parsing {@code spec}.
200     *
201     * @param spec a URI whose illegal characters have all been encoded.
202     */
203    public URI(String spec) throws URISyntaxException {
204        parseURI(spec, false);
205    }
206
207    /**
208     * Creates a new URI instance of the given unencoded component parts.
209     *
210     * @param scheme the URI scheme, or null for a non-absolute URI.
211     */
212    public URI(String scheme, String schemeSpecificPart, String fragment)
213            throws URISyntaxException {
214        StringBuilder uri = new StringBuilder();
215        if (scheme != null) {
216            uri.append(scheme);
217            uri.append(':');
218        }
219        if (schemeSpecificPart != null) {
220            ALL_LEGAL_ENCODER.appendEncoded(uri, schemeSpecificPart);
221        }
222        if (fragment != null) {
223            uri.append('#');
224            ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
225        }
226
227        parseURI(uri.toString(), false);
228    }
229
230    /**
231     * Creates a new URI instance of the given unencoded component parts.
232     *
233     * @param scheme the URI scheme, or null for a non-absolute URI.
234     */
235    public URI(String scheme, String userInfo, String host, int port, String path, String query,
236            String fragment) throws URISyntaxException {
237        if (scheme == null && userInfo == null && host == null && path == null
238                && query == null && fragment == null) {
239            this.path = "";
240            return;
241        }
242
243        if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') {
244            throw new URISyntaxException(path, "Relative path");
245        }
246
247        StringBuilder uri = new StringBuilder();
248        if (scheme != null) {
249            uri.append(scheme);
250            uri.append(':');
251        }
252
253        if (userInfo != null || host != null || port != -1) {
254            uri.append("//");
255        }
256
257        if (userInfo != null) {
258            USER_INFO_ENCODER.appendEncoded(uri, userInfo);
259            uri.append('@');
260        }
261
262        if (host != null) {
263            // check for IPv6 addresses that hasn't been enclosed in square brackets
264            if (host.indexOf(':') != -1 && host.indexOf(']') == -1 && host.indexOf('[') == -1) {
265                host = "[" + host + "]";
266            }
267            uri.append(host);
268        }
269
270        if (port != -1) {
271            uri.append(':');
272            uri.append(port);
273        }
274
275        if (path != null) {
276            PATH_ENCODER.appendEncoded(uri, path);
277        }
278
279        if (query != null) {
280            uri.append('?');
281            ALL_LEGAL_ENCODER.appendEncoded(uri, query);
282        }
283
284        if (fragment != null) {
285            uri.append('#');
286            ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
287        }
288
289        parseURI(uri.toString(), true);
290    }
291
292    /**
293     * Creates a new URI instance of the given unencoded component parts.
294     *
295     * @param scheme the URI scheme, or null for a non-absolute URI.
296     */
297    public URI(String scheme, String host, String path, String fragment) throws URISyntaxException {
298        this(scheme, null, host, -1, path, null, fragment);
299    }
300
301    /**
302     * Creates a new URI instance of the given unencoded component parts.
303     *
304     * @param scheme the URI scheme, or null for a non-absolute URI.
305     */
306    public URI(String scheme, String authority, String path, String query,
307            String fragment) throws URISyntaxException {
308        if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') {
309            throw new URISyntaxException(path, "Relative path");
310        }
311
312        StringBuilder uri = new StringBuilder();
313        if (scheme != null) {
314            uri.append(scheme);
315            uri.append(':');
316        }
317        if (authority != null) {
318            uri.append("//");
319            AUTHORITY_ENCODER.appendEncoded(uri, authority);
320        }
321
322        if (path != null) {
323            PATH_ENCODER.appendEncoded(uri, path);
324        }
325        if (query != null) {
326            uri.append('?');
327            ALL_LEGAL_ENCODER.appendEncoded(uri, query);
328        }
329        if (fragment != null) {
330            uri.append('#');
331            ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
332        }
333
334        parseURI(uri.toString(), false);
335    }
336
337    /**
338     * Breaks uri into its component parts. This first splits URI into scheme,
339     * scheme-specific part and fragment:
340     *   [scheme:][scheme-specific part][#fragment]
341     *
342     * Then it breaks the scheme-specific part into authority, path and query:
343     *   [//authority][path][?query]
344     *
345     * Finally it delegates to parseAuthority to break the authority into user
346     * info, host and port:
347     *   [user-info@][host][:port]
348     */
349    private void parseURI(String uri, boolean forceServer) throws URISyntaxException {
350        string = uri;
351
352        // "#fragment"
353        int fragmentStart = UrlUtils.findFirstOf(uri, "#", 0, uri.length());
354        if (fragmentStart < uri.length()) {
355            fragment = ALL_LEGAL_ENCODER.validate(uri, fragmentStart + 1, uri.length(), "fragment");
356        }
357
358        // scheme:
359        int start;
360        int colon = UrlUtils.findFirstOf(uri, ":", 0, fragmentStart);
361        if (colon < UrlUtils.findFirstOf(uri, "/?#", 0, fragmentStart)) {
362            absolute = true;
363            scheme = validateScheme(uri, colon);
364            start = colon + 1;
365
366            if (start == fragmentStart) {
367                throw new URISyntaxException(uri, "Scheme-specific part expected", start);
368            }
369
370            // URIs with schemes followed by a non-/ char are opaque and need no further parsing.
371            if (!uri.regionMatches(start, "/", 0, 1)) {
372                opaque = true;
373                schemeSpecificPart = ALL_LEGAL_ENCODER.validate(
374                        uri, start, fragmentStart, "scheme specific part");
375                return;
376            }
377        } else {
378            absolute = false;
379            start = 0;
380        }
381
382        opaque = false;
383        schemeSpecificPart = uri.substring(start, fragmentStart);
384
385        // "//authority"
386        int fileStart;
387        if (uri.regionMatches(start, "//", 0, 2)) {
388            int authorityStart = start + 2;
389            fileStart = UrlUtils.findFirstOf(uri, "/?", authorityStart, fragmentStart);
390            if (authorityStart == uri.length()) {
391                throw new URISyntaxException(uri, "Authority expected", uri.length());
392            }
393            if (authorityStart < fileStart) {
394                authority = AUTHORITY_ENCODER.validate(uri, authorityStart, fileStart, "authority");
395            }
396        } else {
397            fileStart = start;
398        }
399
400        // "path"
401        int queryStart = UrlUtils.findFirstOf(uri, "?", fileStart, fragmentStart);
402        path = PATH_ENCODER.validate(uri, fileStart, queryStart, "path");
403
404        // "?query"
405        if (queryStart < fragmentStart) {
406            query = ALL_LEGAL_ENCODER.validate(uri, queryStart + 1, fragmentStart, "query");
407        }
408
409        parseAuthority(forceServer);
410    }
411
412    private String validateScheme(String uri, int end) throws URISyntaxException {
413        if (end == 0) {
414            throw new URISyntaxException(uri, "Scheme expected", 0);
415        }
416
417        for (int i = 0; i < end; i++) {
418            if (!UrlUtils.isValidSchemeChar(i, uri.charAt(i))) {
419                throw new URISyntaxException(uri, "Illegal character in scheme", 0);
420            }
421        }
422
423        return uri.substring(0, end);
424    }
425
426    /**
427     * Breaks this URI's authority into user info, host and port parts.
428     *   [user-info@][host][:port]
429     * If any part of this fails this method will give up and potentially leave
430     * these fields with their default values.
431     *
432     * @param forceServer true to always throw if the authority cannot be
433     *     parsed. If false, this method may still throw for some kinds of
434     *     errors; this unpredictable behavior is consistent with the RI.
435     */
436    private void parseAuthority(boolean forceServer) throws URISyntaxException {
437        if (authority == null) {
438            return;
439        }
440
441        String tempUserInfo = null;
442        String temp = authority;
443        int index = temp.indexOf('@');
444        int hostIndex = 0;
445        if (index != -1) {
446            // remove user info
447            tempUserInfo = temp.substring(0, index);
448            validateUserInfo(authority, tempUserInfo, 0);
449            temp = temp.substring(index + 1); // host[:port] is left
450            hostIndex = index + 1;
451        }
452
453        index = temp.lastIndexOf(':');
454        int endIndex = temp.indexOf(']');
455
456        String tempHost;
457        int tempPort = -1;
458        if (index != -1 && endIndex < index) {
459            // determine port and host
460            tempHost = temp.substring(0, index);
461
462            if (index < (temp.length() - 1)) { // port part is not empty
463                try {
464                    tempPort = Integer.parseInt(temp.substring(index + 1));
465                    if (tempPort < 0) {
466                        if (forceServer) {
467                            throw new URISyntaxException(authority,
468                                    "Invalid port number", hostIndex + index + 1);
469                        }
470                        return;
471                    }
472                } catch (NumberFormatException e) {
473                    if (forceServer) {
474                        throw new URISyntaxException(authority,
475                                "Invalid port number", hostIndex + index + 1);
476                    }
477                    return;
478                }
479            }
480        } else {
481            tempHost = temp;
482        }
483
484        if (tempHost.isEmpty()) {
485            if (forceServer) {
486                throw new URISyntaxException(authority, "Expected host", hostIndex);
487            }
488            return;
489        }
490
491        if (!isValidHost(forceServer, tempHost)) {
492            return;
493        }
494
495        // this is a server based uri,
496        // fill in the userInfo, host and port fields
497        userInfo = tempUserInfo;
498        host = tempHost;
499        port = tempPort;
500        serverAuthority = true;
501    }
502
503    private void validateUserInfo(String uri, String userInfo, int index)
504            throws URISyntaxException {
505        for (int i = 0; i < userInfo.length(); i++) {
506            char ch = userInfo.charAt(i);
507            if (ch == ']' || ch == '[') {
508                throw new URISyntaxException(uri, "Illegal character in userInfo", index + i);
509            }
510        }
511    }
512
513    /**
514     * Returns true if {@code host} is a well-formed host name or IP address.
515     *
516     * @param forceServer true to always throw if the host cannot be parsed. If
517     *     false, this method may still throw for some kinds of errors; this
518     *     unpredictable behavior is consistent with the RI.
519     */
520    private boolean isValidHost(boolean forceServer, String host) throws URISyntaxException {
521        if (host.startsWith("[")) {
522            // IPv6 address
523            if (!host.endsWith("]")) {
524                throw new URISyntaxException(host,
525                        "Expected a closing square bracket for IPv6 address", 0);
526            }
527            if (InetAddress.isNumeric(host)) {
528                // If it's numeric, the presence of square brackets guarantees
529                // that it's a numeric IPv6 address.
530                return true;
531            }
532            throw new URISyntaxException(host, "Malformed IPv6 address");
533        }
534
535        // '[' and ']' can only be the first char and last char
536        // of the host name
537        if (host.indexOf('[') != -1 || host.indexOf(']') != -1) {
538            throw new URISyntaxException(host, "Illegal character in host name", 0);
539        }
540
541        int index = host.lastIndexOf('.');
542        if (index < 0 || index == host.length() - 1
543                || !Character.isDigit(host.charAt(index + 1))) {
544            // domain name
545            if (isValidDomainName(host)) {
546                return true;
547            }
548            if (forceServer) {
549                throw new URISyntaxException(host, "Illegal character in host name", 0);
550            }
551            return false;
552        }
553
554        // IPv4 address?
555        try {
556            InetAddress ia = InetAddress.parseNumericAddress(host);
557            if (ia instanceof Inet4Address) {
558                return true;
559            }
560        } catch (IllegalArgumentException ignored) {
561        }
562
563        if (forceServer) {
564            throw new URISyntaxException(host, "Malformed IPv4 address", 0);
565        }
566        return false;
567    }
568
569    private boolean isValidDomainName(String host) {
570        try {
571            UriCodec.validateSimple(host, "-.");
572        } catch (URISyntaxException e) {
573            return false;
574        }
575
576        String lastLabel = null;
577        for (String token : host.split("\\.")) {
578            lastLabel = token;
579            if (lastLabel.startsWith("-") || lastLabel.endsWith("-")) {
580                return false;
581            }
582        }
583
584        if (lastLabel == null) {
585            return false;
586        }
587
588        if (!lastLabel.equals(host)) {
589            char ch = lastLabel.charAt(0);
590            if (ch >= '0' && ch <= '9') {
591                return false;
592            }
593        }
594        return true;
595    }
596
597    /**
598     * Compares this URI with the given argument {@code uri}. This method will
599     * return a negative value if this URI instance is less than the given
600     * argument and a positive value if this URI instance is greater than the
601     * given argument. The return value {@code 0} indicates that the two
602     * instances represent the same URI. To define the order the single parts of
603     * the URI are compared with each other. String components will be ordered
604     * in the natural case-sensitive way. A hierarchical URI is less than an
605     * opaque URI and if one part is {@code null} the URI with the undefined
606     * part is less than the other one.
607     *
608     * @param uri
609     *            the URI this instance has to compare with.
610     * @return the value representing the order of the two instances.
611     */
612    public int compareTo(URI uri) {
613        int ret;
614
615        // compare schemes
616        if (scheme == null && uri.scheme != null) {
617            return -1;
618        } else if (scheme != null && uri.scheme == null) {
619            return 1;
620        } else if (scheme != null && uri.scheme != null) {
621            ret = scheme.compareToIgnoreCase(uri.scheme);
622            if (ret != 0) {
623                return ret;
624            }
625        }
626
627        // compare opacities
628        if (!opaque && uri.opaque) {
629            return -1;
630        } else if (opaque && !uri.opaque) {
631            return 1;
632        } else if (opaque && uri.opaque) {
633            ret = schemeSpecificPart.compareTo(uri.schemeSpecificPart);
634            if (ret != 0) {
635                return ret;
636            }
637        } else {
638
639            // otherwise both must be hierarchical
640
641            // compare authorities
642            if (authority != null && uri.authority == null) {
643                return 1;
644            } else if (authority == null && uri.authority != null) {
645                return -1;
646            } else if (authority != null && uri.authority != null) {
647                if (host != null && uri.host != null) {
648                    // both are server based, so compare userInfo, host, port
649                    if (userInfo != null && uri.userInfo == null) {
650                        return 1;
651                    } else if (userInfo == null && uri.userInfo != null) {
652                        return -1;
653                    } else if (userInfo != null && uri.userInfo != null) {
654                        ret = userInfo.compareTo(uri.userInfo);
655                        if (ret != 0) {
656                            return ret;
657                        }
658                    }
659
660                    // userInfo's are the same, compare hostname
661                    ret = host.compareToIgnoreCase(uri.host);
662                    if (ret != 0) {
663                        return ret;
664                    }
665
666                    // compare port
667                    if (port != uri.port) {
668                        return port - uri.port;
669                    }
670                } else { // one or both are registry based, compare the whole
671                    // authority
672                    ret = authority.compareTo(uri.authority);
673                    if (ret != 0) {
674                        return ret;
675                    }
676                }
677            }
678
679            // authorities are the same
680            // compare paths
681            ret = path.compareTo(uri.path);
682            if (ret != 0) {
683                return ret;
684            }
685
686            // compare queries
687
688            if (query != null && uri.query == null) {
689                return 1;
690            } else if (query == null && uri.query != null) {
691                return -1;
692            } else if (query != null && uri.query != null) {
693                ret = query.compareTo(uri.query);
694                if (ret != 0) {
695                    return ret;
696                }
697            }
698        }
699
700        // everything else is identical, so compare fragments
701        if (fragment != null && uri.fragment == null) {
702            return 1;
703        } else if (fragment == null && uri.fragment != null) {
704            return -1;
705        } else if (fragment != null && uri.fragment != null) {
706            ret = fragment.compareTo(uri.fragment);
707            if (ret != 0) {
708                return ret;
709            }
710        }
711
712        // identical
713        return 0;
714    }
715
716    /**
717     * Returns the URI formed by parsing {@code uri}. This method behaves
718     * identically to the string constructor but throws a different exception
719     * on failure. The constructor fails with a checked {@link
720     * URISyntaxException}; this method fails with an unchecked {@link
721     * IllegalArgumentException}.
722     */
723    public static URI create(String uri) {
724        try {
725            return new URI(uri);
726        } catch (URISyntaxException e) {
727            throw new IllegalArgumentException(e.getMessage());
728        }
729    }
730
731    private URI duplicate() {
732        URI clone = new URI();
733        clone.absolute = absolute;
734        clone.authority = authority;
735        clone.fragment = fragment;
736        clone.host = host;
737        clone.opaque = opaque;
738        clone.path = path;
739        clone.port = port;
740        clone.query = query;
741        clone.scheme = scheme;
742        clone.schemeSpecificPart = schemeSpecificPart;
743        clone.userInfo = userInfo;
744        clone.serverAuthority = serverAuthority;
745        return clone;
746    }
747
748    /*
749     * Takes a string that may contain hex sequences like %F1 or %2b and
750     * converts the hex values following the '%' to lowercase
751     */
752    private String convertHexToLowerCase(String s) {
753        StringBuilder result = new StringBuilder("");
754        if (s.indexOf('%') == -1) {
755            return s;
756        }
757
758        int index, prevIndex = 0;
759        while ((index = s.indexOf('%', prevIndex)) != -1) {
760            result.append(s.substring(prevIndex, index + 1));
761            result.append(s.substring(index + 1, index + 3).toLowerCase(Locale.US));
762            index += 3;
763            prevIndex = index;
764        }
765        return result.toString();
766    }
767
768    /**
769     * Returns true if {@code first} and {@code second} are equal after
770     * unescaping hex sequences like %F1 and %2b.
771     */
772    private boolean escapedEquals(String first, String second) {
773        if (first.indexOf('%') != second.indexOf('%')) {
774            return first.equals(second);
775        }
776
777        int index, prevIndex = 0;
778        while ((index = first.indexOf('%', prevIndex)) != -1
779                && second.indexOf('%', prevIndex) == index) {
780            boolean match = first.substring(prevIndex, index).equals(
781                    second.substring(prevIndex, index));
782            if (!match) {
783                return false;
784            }
785
786            match = first.substring(index + 1, index + 3).equalsIgnoreCase(
787                    second.substring(index + 1, index + 3));
788            if (!match) {
789                return false;
790            }
791
792            index += 3;
793            prevIndex = index;
794        }
795        return first.substring(prevIndex).equals(second.substring(prevIndex));
796    }
797
798    @Override public boolean equals(Object o) {
799        if (!(o instanceof URI)) {
800            return false;
801        }
802        URI uri = (URI) o;
803
804        if (uri.fragment == null && fragment != null || uri.fragment != null
805                && fragment == null) {
806            return false;
807        } else if (uri.fragment != null && fragment != null) {
808            if (!escapedEquals(uri.fragment, fragment)) {
809                return false;
810            }
811        }
812
813        if (uri.scheme == null && scheme != null || uri.scheme != null
814                && scheme == null) {
815            return false;
816        } else if (uri.scheme != null && scheme != null) {
817            if (!uri.scheme.equalsIgnoreCase(scheme)) {
818                return false;
819            }
820        }
821
822        if (uri.opaque && opaque) {
823            return escapedEquals(uri.schemeSpecificPart,
824                    schemeSpecificPart);
825        } else if (!uri.opaque && !opaque) {
826            if (!escapedEquals(path, uri.path)) {
827                return false;
828            }
829
830            if (uri.query != null && query == null || uri.query == null
831                    && query != null) {
832                return false;
833            } else if (uri.query != null && query != null) {
834                if (!escapedEquals(uri.query, query)) {
835                    return false;
836                }
837            }
838
839            if (uri.authority != null && authority == null
840                    || uri.authority == null && authority != null) {
841                return false;
842            } else if (uri.authority != null && authority != null) {
843                if (uri.host != null && host == null || uri.host == null
844                        && host != null) {
845                    return false;
846                } else if (uri.host == null && host == null) {
847                    // both are registry based, so compare the whole authority
848                    return escapedEquals(uri.authority, authority);
849                } else { // uri.host != null && host != null, so server-based
850                    if (!host.equalsIgnoreCase(uri.host)) {
851                        return false;
852                    }
853
854                    if (port != uri.port) {
855                        return false;
856                    }
857
858                    if (uri.userInfo != null && userInfo == null
859                            || uri.userInfo == null && userInfo != null) {
860                        return false;
861                    } else if (uri.userInfo != null && userInfo != null) {
862                        return escapedEquals(userInfo, uri.userInfo);
863                    } else {
864                        return true;
865                    }
866                }
867            } else {
868                // no authority
869                return true;
870            }
871
872        } else {
873            // one is opaque, the other hierarchical
874            return false;
875        }
876    }
877
878    /**
879     * Returns the scheme of this URI, or null if this URI has no scheme. This
880     * is also known as the protocol.
881     */
882    public String getScheme() {
883        return scheme;
884    }
885
886    /**
887     * Returns the decoded scheme-specific part of this URI, or null if this URI
888     * has no scheme-specific part.
889     */
890    public String getSchemeSpecificPart() {
891        return decode(schemeSpecificPart);
892    }
893
894    /**
895     * Returns the encoded scheme-specific part of this URI, or null if this URI
896     * has no scheme-specific part.
897     */
898    public String getRawSchemeSpecificPart() {
899        return schemeSpecificPart;
900    }
901
902    /**
903     * Returns the decoded authority part of this URI, or null if this URI has
904     * no authority.
905     */
906    public String getAuthority() {
907        return decode(authority);
908    }
909
910    /**
911     * Returns the encoded authority of this URI, or null if this URI has no
912     * authority.
913     */
914    public String getRawAuthority() {
915        return authority;
916    }
917
918    /**
919     * Returns the decoded user info of this URI, or null if this URI has no
920     * user info.
921     */
922    public String getUserInfo() {
923        return decode(userInfo);
924    }
925
926    /**
927     * Returns the encoded user info of this URI, or null if this URI has no
928     * user info.
929     */
930    public String getRawUserInfo() {
931        return userInfo;
932    }
933
934    /**
935     * Returns the host of this URI, or null if this URI has no host.
936     */
937    public String getHost() {
938        return host;
939    }
940
941    /**
942     * Returns the port number of this URI, or {@code -1} if this URI has no
943     * explicit port.
944     */
945    public int getPort() {
946        return port;
947    }
948
949    /** @hide */
950    public int getEffectivePort() {
951        return getEffectivePort(scheme, port);
952    }
953
954    /**
955     * Returns the port to use for {@code scheme} connections will use when
956     * {@link #getPort} returns {@code specifiedPort}.
957     *
958     * @hide
959     */
960    public static int getEffectivePort(String scheme, int specifiedPort) {
961        if (specifiedPort != -1) {
962            return specifiedPort;
963        }
964
965        if ("http".equalsIgnoreCase(scheme)) {
966            return 80;
967        } else if ("https".equalsIgnoreCase(scheme)) {
968            return 443;
969        } else {
970            return -1;
971        }
972    }
973
974    /**
975     * Returns the decoded path of this URI, or null if this URI has no path.
976     */
977    public String getPath() {
978        return decode(path);
979    }
980
981    /**
982     * Returns the encoded path of this URI, or null if this URI has no path.
983     */
984    public String getRawPath() {
985        return path;
986    }
987
988    /**
989     * Returns the decoded query of this URI, or null if this URI has no query.
990     */
991    public String getQuery() {
992        return decode(query);
993    }
994
995    /**
996     * Returns the encoded query of this URI, or null if this URI has no query.
997     */
998    public String getRawQuery() {
999        return query;
1000    }
1001
1002    /**
1003     * Returns the decoded fragment of this URI, or null if this URI has no
1004     * fragment.
1005     */
1006    public String getFragment() {
1007        return decode(fragment);
1008    }
1009
1010    /**
1011     * Gets the encoded fragment of this URI, or null if this URI has no
1012     * fragment.
1013     */
1014    public String getRawFragment() {
1015        return fragment;
1016    }
1017
1018    @Override public int hashCode() {
1019        if (hash == -1) {
1020            hash = getHashString().hashCode();
1021        }
1022        return hash;
1023    }
1024
1025    /**
1026     * Returns true if this URI is absolute, which means that a scheme is
1027     * defined.
1028     */
1029    public boolean isAbsolute() {
1030        // TODO: simplify to 'scheme != null' ?
1031        return absolute;
1032    }
1033
1034    /**
1035     * Returns true if this URI is opaque. Opaque URIs are absolute and have a
1036     * scheme-specific part that does not start with a slash character. All
1037     * parts except scheme, scheme-specific and fragment are undefined.
1038     */
1039    public boolean isOpaque() {
1040        return opaque;
1041    }
1042
1043    /**
1044     * Returns the normalized path.
1045     */
1046    private String normalize(String path, boolean discardRelativePrefix) {
1047        path = UrlUtils.canonicalizePath(path, discardRelativePrefix);
1048
1049        /*
1050         * If the path contains a colon before the first colon, prepend
1051         * "./" to differentiate the path from a scheme prefix.
1052         */
1053        int colon = path.indexOf(':');
1054        if (colon != -1) {
1055            int slash = path.indexOf('/');
1056            if (slash == -1 || colon < slash) {
1057                path = "./" + path;
1058            }
1059        }
1060
1061        return path;
1062    }
1063
1064    /**
1065     * Normalizes the path part of this URI.
1066     *
1067     * @return an URI object which represents this instance with a normalized
1068     *         path.
1069     */
1070    public URI normalize() {
1071        if (opaque) {
1072            return this;
1073        }
1074        String normalizedPath = normalize(path, false);
1075        // if the path is already normalized, return this
1076        if (path.equals(normalizedPath)) {
1077            return this;
1078        }
1079        // get an exact copy of the URI re-calculate the scheme specific part
1080        // since the path of the normalized URI is different from this URI.
1081        URI result = duplicate();
1082        result.path = normalizedPath;
1083        result.setSchemeSpecificPart();
1084        return result;
1085    }
1086
1087    /**
1088     * Tries to parse the authority component of this URI to divide it into the
1089     * host, port, and user-info. If this URI is already determined as a
1090     * ServerAuthority this instance will be returned without changes.
1091     *
1092     * @return this instance with the components of the parsed server authority.
1093     * @throws URISyntaxException
1094     *             if the authority part could not be parsed as a server-based
1095     *             authority.
1096     */
1097    public URI parseServerAuthority() throws URISyntaxException {
1098        if (!serverAuthority) {
1099            parseAuthority(true);
1100        }
1101        return this;
1102    }
1103
1104    /**
1105     * Makes the given URI {@code relative} to a relative URI against the URI
1106     * represented by this instance.
1107     *
1108     * @param relative
1109     *            the URI which has to be relativized against this URI.
1110     * @return the relative URI.
1111     */
1112    public URI relativize(URI relative) {
1113        if (relative.opaque || opaque) {
1114            return relative;
1115        }
1116
1117        if (scheme == null ? relative.scheme != null : !scheme
1118                .equals(relative.scheme)) {
1119            return relative;
1120        }
1121
1122        if (authority == null ? relative.authority != null : !authority
1123                .equals(relative.authority)) {
1124            return relative;
1125        }
1126
1127        // normalize both paths
1128        String thisPath = normalize(path, false);
1129        String relativePath = normalize(relative.path, false);
1130
1131        /*
1132         * if the paths aren't equal, then we need to determine if this URI's
1133         * path is a parent path (begins with) the relative URI's path
1134         */
1135        if (!thisPath.equals(relativePath)) {
1136            // drop everything after the last slash in this path
1137            thisPath = thisPath.substring(0, thisPath.lastIndexOf('/') + 1);
1138
1139            /*
1140             * if the relative URI's path doesn't start with this URI's path,
1141             * then just return the relative URI; the URIs have nothing in
1142             * common
1143             */
1144            if (!relativePath.startsWith(thisPath)) {
1145                return relative;
1146            }
1147        }
1148
1149        URI result = new URI();
1150        result.fragment = relative.fragment;
1151        result.query = relative.query;
1152        // the result URI is the remainder of the relative URI's path
1153        result.path = relativePath.substring(thisPath.length());
1154        result.setSchemeSpecificPart();
1155        return result;
1156    }
1157
1158    /**
1159     * Resolves the given URI {@code relative} against the URI represented by
1160     * this instance.
1161     *
1162     * @param relative
1163     *            the URI which has to be resolved against this URI.
1164     * @return the resolved URI.
1165     */
1166    public URI resolve(URI relative) {
1167        if (relative.absolute || opaque) {
1168            return relative;
1169        }
1170
1171        if (relative.authority != null) {
1172            // If the relative URI has an authority, the result is the relative
1173            // with this URI's scheme.
1174            URI result = relative.duplicate();
1175            result.scheme = scheme;
1176            result.absolute = absolute;
1177            return result;
1178        }
1179
1180        if (relative.path.isEmpty() && relative.scheme == null && relative.query == null) {
1181            // if the relative URI only consists of at most a fragment,
1182            URI result = duplicate();
1183            result.fragment = relative.fragment;
1184            return result;
1185        }
1186
1187        URI result = duplicate();
1188        result.fragment = relative.fragment;
1189        result.query = relative.query;
1190        String resolvedPath;
1191        if (relative.path.startsWith("/")) {
1192            // The relative URI has an absolute path; use it.
1193            resolvedPath = relative.path;
1194        } else if (relative.path.isEmpty()) {
1195            // The relative URI has no path; use the base path.
1196            resolvedPath = path;
1197        } else {
1198            // The relative URI has a relative path; combine the paths.
1199            int endIndex = path.lastIndexOf('/') + 1;
1200            resolvedPath = path.substring(0, endIndex) + relative.path;
1201        }
1202        result.path = UrlUtils.authoritySafePath(result.authority, normalize(resolvedPath, true));
1203        result.setSchemeSpecificPart();
1204        return result;
1205    }
1206
1207    /**
1208     * Helper method used to re-calculate the scheme specific part of the
1209     * resolved or normalized URIs
1210     */
1211    private void setSchemeSpecificPart() {
1212        // ssp = [//authority][path][?query]
1213        StringBuilder ssp = new StringBuilder();
1214        if (authority != null) {
1215            ssp.append("//" + authority);
1216        }
1217        if (path != null) {
1218            ssp.append(path);
1219        }
1220        if (query != null) {
1221            ssp.append("?" + query);
1222        }
1223        schemeSpecificPart = ssp.toString();
1224        // reset string, so that it can be re-calculated correctly when asked.
1225        string = null;
1226    }
1227
1228    /**
1229     * Creates a new URI instance by parsing the given string {@code relative}
1230     * and resolves the created URI against the URI represented by this
1231     * instance.
1232     *
1233     * @param relative
1234     *            the given string to create the new URI instance which has to
1235     *            be resolved later on.
1236     * @return the created and resolved URI.
1237     */
1238    public URI resolve(String relative) {
1239        return resolve(create(relative));
1240    }
1241
1242    private String decode(String s) {
1243        return s != null ? UriCodec.decode(s) : null;
1244    }
1245
1246    /**
1247     * Returns the textual string representation of this URI instance using the
1248     * US-ASCII encoding.
1249     *
1250     * @return the US-ASCII string representation of this URI.
1251     */
1252    public String toASCIIString() {
1253        StringBuilder result = new StringBuilder();
1254        ASCII_ONLY.appendEncoded(result, toString());
1255        return result.toString();
1256    }
1257
1258    /**
1259     * Returns the encoded URI.
1260     */
1261    @Override public String toString() {
1262        if (string != null) {
1263            return string;
1264        }
1265
1266        StringBuilder result = new StringBuilder();
1267        if (scheme != null) {
1268            result.append(scheme);
1269            result.append(':');
1270        }
1271        if (opaque) {
1272            result.append(schemeSpecificPart);
1273        } else {
1274            if (authority != null) {
1275                result.append("//");
1276                result.append(authority);
1277            }
1278
1279            if (path != null) {
1280                result.append(path);
1281            }
1282
1283            if (query != null) {
1284                result.append('?');
1285                result.append(query);
1286            }
1287        }
1288
1289        if (fragment != null) {
1290            result.append('#');
1291            result.append(fragment);
1292        }
1293
1294        string = result.toString();
1295        return string;
1296    }
1297
1298    /*
1299     * Form a string from the components of this URI, similarly to the
1300     * toString() method. But this method converts scheme and host to lowercase,
1301     * and converts escaped octets to lowercase.
1302     */
1303    private String getHashString() {
1304        StringBuilder result = new StringBuilder();
1305        if (scheme != null) {
1306            result.append(scheme.toLowerCase(Locale.US));
1307            result.append(':');
1308        }
1309        if (opaque) {
1310            result.append(schemeSpecificPart);
1311        } else {
1312            if (authority != null) {
1313                result.append("//");
1314                if (host == null) {
1315                    result.append(authority);
1316                } else {
1317                    if (userInfo != null) {
1318                        result.append(userInfo + "@");
1319                    }
1320                    result.append(host.toLowerCase(Locale.US));
1321                    if (port != -1) {
1322                        result.append(":" + port);
1323                    }
1324                }
1325            }
1326
1327            if (path != null) {
1328                result.append(path);
1329            }
1330
1331            if (query != null) {
1332                result.append('?');
1333                result.append(query);
1334            }
1335        }
1336
1337        if (fragment != null) {
1338            result.append('#');
1339            result.append(fragment);
1340        }
1341
1342        return convertHexToLowerCase(result.toString());
1343    }
1344
1345    /**
1346     * Converts this URI instance to a URL.
1347     *
1348     * @return the created URL representing the same resource as this URI.
1349     * @throws MalformedURLException
1350     *             if an error occurs while creating the URL or no protocol
1351     *             handler could be found.
1352     */
1353    public URL toURL() throws MalformedURLException {
1354        if (!absolute) {
1355            throw new IllegalArgumentException("URI is not absolute: " + toString());
1356        }
1357        return new URL(toString());
1358    }
1359
1360    private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
1361        in.defaultReadObject();
1362        try {
1363            parseURI(string, false);
1364        } catch (URISyntaxException e) {
1365            throw new IOException(e.toString());
1366        }
1367    }
1368
1369    private void writeObject(ObjectOutputStream out) throws IOException, ClassNotFoundException {
1370        // call toString() to ensure the value of string field is calculated
1371        toString();
1372        out.defaultWriteObject();
1373    }
1374}
1375