URI.java revision 30fac583a89ef577ed74629c41c1bea1ffbee4e8
1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.net;
19
20import java.io.IOException;
21import java.io.ObjectInputStream;
22import java.io.ObjectOutputStream;
23import java.io.Serializable;
24import java.util.Locale;
25import libcore.net.UriCodec;
26import libcore.net.url.UrlUtils;
27
28/**
29 * A Uniform Resource Identifier that identifies an abstract or physical
30 * resource, as specified by <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC
31 * 2396</a>.
32 *
33 * <h3>Parts of a URI</h3>
34 * A URI is composed of many parts. This class can both parse URI strings into
35 * parts and compose URI strings from parts. For example, consider the parts of
36 * this URI:
37 * {@code http://username:password@host:8080/directory/file?query#fragment}
38 * <table>
39 * <tr><th>Component                                            </th><th>Example value                                                      </th><th>Also known as</th></tr>
40 * <tr><td>{@link #getScheme() Scheme}                          </td><td>{@code http}                                                       </td><td>protocol</td></tr>
41 * <tr><td>{@link #getSchemeSpecificPart() Scheme-specific part}</td><td>{@code //username:password@host:8080/directory/file?query#fragment}</td><td></td></tr>
42 * <tr><td>{@link #getAuthority() Authority}                    </td><td>{@code username:password@host:8080}                                </td><td></td></tr>
43 * <tr><td>{@link #getUserInfo() User Info}                     </td><td>{@code username:password}                                          </td><td></td></tr>
44 * <tr><td>{@link #getHost() Host}                              </td><td>{@code host}                                                       </td><td></td></tr>
45 * <tr><td>{@link #getPort() Port}                              </td><td>{@code 8080}                                                       </td><td></td></tr>
46 * <tr><td>{@link #getPath() Path}                              </td><td>{@code /directory/file}                                            </td><td></td></tr>
47 * <tr><td>{@link #getQuery() Query}                            </td><td>{@code query}                                                      </td><td></td></tr>
48 * <tr><td>{@link #getFragment() Fragment}                      </td><td>{@code fragment}                                                   </td><td>ref</td></tr>
49 * </table>
50 *
51 * <h3>Absolute vs. Relative URIs</h3>
52 * URIs are either {@link #isAbsolute() absolute or relative}.
53 * <ul>
54 *     <li><strong>Absolute:</strong> {@code http://android.com/robots.txt}
55 *     <li><strong>Relative:</strong> {@code robots.txt}
56 * </ul>
57 *
58 * <p>Absolute URIs always have a scheme. If its scheme is supported by {@link
59 * URL}, you can use {@link #toURL} to convert an absolute URI to a URL.
60 *
61 * <p>Relative URIs do not have a scheme and cannot be converted to URLs. If you
62 * have the absolute URI that a relative URI is relative to, you can use {@link
63 * #resolve} to compute the referenced absolute URI. Symmetrically, you can use
64 * {@link #relativize} to compute the relative URI from one URI to another.
65 * <pre>   {@code
66 *   URI absolute = new URI("http://android.com/");
67 *   URI relative = new URI("robots.txt");
68 *   URI resolved = new URI("http://android.com/robots.txt");
69 *
70 *   // print "http://android.com/robots.txt"
71 *   System.out.println(absolute.resolve(relative));
72 *
73 *   // print "robots.txt"
74 *   System.out.println(absolute.relativize(resolved));
75 * }</pre>
76 *
77 * <h3>Opaque vs. Hierarchical URIs</h3>
78 * Absolute URIs are either {@link #isOpaque() opaque or hierarchical}. Relative
79 * URIs are always hierarchical.
80 * <ul>
81 *     <li><strong>Hierarchical:</strong> {@code http://android.com/robots.txt}
82 *     <li><strong>Opaque:</strong> {@code mailto:robots@example.com}
83 * </ul>
84 *
85 * <p>Opaque URIs have both a scheme and a scheme-specific part that does not
86 * begin with the slash character: {@code /}. The contents of the
87 * scheme-specific part of an opaque URI is not parsed so an opaque URI never
88 * has an authority, user info, host, port, path or query. An opaque URIs may
89 * have a fragment, however. A typical opaque URI is
90 * {@code mailto:robots@example.com}.
91 * <table>
92 * <tr><th>Component           </th><th>Example value             </th></tr>
93 * <tr><td>Scheme              </td><td>{@code mailto}            </td></tr>
94 * <tr><td>Scheme-specific part</td><td>{@code robots@example.com}</td></tr>
95 * <tr><td>Fragment            </td><td>                          </td></tr>
96 * </table>
97 * <p>Hierarchical URIs may have values for any URL component. They always
98 * have a non-null path, though that path may be the empty string.
99 *
100 * <h3>Encoding and Decoding URI Components</h3>
101 * Each component of a URI permits a limited set of legal characters. Other
102 * characters must first be <i>encoded</i> before they can be embedded in a URI.
103 * To recover the original characters from a URI, they may be <i>decoded</i>.
104 * <strong>Contrary to what you might expect,</strong> this class uses the
105 * term <i>raw</i> to refer to encoded strings. The non-<i>raw</i> accessors
106 * return decoded strings. For example, consider how this URI is decoded:
107 * {@code http://user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots#over%206%22}
108 * <table>
109 * <tr><th>Component           </th><th>Legal Characters                                                    </th><th>Other Constraints                                  </th><th>Raw Value                                                      </th><th>Value</th></tr>
110 * <tr><td>Scheme              </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code +-.}                  </td><td>First character must be in {@code a-z}, {@code A-Z}</td><td>                                                               </td><td>{@code http}</td></tr>
111 * <tr><td>Scheme-specific part</td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code //user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots}</td><td>{@code //user:pa55w?rd@host:80/doc|search?q=green robots}</td></tr>
112 * <tr><td>Authority           </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=@[]}  </td><td>Non-ASCII characters okay                          </td><td>{@code user:pa55w%3Frd@host:80}                                </td><td>{@code user:pa55w?rd@host:80}</td></tr>
113 * <tr><td>User Info           </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=}     </td><td>Non-ASCII characters okay                          </td><td>{@code user:pa55w%3Frd}                                        </td><td>{@code user:pa55w?rd}</td></tr>
114 * <tr><td>Host                </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code -.[]}                 </td><td>Domain name, IPv4 address or [IPv6 address]        </td><td>                                                               </td><td>host</td></tr>
115 * <tr><td>Port                </td><td>{@code 0-9}                                                         </td><td>                                                   </td><td>                                                               </td><td>{@code 80}</td></tr>
116 * <tr><td>Path                </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=/@}   </td><td>Non-ASCII characters okay                          </td><td>{@code /doc%7Csearch}                                          </td><td>{@code /doc|search}</td></tr>
117 * <tr><td>Query               </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code q=green%20robots}                                       </td><td>{@code q=green robots}</td></tr>
118 * <tr><td>Fragment            </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code over%206%22}                                            </td><td>{@code over 6"}</td></tr>
119 * </table>
120 * A URI's host, port and scheme are not eligible for encoding and must not
121 * contain illegal characters.
122 *
123 * <p>To encode a URI, invoke any of the multiple-parameter constructors of this
124 * class. These constructors accept your original strings and encode them into
125 * their raw form.
126 *
127 * <p>To decode a URI, invoke the single-string constructor, and then use the
128 * appropriate accessor methods to get the decoded components.
129 *
130 * <p>The {@link URL} class can be used to retrieve resources by their URI.
131 */
132public final class URI implements Comparable<URI>, Serializable {
133
134    private static final long serialVersionUID = -6052424284110960213l;
135
136    static final String UNRESERVED = "_-!.~\'()*";
137    static final String PUNCTUATION = ",;:$&+=";
138
139    static final UriCodec USER_INFO_ENCODER = new PartEncoder("");
140    static final UriCodec PATH_ENCODER = new PartEncoder("/@");
141    static final UriCodec AUTHORITY_ENCODER = new PartEncoder("@[]");
142
143    /** for java.net.URL, which foolishly combines these two parts */
144    static final UriCodec FILE_AND_QUERY_ENCODER = new PartEncoder("/@?");
145
146    /** for query, fragment, and scheme-specific part */
147    static final UriCodec ALL_LEGAL_ENCODER = new PartEncoder("?/[]@");
148
149    /** Retains all ASCII chars including delimiters. */
150    private static final UriCodec ASCII_ONLY = new UriCodec() {
151        @Override protected boolean isRetained(char c) {
152            return c <= 127;
153        }
154    };
155
156    /**
157     * Encodes the unescaped characters of {@code s} that are not permitted.
158     * Permitted characters are:
159     * <ul>
160     *   <li>Unreserved characters in <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>.
161     *   <li>{@code extraOkayChars},
162     *   <li>non-ASCII, non-control, non-whitespace characters
163     * </ul>
164     */
165    private static class PartEncoder extends UriCodec {
166        private final String extraLegalCharacters;
167
168        PartEncoder(String extraLegalCharacters) {
169            this.extraLegalCharacters = extraLegalCharacters;
170        }
171
172        @Override protected boolean isRetained(char c) {
173            return UNRESERVED.indexOf(c) != -1
174                    || PUNCTUATION.indexOf(c) != -1
175                    || extraLegalCharacters.indexOf(c) != -1
176                    || (c > 127 && !Character.isSpaceChar(c) && !Character.isISOControl(c));
177        }
178    }
179
180    private String string;
181    private transient String scheme;
182    private transient String schemeSpecificPart;
183    private transient String authority;
184    private transient String userInfo;
185    private transient String host;
186    private transient int port = -1;
187    private transient String path;
188    private transient String query;
189    private transient String fragment;
190    private transient boolean opaque;
191    private transient boolean absolute;
192    private transient boolean serverAuthority = false;
193
194    private transient int hash = -1;
195
196    private URI() {}
197
198    /**
199     * Creates a new URI instance by parsing {@code spec}.
200     *
201     * @param spec a URI whose illegal characters have all been encoded.
202     */
203    public URI(String spec) throws URISyntaxException {
204        parseURI(spec, false);
205    }
206
207    /**
208     * Creates a new URI instance of the given unencoded component parts.
209     *
210     * @param scheme the URI scheme, or null for a non-absolute URI.
211     */
212    public URI(String scheme, String schemeSpecificPart, String fragment)
213            throws URISyntaxException {
214        StringBuilder uri = new StringBuilder();
215        if (scheme != null) {
216            uri.append(scheme);
217            uri.append(':');
218        }
219        if (schemeSpecificPart != null) {
220            ALL_LEGAL_ENCODER.appendEncoded(uri, schemeSpecificPart);
221        }
222        if (fragment != null) {
223            uri.append('#');
224            ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
225        }
226
227        parseURI(uri.toString(), false);
228    }
229
230    /**
231     * Creates a new URI instance of the given unencoded component parts.
232     *
233     * @param scheme the URI scheme, or null for a non-absolute URI.
234     */
235    public URI(String scheme, String userInfo, String host, int port, String path, String query,
236            String fragment) throws URISyntaxException {
237        if (scheme == null && userInfo == null && host == null && path == null
238                && query == null && fragment == null) {
239            this.path = "";
240            return;
241        }
242
243        if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') {
244            throw new URISyntaxException(path, "Relative path");
245        }
246
247        StringBuilder uri = new StringBuilder();
248        if (scheme != null) {
249            uri.append(scheme);
250            uri.append(':');
251        }
252
253        if (userInfo != null || host != null || port != -1) {
254            uri.append("//");
255        }
256
257        if (userInfo != null) {
258            USER_INFO_ENCODER.appendEncoded(uri, userInfo);
259            uri.append('@');
260        }
261
262        if (host != null) {
263            // check for IPv6 addresses that hasn't been enclosed in square brackets
264            if (host.indexOf(':') != -1 && host.indexOf(']') == -1 && host.indexOf('[') == -1) {
265                host = "[" + host + "]";
266            }
267            uri.append(host);
268        }
269
270        if (port != -1) {
271            uri.append(':');
272            uri.append(port);
273        }
274
275        if (path != null) {
276            PATH_ENCODER.appendEncoded(uri, path);
277        }
278
279        if (query != null) {
280            uri.append('?');
281            ALL_LEGAL_ENCODER.appendEncoded(uri, query);
282        }
283
284        if (fragment != null) {
285            uri.append('#');
286            ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
287        }
288
289        parseURI(uri.toString(), true);
290    }
291
292    /**
293     * Creates a new URI instance of the given unencoded component parts.
294     *
295     * @param scheme the URI scheme, or null for a non-absolute URI.
296     */
297    public URI(String scheme, String host, String path, String fragment) throws URISyntaxException {
298        this(scheme, null, host, -1, path, null, fragment);
299    }
300
301    /**
302     * Creates a new URI instance of the given unencoded component parts.
303     *
304     * @param scheme the URI scheme, or null for a non-absolute URI.
305     */
306    public URI(String scheme, String authority, String path, String query,
307            String fragment) throws URISyntaxException {
308        if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') {
309            throw new URISyntaxException(path, "Relative path");
310        }
311
312        StringBuilder uri = new StringBuilder();
313        if (scheme != null) {
314            uri.append(scheme);
315            uri.append(':');
316        }
317        if (authority != null) {
318            uri.append("//");
319            AUTHORITY_ENCODER.appendEncoded(uri, authority);
320        }
321
322        if (path != null) {
323            PATH_ENCODER.appendEncoded(uri, path);
324        }
325        if (query != null) {
326            uri.append('?');
327            ALL_LEGAL_ENCODER.appendEncoded(uri, query);
328        }
329        if (fragment != null) {
330            uri.append('#');
331            ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
332        }
333
334        parseURI(uri.toString(), false);
335    }
336
337    /**
338     * Breaks uri into its component parts. This first splits URI into scheme,
339     * scheme-specific part and fragment:
340     *   [scheme:][scheme-specific part][#fragment]
341     *
342     * Then it breaks the scheme-specific part into authority, path and query:
343     *   [//authority][path][?query]
344     *
345     * Finally it delegates to parseAuthority to break the authority into user
346     * info, host and port:
347     *   [user-info@][host][:port]
348     */
349    private void parseURI(String uri, boolean forceServer) throws URISyntaxException {
350        string = uri;
351
352        // "#fragment"
353        int fragmentStart = UrlUtils.findFirstOf(uri, "#", 0, uri.length());
354        if (fragmentStart < uri.length()) {
355            fragment = ALL_LEGAL_ENCODER.validate(uri, fragmentStart + 1, uri.length(), "fragment");
356        }
357
358        // scheme:
359        int start;
360        int colon = UrlUtils.findFirstOf(uri, ":", 0, fragmentStart);
361        if (colon < UrlUtils.findFirstOf(uri, "/?#", 0, fragmentStart)) {
362            absolute = true;
363            scheme = validateScheme(uri, colon);
364            start = colon + 1;
365
366            if (start == fragmentStart) {
367                throw new URISyntaxException(uri, "Scheme-specific part expected", start);
368            }
369
370            // URIs with schemes followed by a non-/ char are opaque and need no further parsing.
371            if (!uri.regionMatches(start, "/", 0, 1)) {
372                opaque = true;
373                schemeSpecificPart = ALL_LEGAL_ENCODER.validate(
374                        uri, start, fragmentStart, "scheme specific part");
375                return;
376            }
377        } else {
378            absolute = false;
379            start = 0;
380        }
381
382        opaque = false;
383        schemeSpecificPart = uri.substring(start, fragmentStart);
384
385        // "//authority"
386        int fileStart;
387        if (uri.regionMatches(start, "//", 0, 2)) {
388            int authorityStart = start + 2;
389            fileStart = UrlUtils.findFirstOf(uri, "/?", authorityStart, fragmentStart);
390            if (authorityStart == uri.length()) {
391                throw new URISyntaxException(uri, "Authority expected", uri.length());
392            }
393            if (authorityStart < fileStart) {
394                authority = AUTHORITY_ENCODER.validate(uri, authorityStart, fileStart, "authority");
395            }
396        } else {
397            fileStart = start;
398        }
399
400        // "path"
401        int queryStart = UrlUtils.findFirstOf(uri, "?", fileStart, fragmentStart);
402        path = PATH_ENCODER.validate(uri, fileStart, queryStart, "path");
403
404        // "?query"
405        if (queryStart < fragmentStart) {
406            query = ALL_LEGAL_ENCODER.validate(uri, queryStart + 1, fragmentStart, "query");
407        }
408
409        parseAuthority(forceServer);
410    }
411
412    private String validateScheme(String uri, int end) throws URISyntaxException {
413        if (end == 0) {
414            throw new URISyntaxException(uri, "Scheme expected", 0);
415        }
416
417        for (int i = 0; i < end; i++) {
418            if (!UrlUtils.isValidSchemeChar(i, uri.charAt(i))) {
419                throw new URISyntaxException(uri, "Illegal character in scheme", 0);
420            }
421        }
422
423        return uri.substring(0, end);
424    }
425
426    /**
427     * Breaks this URI's authority into user info, host and port parts.
428     *   [user-info@][host][:port]
429     * If any part of this fails this method will give up and potentially leave
430     * these fields with their default values.
431     *
432     * @param forceServer true to always throw if the authority cannot be
433     *     parsed. If false, this method may still throw for some kinds of
434     *     errors; this unpredictable behavior is consistent with the RI.
435     */
436    private void parseAuthority(boolean forceServer) throws URISyntaxException {
437        if (authority == null) {
438            return;
439        }
440
441        String tempUserInfo = null;
442        String temp = authority;
443        int index = temp.indexOf('@');
444        int hostIndex = 0;
445        if (index != -1) {
446            // remove user info
447            tempUserInfo = temp.substring(0, index);
448            validateUserInfo(authority, tempUserInfo, 0);
449            temp = temp.substring(index + 1); // host[:port] is left
450            hostIndex = index + 1;
451        }
452
453        index = temp.lastIndexOf(':');
454        int endIndex = temp.indexOf(']');
455
456        String tempHost;
457        int tempPort = -1;
458        if (index != -1 && endIndex < index) {
459            // determine port and host
460            tempHost = temp.substring(0, index);
461
462            if (index < (temp.length() - 1)) { // port part is not empty
463                try {
464                    char firstPortChar = temp.charAt(index + 1);
465                    if (firstPortChar >= '0' && firstPortChar <= '9') {
466                        // allow only digits, no signs
467                        tempPort = Integer.parseInt(temp.substring(index + 1));
468                    } else {
469                        if (forceServer) {
470                            throw new URISyntaxException(authority,
471                                "Invalid port number", hostIndex + index + 1);
472                        }
473                        return;
474                    }
475                } catch (NumberFormatException e) {
476                    if (forceServer) {
477                        throw new URISyntaxException(authority,
478                                "Invalid port number", hostIndex + index + 1);
479                    }
480                    return;
481                }
482            }
483        } else {
484            tempHost = temp;
485        }
486
487        if (tempHost.isEmpty()) {
488            if (forceServer) {
489                throw new URISyntaxException(authority, "Expected host", hostIndex);
490            }
491            return;
492        }
493
494        if (!isValidHost(forceServer, tempHost)) {
495            return;
496        }
497
498        // this is a server based uri,
499        // fill in the userInfo, host and port fields
500        userInfo = tempUserInfo;
501        host = tempHost;
502        port = tempPort;
503        serverAuthority = true;
504    }
505
506    private void validateUserInfo(String uri, String userInfo, int index)
507            throws URISyntaxException {
508        for (int i = 0; i < userInfo.length(); i++) {
509            char ch = userInfo.charAt(i);
510            if (ch == ']' || ch == '[') {
511                throw new URISyntaxException(uri, "Illegal character in userInfo", index + i);
512            }
513        }
514    }
515
516    /**
517     * Returns true if {@code host} is a well-formed host name or IP address.
518     *
519     * @param forceServer true to always throw if the host cannot be parsed. If
520     *     false, this method may still throw for some kinds of errors; this
521     *     unpredictable behavior is consistent with the RI.
522     */
523    private boolean isValidHost(boolean forceServer, String host) throws URISyntaxException {
524        if (host.startsWith("[")) {
525            // IPv6 address
526            if (!host.endsWith("]")) {
527                throw new URISyntaxException(host,
528                        "Expected a closing square bracket for IPv6 address", 0);
529            }
530            if (InetAddress.isNumeric(host)) {
531                // If it's numeric, the presence of square brackets guarantees
532                // that it's a numeric IPv6 address.
533                return true;
534            }
535            throw new URISyntaxException(host, "Malformed IPv6 address");
536        }
537
538        // '[' and ']' can only be the first char and last char
539        // of the host name
540        if (host.indexOf('[') != -1 || host.indexOf(']') != -1) {
541            throw new URISyntaxException(host, "Illegal character in host name", 0);
542        }
543
544        int index = host.lastIndexOf('.');
545        if (index < 0 || index == host.length() - 1
546                || !Character.isDigit(host.charAt(index + 1))) {
547            // domain name
548            if (isValidDomainName(host)) {
549                return true;
550            }
551            if (forceServer) {
552                throw new URISyntaxException(host, "Illegal character in host name", 0);
553            }
554            return false;
555        }
556
557        // IPv4 address?
558        try {
559            InetAddress ia = InetAddress.parseNumericAddress(host);
560            if (ia instanceof Inet4Address) {
561                return true;
562            }
563        } catch (IllegalArgumentException ignored) {
564        }
565
566        if (forceServer) {
567            throw new URISyntaxException(host, "Malformed IPv4 address", 0);
568        }
569        return false;
570    }
571
572    private boolean isValidDomainName(String host) {
573        try {
574            UriCodec.validateSimple(host, "-.");
575        } catch (URISyntaxException e) {
576            return false;
577        }
578
579        String lastLabel = null;
580        for (String token : host.split("\\.")) {
581            lastLabel = token;
582            if (lastLabel.startsWith("-") || lastLabel.endsWith("-")) {
583                return false;
584            }
585        }
586
587        if (lastLabel == null) {
588            return false;
589        }
590
591        if (!lastLabel.equals(host)) {
592            char ch = lastLabel.charAt(0);
593            if (ch >= '0' && ch <= '9') {
594                return false;
595            }
596        }
597        return true;
598    }
599
600    /**
601     * Compares this URI with the given argument {@code uri}. This method will
602     * return a negative value if this URI instance is less than the given
603     * argument and a positive value if this URI instance is greater than the
604     * given argument. The return value {@code 0} indicates that the two
605     * instances represent the same URI. To define the order the single parts of
606     * the URI are compared with each other. String components will be ordered
607     * in the natural case-sensitive way. A hierarchical URI is less than an
608     * opaque URI and if one part is {@code null} the URI with the undefined
609     * part is less than the other one.
610     *
611     * @param uri
612     *            the URI this instance has to compare with.
613     * @return the value representing the order of the two instances.
614     */
615    public int compareTo(URI uri) {
616        int ret;
617
618        // compare schemes
619        if (scheme == null && uri.scheme != null) {
620            return -1;
621        } else if (scheme != null && uri.scheme == null) {
622            return 1;
623        } else if (scheme != null && uri.scheme != null) {
624            ret = scheme.compareToIgnoreCase(uri.scheme);
625            if (ret != 0) {
626                return ret;
627            }
628        }
629
630        // compare opacities
631        if (!opaque && uri.opaque) {
632            return -1;
633        } else if (opaque && !uri.opaque) {
634            return 1;
635        } else if (opaque && uri.opaque) {
636            ret = schemeSpecificPart.compareTo(uri.schemeSpecificPart);
637            if (ret != 0) {
638                return ret;
639            }
640        } else {
641
642            // otherwise both must be hierarchical
643
644            // compare authorities
645            if (authority != null && uri.authority == null) {
646                return 1;
647            } else if (authority == null && uri.authority != null) {
648                return -1;
649            } else if (authority != null && uri.authority != null) {
650                if (host != null && uri.host != null) {
651                    // both are server based, so compare userInfo, host, port
652                    if (userInfo != null && uri.userInfo == null) {
653                        return 1;
654                    } else if (userInfo == null && uri.userInfo != null) {
655                        return -1;
656                    } else if (userInfo != null && uri.userInfo != null) {
657                        ret = userInfo.compareTo(uri.userInfo);
658                        if (ret != 0) {
659                            return ret;
660                        }
661                    }
662
663                    // userInfo's are the same, compare hostname
664                    ret = host.compareToIgnoreCase(uri.host);
665                    if (ret != 0) {
666                        return ret;
667                    }
668
669                    // compare port
670                    if (port != uri.port) {
671                        return port - uri.port;
672                    }
673                } else { // one or both are registry based, compare the whole
674                    // authority
675                    ret = authority.compareTo(uri.authority);
676                    if (ret != 0) {
677                        return ret;
678                    }
679                }
680            }
681
682            // authorities are the same
683            // compare paths
684            ret = path.compareTo(uri.path);
685            if (ret != 0) {
686                return ret;
687            }
688
689            // compare queries
690
691            if (query != null && uri.query == null) {
692                return 1;
693            } else if (query == null && uri.query != null) {
694                return -1;
695            } else if (query != null && uri.query != null) {
696                ret = query.compareTo(uri.query);
697                if (ret != 0) {
698                    return ret;
699                }
700            }
701        }
702
703        // everything else is identical, so compare fragments
704        if (fragment != null && uri.fragment == null) {
705            return 1;
706        } else if (fragment == null && uri.fragment != null) {
707            return -1;
708        } else if (fragment != null && uri.fragment != null) {
709            ret = fragment.compareTo(uri.fragment);
710            if (ret != 0) {
711                return ret;
712            }
713        }
714
715        // identical
716        return 0;
717    }
718
719    /**
720     * Returns the URI formed by parsing {@code uri}. This method behaves
721     * identically to the string constructor but throws a different exception
722     * on failure. The constructor fails with a checked {@link
723     * URISyntaxException}; this method fails with an unchecked {@link
724     * IllegalArgumentException}.
725     */
726    public static URI create(String uri) {
727        try {
728            return new URI(uri);
729        } catch (URISyntaxException e) {
730            throw new IllegalArgumentException(e.getMessage());
731        }
732    }
733
734    private URI duplicate() {
735        URI clone = new URI();
736        clone.absolute = absolute;
737        clone.authority = authority;
738        clone.fragment = fragment;
739        clone.host = host;
740        clone.opaque = opaque;
741        clone.path = path;
742        clone.port = port;
743        clone.query = query;
744        clone.scheme = scheme;
745        clone.schemeSpecificPart = schemeSpecificPart;
746        clone.userInfo = userInfo;
747        clone.serverAuthority = serverAuthority;
748        return clone;
749    }
750
751    /*
752     * Takes a string that may contain hex sequences like %F1 or %2b and
753     * converts the hex values following the '%' to lowercase
754     */
755    private String convertHexToLowerCase(String s) {
756        StringBuilder result = new StringBuilder("");
757        if (s.indexOf('%') == -1) {
758            return s;
759        }
760
761        int index, prevIndex = 0;
762        while ((index = s.indexOf('%', prevIndex)) != -1) {
763            result.append(s.substring(prevIndex, index + 1));
764            result.append(s.substring(index + 1, index + 3).toLowerCase(Locale.US));
765            index += 3;
766            prevIndex = index;
767        }
768        return result.toString();
769    }
770
771    /**
772     * Returns true if the given URI escaped strings {@code first} and {@code second} are
773     * equal.
774     *
775     * TODO: This method assumes that both strings are escaped using the same escape rules
776     * yet it still performs case insensitive comparison of the escaped sequences.
777     * Why is this necessary ? We can just replace it with first.equals(second)
778     * otherwise.
779     */
780    private boolean escapedEquals(String first, String second) {
781        // This length test isn't a micro-optimization. We need it because we sometimes
782        // calculate the number of characters to match based on the length of the second
783        // string. If the second string is shorter than the first, we might attempt to match
784        // 0 chars, and regionMatches is specified to return true in that case.
785        if (first.length() != second.length()) {
786            return false;
787        }
788
789        int prevIndex = 0;
790        while (true) {
791            int index = first.indexOf('%', prevIndex);
792            int index1 = second.indexOf('%', prevIndex);
793            if (index != index1) {
794                return false;
795            }
796
797            // index == index1 from this point on.
798
799            if (index == -1) {
800                // No more escapes, match the remainder of the string
801                // normally.
802               return first.regionMatches(prevIndex, second, prevIndex,
803                       second.length() - prevIndex);
804            }
805
806            if (!first.regionMatches(prevIndex, second, prevIndex, (index - prevIndex))) {
807                return false;
808            }
809
810            if (!first.regionMatches(true /* ignore case */, index + 1, second, index + 1, 2)) {
811                return false;
812            }
813
814            index += 3;
815            prevIndex = index;
816        }
817    }
818
819    @Override public boolean equals(Object o) {
820        if (!(o instanceof URI)) {
821            return false;
822        }
823        URI uri = (URI) o;
824
825        if (uri.fragment == null && fragment != null || uri.fragment != null
826                && fragment == null) {
827            return false;
828        } else if (uri.fragment != null && fragment != null) {
829            if (!escapedEquals(uri.fragment, fragment)) {
830                return false;
831            }
832        }
833
834        if (uri.scheme == null && scheme != null || uri.scheme != null
835                && scheme == null) {
836            return false;
837        } else if (uri.scheme != null && scheme != null) {
838            if (!uri.scheme.equalsIgnoreCase(scheme)) {
839                return false;
840            }
841        }
842
843        if (uri.opaque && opaque) {
844            return escapedEquals(uri.schemeSpecificPart,
845                    schemeSpecificPart);
846        } else if (!uri.opaque && !opaque) {
847            if (!escapedEquals(path, uri.path)) {
848                return false;
849            }
850
851            if (uri.query != null && query == null || uri.query == null
852                    && query != null) {
853                return false;
854            } else if (uri.query != null && query != null) {
855                if (!escapedEquals(uri.query, query)) {
856                    return false;
857                }
858            }
859
860            if (uri.authority != null && authority == null
861                    || uri.authority == null && authority != null) {
862                return false;
863            } else if (uri.authority != null && authority != null) {
864                if (uri.host != null && host == null || uri.host == null
865                        && host != null) {
866                    return false;
867                } else if (uri.host == null && host == null) {
868                    // both are registry based, so compare the whole authority
869                    return escapedEquals(uri.authority, authority);
870                } else { // uri.host != null && host != null, so server-based
871                    if (!host.equalsIgnoreCase(uri.host)) {
872                        return false;
873                    }
874
875                    if (port != uri.port) {
876                        return false;
877                    }
878
879                    if (uri.userInfo != null && userInfo == null
880                            || uri.userInfo == null && userInfo != null) {
881                        return false;
882                    } else if (uri.userInfo != null && userInfo != null) {
883                        return escapedEquals(userInfo, uri.userInfo);
884                    } else {
885                        return true;
886                    }
887                }
888            } else {
889                // no authority
890                return true;
891            }
892
893        } else {
894            // one is opaque, the other hierarchical
895            return false;
896        }
897    }
898
899    /**
900     * Returns the scheme of this URI, or null if this URI has no scheme. This
901     * is also known as the protocol.
902     */
903    public String getScheme() {
904        return scheme;
905    }
906
907    /**
908     * Returns the decoded scheme-specific part of this URI, or null if this URI
909     * has no scheme-specific part.
910     */
911    public String getSchemeSpecificPart() {
912        return decode(schemeSpecificPart);
913    }
914
915    /**
916     * Returns the encoded scheme-specific part of this URI, or null if this URI
917     * has no scheme-specific part.
918     */
919    public String getRawSchemeSpecificPart() {
920        return schemeSpecificPart;
921    }
922
923    /**
924     * Returns the decoded authority part of this URI, or null if this URI has
925     * no authority.
926     */
927    public String getAuthority() {
928        return decode(authority);
929    }
930
931    /**
932     * Returns the encoded authority of this URI, or null if this URI has no
933     * authority.
934     */
935    public String getRawAuthority() {
936        return authority;
937    }
938
939    /**
940     * Returns the decoded user info of this URI, or null if this URI has no
941     * user info.
942     */
943    public String getUserInfo() {
944        return decode(userInfo);
945    }
946
947    /**
948     * Returns the encoded user info of this URI, or null if this URI has no
949     * user info.
950     */
951    public String getRawUserInfo() {
952        return userInfo;
953    }
954
955    /**
956     * Returns the host of this URI, or null if this URI has no host.
957     */
958    public String getHost() {
959        return host;
960    }
961
962    /**
963     * Returns the port number of this URI, or {@code -1} if this URI has no
964     * explicit port.
965     */
966    public int getPort() {
967        return port;
968    }
969
970    /** @hide */
971    public int getEffectivePort() {
972        return getEffectivePort(scheme, port);
973    }
974
975    /**
976     * Returns the port to use for {@code scheme} connections will use when
977     * {@link #getPort} returns {@code specifiedPort}.
978     *
979     * @hide
980     */
981    public static int getEffectivePort(String scheme, int specifiedPort) {
982        if (specifiedPort != -1) {
983            return specifiedPort;
984        }
985
986        if ("http".equalsIgnoreCase(scheme)) {
987            return 80;
988        } else if ("https".equalsIgnoreCase(scheme)) {
989            return 443;
990        } else {
991            return -1;
992        }
993    }
994
995    /**
996     * Returns the decoded path of this URI, or null if this URI has no path.
997     */
998    public String getPath() {
999        return decode(path);
1000    }
1001
1002    /**
1003     * Returns the encoded path of this URI, or null if this URI has no path.
1004     */
1005    public String getRawPath() {
1006        return path;
1007    }
1008
1009    /**
1010     * Returns the decoded query of this URI, or null if this URI has no query.
1011     */
1012    public String getQuery() {
1013        return decode(query);
1014    }
1015
1016    /**
1017     * Returns the encoded query of this URI, or null if this URI has no query.
1018     */
1019    public String getRawQuery() {
1020        return query;
1021    }
1022
1023    /**
1024     * Returns the decoded fragment of this URI, or null if this URI has no
1025     * fragment.
1026     */
1027    public String getFragment() {
1028        return decode(fragment);
1029    }
1030
1031    /**
1032     * Gets the encoded fragment of this URI, or null if this URI has no
1033     * fragment.
1034     */
1035    public String getRawFragment() {
1036        return fragment;
1037    }
1038
1039    @Override public int hashCode() {
1040        if (hash == -1) {
1041            hash = getHashString().hashCode();
1042        }
1043        return hash;
1044    }
1045
1046    /**
1047     * Returns true if this URI is absolute, which means that a scheme is
1048     * defined.
1049     */
1050    public boolean isAbsolute() {
1051        // TODO: simplify to 'scheme != null' ?
1052        return absolute;
1053    }
1054
1055    /**
1056     * Returns true if this URI is opaque. Opaque URIs are absolute and have a
1057     * scheme-specific part that does not start with a slash character. All
1058     * parts except scheme, scheme-specific and fragment are undefined.
1059     */
1060    public boolean isOpaque() {
1061        return opaque;
1062    }
1063
1064    /**
1065     * Returns the normalized path.
1066     */
1067    private String normalize(String path, boolean discardRelativePrefix) {
1068        path = UrlUtils.canonicalizePath(path, discardRelativePrefix);
1069
1070        /*
1071         * If the path contains a colon before the first colon, prepend
1072         * "./" to differentiate the path from a scheme prefix.
1073         */
1074        int colon = path.indexOf(':');
1075        if (colon != -1) {
1076            int slash = path.indexOf('/');
1077            if (slash == -1 || colon < slash) {
1078                path = "./" + path;
1079            }
1080        }
1081
1082        return path;
1083    }
1084
1085    /**
1086     * Normalizes the path part of this URI.
1087     *
1088     * @return an URI object which represents this instance with a normalized
1089     *         path.
1090     */
1091    public URI normalize() {
1092        if (opaque) {
1093            return this;
1094        }
1095        String normalizedPath = normalize(path, false);
1096        // if the path is already normalized, return this
1097        if (path.equals(normalizedPath)) {
1098            return this;
1099        }
1100        // get an exact copy of the URI re-calculate the scheme specific part
1101        // since the path of the normalized URI is different from this URI.
1102        URI result = duplicate();
1103        result.path = normalizedPath;
1104        result.setSchemeSpecificPart();
1105        return result;
1106    }
1107
1108    /**
1109     * Tries to parse the authority component of this URI to divide it into the
1110     * host, port, and user-info. If this URI is already determined as a
1111     * ServerAuthority this instance will be returned without changes.
1112     *
1113     * @return this instance with the components of the parsed server authority.
1114     * @throws URISyntaxException
1115     *             if the authority part could not be parsed as a server-based
1116     *             authority.
1117     */
1118    public URI parseServerAuthority() throws URISyntaxException {
1119        if (!serverAuthority) {
1120            parseAuthority(true);
1121        }
1122        return this;
1123    }
1124
1125    /**
1126     * Makes the given URI {@code relative} to a relative URI against the URI
1127     * represented by this instance.
1128     *
1129     * @param relative
1130     *            the URI which has to be relativized against this URI.
1131     * @return the relative URI.
1132     */
1133    public URI relativize(URI relative) {
1134        if (relative.opaque || opaque) {
1135            return relative;
1136        }
1137
1138        if (scheme == null ? relative.scheme != null : !scheme
1139                .equals(relative.scheme)) {
1140            return relative;
1141        }
1142
1143        if (authority == null ? relative.authority != null : !authority
1144                .equals(relative.authority)) {
1145            return relative;
1146        }
1147
1148        // normalize both paths
1149        String thisPath = normalize(path, false);
1150        String relativePath = normalize(relative.path, false);
1151
1152        /*
1153         * if the paths aren't equal, then we need to determine if this URI's
1154         * path is a parent path (begins with) the relative URI's path
1155         */
1156        if (!thisPath.equals(relativePath)) {
1157            // drop everything after the last slash in this path
1158            thisPath = thisPath.substring(0, thisPath.lastIndexOf('/') + 1);
1159
1160            /*
1161             * if the relative URI's path doesn't start with this URI's path,
1162             * then just return the relative URI; the URIs have nothing in
1163             * common
1164             */
1165            if (!relativePath.startsWith(thisPath)) {
1166                return relative;
1167            }
1168        }
1169
1170        URI result = new URI();
1171        result.fragment = relative.fragment;
1172        result.query = relative.query;
1173        // the result URI is the remainder of the relative URI's path
1174        result.path = relativePath.substring(thisPath.length());
1175        result.setSchemeSpecificPart();
1176        return result;
1177    }
1178
1179    /**
1180     * Resolves the given URI {@code relative} against the URI represented by
1181     * this instance.
1182     *
1183     * @param relative
1184     *            the URI which has to be resolved against this URI.
1185     * @return the resolved URI.
1186     */
1187    public URI resolve(URI relative) {
1188        if (relative.absolute || opaque) {
1189            return relative;
1190        }
1191
1192        if (relative.authority != null) {
1193            // If the relative URI has an authority, the result is the relative
1194            // with this URI's scheme.
1195            URI result = relative.duplicate();
1196            result.scheme = scheme;
1197            result.absolute = absolute;
1198            return result;
1199        }
1200
1201        if (relative.path.isEmpty() && relative.scheme == null && relative.query == null) {
1202            // if the relative URI only consists of at most a fragment,
1203            URI result = duplicate();
1204            result.fragment = relative.fragment;
1205            return result;
1206        }
1207
1208        URI result = duplicate();
1209        result.fragment = relative.fragment;
1210        result.query = relative.query;
1211        String resolvedPath;
1212        if (relative.path.startsWith("/")) {
1213            // The relative URI has an absolute path; use it.
1214            resolvedPath = relative.path;
1215        } else if (relative.path.isEmpty()) {
1216            // The relative URI has no path; use the base path.
1217            resolvedPath = path;
1218        } else {
1219            // The relative URI has a relative path; combine the paths.
1220            int endIndex = path.lastIndexOf('/') + 1;
1221            resolvedPath = path.substring(0, endIndex) + relative.path;
1222        }
1223        result.path = UrlUtils.authoritySafePath(result.authority, normalize(resolvedPath, true));
1224        result.setSchemeSpecificPart();
1225        return result;
1226    }
1227
1228    /**
1229     * Helper method used to re-calculate the scheme specific part of the
1230     * resolved or normalized URIs
1231     */
1232    private void setSchemeSpecificPart() {
1233        // ssp = [//authority][path][?query]
1234        StringBuilder ssp = new StringBuilder();
1235        if (authority != null) {
1236            ssp.append("//" + authority);
1237        }
1238        if (path != null) {
1239            ssp.append(path);
1240        }
1241        if (query != null) {
1242            ssp.append("?" + query);
1243        }
1244        schemeSpecificPart = ssp.toString();
1245        // reset string, so that it can be re-calculated correctly when asked.
1246        string = null;
1247    }
1248
1249    /**
1250     * Creates a new URI instance by parsing the given string {@code relative}
1251     * and resolves the created URI against the URI represented by this
1252     * instance.
1253     *
1254     * @param relative
1255     *            the given string to create the new URI instance which has to
1256     *            be resolved later on.
1257     * @return the created and resolved URI.
1258     */
1259    public URI resolve(String relative) {
1260        return resolve(create(relative));
1261    }
1262
1263    private String decode(String s) {
1264        return s != null ? UriCodec.decode(s) : null;
1265    }
1266
1267    /**
1268     * Returns the textual string representation of this URI instance using the
1269     * US-ASCII encoding.
1270     *
1271     * @return the US-ASCII string representation of this URI.
1272     */
1273    public String toASCIIString() {
1274        StringBuilder result = new StringBuilder();
1275        ASCII_ONLY.appendEncoded(result, toString());
1276        return result.toString();
1277    }
1278
1279    /**
1280     * Returns the encoded URI.
1281     */
1282    @Override public String toString() {
1283        if (string != null) {
1284            return string;
1285        }
1286
1287        StringBuilder result = new StringBuilder();
1288        if (scheme != null) {
1289            result.append(scheme);
1290            result.append(':');
1291        }
1292        if (opaque) {
1293            result.append(schemeSpecificPart);
1294        } else {
1295            if (authority != null) {
1296                result.append("//");
1297                result.append(authority);
1298            }
1299
1300            if (path != null) {
1301                result.append(path);
1302            }
1303
1304            if (query != null) {
1305                result.append('?');
1306                result.append(query);
1307            }
1308        }
1309
1310        if (fragment != null) {
1311            result.append('#');
1312            result.append(fragment);
1313        }
1314
1315        string = result.toString();
1316        return string;
1317    }
1318
1319    /*
1320     * Form a string from the components of this URI, similarly to the
1321     * toString() method. But this method converts scheme and host to lowercase,
1322     * and converts escaped octets to lowercase.
1323     */
1324    private String getHashString() {
1325        StringBuilder result = new StringBuilder();
1326        if (scheme != null) {
1327            result.append(scheme.toLowerCase(Locale.US));
1328            result.append(':');
1329        }
1330        if (opaque) {
1331            result.append(schemeSpecificPart);
1332        } else {
1333            if (authority != null) {
1334                result.append("//");
1335                if (host == null) {
1336                    result.append(authority);
1337                } else {
1338                    if (userInfo != null) {
1339                        result.append(userInfo + "@");
1340                    }
1341                    result.append(host.toLowerCase(Locale.US));
1342                    if (port != -1) {
1343                        result.append(":" + port);
1344                    }
1345                }
1346            }
1347
1348            if (path != null) {
1349                result.append(path);
1350            }
1351
1352            if (query != null) {
1353                result.append('?');
1354                result.append(query);
1355            }
1356        }
1357
1358        if (fragment != null) {
1359            result.append('#');
1360            result.append(fragment);
1361        }
1362
1363        return convertHexToLowerCase(result.toString());
1364    }
1365
1366    /**
1367     * Converts this URI instance to a URL.
1368     *
1369     * @return the created URL representing the same resource as this URI.
1370     * @throws MalformedURLException
1371     *             if an error occurs while creating the URL or no protocol
1372     *             handler could be found.
1373     */
1374    public URL toURL() throws MalformedURLException {
1375        if (!absolute) {
1376            throw new IllegalArgumentException("URI is not absolute: " + toString());
1377        }
1378        return new URL(toString());
1379    }
1380
1381    private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
1382        in.defaultReadObject();
1383        try {
1384            parseURI(string, false);
1385        } catch (URISyntaxException e) {
1386            throw new IOException(e.toString());
1387        }
1388    }
1389
1390    private void writeObject(ObjectOutputStream out) throws IOException, ClassNotFoundException {
1391        // call toString() to ensure the value of string field is calculated
1392        toString();
1393        out.defaultWriteObject();
1394    }
1395}
1396