URI.java revision 0b217ad34f025aedbba468e248303bdc8b2e5df0
1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.net;
19
20import java.io.IOException;
21import java.io.ObjectInputStream;
22import java.io.ObjectOutputStream;
23import java.io.Serializable;
24import java.util.Locale;
25import libcore.net.UriCodec;
26import libcore.net.url.UrlUtils;
27
28/**
29 * A Uniform Resource Identifier that identifies an abstract or physical
30 * resource, as specified by <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC
31 * 2396</a>.
32 *
33 * <h3>Parts of a URI</h3>
34 * A URI is composed of many parts. This class can both parse URI strings into
35 * parts and compose URI strings from parts. For example, consider the parts of
36 * this URI:
37 * {@code http://username:password@host:8080/directory/file?query#fragment}
38 * <table>
39 * <tr><th>Component                                            </th><th>Example value                                                      </th><th>Also known as</th></tr>
40 * <tr><td>{@link #getScheme() Scheme}                          </td><td>{@code http}                                                       </td><td>protocol</td></tr>
41 * <tr><td>{@link #getSchemeSpecificPart() Scheme-specific part}</td><td>{@code //username:password@host:8080/directory/file?query#fragment}</td><td></td></tr>
42 * <tr><td>{@link #getAuthority() Authority}                    </td><td>{@code username:password@host:8080}                                </td><td></td></tr>
43 * <tr><td>{@link #getUserInfo() User Info}                     </td><td>{@code username:password}                                          </td><td></td></tr>
44 * <tr><td>{@link #getHost() Host}                              </td><td>{@code host}                                                       </td><td></td></tr>
45 * <tr><td>{@link #getPort() Port}                              </td><td>{@code 8080}                                                       </td><td></td></tr>
46 * <tr><td>{@link #getPath() Path}                              </td><td>{@code /directory/file}                                            </td><td></td></tr>
47 * <tr><td>{@link #getQuery() Query}                            </td><td>{@code query}                                                      </td><td></td></tr>
48 * <tr><td>{@link #getFragment() Fragment}                      </td><td>{@code fragment}                                                   </td><td>ref</td></tr>
49 * </table>
50 *
51 * <h3>Absolute vs. Relative URIs</h3>
52 * URIs are either {@link #isAbsolute() absolute or relative}.
53 * <ul>
54 *     <li><strong>Absolute:</strong> {@code http://android.com/robots.txt}
55 *     <li><strong>Relative:</strong> {@code robots.txt}
56 * </ul>
57 *
58 * <p>Absolute URIs always have a scheme. If its scheme is supported by {@link
59 * URL}, you can use {@link #toURL} to convert an absolute URI to a URL.
60 *
61 * <p>Relative URIs do not have a scheme and cannot be converted to URLs. If you
62 * have the absolute URI that a relative URI is relative to, you can use {@link
63 * #resolve} to compute the referenced absolute URI. Symmetrically, you can use
64 * {@link #relativize} to compute the relative URI from one URI to another.
65 * <pre>   {@code
66 *   URI absolute = new URI("http://android.com/");
67 *   URI relative = new URI("robots.txt");
68 *   URI resolved = new URI("http://android.com/robots.txt");
69 *
70 *   // print "http://android.com/robots.txt"
71 *   System.out.println(absolute.resolve(relative));
72 *
73 *   // print "robots.txt"
74 *   System.out.println(absolute.relativize(resolved));
75 * }</pre>
76 *
77 * <h3>Opaque vs. Hierarchical URIs</h3>
78 * Absolute URIs are either {@link #isOpaque() opaque or hierarchical}. Relative
79 * URIs are always hierarchical.
80 * <ul>
81 *     <li><strong>Hierarchical:</strong> {@code http://android.com/robots.txt}
82 *     <li><strong>Opaque:</strong> {@code mailto:robots@example.com}
83 * </ul>
84 *
85 * <p>Opaque URIs have both a scheme and a scheme-specific part that does not
86 * begin with the slash character: {@code /}. The contents of the
87 * scheme-specific part of an opaque URI is not parsed so an opaque URI never
88 * has an authority, user info, host, port, path or query. An opaque URIs may
89 * have a fragment, however. A typical opaque URI is
90 * {@code mailto:robots@example.com}.
91 * <table>
92 * <tr><th>Component           </th><th>Example value             </th></tr>
93 * <tr><td>Scheme              </td><td>{@code mailto}            </td></tr>
94 * <tr><td>Scheme-specific part</td><td>{@code robots@example.com}</td></tr>
95 * <tr><td>Fragment            </td><td>                          </td></tr>
96 * </table>
97 * <p>Hierarchical URIs may have values for any URL component. They always
98 * have a non-null path, though that path may be the empty string.
99 *
100 * <h3>Encoding and Decoding URI Components</h3>
101 * Each component of a URI permits a limited set of legal characters. Other
102 * characters must first be <i>encoded</i> before they can be embedded in a URI.
103 * To recover the original characters from a URI, they may be <i>decoded</i>.
104 * <strong>Contrary to what you might expect,</strong> this class uses the
105 * term <i>raw</i> to refer to encoded strings. The non-<i>raw</i> accessors
106 * return decoded strings. For example, consider how this URI is decoded:
107 * {@code http://user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots#over%206%22}
108 * <table>
109 * <tr><th>Component           </th><th>Legal Characters                                                    </th><th>Other Constraints                                  </th><th>Raw Value                                                      </th><th>Value</th></tr>
110 * <tr><td>Scheme              </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code +-.}                  </td><td>First character must be in {@code a-z}, {@code A-Z}</td><td>                                                               </td><td>{@code http}</td></tr>
111 * <tr><td>Scheme-specific part</td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code //user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots}</td><td>{@code //user:pa55w?rd@host:80/doc|search?q=green robots}</td></tr>
112 * <tr><td>Authority           </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=@[]}  </td><td>Non-ASCII characters okay                          </td><td>{@code user:pa55w%3Frd@host:80}                                </td><td>{@code user:pa55w?rd@host:80}</td></tr>
113 * <tr><td>User Info           </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=}     </td><td>Non-ASCII characters okay                          </td><td>{@code user:pa55w%3Frd}                                        </td><td>{@code user:pa55w?rd}</td></tr>
114 * <tr><td>Host                </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code -.[]}                 </td><td>Domain name, IPv4 address or [IPv6 address]        </td><td>                                                               </td><td>host</td></tr>
115 * <tr><td>Port                </td><td>{@code 0-9}                                                         </td><td>                                                   </td><td>                                                               </td><td>{@code 80}</td></tr>
116 * <tr><td>Path                </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=/@}   </td><td>Non-ASCII characters okay                          </td><td>{@code /doc%7Csearch}                                          </td><td>{@code /doc|search}</td></tr>
117 * <tr><td>Query               </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code q=green%20robots}                                       </td><td>{@code q=green robots}</td></tr>
118 * <tr><td>Fragment            </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code over%206%22}                                            </td><td>{@code over 6"}</td></tr>
119 * </table>
120 * A URI's host, port and scheme are not eligible for encoding and must not
121 * contain illegal characters.
122 *
123 * <p>To encode a URI, invoke any of the multiple-parameter constructors of this
124 * class. These constructors accept your original strings and encode them into
125 * their raw form.
126 *
127 * <p>To decode a URI, invoke the single-string constructor, and then use the
128 * appropriate accessor methods to get the decoded components.
129 *
130 * <p>The {@link URL} class can be used to retrieve resources by their URI.
131 */
132public final class URI implements Comparable<URI>, Serializable {
133
134    private static final long serialVersionUID = -6052424284110960213l;
135
136    static final String UNRESERVED = "_-!.~\'()*";
137    static final String PUNCTUATION = ",;:$&+=";
138
139    static final UriCodec USER_INFO_ENCODER = new PartEncoder("");
140    static final UriCodec PATH_ENCODER = new PartEncoder("/@");
141    static final UriCodec AUTHORITY_ENCODER = new PartEncoder("@[]");
142
143    /** for java.net.URL, which foolishly combines these two parts */
144    static final UriCodec FILE_AND_QUERY_ENCODER = new PartEncoder("/@?");
145
146    /** for query, fragment, and scheme-specific part */
147    static final UriCodec ALL_LEGAL_ENCODER = new PartEncoder("?/[]@");
148
149    /** Retains all ASCII chars including delimiters. */
150    private static final UriCodec ASCII_ONLY = new UriCodec() {
151        @Override protected boolean isRetained(char c) {
152            return c <= 127;
153        }
154    };
155
156    /**
157     * Encodes the unescaped characters of {@code s} that are not permitted.
158     * Permitted characters are:
159     * <ul>
160     *   <li>Unreserved characters in <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>.
161     *   <li>{@code extraOkayChars},
162     *   <li>non-ASCII, non-control, non-whitespace characters
163     * </ul>
164     */
165    private static class PartEncoder extends UriCodec {
166        private final String extraLegalCharacters;
167
168        PartEncoder(String extraLegalCharacters) {
169            this.extraLegalCharacters = extraLegalCharacters;
170        }
171
172        @Override protected boolean isRetained(char c) {
173            return UNRESERVED.indexOf(c) != -1
174                    || PUNCTUATION.indexOf(c) != -1
175                    || extraLegalCharacters.indexOf(c) != -1
176                    || (c > 127 && !Character.isSpaceChar(c) && !Character.isISOControl(c));
177        }
178    }
179
180    private String string;
181    private transient String scheme;
182    private transient String schemeSpecificPart;
183    private transient String authority;
184    private transient String userInfo;
185    private transient String host;
186    private transient int port = -1;
187    private transient String path;
188    private transient String query;
189    private transient String fragment;
190    private transient boolean opaque;
191    private transient boolean absolute;
192    private transient boolean serverAuthority = false;
193
194    private transient int hash = -1;
195
196    private URI() {}
197
198    /**
199     * Creates a new URI instance by parsing {@code spec}.
200     *
201     * @param spec a URI whose illegal characters have all been encoded.
202     */
203    public URI(String spec) throws URISyntaxException {
204        parseURI(spec, false);
205    }
206
207    /**
208     * Creates a new URI instance of the given unencoded component parts.
209     *
210     * @param scheme the URI scheme, or null for a non-absolute URI.
211     */
212    public URI(String scheme, String schemeSpecificPart, String fragment)
213            throws URISyntaxException {
214        StringBuilder uri = new StringBuilder();
215        if (scheme != null) {
216            uri.append(scheme);
217            uri.append(':');
218        }
219        if (schemeSpecificPart != null) {
220            ALL_LEGAL_ENCODER.appendEncoded(uri, schemeSpecificPart);
221        }
222        if (fragment != null) {
223            uri.append('#');
224            ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
225        }
226
227        parseURI(uri.toString(), false);
228    }
229
230    /**
231     * Creates a new URI instance of the given unencoded component parts.
232     *
233     * @param scheme the URI scheme, or null for a non-absolute URI.
234     */
235    public URI(String scheme, String userInfo, String host, int port, String path, String query,
236            String fragment) throws URISyntaxException {
237        if (scheme == null && userInfo == null && host == null && path == null
238                && query == null && fragment == null) {
239            this.path = "";
240            return;
241        }
242
243        if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') {
244            throw new URISyntaxException(path, "Relative path");
245        }
246
247        StringBuilder uri = new StringBuilder();
248        if (scheme != null) {
249            uri.append(scheme);
250            uri.append(':');
251        }
252
253        if (userInfo != null || host != null || port != -1) {
254            uri.append("//");
255        }
256
257        if (userInfo != null) {
258            USER_INFO_ENCODER.appendEncoded(uri, userInfo);
259            uri.append('@');
260        }
261
262        if (host != null) {
263            // check for IPv6 addresses that hasn't been enclosed in square brackets
264            if (host.indexOf(':') != -1 && host.indexOf(']') == -1 && host.indexOf('[') == -1) {
265                host = "[" + host + "]";
266            }
267            uri.append(host);
268        }
269
270        if (port != -1) {
271            uri.append(':');
272            uri.append(port);
273        }
274
275        if (path != null) {
276            PATH_ENCODER.appendEncoded(uri, path);
277        }
278
279        if (query != null) {
280            uri.append('?');
281            ALL_LEGAL_ENCODER.appendEncoded(uri, query);
282        }
283
284        if (fragment != null) {
285            uri.append('#');
286            ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
287        }
288
289        parseURI(uri.toString(), true);
290    }
291
292    /**
293     * Creates a new URI instance of the given unencoded component parts.
294     *
295     * @param scheme the URI scheme, or null for a non-absolute URI.
296     */
297    public URI(String scheme, String host, String path, String fragment) throws URISyntaxException {
298        this(scheme, null, host, -1, path, null, fragment);
299    }
300
301    /**
302     * Creates a new URI instance of the given unencoded component parts.
303     *
304     * @param scheme the URI scheme, or null for a non-absolute URI.
305     */
306    public URI(String scheme, String authority, String path, String query,
307            String fragment) throws URISyntaxException {
308        if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') {
309            throw new URISyntaxException(path, "Relative path");
310        }
311
312        StringBuilder uri = new StringBuilder();
313        if (scheme != null) {
314            uri.append(scheme);
315            uri.append(':');
316        }
317        if (authority != null) {
318            uri.append("//");
319            AUTHORITY_ENCODER.appendEncoded(uri, authority);
320        }
321
322        if (path != null) {
323            PATH_ENCODER.appendEncoded(uri, path);
324        }
325        if (query != null) {
326            uri.append('?');
327            ALL_LEGAL_ENCODER.appendEncoded(uri, query);
328        }
329        if (fragment != null) {
330            uri.append('#');
331            ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
332        }
333
334        parseURI(uri.toString(), false);
335    }
336
337    /**
338     * Breaks uri into its component parts. This first splits URI into scheme,
339     * scheme-specific part and fragment:
340     *   [scheme:][scheme-specific part][#fragment]
341     *
342     * Then it breaks the scheme-specific part into authority, path and query:
343     *   [//authority][path][?query]
344     *
345     * Finally it delegates to parseAuthority to break the authority into user
346     * info, host and port:
347     *   [user-info@][host][:port]
348     */
349    private void parseURI(String uri, boolean forceServer) throws URISyntaxException {
350        string = uri;
351
352        // "#fragment"
353        int fragmentStart = UrlUtils.findFirstOf(uri, "#", 0, uri.length());
354        if (fragmentStart < uri.length()) {
355            fragment = ALL_LEGAL_ENCODER.validate(uri, fragmentStart + 1, uri.length(), "fragment");
356        }
357
358        // scheme:
359        int start;
360        int colon = UrlUtils.findFirstOf(uri, ":", 0, fragmentStart);
361        if (colon < UrlUtils.findFirstOf(uri, "/?#", 0, fragmentStart)) {
362            absolute = true;
363            scheme = validateScheme(uri, colon);
364            start = colon + 1;
365
366            if (start == fragmentStart) {
367                throw new URISyntaxException(uri, "Scheme-specific part expected", start);
368            }
369
370            // URIs with schemes followed by a non-/ char are opaque and need no further parsing.
371            if (!uri.regionMatches(start, "/", 0, 1)) {
372                opaque = true;
373                schemeSpecificPart = ALL_LEGAL_ENCODER.validate(
374                        uri, start, fragmentStart, "scheme specific part");
375                return;
376            }
377        } else {
378            absolute = false;
379            start = 0;
380        }
381
382        opaque = false;
383        schemeSpecificPart = uri.substring(start, fragmentStart);
384
385        // "//authority"
386        int fileStart;
387        if (uri.regionMatches(start, "//", 0, 2)) {
388            int authorityStart = start + 2;
389            fileStart = UrlUtils.findFirstOf(uri, "/?", authorityStart, fragmentStart);
390            if (authorityStart == uri.length()) {
391                throw new URISyntaxException(uri, "Authority expected", uri.length());
392            }
393            if (authorityStart < fileStart) {
394                authority = AUTHORITY_ENCODER.validate(uri, authorityStart, fileStart, "authority");
395            }
396        } else {
397            fileStart = start;
398        }
399
400        // "path"
401        int queryStart = UrlUtils.findFirstOf(uri, "?", fileStart, fragmentStart);
402        path = PATH_ENCODER.validate(uri, fileStart, queryStart, "path");
403
404        // "?query"
405        if (queryStart < fragmentStart) {
406            query = ALL_LEGAL_ENCODER.validate(uri, queryStart + 1, fragmentStart, "query");
407        }
408
409        parseAuthority(forceServer);
410    }
411
412    private String validateScheme(String uri, int end) throws URISyntaxException {
413        if (end == 0) {
414            throw new URISyntaxException(uri, "Scheme expected", 0);
415        }
416
417        for (int i = 0; i < end; i++) {
418            if (!UrlUtils.isValidSchemeChar(i, uri.charAt(i))) {
419                throw new URISyntaxException(uri, "Illegal character in scheme", 0);
420            }
421        }
422
423        return uri.substring(0, end);
424    }
425
426    /**
427     * Breaks this URI's authority into user info, host and port parts.
428     *   [user-info@][host][:port]
429     * If any part of this fails this method will give up and potentially leave
430     * these fields with their default values.
431     *
432     * @param forceServer true to always throw if the authority cannot be
433     *     parsed. If false, this method may still throw for some kinds of
434     *     errors; this unpredictable behavior is consistent with the RI.
435     */
436    private void parseAuthority(boolean forceServer) throws URISyntaxException {
437        if (authority == null) {
438            return;
439        }
440
441        String tempUserInfo = null;
442        String temp = authority;
443        int index = temp.indexOf('@');
444        int hostIndex = 0;
445        if (index != -1) {
446            // remove user info
447            tempUserInfo = temp.substring(0, index);
448            validateUserInfo(authority, tempUserInfo, 0);
449            temp = temp.substring(index + 1); // host[:port] is left
450            hostIndex = index + 1;
451        }
452
453        index = temp.lastIndexOf(':');
454        int endIndex = temp.indexOf(']');
455
456        String tempHost;
457        int tempPort = -1;
458        if (index != -1 && endIndex < index) {
459            // determine port and host
460            tempHost = temp.substring(0, index);
461
462            if (index < (temp.length() - 1)) { // port part is not empty
463                try {
464                    tempPort = Integer.parseInt(temp.substring(index + 1));
465                    if (tempPort < 0) {
466                        if (forceServer) {
467                            throw new URISyntaxException(authority,
468                                    "Invalid port number", hostIndex + index + 1);
469                        }
470                        return;
471                    }
472                } catch (NumberFormatException e) {
473                    if (forceServer) {
474                        throw new URISyntaxException(authority,
475                                "Invalid port number", hostIndex + index + 1);
476                    }
477                    return;
478                }
479            }
480        } else {
481            tempHost = temp;
482        }
483
484        if (tempHost.isEmpty()) {
485            if (forceServer) {
486                throw new URISyntaxException(authority, "Expected host", hostIndex);
487            }
488            return;
489        }
490
491        if (!isValidHost(forceServer, tempHost)) {
492            return;
493        }
494
495        // this is a server based uri,
496        // fill in the userInfo, host and port fields
497        userInfo = tempUserInfo;
498        host = tempHost;
499        port = tempPort;
500        serverAuthority = true;
501    }
502
503    private void validateUserInfo(String uri, String userInfo, int index)
504            throws URISyntaxException {
505        for (int i = 0; i < userInfo.length(); i++) {
506            char ch = userInfo.charAt(i);
507            if (ch == ']' || ch == '[') {
508                throw new URISyntaxException(uri, "Illegal character in userInfo", index + i);
509            }
510        }
511    }
512
513    /**
514     * Returns true if {@code host} is a well-formed host name or IP address.
515     *
516     * @param forceServer true to always throw if the host cannot be parsed. If
517     *     false, this method may still throw for some kinds of errors; this
518     *     unpredictable behavior is consistent with the RI.
519     */
520    private boolean isValidHost(boolean forceServer, String host) throws URISyntaxException {
521        if (host.startsWith("[")) {
522            // IPv6 address
523            if (!host.endsWith("]")) {
524                throw new URISyntaxException(host,
525                        "Expected a closing square bracket for IPv6 address", 0);
526            }
527            if (InetAddress.isNumeric(host)) {
528                // If it's numeric, the presence of square brackets guarantees
529                // that it's a numeric IPv6 address.
530                return true;
531            }
532            throw new URISyntaxException(host, "Malformed IPv6 address");
533        }
534
535        // '[' and ']' can only be the first char and last char
536        // of the host name
537        if (host.indexOf('[') != -1 || host.indexOf(']') != -1) {
538            throw new URISyntaxException(host, "Illegal character in host name", 0);
539        }
540
541        int index = host.lastIndexOf('.');
542        if (index < 0 || index == host.length() - 1
543                || !Character.isDigit(host.charAt(index + 1))) {
544            // domain name
545            if (isValidDomainName(host)) {
546                return true;
547            }
548            if (forceServer) {
549                throw new URISyntaxException(host, "Illegal character in host name", 0);
550            }
551            return false;
552        }
553
554        // IPv4 address?
555        try {
556            InetAddress ia = InetAddress.parseNumericAddress(host);
557            if (ia instanceof Inet4Address) {
558                return true;
559            }
560        } catch (IllegalArgumentException ignored) {
561        }
562
563        if (forceServer) {
564            throw new URISyntaxException(host, "Malformed IPv4 address", 0);
565        }
566        return false;
567    }
568
569    private boolean isValidDomainName(String host) {
570        try {
571            UriCodec.validateSimple(host, "-.");
572        } catch (URISyntaxException e) {
573            return false;
574        }
575
576        String lastLabel = null;
577        for (String token : host.split("\\.")) {
578            lastLabel = token;
579            if (lastLabel.startsWith("-") || lastLabel.endsWith("-")) {
580                return false;
581            }
582        }
583
584        if (lastLabel == null) {
585            return false;
586        }
587
588        if (!lastLabel.equals(host)) {
589            char ch = lastLabel.charAt(0);
590            if (ch >= '0' && ch <= '9') {
591                return false;
592            }
593        }
594        return true;
595    }
596
597    /**
598     * Compares this URI with the given argument {@code uri}. This method will
599     * return a negative value if this URI instance is less than the given
600     * argument and a positive value if this URI instance is greater than the
601     * given argument. The return value {@code 0} indicates that the two
602     * instances represent the same URI. To define the order the single parts of
603     * the URI are compared with each other. String components will be ordered
604     * in the natural case-sensitive way. A hierarchical URI is less than an
605     * opaque URI and if one part is {@code null} the URI with the undefined
606     * part is less than the other one.
607     *
608     * @param uri
609     *            the URI this instance has to compare with.
610     * @return the value representing the order of the two instances.
611     */
612    public int compareTo(URI uri) {
613        int ret;
614
615        // compare schemes
616        if (scheme == null && uri.scheme != null) {
617            return -1;
618        } else if (scheme != null && uri.scheme == null) {
619            return 1;
620        } else if (scheme != null && uri.scheme != null) {
621            ret = scheme.compareToIgnoreCase(uri.scheme);
622            if (ret != 0) {
623                return ret;
624            }
625        }
626
627        // compare opacities
628        if (!opaque && uri.opaque) {
629            return -1;
630        } else if (opaque && !uri.opaque) {
631            return 1;
632        } else if (opaque && uri.opaque) {
633            ret = schemeSpecificPart.compareTo(uri.schemeSpecificPart);
634            if (ret != 0) {
635                return ret;
636            }
637        } else {
638
639            // otherwise both must be hierarchical
640
641            // compare authorities
642            if (authority != null && uri.authority == null) {
643                return 1;
644            } else if (authority == null && uri.authority != null) {
645                return -1;
646            } else if (authority != null && uri.authority != null) {
647                if (host != null && uri.host != null) {
648                    // both are server based, so compare userInfo, host, port
649                    if (userInfo != null && uri.userInfo == null) {
650                        return 1;
651                    } else if (userInfo == null && uri.userInfo != null) {
652                        return -1;
653                    } else if (userInfo != null && uri.userInfo != null) {
654                        ret = userInfo.compareTo(uri.userInfo);
655                        if (ret != 0) {
656                            return ret;
657                        }
658                    }
659
660                    // userInfo's are the same, compare hostname
661                    ret = host.compareToIgnoreCase(uri.host);
662                    if (ret != 0) {
663                        return ret;
664                    }
665
666                    // compare port
667                    if (port != uri.port) {
668                        return port - uri.port;
669                    }
670                } else { // one or both are registry based, compare the whole
671                    // authority
672                    ret = authority.compareTo(uri.authority);
673                    if (ret != 0) {
674                        return ret;
675                    }
676                }
677            }
678
679            // authorities are the same
680            // compare paths
681            ret = path.compareTo(uri.path);
682            if (ret != 0) {
683                return ret;
684            }
685
686            // compare queries
687
688            if (query != null && uri.query == null) {
689                return 1;
690            } else if (query == null && uri.query != null) {
691                return -1;
692            } else if (query != null && uri.query != null) {
693                ret = query.compareTo(uri.query);
694                if (ret != 0) {
695                    return ret;
696                }
697            }
698        }
699
700        // everything else is identical, so compare fragments
701        if (fragment != null && uri.fragment == null) {
702            return 1;
703        } else if (fragment == null && uri.fragment != null) {
704            return -1;
705        } else if (fragment != null && uri.fragment != null) {
706            ret = fragment.compareTo(uri.fragment);
707            if (ret != 0) {
708                return ret;
709            }
710        }
711
712        // identical
713        return 0;
714    }
715
716    /**
717     * Returns the URI formed by parsing {@code uri}. This method behaves
718     * identically to the string constructor but throws a different exception
719     * on failure. The constructor fails with a checked {@link
720     * URISyntaxException}; this method fails with an unchecked {@link
721     * IllegalArgumentException}.
722     */
723    public static URI create(String uri) {
724        try {
725            return new URI(uri);
726        } catch (URISyntaxException e) {
727            throw new IllegalArgumentException(e.getMessage());
728        }
729    }
730
731    private URI duplicate() {
732        URI clone = new URI();
733        clone.absolute = absolute;
734        clone.authority = authority;
735        clone.fragment = fragment;
736        clone.host = host;
737        clone.opaque = opaque;
738        clone.path = path;
739        clone.port = port;
740        clone.query = query;
741        clone.scheme = scheme;
742        clone.schemeSpecificPart = schemeSpecificPart;
743        clone.userInfo = userInfo;
744        clone.serverAuthority = serverAuthority;
745        return clone;
746    }
747
748    /*
749     * Takes a string that may contain hex sequences like %F1 or %2b and
750     * converts the hex values following the '%' to lowercase
751     */
752    private String convertHexToLowerCase(String s) {
753        StringBuilder result = new StringBuilder("");
754        if (s.indexOf('%') == -1) {
755            return s;
756        }
757
758        int index, prevIndex = 0;
759        while ((index = s.indexOf('%', prevIndex)) != -1) {
760            result.append(s.substring(prevIndex, index + 1));
761            result.append(s.substring(index + 1, index + 3).toLowerCase(Locale.US));
762            index += 3;
763            prevIndex = index;
764        }
765        return result.toString();
766    }
767
768    /**
769     * Returns true if the given URI escaped strings {@code first} and {@code second} are
770     * equal.
771     *
772     * TODO: This method assumes that both strings are escaped using the same escape rules
773     * yet it still performs case insensitive comparison of the escaped sequences.
774     * Why is this necessary ? We can just replace it with first.equals(second)
775     * otherwise.
776     */
777    private boolean escapedEquals(String first, String second) {
778        // This length test isn't a micro-optimization. We need it because we sometimes
779        // calculate the number of characters to match based on the length of the second
780        // string. If the second string is shorter than the first, we might attempt to match
781        // 0 chars, and regionMatches is specified to return true in that case.
782        if (first.length() != second.length()) {
783            return false;
784        }
785
786        int prevIndex = 0;
787        while (true) {
788            int index = first.indexOf('%', prevIndex);
789            int index1 = second.indexOf('%', prevIndex);
790            if (index != index1) {
791                return false;
792            }
793
794            // index == index1 from this point on.
795
796            if (index == -1) {
797                // No more escapes, match the remainder of the string
798                // normally.
799               return first.regionMatches(prevIndex, second, prevIndex,
800                       second.length() - prevIndex);
801            }
802
803            if (!first.regionMatches(prevIndex, second, prevIndex, (index - prevIndex))) {
804                return false;
805            }
806
807            if (!first.regionMatches(true /* ignore case */, index + 1, second, index + 1, 2)) {
808                return false;
809            }
810
811            index += 3;
812            prevIndex = index;
813        }
814    }
815
816    @Override public boolean equals(Object o) {
817        if (!(o instanceof URI)) {
818            return false;
819        }
820        URI uri = (URI) o;
821
822        if (uri.fragment == null && fragment != null || uri.fragment != null
823                && fragment == null) {
824            return false;
825        } else if (uri.fragment != null && fragment != null) {
826            if (!escapedEquals(uri.fragment, fragment)) {
827                return false;
828            }
829        }
830
831        if (uri.scheme == null && scheme != null || uri.scheme != null
832                && scheme == null) {
833            return false;
834        } else if (uri.scheme != null && scheme != null) {
835            if (!uri.scheme.equalsIgnoreCase(scheme)) {
836                return false;
837            }
838        }
839
840        if (uri.opaque && opaque) {
841            return escapedEquals(uri.schemeSpecificPart,
842                    schemeSpecificPart);
843        } else if (!uri.opaque && !opaque) {
844            if (!escapedEquals(path, uri.path)) {
845                return false;
846            }
847
848            if (uri.query != null && query == null || uri.query == null
849                    && query != null) {
850                return false;
851            } else if (uri.query != null && query != null) {
852                if (!escapedEquals(uri.query, query)) {
853                    return false;
854                }
855            }
856
857            if (uri.authority != null && authority == null
858                    || uri.authority == null && authority != null) {
859                return false;
860            } else if (uri.authority != null && authority != null) {
861                if (uri.host != null && host == null || uri.host == null
862                        && host != null) {
863                    return false;
864                } else if (uri.host == null && host == null) {
865                    // both are registry based, so compare the whole authority
866                    return escapedEquals(uri.authority, authority);
867                } else { // uri.host != null && host != null, so server-based
868                    if (!host.equalsIgnoreCase(uri.host)) {
869                        return false;
870                    }
871
872                    if (port != uri.port) {
873                        return false;
874                    }
875
876                    if (uri.userInfo != null && userInfo == null
877                            || uri.userInfo == null && userInfo != null) {
878                        return false;
879                    } else if (uri.userInfo != null && userInfo != null) {
880                        return escapedEquals(userInfo, uri.userInfo);
881                    } else {
882                        return true;
883                    }
884                }
885            } else {
886                // no authority
887                return true;
888            }
889
890        } else {
891            // one is opaque, the other hierarchical
892            return false;
893        }
894    }
895
896    /**
897     * Returns the scheme of this URI, or null if this URI has no scheme. This
898     * is also known as the protocol.
899     */
900    public String getScheme() {
901        return scheme;
902    }
903
904    /**
905     * Returns the decoded scheme-specific part of this URI, or null if this URI
906     * has no scheme-specific part.
907     */
908    public String getSchemeSpecificPart() {
909        return decode(schemeSpecificPart);
910    }
911
912    /**
913     * Returns the encoded scheme-specific part of this URI, or null if this URI
914     * has no scheme-specific part.
915     */
916    public String getRawSchemeSpecificPart() {
917        return schemeSpecificPart;
918    }
919
920    /**
921     * Returns the decoded authority part of this URI, or null if this URI has
922     * no authority.
923     */
924    public String getAuthority() {
925        return decode(authority);
926    }
927
928    /**
929     * Returns the encoded authority of this URI, or null if this URI has no
930     * authority.
931     */
932    public String getRawAuthority() {
933        return authority;
934    }
935
936    /**
937     * Returns the decoded user info of this URI, or null if this URI has no
938     * user info.
939     */
940    public String getUserInfo() {
941        return decode(userInfo);
942    }
943
944    /**
945     * Returns the encoded user info of this URI, or null if this URI has no
946     * user info.
947     */
948    public String getRawUserInfo() {
949        return userInfo;
950    }
951
952    /**
953     * Returns the host of this URI, or null if this URI has no host.
954     */
955    public String getHost() {
956        return host;
957    }
958
959    /**
960     * Returns the port number of this URI, or {@code -1} if this URI has no
961     * explicit port.
962     */
963    public int getPort() {
964        return port;
965    }
966
967    /** @hide */
968    public int getEffectivePort() {
969        return getEffectivePort(scheme, port);
970    }
971
972    /**
973     * Returns the port to use for {@code scheme} connections will use when
974     * {@link #getPort} returns {@code specifiedPort}.
975     *
976     * @hide
977     */
978    public static int getEffectivePort(String scheme, int specifiedPort) {
979        if (specifiedPort != -1) {
980            return specifiedPort;
981        }
982
983        if ("http".equalsIgnoreCase(scheme)) {
984            return 80;
985        } else if ("https".equalsIgnoreCase(scheme)) {
986            return 443;
987        } else {
988            return -1;
989        }
990    }
991
992    /**
993     * Returns the decoded path of this URI, or null if this URI has no path.
994     */
995    public String getPath() {
996        return decode(path);
997    }
998
999    /**
1000     * Returns the encoded path of this URI, or null if this URI has no path.
1001     */
1002    public String getRawPath() {
1003        return path;
1004    }
1005
1006    /**
1007     * Returns the decoded query of this URI, or null if this URI has no query.
1008     */
1009    public String getQuery() {
1010        return decode(query);
1011    }
1012
1013    /**
1014     * Returns the encoded query of this URI, or null if this URI has no query.
1015     */
1016    public String getRawQuery() {
1017        return query;
1018    }
1019
1020    /**
1021     * Returns the decoded fragment of this URI, or null if this URI has no
1022     * fragment.
1023     */
1024    public String getFragment() {
1025        return decode(fragment);
1026    }
1027
1028    /**
1029     * Gets the encoded fragment of this URI, or null if this URI has no
1030     * fragment.
1031     */
1032    public String getRawFragment() {
1033        return fragment;
1034    }
1035
1036    @Override public int hashCode() {
1037        if (hash == -1) {
1038            hash = getHashString().hashCode();
1039        }
1040        return hash;
1041    }
1042
1043    /**
1044     * Returns true if this URI is absolute, which means that a scheme is
1045     * defined.
1046     */
1047    public boolean isAbsolute() {
1048        // TODO: simplify to 'scheme != null' ?
1049        return absolute;
1050    }
1051
1052    /**
1053     * Returns true if this URI is opaque. Opaque URIs are absolute and have a
1054     * scheme-specific part that does not start with a slash character. All
1055     * parts except scheme, scheme-specific and fragment are undefined.
1056     */
1057    public boolean isOpaque() {
1058        return opaque;
1059    }
1060
1061    /**
1062     * Returns the normalized path.
1063     */
1064    private String normalize(String path, boolean discardRelativePrefix) {
1065        path = UrlUtils.canonicalizePath(path, discardRelativePrefix);
1066
1067        /*
1068         * If the path contains a colon before the first colon, prepend
1069         * "./" to differentiate the path from a scheme prefix.
1070         */
1071        int colon = path.indexOf(':');
1072        if (colon != -1) {
1073            int slash = path.indexOf('/');
1074            if (slash == -1 || colon < slash) {
1075                path = "./" + path;
1076            }
1077        }
1078
1079        return path;
1080    }
1081
1082    /**
1083     * Normalizes the path part of this URI.
1084     *
1085     * @return an URI object which represents this instance with a normalized
1086     *         path.
1087     */
1088    public URI normalize() {
1089        if (opaque) {
1090            return this;
1091        }
1092        String normalizedPath = normalize(path, false);
1093        // if the path is already normalized, return this
1094        if (path.equals(normalizedPath)) {
1095            return this;
1096        }
1097        // get an exact copy of the URI re-calculate the scheme specific part
1098        // since the path of the normalized URI is different from this URI.
1099        URI result = duplicate();
1100        result.path = normalizedPath;
1101        result.setSchemeSpecificPart();
1102        return result;
1103    }
1104
1105    /**
1106     * Tries to parse the authority component of this URI to divide it into the
1107     * host, port, and user-info. If this URI is already determined as a
1108     * ServerAuthority this instance will be returned without changes.
1109     *
1110     * @return this instance with the components of the parsed server authority.
1111     * @throws URISyntaxException
1112     *             if the authority part could not be parsed as a server-based
1113     *             authority.
1114     */
1115    public URI parseServerAuthority() throws URISyntaxException {
1116        if (!serverAuthority) {
1117            parseAuthority(true);
1118        }
1119        return this;
1120    }
1121
1122    /**
1123     * Makes the given URI {@code relative} to a relative URI against the URI
1124     * represented by this instance.
1125     *
1126     * @param relative
1127     *            the URI which has to be relativized against this URI.
1128     * @return the relative URI.
1129     */
1130    public URI relativize(URI relative) {
1131        if (relative.opaque || opaque) {
1132            return relative;
1133        }
1134
1135        if (scheme == null ? relative.scheme != null : !scheme
1136                .equals(relative.scheme)) {
1137            return relative;
1138        }
1139
1140        if (authority == null ? relative.authority != null : !authority
1141                .equals(relative.authority)) {
1142            return relative;
1143        }
1144
1145        // normalize both paths
1146        String thisPath = normalize(path, false);
1147        String relativePath = normalize(relative.path, false);
1148
1149        /*
1150         * if the paths aren't equal, then we need to determine if this URI's
1151         * path is a parent path (begins with) the relative URI's path
1152         */
1153        if (!thisPath.equals(relativePath)) {
1154            // drop everything after the last slash in this path
1155            thisPath = thisPath.substring(0, thisPath.lastIndexOf('/') + 1);
1156
1157            /*
1158             * if the relative URI's path doesn't start with this URI's path,
1159             * then just return the relative URI; the URIs have nothing in
1160             * common
1161             */
1162            if (!relativePath.startsWith(thisPath)) {
1163                return relative;
1164            }
1165        }
1166
1167        URI result = new URI();
1168        result.fragment = relative.fragment;
1169        result.query = relative.query;
1170        // the result URI is the remainder of the relative URI's path
1171        result.path = relativePath.substring(thisPath.length());
1172        result.setSchemeSpecificPart();
1173        return result;
1174    }
1175
1176    /**
1177     * Resolves the given URI {@code relative} against the URI represented by
1178     * this instance.
1179     *
1180     * @param relative
1181     *            the URI which has to be resolved against this URI.
1182     * @return the resolved URI.
1183     */
1184    public URI resolve(URI relative) {
1185        if (relative.absolute || opaque) {
1186            return relative;
1187        }
1188
1189        if (relative.authority != null) {
1190            // If the relative URI has an authority, the result is the relative
1191            // with this URI's scheme.
1192            URI result = relative.duplicate();
1193            result.scheme = scheme;
1194            result.absolute = absolute;
1195            return result;
1196        }
1197
1198        if (relative.path.isEmpty() && relative.scheme == null && relative.query == null) {
1199            // if the relative URI only consists of at most a fragment,
1200            URI result = duplicate();
1201            result.fragment = relative.fragment;
1202            return result;
1203        }
1204
1205        URI result = duplicate();
1206        result.fragment = relative.fragment;
1207        result.query = relative.query;
1208        String resolvedPath;
1209        if (relative.path.startsWith("/")) {
1210            // The relative URI has an absolute path; use it.
1211            resolvedPath = relative.path;
1212        } else if (relative.path.isEmpty()) {
1213            // The relative URI has no path; use the base path.
1214            resolvedPath = path;
1215        } else {
1216            // The relative URI has a relative path; combine the paths.
1217            int endIndex = path.lastIndexOf('/') + 1;
1218            resolvedPath = path.substring(0, endIndex) + relative.path;
1219        }
1220        result.path = UrlUtils.authoritySafePath(result.authority, normalize(resolvedPath, true));
1221        result.setSchemeSpecificPart();
1222        return result;
1223    }
1224
1225    /**
1226     * Helper method used to re-calculate the scheme specific part of the
1227     * resolved or normalized URIs
1228     */
1229    private void setSchemeSpecificPart() {
1230        // ssp = [//authority][path][?query]
1231        StringBuilder ssp = new StringBuilder();
1232        if (authority != null) {
1233            ssp.append("//" + authority);
1234        }
1235        if (path != null) {
1236            ssp.append(path);
1237        }
1238        if (query != null) {
1239            ssp.append("?" + query);
1240        }
1241        schemeSpecificPart = ssp.toString();
1242        // reset string, so that it can be re-calculated correctly when asked.
1243        string = null;
1244    }
1245
1246    /**
1247     * Creates a new URI instance by parsing the given string {@code relative}
1248     * and resolves the created URI against the URI represented by this
1249     * instance.
1250     *
1251     * @param relative
1252     *            the given string to create the new URI instance which has to
1253     *            be resolved later on.
1254     * @return the created and resolved URI.
1255     */
1256    public URI resolve(String relative) {
1257        return resolve(create(relative));
1258    }
1259
1260    private String decode(String s) {
1261        return s != null ? UriCodec.decode(s) : null;
1262    }
1263
1264    /**
1265     * Returns the textual string representation of this URI instance using the
1266     * US-ASCII encoding.
1267     *
1268     * @return the US-ASCII string representation of this URI.
1269     */
1270    public String toASCIIString() {
1271        StringBuilder result = new StringBuilder();
1272        ASCII_ONLY.appendEncoded(result, toString());
1273        return result.toString();
1274    }
1275
1276    /**
1277     * Returns the encoded URI.
1278     */
1279    @Override public String toString() {
1280        if (string != null) {
1281            return string;
1282        }
1283
1284        StringBuilder result = new StringBuilder();
1285        if (scheme != null) {
1286            result.append(scheme);
1287            result.append(':');
1288        }
1289        if (opaque) {
1290            result.append(schemeSpecificPart);
1291        } else {
1292            if (authority != null) {
1293                result.append("//");
1294                result.append(authority);
1295            }
1296
1297            if (path != null) {
1298                result.append(path);
1299            }
1300
1301            if (query != null) {
1302                result.append('?');
1303                result.append(query);
1304            }
1305        }
1306
1307        if (fragment != null) {
1308            result.append('#');
1309            result.append(fragment);
1310        }
1311
1312        string = result.toString();
1313        return string;
1314    }
1315
1316    /*
1317     * Form a string from the components of this URI, similarly to the
1318     * toString() method. But this method converts scheme and host to lowercase,
1319     * and converts escaped octets to lowercase.
1320     */
1321    private String getHashString() {
1322        StringBuilder result = new StringBuilder();
1323        if (scheme != null) {
1324            result.append(scheme.toLowerCase(Locale.US));
1325            result.append(':');
1326        }
1327        if (opaque) {
1328            result.append(schemeSpecificPart);
1329        } else {
1330            if (authority != null) {
1331                result.append("//");
1332                if (host == null) {
1333                    result.append(authority);
1334                } else {
1335                    if (userInfo != null) {
1336                        result.append(userInfo + "@");
1337                    }
1338                    result.append(host.toLowerCase(Locale.US));
1339                    if (port != -1) {
1340                        result.append(":" + port);
1341                    }
1342                }
1343            }
1344
1345            if (path != null) {
1346                result.append(path);
1347            }
1348
1349            if (query != null) {
1350                result.append('?');
1351                result.append(query);
1352            }
1353        }
1354
1355        if (fragment != null) {
1356            result.append('#');
1357            result.append(fragment);
1358        }
1359
1360        return convertHexToLowerCase(result.toString());
1361    }
1362
1363    /**
1364     * Converts this URI instance to a URL.
1365     *
1366     * @return the created URL representing the same resource as this URI.
1367     * @throws MalformedURLException
1368     *             if an error occurs while creating the URL or no protocol
1369     *             handler could be found.
1370     */
1371    public URL toURL() throws MalformedURLException {
1372        if (!absolute) {
1373            throw new IllegalArgumentException("URI is not absolute: " + toString());
1374        }
1375        return new URL(toString());
1376    }
1377
1378    private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
1379        in.defaultReadObject();
1380        try {
1381            parseURI(string, false);
1382        } catch (URISyntaxException e) {
1383            throw new IOException(e.toString());
1384        }
1385    }
1386
1387    private void writeObject(ObjectOutputStream out) throws IOException, ClassNotFoundException {
1388        // call toString() to ensure the value of string field is calculated
1389        toString();
1390        out.defaultWriteObject();
1391    }
1392}
1393