1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.net;
19
20import java.io.IOException;
21import java.io.ObjectInputStream;
22import java.io.ObjectOutputStream;
23import java.io.Serializable;
24import java.util.Locale;
25import libcore.net.UriCodec;
26import libcore.net.url.UrlUtils;
27
28/**
29 * A Uniform Resource Identifier that identifies an abstract or physical
30 * resource, as specified by <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC
31 * 2396</a>.
32 *
33 * <h3>Parts of a URI</h3>
34 * A URI is composed of many parts. This class can both parse URI strings into
35 * parts and compose URI strings from parts. For example, consider the parts of
36 * this URI:
37 * {@code http://username:password@host:8080/directory/file?query#fragment}
38 * <table>
39 * <tr><th>Component                                            </th><th>Example value                                                      </th><th>Also known as</th></tr>
40 * <tr><td>{@link #getScheme() Scheme}                          </td><td>{@code http}                                                       </td><td>protocol</td></tr>
41 * <tr><td>{@link #getSchemeSpecificPart() Scheme-specific part}</td><td>{@code //username:password@host:8080/directory/file?query#fragment}</td><td></td></tr>
42 * <tr><td>{@link #getAuthority() Authority}                    </td><td>{@code username:password@host:8080}                                </td><td></td></tr>
43 * <tr><td>{@link #getUserInfo() User Info}                     </td><td>{@code username:password}                                          </td><td></td></tr>
44 * <tr><td>{@link #getHost() Host}                              </td><td>{@code host}                                                       </td><td></td></tr>
45 * <tr><td>{@link #getPort() Port}                              </td><td>{@code 8080}                                                       </td><td></td></tr>
46 * <tr><td>{@link #getPath() Path}                              </td><td>{@code /directory/file}                                            </td><td></td></tr>
47 * <tr><td>{@link #getQuery() Query}                            </td><td>{@code query}                                                      </td><td></td></tr>
48 * <tr><td>{@link #getFragment() Fragment}                      </td><td>{@code fragment}                                                   </td><td>ref</td></tr>
49 * </table>
50 *
51 * <h3>Absolute vs. Relative URIs</h3>
52 * URIs are either {@link #isAbsolute() absolute or relative}.
53 * <ul>
54 *     <li><strong>Absolute:</strong> {@code http://android.com/robots.txt}
55 *     <li><strong>Relative:</strong> {@code robots.txt}
56 * </ul>
57 *
58 * <p>Absolute URIs always have a scheme. If its scheme is supported by {@link
59 * URL}, you can use {@link #toURL} to convert an absolute URI to a URL.
60 *
61 * <p>Relative URIs do not have a scheme and cannot be converted to URLs. If you
62 * have the absolute URI that a relative URI is relative to, you can use {@link
63 * #resolve} to compute the referenced absolute URI. Symmetrically, you can use
64 * {@link #relativize} to compute the relative URI from one URI to another.
65 * <pre>   {@code
66 *   URI absolute = new URI("http://android.com/");
67 *   URI relative = new URI("robots.txt");
68 *   URI resolved = new URI("http://android.com/robots.txt");
69 *
70 *   // print "http://android.com/robots.txt"
71 *   System.out.println(absolute.resolve(relative));
72 *
73 *   // print "robots.txt"
74 *   System.out.println(absolute.relativize(resolved));
75 * }</pre>
76 *
77 * <h3>Opaque vs. Hierarchical URIs</h3>
78 * Absolute URIs are either {@link #isOpaque() opaque or hierarchical}. Relative
79 * URIs are always hierarchical.
80 * <ul>
81 *     <li><strong>Hierarchical:</strong> {@code http://android.com/robots.txt}
82 *     <li><strong>Opaque:</strong> {@code mailto:robots@example.com}
83 * </ul>
84 *
85 * <p>Opaque URIs have both a scheme and a scheme-specific part that does not
86 * begin with the slash character: {@code /}. The contents of the
87 * scheme-specific part of an opaque URI is not parsed so an opaque URI never
88 * has an authority, user info, host, port, path or query. An opaque URIs may
89 * have a fragment, however. A typical opaque URI is
90 * {@code mailto:robots@example.com}.
91 * <table>
92 * <tr><th>Component           </th><th>Example value             </th></tr>
93 * <tr><td>Scheme              </td><td>{@code mailto}            </td></tr>
94 * <tr><td>Scheme-specific part</td><td>{@code robots@example.com}</td></tr>
95 * <tr><td>Fragment            </td><td>                          </td></tr>
96 * </table>
97 * <p>Hierarchical URIs may have values for any URL component. They always
98 * have a non-null path, though that path may be the empty string.
99 *
100 * <h3>Encoding and Decoding URI Components</h3>
101 * Each component of a URI permits a limited set of legal characters. Other
102 * characters must first be <i>encoded</i> before they can be embedded in a URI.
103 * To recover the original characters from a URI, they may be <i>decoded</i>.
104 * <strong>Contrary to what you might expect,</strong> this class uses the
105 * term <i>raw</i> to refer to encoded strings. The non-<i>raw</i> accessors
106 * return decoded strings. For example, consider how this URI is decoded:
107 * {@code http://user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots#over%206%22}
108 * <table>
109 * <tr><th>Component           </th><th>Legal Characters                                                    </th><th>Other Constraints                                  </th><th>Raw Value                                                      </th><th>Value</th></tr>
110 * <tr><td>Scheme              </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code +-.}                  </td><td>First character must be in {@code a-z}, {@code A-Z}</td><td>                                                               </td><td>{@code http}</td></tr>
111 * <tr><td>Scheme-specific part</td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code //user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots}</td><td>{@code //user:pa55w?rd@host:80/doc|search?q=green robots}</td></tr>
112 * <tr><td>Authority           </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=@[]}  </td><td>Non-ASCII characters okay                          </td><td>{@code user:pa55w%3Frd@host:80}                                </td><td>{@code user:pa55w?rd@host:80}</td></tr>
113 * <tr><td>User Info           </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=}     </td><td>Non-ASCII characters okay                          </td><td>{@code user:pa55w%3Frd}                                        </td><td>{@code user:pa55w?rd}</td></tr>
114 * <tr><td>Host                </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code -.[]}                 </td><td>Domain name, IPv4 address or [IPv6 address]        </td><td>                                                               </td><td>host</td></tr>
115 * <tr><td>Port                </td><td>{@code 0-9}                                                         </td><td>                                                   </td><td>                                                               </td><td>{@code 80}</td></tr>
116 * <tr><td>Path                </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=/@}   </td><td>Non-ASCII characters okay                          </td><td>{@code /doc%7Csearch}                                          </td><td>{@code /doc|search}</td></tr>
117 * <tr><td>Query               </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code q=green%20robots}                                       </td><td>{@code q=green robots}</td></tr>
118 * <tr><td>Fragment            </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code over%206%22}                                            </td><td>{@code over 6"}</td></tr>
119 * </table>
120 * A URI's host, port and scheme are not eligible for encoding and must not
121 * contain illegal characters.
122 *
123 * <p>To encode a URI, invoke any of the multiple-parameter constructors of this
124 * class. These constructors accept your original strings and encode them into
125 * their raw form.
126 *
127 * <p>To decode a URI, invoke the single-string constructor, and then use the
128 * appropriate accessor methods to get the decoded components.
129 *
130 * <p>The {@link URL} class can be used to retrieve resources by their URI.
131 */
132public final class URI implements Comparable<URI>, Serializable {
133
134    private static final long serialVersionUID = -6052424284110960213l;
135
136    static final String UNRESERVED = "_-!.~\'()*";
137    static final String PUNCTUATION = ",;:$&+=";
138
139    static final UriCodec USER_INFO_ENCODER = new PartEncoder("");
140    static final UriCodec PATH_ENCODER = new PartEncoder("/@");
141    static final UriCodec AUTHORITY_ENCODER = new PartEncoder("@[]");
142
143    /** for java.net.URL, which foolishly combines these two parts */
144    static final UriCodec FILE_AND_QUERY_ENCODER = new PartEncoder("/@?");
145
146    /** for query, fragment, and scheme-specific part */
147    static final UriCodec ALL_LEGAL_ENCODER = new PartEncoder("?/[]@");
148
149    /** Retains all ASCII chars including delimiters. */
150    private static final UriCodec ASCII_ONLY = new UriCodec() {
151        @Override protected boolean isRetained(char c) {
152            return c <= 127;
153        }
154    };
155
156    /**
157     * Encodes the unescaped characters of {@code s} that are not permitted.
158     * Permitted characters are:
159     * <ul>
160     *   <li>Unreserved characters in <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>.
161     *   <li>{@code extraOkayChars},
162     *   <li>non-ASCII, non-control, non-whitespace characters
163     * </ul>
164     */
165    private static class PartEncoder extends UriCodec {
166        private final String extraLegalCharacters;
167
168        PartEncoder(String extraLegalCharacters) {
169            this.extraLegalCharacters = extraLegalCharacters;
170        }
171
172        @Override protected boolean isRetained(char c) {
173            return UNRESERVED.indexOf(c) != -1
174                    || PUNCTUATION.indexOf(c) != -1
175                    || extraLegalCharacters.indexOf(c) != -1
176                    || (c > 127 && !Character.isSpaceChar(c) && !Character.isISOControl(c));
177        }
178    }
179
180    private String string;
181    private transient String scheme;
182    private transient String schemeSpecificPart;
183    private transient String authority;
184    private transient String userInfo;
185    private transient String host;
186    private transient int port = -1;
187    private transient String path;
188    private transient String query;
189    private transient String fragment;
190    private transient boolean opaque;
191    private transient boolean absolute;
192    private transient boolean serverAuthority = false;
193
194    private transient int hash = -1;
195
196    private URI() {}
197
198    /**
199     * Creates a new URI instance by parsing {@code spec}.
200     *
201     * @param spec a URI whose illegal characters have all been encoded.
202     */
203    public URI(String spec) throws URISyntaxException {
204        parseURI(spec, false);
205    }
206
207    /**
208     * Creates a new URI instance of the given unencoded component parts.
209     *
210     * @param scheme the URI scheme, or null for a non-absolute URI.
211     */
212    public URI(String scheme, String schemeSpecificPart, String fragment)
213            throws URISyntaxException {
214        StringBuilder uri = new StringBuilder();
215        if (scheme != null) {
216            uri.append(scheme);
217            uri.append(':');
218        }
219        if (schemeSpecificPart != null) {
220            ALL_LEGAL_ENCODER.appendEncoded(uri, schemeSpecificPart);
221        }
222        if (fragment != null) {
223            uri.append('#');
224            ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
225        }
226
227        parseURI(uri.toString(), false);
228    }
229
230    /**
231     * Creates a new URI instance of the given unencoded component parts.
232     *
233     * @param scheme the URI scheme, or null for a non-absolute URI.
234     */
235    public URI(String scheme, String userInfo, String host, int port, String path, String query,
236            String fragment) throws URISyntaxException {
237        if (scheme == null && userInfo == null && host == null && path == null
238                && query == null && fragment == null) {
239            this.path = "";
240            return;
241        }
242
243        if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') {
244            throw new URISyntaxException(path, "Relative path");
245        }
246
247        StringBuilder uri = new StringBuilder();
248        if (scheme != null) {
249            uri.append(scheme);
250            uri.append(':');
251        }
252
253        if (userInfo != null || host != null || port != -1) {
254            uri.append("//");
255        }
256
257        if (userInfo != null) {
258            USER_INFO_ENCODER.appendEncoded(uri, userInfo);
259            uri.append('@');
260        }
261
262        if (host != null) {
263            // check for IPv6 addresses that hasn't been enclosed in square brackets
264            if (host.indexOf(':') != -1 && host.indexOf(']') == -1 && host.indexOf('[') == -1) {
265                host = "[" + host + "]";
266            }
267            uri.append(host);
268        }
269
270        if (port != -1) {
271            uri.append(':');
272            uri.append(port);
273        }
274
275        if (path != null) {
276            PATH_ENCODER.appendEncoded(uri, path);
277        }
278
279        if (query != null) {
280            uri.append('?');
281            ALL_LEGAL_ENCODER.appendEncoded(uri, query);
282        }
283
284        if (fragment != null) {
285            uri.append('#');
286            ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
287        }
288
289        parseURI(uri.toString(), true);
290    }
291
292    /**
293     * Creates a new URI instance of the given unencoded component parts.
294     *
295     * @param scheme the URI scheme, or null for a non-absolute URI.
296     */
297    public URI(String scheme, String host, String path, String fragment) throws URISyntaxException {
298        this(scheme, null, host, -1, path, null, fragment);
299    }
300
301    /**
302     * Creates a new URI instance of the given unencoded component parts.
303     *
304     * @param scheme the URI scheme, or null for a non-absolute URI.
305     */
306    public URI(String scheme, String authority, String path, String query,
307            String fragment) throws URISyntaxException {
308        if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') {
309            throw new URISyntaxException(path, "Relative path");
310        }
311
312        StringBuilder uri = new StringBuilder();
313        if (scheme != null) {
314            uri.append(scheme);
315            uri.append(':');
316        }
317        if (authority != null) {
318            uri.append("//");
319            AUTHORITY_ENCODER.appendEncoded(uri, authority);
320        }
321
322        if (path != null) {
323            PATH_ENCODER.appendEncoded(uri, path);
324        }
325        if (query != null) {
326            uri.append('?');
327            ALL_LEGAL_ENCODER.appendEncoded(uri, query);
328        }
329        if (fragment != null) {
330            uri.append('#');
331            ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
332        }
333
334        parseURI(uri.toString(), false);
335    }
336
337    /**
338     * Breaks uri into its component parts. This first splits URI into scheme,
339     * scheme-specific part and fragment:
340     *   [scheme:][scheme-specific part][#fragment]
341     *
342     * Then it breaks the scheme-specific part into authority, path and query:
343     *   [//authority][path][?query]
344     *
345     * Finally it delegates to parseAuthority to break the authority into user
346     * info, host and port:
347     *   [user-info@][host][:port]
348     */
349    private void parseURI(String uri, boolean forceServer) throws URISyntaxException {
350        string = uri;
351
352        // "#fragment"
353        int fragmentStart = UrlUtils.findFirstOf(uri, "#", 0, uri.length());
354        if (fragmentStart < uri.length()) {
355            fragment = ALL_LEGAL_ENCODER.validate(uri, fragmentStart + 1, uri.length(), "fragment");
356        }
357
358        // scheme:
359        int start;
360        int colon = UrlUtils.findFirstOf(uri, ":", 0, fragmentStart);
361        if (colon < UrlUtils.findFirstOf(uri, "/?#", 0, fragmentStart)) {
362            absolute = true;
363            scheme = validateScheme(uri, colon);
364            start = colon + 1;
365
366            if (start == fragmentStart) {
367                throw new URISyntaxException(uri, "Scheme-specific part expected", start);
368            }
369
370            // URIs with schemes followed by a non-/ char are opaque and need no further parsing.
371            if (!uri.regionMatches(start, "/", 0, 1)) {
372                opaque = true;
373                schemeSpecificPart = ALL_LEGAL_ENCODER.validate(
374                        uri, start, fragmentStart, "scheme specific part");
375                return;
376            }
377        } else {
378            absolute = false;
379            start = 0;
380        }
381
382        opaque = false;
383        schemeSpecificPart = uri.substring(start, fragmentStart);
384
385        // "//authority"
386        int fileStart;
387        if (uri.regionMatches(start, "//", 0, 2)) {
388            int authorityStart = start + 2;
389            fileStart = UrlUtils.findFirstOf(uri, "/?", authorityStart, fragmentStart);
390            if (authorityStart == uri.length()) {
391                throw new URISyntaxException(uri, "Authority expected", uri.length());
392            }
393            if (authorityStart < fileStart) {
394                authority = AUTHORITY_ENCODER.validate(uri, authorityStart, fileStart, "authority");
395            }
396        } else {
397            fileStart = start;
398        }
399
400        // "path"
401        int queryStart = UrlUtils.findFirstOf(uri, "?", fileStart, fragmentStart);
402        path = PATH_ENCODER.validate(uri, fileStart, queryStart, "path");
403
404        // "?query"
405        if (queryStart < fragmentStart) {
406            query = ALL_LEGAL_ENCODER.validate(uri, queryStart + 1, fragmentStart, "query");
407        }
408
409        parseAuthority(forceServer);
410    }
411
412    private String validateScheme(String uri, int end) throws URISyntaxException {
413        if (end == 0) {
414            throw new URISyntaxException(uri, "Scheme expected", 0);
415        }
416
417        for (int i = 0; i < end; i++) {
418            if (!UrlUtils.isValidSchemeChar(i, uri.charAt(i))) {
419                throw new URISyntaxException(uri, "Illegal character in scheme", 0);
420            }
421        }
422
423        return uri.substring(0, end);
424    }
425
426    /**
427     * Breaks this URI's authority into user info, host and port parts.
428     *   [user-info@][host][:port]
429     * If any part of this fails this method will give up and potentially leave
430     * these fields with their default values.
431     *
432     * @param forceServer true to always throw if the authority cannot be
433     *     parsed. If false, this method may still throw for some kinds of
434     *     errors; this unpredictable behavior is consistent with the RI.
435     */
436    private void parseAuthority(boolean forceServer) throws URISyntaxException {
437        if (authority == null) {
438            return;
439        }
440
441        String tempUserInfo = null;
442        String temp = authority;
443        int index = temp.indexOf('@');
444        int hostIndex = 0;
445        if (index != -1) {
446            // remove user info
447            tempUserInfo = temp.substring(0, index);
448            validateUserInfo(authority, tempUserInfo, 0);
449            temp = temp.substring(index + 1); // host[:port] is left
450            hostIndex = index + 1;
451        }
452
453        index = temp.lastIndexOf(':');
454        int endIndex = temp.indexOf(']');
455
456        String tempHost;
457        int tempPort = -1;
458        if (index != -1 && endIndex < index) {
459            // determine port and host
460            tempHost = temp.substring(0, index);
461
462            if (index < (temp.length() - 1)) { // port part is not empty
463                try {
464                    char firstPortChar = temp.charAt(index + 1);
465                    if (firstPortChar >= '0' && firstPortChar <= '9') {
466                        // allow only digits, no signs
467                        tempPort = Integer.parseInt(temp.substring(index + 1));
468                    } else {
469                        if (forceServer) {
470                            throw new URISyntaxException(authority,
471                                "Invalid port number", hostIndex + index + 1);
472                        }
473                        return;
474                    }
475                } catch (NumberFormatException e) {
476                    if (forceServer) {
477                        throw new URISyntaxException(authority,
478                                "Invalid port number", hostIndex + index + 1);
479                    }
480                    return;
481                }
482            }
483        } else {
484            tempHost = temp;
485        }
486
487        if (tempHost.isEmpty()) {
488            if (forceServer) {
489                throw new URISyntaxException(authority, "Expected host", hostIndex);
490            }
491            return;
492        }
493
494        if (!isValidHost(forceServer, tempHost)) {
495            return;
496        }
497
498        // this is a server based uri,
499        // fill in the userInfo, host and port fields
500        userInfo = tempUserInfo;
501        host = tempHost;
502        port = tempPort;
503        serverAuthority = true;
504    }
505
506    private void validateUserInfo(String uri, String userInfo, int index)
507            throws URISyntaxException {
508        for (int i = 0; i < userInfo.length(); i++) {
509            char ch = userInfo.charAt(i);
510            if (ch == ']' || ch == '[') {
511                throw new URISyntaxException(uri, "Illegal character in userInfo", index + i);
512            }
513        }
514    }
515
516    /**
517     * Returns true if {@code host} is a well-formed host name or IP address.
518     *
519     * @param forceServer true to always throw if the host cannot be parsed. If
520     *     false, this method may still throw for some kinds of errors; this
521     *     unpredictable behavior is consistent with the RI.
522     */
523    private boolean isValidHost(boolean forceServer, String host) throws URISyntaxException {
524        if (host.startsWith("[")) {
525            // IPv6 address
526            if (!host.endsWith("]")) {
527                throw new URISyntaxException(host,
528                        "Expected a closing square bracket for IPv6 address", 0);
529            }
530            if (InetAddress.isNumeric(host)) {
531                // If it's numeric, the presence of square brackets guarantees
532                // that it's a numeric IPv6 address.
533                return true;
534            }
535            throw new URISyntaxException(host, "Malformed IPv6 address");
536        }
537
538        // '[' and ']' can only be the first char and last char
539        // of the host name
540        if (host.indexOf('[') != -1 || host.indexOf(']') != -1) {
541            throw new URISyntaxException(host, "Illegal character in host name", 0);
542        }
543
544        int index = host.lastIndexOf('.');
545        if (index < 0 || index == host.length() - 1
546                || !Character.isDigit(host.charAt(index + 1))) {
547            // domain name
548            if (isValidDomainName(host)) {
549                return true;
550            }
551            if (forceServer) {
552                throw new URISyntaxException(host, "Illegal character in host name", 0);
553            }
554            return false;
555        }
556
557        // IPv4 address?
558        try {
559            InetAddress ia = InetAddress.parseNumericAddress(host);
560            if (ia instanceof Inet4Address) {
561                return true;
562            }
563        } catch (IllegalArgumentException ignored) {
564        }
565
566        if (forceServer) {
567            throw new URISyntaxException(host, "Malformed IPv4 address", 0);
568        }
569        return false;
570    }
571
572    private boolean isValidDomainName(String host) {
573        try {
574            // The RFCs don't permit underscores in hostnames, but URI has to because
575            // a certain large website doesn't seem to care about standards and specs.
576            // See bugs 18023709, 17579865 and 18016625.
577            UriCodec.validateSimple(host, "_-.");
578        } catch (URISyntaxException e) {
579            return false;
580        }
581
582        String lastLabel = null;
583        for (String token : host.split("\\.")) {
584            lastLabel = token;
585            if (lastLabel.startsWith("-") || lastLabel.endsWith("-")) {
586                return false;
587            }
588        }
589
590        if (lastLabel == null) {
591            return false;
592        }
593
594        if (!lastLabel.equals(host)) {
595            char ch = lastLabel.charAt(0);
596            if (ch >= '0' && ch <= '9') {
597                return false;
598            }
599        }
600        return true;
601    }
602
603    /**
604     * Compares this URI with the given argument {@code uri}. This method will
605     * return a negative value if this URI instance is less than the given
606     * argument and a positive value if this URI instance is greater than the
607     * given argument. The return value {@code 0} indicates that the two
608     * instances represent the same URI. To define the order the single parts of
609     * the URI are compared with each other. String components will be ordered
610     * in the natural case-sensitive way. A hierarchical URI is less than an
611     * opaque URI and if one part is {@code null} the URI with the undefined
612     * part is less than the other one.
613     *
614     * @param uri
615     *            the URI this instance has to compare with.
616     * @return the value representing the order of the two instances.
617     */
618    public int compareTo(URI uri) {
619        int ret;
620
621        // compare schemes
622        if (scheme == null && uri.scheme != null) {
623            return -1;
624        } else if (scheme != null && uri.scheme == null) {
625            return 1;
626        } else if (scheme != null && uri.scheme != null) {
627            ret = scheme.compareToIgnoreCase(uri.scheme);
628            if (ret != 0) {
629                return ret;
630            }
631        }
632
633        // compare opacities
634        if (!opaque && uri.opaque) {
635            return -1;
636        } else if (opaque && !uri.opaque) {
637            return 1;
638        } else if (opaque && uri.opaque) {
639            ret = schemeSpecificPart.compareTo(uri.schemeSpecificPart);
640            if (ret != 0) {
641                return ret;
642            }
643        } else {
644
645            // otherwise both must be hierarchical
646
647            // compare authorities
648            if (authority != null && uri.authority == null) {
649                return 1;
650            } else if (authority == null && uri.authority != null) {
651                return -1;
652            } else if (authority != null && uri.authority != null) {
653                if (host != null && uri.host != null) {
654                    // both are server based, so compare userInfo, host, port
655                    if (userInfo != null && uri.userInfo == null) {
656                        return 1;
657                    } else if (userInfo == null && uri.userInfo != null) {
658                        return -1;
659                    } else if (userInfo != null && uri.userInfo != null) {
660                        ret = userInfo.compareTo(uri.userInfo);
661                        if (ret != 0) {
662                            return ret;
663                        }
664                    }
665
666                    // userInfo's are the same, compare hostname
667                    ret = host.compareToIgnoreCase(uri.host);
668                    if (ret != 0) {
669                        return ret;
670                    }
671
672                    // compare port
673                    if (port != uri.port) {
674                        return port - uri.port;
675                    }
676                } else { // one or both are registry based, compare the whole
677                    // authority
678                    ret = authority.compareTo(uri.authority);
679                    if (ret != 0) {
680                        return ret;
681                    }
682                }
683            }
684
685            // authorities are the same
686            // compare paths
687            ret = path.compareTo(uri.path);
688            if (ret != 0) {
689                return ret;
690            }
691
692            // compare queries
693
694            if (query != null && uri.query == null) {
695                return 1;
696            } else if (query == null && uri.query != null) {
697                return -1;
698            } else if (query != null && uri.query != null) {
699                ret = query.compareTo(uri.query);
700                if (ret != 0) {
701                    return ret;
702                }
703            }
704        }
705
706        // everything else is identical, so compare fragments
707        if (fragment != null && uri.fragment == null) {
708            return 1;
709        } else if (fragment == null && uri.fragment != null) {
710            return -1;
711        } else if (fragment != null && uri.fragment != null) {
712            ret = fragment.compareTo(uri.fragment);
713            if (ret != 0) {
714                return ret;
715            }
716        }
717
718        // identical
719        return 0;
720    }
721
722    /**
723     * Returns the URI formed by parsing {@code uri}. This method behaves
724     * identically to the string constructor but throws a different exception
725     * on failure. The constructor fails with a checked {@link
726     * URISyntaxException}; this method fails with an unchecked {@link
727     * IllegalArgumentException}.
728     */
729    public static URI create(String uri) {
730        try {
731            return new URI(uri);
732        } catch (URISyntaxException e) {
733            throw new IllegalArgumentException(e.getMessage());
734        }
735    }
736
737    private URI duplicate() {
738        URI clone = new URI();
739        clone.absolute = absolute;
740        clone.authority = authority;
741        clone.fragment = fragment;
742        clone.host = host;
743        clone.opaque = opaque;
744        clone.path = path;
745        clone.port = port;
746        clone.query = query;
747        clone.scheme = scheme;
748        clone.schemeSpecificPart = schemeSpecificPart;
749        clone.userInfo = userInfo;
750        clone.serverAuthority = serverAuthority;
751        return clone;
752    }
753
754    /*
755     * Takes a string that may contain hex sequences like %F1 or %2b and
756     * converts the hex values following the '%' to lowercase
757     */
758    private String convertHexToLowerCase(String s) {
759        StringBuilder result = new StringBuilder("");
760        if (s.indexOf('%') == -1) {
761            return s;
762        }
763
764        int index, prevIndex = 0;
765        while ((index = s.indexOf('%', prevIndex)) != -1) {
766            result.append(s.substring(prevIndex, index + 1));
767            result.append(s.substring(index + 1, index + 3).toLowerCase(Locale.US));
768            index += 3;
769            prevIndex = index;
770        }
771        return result.toString();
772    }
773
774    /**
775     * Returns true if the given URI escaped strings {@code first} and {@code second} are
776     * equal.
777     *
778     * TODO: This method assumes that both strings are escaped using the same escape rules
779     * yet it still performs case insensitive comparison of the escaped sequences.
780     * Why is this necessary ? We can just replace it with first.equals(second)
781     * otherwise.
782     */
783    private boolean escapedEquals(String first, String second) {
784        // This length test isn't a micro-optimization. We need it because we sometimes
785        // calculate the number of characters to match based on the length of the second
786        // string. If the second string is shorter than the first, we might attempt to match
787        // 0 chars, and regionMatches is specified to return true in that case.
788        if (first.length() != second.length()) {
789            return false;
790        }
791
792        int prevIndex = 0;
793        while (true) {
794            int index = first.indexOf('%', prevIndex);
795            int index1 = second.indexOf('%', prevIndex);
796            if (index != index1) {
797                return false;
798            }
799
800            // index == index1 from this point on.
801
802            if (index == -1) {
803                // No more escapes, match the remainder of the string
804                // normally.
805               return first.regionMatches(prevIndex, second, prevIndex,
806                       second.length() - prevIndex);
807            }
808
809            if (!first.regionMatches(prevIndex, second, prevIndex, (index - prevIndex))) {
810                return false;
811            }
812
813            if (!first.regionMatches(true /* ignore case */, index + 1, second, index + 1, 2)) {
814                return false;
815            }
816
817            index += 3;
818            prevIndex = index;
819        }
820    }
821
822    @Override public boolean equals(Object o) {
823        if (!(o instanceof URI)) {
824            return false;
825        }
826        URI uri = (URI) o;
827
828        if (uri.fragment == null && fragment != null || uri.fragment != null
829                && fragment == null) {
830            return false;
831        } else if (uri.fragment != null && fragment != null) {
832            if (!escapedEquals(uri.fragment, fragment)) {
833                return false;
834            }
835        }
836
837        if (uri.scheme == null && scheme != null || uri.scheme != null
838                && scheme == null) {
839            return false;
840        } else if (uri.scheme != null && scheme != null) {
841            if (!uri.scheme.equalsIgnoreCase(scheme)) {
842                return false;
843            }
844        }
845
846        if (uri.opaque && opaque) {
847            return escapedEquals(uri.schemeSpecificPart,
848                    schemeSpecificPart);
849        } else if (!uri.opaque && !opaque) {
850            if (!escapedEquals(path, uri.path)) {
851                return false;
852            }
853
854            if (uri.query != null && query == null || uri.query == null
855                    && query != null) {
856                return false;
857            } else if (uri.query != null && query != null) {
858                if (!escapedEquals(uri.query, query)) {
859                    return false;
860                }
861            }
862
863            if (uri.authority != null && authority == null
864                    || uri.authority == null && authority != null) {
865                return false;
866            } else if (uri.authority != null && authority != null) {
867                if (uri.host != null && host == null || uri.host == null
868                        && host != null) {
869                    return false;
870                } else if (uri.host == null && host == null) {
871                    // both are registry based, so compare the whole authority
872                    return escapedEquals(uri.authority, authority);
873                } else { // uri.host != null && host != null, so server-based
874                    if (!host.equalsIgnoreCase(uri.host)) {
875                        return false;
876                    }
877
878                    if (port != uri.port) {
879                        return false;
880                    }
881
882                    if (uri.userInfo != null && userInfo == null
883                            || uri.userInfo == null && userInfo != null) {
884                        return false;
885                    } else if (uri.userInfo != null && userInfo != null) {
886                        return escapedEquals(userInfo, uri.userInfo);
887                    } else {
888                        return true;
889                    }
890                }
891            } else {
892                // no authority
893                return true;
894            }
895
896        } else {
897            // one is opaque, the other hierarchical
898            return false;
899        }
900    }
901
902    /**
903     * Returns the scheme of this URI, or null if this URI has no scheme. This
904     * is also known as the protocol.
905     */
906    public String getScheme() {
907        return scheme;
908    }
909
910    /**
911     * Returns the decoded scheme-specific part of this URI, or null if this URI
912     * has no scheme-specific part.
913     */
914    public String getSchemeSpecificPart() {
915        return decode(schemeSpecificPart);
916    }
917
918    /**
919     * Returns the encoded scheme-specific part of this URI, or null if this URI
920     * has no scheme-specific part.
921     */
922    public String getRawSchemeSpecificPart() {
923        return schemeSpecificPart;
924    }
925
926    /**
927     * Returns the decoded authority part of this URI, or null if this URI has
928     * no authority.
929     */
930    public String getAuthority() {
931        return decode(authority);
932    }
933
934    /**
935     * Returns the encoded authority of this URI, or null if this URI has no
936     * authority.
937     */
938    public String getRawAuthority() {
939        return authority;
940    }
941
942    /**
943     * Returns the decoded user info of this URI, or null if this URI has no
944     * user info.
945     */
946    public String getUserInfo() {
947        return decode(userInfo);
948    }
949
950    /**
951     * Returns the encoded user info of this URI, or null if this URI has no
952     * user info.
953     */
954    public String getRawUserInfo() {
955        return userInfo;
956    }
957
958    /**
959     * Returns the host of this URI, or null if this URI has no host.
960     */
961    public String getHost() {
962        return host;
963    }
964
965    /**
966     * Returns the port number of this URI, or {@code -1} if this URI has no
967     * explicit port.
968     */
969    public int getPort() {
970        return port;
971    }
972
973    /** @hide */
974    public int getEffectivePort() {
975        return getEffectivePort(scheme, port);
976    }
977
978    /**
979     * Returns the port to use for {@code scheme} connections will use when
980     * {@link #getPort} returns {@code specifiedPort}.
981     *
982     * @hide
983     */
984    public static int getEffectivePort(String scheme, int specifiedPort) {
985        if (specifiedPort != -1) {
986            return specifiedPort;
987        }
988
989        if ("http".equalsIgnoreCase(scheme)) {
990            return 80;
991        } else if ("https".equalsIgnoreCase(scheme)) {
992            return 443;
993        } else {
994            return -1;
995        }
996    }
997
998    /**
999     * Returns the decoded path of this URI, or null if this URI has no path.
1000     */
1001    public String getPath() {
1002        return decode(path);
1003    }
1004
1005    /**
1006     * Returns the encoded path of this URI, or null if this URI has no path.
1007     */
1008    public String getRawPath() {
1009        return path;
1010    }
1011
1012    /**
1013     * Returns the decoded query of this URI, or null if this URI has no query.
1014     */
1015    public String getQuery() {
1016        return decode(query);
1017    }
1018
1019    /**
1020     * Returns the encoded query of this URI, or null if this URI has no query.
1021     */
1022    public String getRawQuery() {
1023        return query;
1024    }
1025
1026    /**
1027     * Returns the decoded fragment of this URI, or null if this URI has no
1028     * fragment.
1029     */
1030    public String getFragment() {
1031        return decode(fragment);
1032    }
1033
1034    /**
1035     * Gets the encoded fragment of this URI, or null if this URI has no
1036     * fragment.
1037     */
1038    public String getRawFragment() {
1039        return fragment;
1040    }
1041
1042    @Override public int hashCode() {
1043        if (hash == -1) {
1044            hash = getHashString().hashCode();
1045        }
1046        return hash;
1047    }
1048
1049    /**
1050     * Returns true if this URI is absolute, which means that a scheme is
1051     * defined.
1052     */
1053    public boolean isAbsolute() {
1054        // TODO: simplify to 'scheme != null' ?
1055        return absolute;
1056    }
1057
1058    /**
1059     * Returns true if this URI is opaque. Opaque URIs are absolute and have a
1060     * scheme-specific part that does not start with a slash character. All
1061     * parts except scheme, scheme-specific and fragment are undefined.
1062     */
1063    public boolean isOpaque() {
1064        return opaque;
1065    }
1066
1067    /**
1068     * Returns the normalized path.
1069     */
1070    private String normalize(String path, boolean discardRelativePrefix) {
1071        path = UrlUtils.canonicalizePath(path, discardRelativePrefix);
1072
1073        /*
1074         * If the path contains a colon before the first colon, prepend
1075         * "./" to differentiate the path from a scheme prefix.
1076         */
1077        int colon = path.indexOf(':');
1078        if (colon != -1) {
1079            int slash = path.indexOf('/');
1080            if (slash == -1 || colon < slash) {
1081                path = "./" + path;
1082            }
1083        }
1084
1085        return path;
1086    }
1087
1088    /**
1089     * Normalizes the path part of this URI.
1090     *
1091     * @return an URI object which represents this instance with a normalized
1092     *         path.
1093     */
1094    public URI normalize() {
1095        if (opaque) {
1096            return this;
1097        }
1098        String normalizedPath = normalize(path, false);
1099        // if the path is already normalized, return this
1100        if (path.equals(normalizedPath)) {
1101            return this;
1102        }
1103        // get an exact copy of the URI re-calculate the scheme specific part
1104        // since the path of the normalized URI is different from this URI.
1105        URI result = duplicate();
1106        result.path = normalizedPath;
1107        result.setSchemeSpecificPart();
1108        return result;
1109    }
1110
1111    /**
1112     * Tries to parse the authority component of this URI to divide it into the
1113     * host, port, and user-info. If this URI is already determined as a
1114     * ServerAuthority this instance will be returned without changes.
1115     *
1116     * @return this instance with the components of the parsed server authority.
1117     * @throws URISyntaxException
1118     *             if the authority part could not be parsed as a server-based
1119     *             authority.
1120     */
1121    public URI parseServerAuthority() throws URISyntaxException {
1122        if (!serverAuthority) {
1123            parseAuthority(true);
1124        }
1125        return this;
1126    }
1127
1128    /**
1129     * Makes the given URI {@code relative} to a relative URI against the URI
1130     * represented by this instance.
1131     *
1132     * @param relative
1133     *            the URI which has to be relativized against this URI.
1134     * @return the relative URI.
1135     */
1136    public URI relativize(URI relative) {
1137        if (relative.opaque || opaque) {
1138            return relative;
1139        }
1140
1141        if (scheme == null ? relative.scheme != null : !scheme
1142                .equals(relative.scheme)) {
1143            return relative;
1144        }
1145
1146        if (authority == null ? relative.authority != null : !authority
1147                .equals(relative.authority)) {
1148            return relative;
1149        }
1150
1151        // normalize both paths
1152        String thisPath = normalize(path, false);
1153        String relativePath = normalize(relative.path, false);
1154
1155        /*
1156         * if the paths aren't equal, then we need to determine if this URI's
1157         * path is a parent path (begins with) the relative URI's path
1158         */
1159        if (!thisPath.equals(relativePath)) {
1160            // drop everything after the last slash in this path
1161            thisPath = thisPath.substring(0, thisPath.lastIndexOf('/') + 1);
1162
1163            /*
1164             * if the relative URI's path doesn't start with this URI's path,
1165             * then just return the relative URI; the URIs have nothing in
1166             * common
1167             */
1168            if (!relativePath.startsWith(thisPath)) {
1169                return relative;
1170            }
1171        }
1172
1173        URI result = new URI();
1174        result.fragment = relative.fragment;
1175        result.query = relative.query;
1176        // the result URI is the remainder of the relative URI's path
1177        result.path = relativePath.substring(thisPath.length());
1178        result.setSchemeSpecificPart();
1179        return result;
1180    }
1181
1182    /**
1183     * Resolves the given URI {@code relative} against the URI represented by
1184     * this instance.
1185     *
1186     * @param relative
1187     *            the URI which has to be resolved against this URI.
1188     * @return the resolved URI.
1189     */
1190    public URI resolve(URI relative) {
1191        if (relative.absolute || opaque) {
1192            return relative;
1193        }
1194
1195        if (relative.authority != null) {
1196            // If the relative URI has an authority, the result is the relative
1197            // with this URI's scheme.
1198            URI result = relative.duplicate();
1199            result.scheme = scheme;
1200            result.absolute = absolute;
1201            return result;
1202        }
1203
1204        if (relative.path.isEmpty() && relative.scheme == null && relative.query == null) {
1205            // if the relative URI only consists of at most a fragment,
1206            URI result = duplicate();
1207            result.fragment = relative.fragment;
1208            return result;
1209        }
1210
1211        URI result = duplicate();
1212        result.fragment = relative.fragment;
1213        result.query = relative.query;
1214        String resolvedPath;
1215        if (relative.path.startsWith("/")) {
1216            // The relative URI has an absolute path; use it.
1217            resolvedPath = relative.path;
1218        } else if (relative.path.isEmpty()) {
1219            // The relative URI has no path; use the base path.
1220            resolvedPath = path;
1221        } else {
1222            // The relative URI has a relative path; combine the paths.
1223            int endIndex = path.lastIndexOf('/') + 1;
1224            resolvedPath = path.substring(0, endIndex) + relative.path;
1225        }
1226        result.path = UrlUtils.authoritySafePath(result.authority, normalize(resolvedPath, true));
1227        result.setSchemeSpecificPart();
1228        return result;
1229    }
1230
1231    /**
1232     * Helper method used to re-calculate the scheme specific part of the
1233     * resolved or normalized URIs
1234     */
1235    private void setSchemeSpecificPart() {
1236        // ssp = [//authority][path][?query]
1237        StringBuilder ssp = new StringBuilder();
1238        if (authority != null) {
1239            ssp.append("//" + authority);
1240        }
1241        if (path != null) {
1242            ssp.append(path);
1243        }
1244        if (query != null) {
1245            ssp.append("?" + query);
1246        }
1247        schemeSpecificPart = ssp.toString();
1248        // reset string, so that it can be re-calculated correctly when asked.
1249        string = null;
1250    }
1251
1252    /**
1253     * Creates a new URI instance by parsing the given string {@code relative}
1254     * and resolves the created URI against the URI represented by this
1255     * instance.
1256     *
1257     * @param relative
1258     *            the given string to create the new URI instance which has to
1259     *            be resolved later on.
1260     * @return the created and resolved URI.
1261     */
1262    public URI resolve(String relative) {
1263        return resolve(create(relative));
1264    }
1265
1266    private String decode(String s) {
1267        return s != null ? UriCodec.decode(s) : null;
1268    }
1269
1270    /**
1271     * Returns the textual string representation of this URI instance using the
1272     * US-ASCII encoding.
1273     *
1274     * @return the US-ASCII string representation of this URI.
1275     */
1276    public String toASCIIString() {
1277        StringBuilder result = new StringBuilder();
1278        ASCII_ONLY.appendEncoded(result, toString());
1279        return result.toString();
1280    }
1281
1282    /**
1283     * Returns the encoded URI.
1284     */
1285    @Override public String toString() {
1286        if (string != null) {
1287            return string;
1288        }
1289
1290        StringBuilder result = new StringBuilder();
1291        if (scheme != null) {
1292            result.append(scheme);
1293            result.append(':');
1294        }
1295        if (opaque) {
1296            result.append(schemeSpecificPart);
1297        } else {
1298            if (authority != null) {
1299                result.append("//");
1300                result.append(authority);
1301            }
1302
1303            if (path != null) {
1304                result.append(path);
1305            }
1306
1307            if (query != null) {
1308                result.append('?');
1309                result.append(query);
1310            }
1311        }
1312
1313        if (fragment != null) {
1314            result.append('#');
1315            result.append(fragment);
1316        }
1317
1318        string = result.toString();
1319        return string;
1320    }
1321
1322    /*
1323     * Form a string from the components of this URI, similarly to the
1324     * toString() method. But this method converts scheme and host to lowercase,
1325     * and converts escaped octets to lowercase.
1326     */
1327    private String getHashString() {
1328        StringBuilder result = new StringBuilder();
1329        if (scheme != null) {
1330            result.append(scheme.toLowerCase(Locale.US));
1331            result.append(':');
1332        }
1333        if (opaque) {
1334            result.append(schemeSpecificPart);
1335        } else {
1336            if (authority != null) {
1337                result.append("//");
1338                if (host == null) {
1339                    result.append(authority);
1340                } else {
1341                    if (userInfo != null) {
1342                        result.append(userInfo + "@");
1343                    }
1344                    result.append(host.toLowerCase(Locale.US));
1345                    if (port != -1) {
1346                        result.append(":" + port);
1347                    }
1348                }
1349            }
1350
1351            if (path != null) {
1352                result.append(path);
1353            }
1354
1355            if (query != null) {
1356                result.append('?');
1357                result.append(query);
1358            }
1359        }
1360
1361        if (fragment != null) {
1362            result.append('#');
1363            result.append(fragment);
1364        }
1365
1366        return convertHexToLowerCase(result.toString());
1367    }
1368
1369    /**
1370     * Converts this URI instance to a URL.
1371     *
1372     * @return the created URL representing the same resource as this URI.
1373     * @throws MalformedURLException
1374     *             if an error occurs while creating the URL or no protocol
1375     *             handler could be found.
1376     */
1377    public URL toURL() throws MalformedURLException {
1378        if (!absolute) {
1379            throw new IllegalArgumentException("URI is not absolute: " + toString());
1380        }
1381        return new URL(toString());
1382    }
1383
1384    private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
1385        in.defaultReadObject();
1386        try {
1387            parseURI(string, false);
1388        } catch (URISyntaxException e) {
1389            throw new IOException(e.toString());
1390        }
1391    }
1392
1393    private void writeObject(ObjectOutputStream out) throws IOException, ClassNotFoundException {
1394        // call toString() to ensure the value of string field is calculated
1395        toString();
1396        out.defaultWriteObject();
1397    }
1398}
1399