URI.java revision c68609e723a5daa20888abdb640799d4353fd590
1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.net;
19
20import java.io.IOException;
21import java.io.ObjectInputStream;
22import java.io.ObjectOutputStream;
23import java.io.Serializable;
24import java.util.Locale;
25import libcore.net.UriCodec;
26import libcore.net.url.UrlUtils;
27
28/**
29 * A Uniform Resource Identifier that identifies an abstract or physical
30 * resource, as specified by <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC
31 * 2396</a>.
32 *
33 * <h3>Parts of a URI</h3>
34 * A URI is composed of many parts. This class can both parse URI strings into
35 * parts and compose URI strings from parts. For example, consider the parts of
36 * this URI:
37 * {@code http://username:password@host:8080/directory/file?query#fragment}
38 * <table>
39 * <tr><th>Component                                            </th><th>Example value                                                      </th><th>Also known as</th></tr>
40 * <tr><td>{@link #getScheme() Scheme}                          </td><td>{@code http}                                                       </td><td>protocol</td></tr>
41 * <tr><td>{@link #getSchemeSpecificPart() Scheme-specific part}</td><td>{@code //username:password@host:8080/directory/file?query#fragment}</td><td></td></tr>
42 * <tr><td>{@link #getAuthority() Authority}                    </td><td>{@code username:password@host:8080}                                </td><td></td></tr>
43 * <tr><td>{@link #getUserInfo() User Info}                     </td><td>{@code username:password}                                          </td><td></td></tr>
44 * <tr><td>{@link #getHost() Host}                              </td><td>{@code host}                                                       </td><td></td></tr>
45 * <tr><td>{@link #getPort() Port}                              </td><td>{@code 8080}                                                       </td><td></td></tr>
46 * <tr><td>{@link #getPath() Path}                              </td><td>{@code /directory/file}                                            </td><td></td></tr>
47 * <tr><td>{@link #getQuery() Query}                            </td><td>{@code query}                                                      </td><td></td></tr>
48 * <tr><td>{@link #getFragment() Fragment}                      </td><td>{@code fragment}                                                   </td><td>ref</td></tr>
49 * </table>
50 *
51 * <h3>Encoding and Decoding URI Components</h3>
52 * Each component of a URI permits a limited set of legal characters. Other
53 * characters must first be <i>encoded</i> before they can be embedded in a URI.
54 * To recover the original characters from a URI, they may be <i>decoded</i>.
55 * This class refers to encoded strings as <string>raw</string> strings. For
56 * example, consider how this URI is decoded:
57 * {@code http://user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots#over%206%22}
58 * <table>
59 * <tr><th>Component           </th><th>Legal Characters                                                    </th><th>Other Constraints                                  </th><th>Raw Value                                                      </th><th>Value</th></tr>
60 * <tr><td>Scheme              </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code +-.}                  </td><td>First character must be in {@code a-z}, {@code A-Z}</td><td>                                                               </td><td>{@code http}</td></tr>
61 * <tr><td>Scheme-specific part</td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code //user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots}</td><td>{@code //user:pa55w?rd@host:80/doc|search?q=green robots}</td></tr>
62 * <tr><td>Authority           </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=@[]}  </td><td>Non-ASCII characters okay                          </td><td>{@code user:pa55w%3Frd@host:80}                                </td><td>{@code user:pa55w?rd@host:80}</td></tr>
63 * <tr><td>User Info           </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=}     </td><td>Non-ASCII characters okay                          </td><td>{@code user:pa55w%3Frd}                                        </td><td>{@code user:pa55w?rd}</td></tr>
64 * <tr><td>Host                </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code -.[]}                 </td><td>Domain name, IPv4 address or [IPv6 address]        </td><td>                                                               </td><td>host</td></tr>
65 * <tr><td>Port                </td><td>{@code 0-9}                                                         </td><td>                                                   </td><td>                                                               </td><td>{@code 80}</td></tr>
66 * <tr><td>Path                </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=/@}   </td><td>Non-ASCII characters okay                          </td><td>{@code /doc%7Csearch}                                          </td><td>{@code /doc|search}</td></tr>
67 * <tr><td>Query               </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code q=green%20robots}                                       </td><td>{@code q=green robots}</td></tr>
68 * <tr><td>Fragment            </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay                          </td><td>{@code over%206%22}                                            </td><td>{@code over 6"}</td></tr>
69 * </table>
70 * A URI's host, port and scheme are not eligible for encoding and must not
71 * contain illegal characters.
72 *
73 * <p>To encode a URI, invoke any of the multiple-parameter constructors of this
74 * class. These constructors accept your original strings and encode them into
75 * their raw form.
76 *
77 * <p>To decode a URI, invoke the single-string constructor, and then the
78 * appropriate <code>get<i>Component()</i></code> methods to get the decoded
79 * components.
80 *
81 * <p>The {@link URL} class can be used to retrieve resources by their URI.
82 */
83// TODO: document relative URIs
84// TODO: document opaque URIs
85public final class URI implements Comparable<URI>, Serializable {
86
87    private static final long serialVersionUID = -6052424284110960213l;
88
89    static final String UNRESERVED = "_-!.~\'()*";
90    static final String PUNCTUATION = ",;:$&+=";
91
92    static final UriCodec USER_INFO_ENCODER = new PartEncoder("");
93    static final UriCodec PATH_ENCODER = new PartEncoder("/@");
94    static final UriCodec AUTHORITY_ENCODER = new PartEncoder("@[]");
95
96    /** for java.net.URL, which foolishly combines these two parts */
97    static final UriCodec FILE_AND_QUERY_ENCODER = new PartEncoder("/@?");
98
99    /** for query, fragment, and scheme-specific part */
100    static final UriCodec ALL_LEGAL_ENCODER = new PartEncoder("?/[]@");
101
102    /** Retains all ASCII chars including delimiters. */
103    private static final UriCodec ASCII_ONLY = new UriCodec() {
104        @Override protected boolean isRetained(char c) {
105            return c <= 127;
106        }
107    };
108
109    /**
110     * Encodes the unescaped characters of {@code s} that are not permitted.
111     * Permitted characters are:
112     * <ul>
113     *   <li>Unreserved characters in <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>.
114     *   <li>{@code extraOkayChars},
115     *   <li>non-ASCII, non-control, non-whitespace characters
116     * </ul>
117     */
118    private static class PartEncoder extends UriCodec {
119        private final String extraLegalCharacters;
120
121        PartEncoder(String extraLegalCharacters) {
122            this.extraLegalCharacters = extraLegalCharacters;
123        }
124
125        @Override protected boolean isRetained(char c) {
126            return UNRESERVED.indexOf(c) != -1
127                    || PUNCTUATION.indexOf(c) != -1
128                    || extraLegalCharacters.indexOf(c) != -1
129                    || (c > 127 && !Character.isSpaceChar(c) && !Character.isISOControl(c));
130        }
131    }
132
133    private String string;
134    private transient String scheme;
135    private transient String schemeSpecificPart;
136    private transient String authority;
137    private transient String userInfo;
138    private transient String host;
139    private transient int port = -1;
140    private transient String path;
141    private transient String query;
142    private transient String fragment;
143    private transient boolean opaque;
144    private transient boolean absolute;
145    private transient boolean serverAuthority = false;
146
147    private transient int hash = -1;
148
149    private URI() {}
150
151    /**
152     * Creates a new URI instance by parsing {@code spec}.
153     *
154     * @param spec a URI whose illegal characters have all been encoded.
155     */
156    public URI(String spec) throws URISyntaxException {
157        parseURI(spec, false);
158    }
159
160    /**
161     * Creates a new URI instance of the given unencoded component parts.
162     *
163     * @param scheme the URI scheme, or null for a non-absolute URI.
164     */
165    public URI(String scheme, String schemeSpecificPart, String fragment)
166            throws URISyntaxException {
167        StringBuilder uri = new StringBuilder();
168        if (scheme != null) {
169            uri.append(scheme);
170            uri.append(':');
171        }
172        if (schemeSpecificPart != null) {
173            ALL_LEGAL_ENCODER.appendEncoded(uri, schemeSpecificPart);
174        }
175        if (fragment != null) {
176            uri.append('#');
177            ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
178        }
179
180        parseURI(uri.toString(), false);
181    }
182
183    /**
184     * Creates a new URI instance of the given unencoded component parts.
185     *
186     * @param scheme the URI scheme, or null for a non-absolute URI.
187     */
188    public URI(String scheme, String userInfo, String host, int port, String path, String query,
189            String fragment) throws URISyntaxException {
190        if (scheme == null && userInfo == null && host == null && path == null
191                && query == null && fragment == null) {
192            this.path = "";
193            return;
194        }
195
196        if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') {
197            throw new URISyntaxException(path, "Relative path");
198        }
199
200        StringBuilder uri = new StringBuilder();
201        if (scheme != null) {
202            uri.append(scheme);
203            uri.append(':');
204        }
205
206        if (userInfo != null || host != null || port != -1) {
207            uri.append("//");
208        }
209
210        if (userInfo != null) {
211            USER_INFO_ENCODER.appendEncoded(uri, userInfo);
212            uri.append('@');
213        }
214
215        if (host != null) {
216            // check for IPv6 addresses that hasn't been enclosed in square brackets
217            if (host.indexOf(':') != -1 && host.indexOf(']') == -1 && host.indexOf('[') == -1) {
218                host = "[" + host + "]";
219            }
220            uri.append(host);
221        }
222
223        if (port != -1) {
224            uri.append(':');
225            uri.append(port);
226        }
227
228        if (path != null) {
229            PATH_ENCODER.appendEncoded(uri, path);
230        }
231
232        if (query != null) {
233            uri.append('?');
234            ALL_LEGAL_ENCODER.appendEncoded(uri, query);
235        }
236
237        if (fragment != null) {
238            uri.append('#');
239            ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
240        }
241
242        parseURI(uri.toString(), true);
243    }
244
245    /**
246     * Creates a new URI instance of the given unencoded component parts.
247     *
248     * @param scheme the URI scheme, or null for a non-absolute URI.
249     */
250    public URI(String scheme, String host, String path, String fragment) throws URISyntaxException {
251        this(scheme, null, host, -1, path, null, fragment);
252    }
253
254    /**
255     * Creates a new URI instance of the given unencoded component parts.
256     *
257     * @param scheme the URI scheme, or null for a non-absolute URI.
258     */
259    public URI(String scheme, String authority, String path, String query,
260            String fragment) throws URISyntaxException {
261        if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') {
262            throw new URISyntaxException(path, "Relative path");
263        }
264
265        StringBuilder uri = new StringBuilder();
266        if (scheme != null) {
267            uri.append(scheme);
268            uri.append(':');
269        }
270        if (authority != null) {
271            uri.append("//");
272            AUTHORITY_ENCODER.appendEncoded(uri, authority);
273        }
274
275        if (path != null) {
276            PATH_ENCODER.appendEncoded(uri, path);
277        }
278        if (query != null) {
279            uri.append('?');
280            ALL_LEGAL_ENCODER.appendEncoded(uri, query);
281        }
282        if (fragment != null) {
283            uri.append('#');
284            ALL_LEGAL_ENCODER.appendEncoded(uri, fragment);
285        }
286
287        parseURI(uri.toString(), false);
288    }
289
290    /**
291     * Breaks uri into its component parts. This first splits URI into scheme,
292     * scheme-specific part and fragment:
293     *   [scheme:][scheme-specific part][#fragment]
294     *
295     * Then it breaks the scheme-specific part into authority, path and query:
296     *   [//authority][path][?query]
297     *
298     * Finally it delegates to parseAuthority to break the authority into user
299     * info, host and port:
300     *   [user-info@][host][:port]
301     */
302    private void parseURI(String uri, boolean forceServer) throws URISyntaxException {
303        string = uri;
304
305        // "#fragment"
306        int fragmentStart = UrlUtils.findFirstOf(uri, "#", 0, uri.length());
307        if (fragmentStart < uri.length()) {
308            fragment = ALL_LEGAL_ENCODER.validate(uri, fragmentStart + 1, uri.length(), "fragment");
309        }
310
311        // scheme:
312        int start;
313        int colon = UrlUtils.findFirstOf(uri, ":", 0, fragmentStart);
314        if (colon < UrlUtils.findFirstOf(uri, "/?#", 0, fragmentStart)) {
315            absolute = true;
316            scheme = validateScheme(uri, colon);
317            start = colon + 1;
318
319            if (start == fragmentStart) {
320                throw new URISyntaxException(uri, "Scheme-specific part expected", start);
321            }
322
323            // URIs with schemes followed by a non-/ char are opaque and need no further parsing.
324            if (!uri.regionMatches(start, "/", 0, 1)) {
325                opaque = true;
326                schemeSpecificPart = ALL_LEGAL_ENCODER.validate(
327                        uri, start, fragmentStart, "scheme specific part");
328                return;
329            }
330        } else {
331            absolute = false;
332            start = 0;
333        }
334
335        opaque = false;
336        schemeSpecificPart = uri.substring(start, fragmentStart);
337
338        // "//authority"
339        int fileStart;
340        if (uri.regionMatches(start, "//", 0, 2)) {
341            int authorityStart = start + 2;
342            fileStart = UrlUtils.findFirstOf(uri, "/?", authorityStart, fragmentStart);
343            if (authorityStart == uri.length()) {
344                throw new URISyntaxException(uri, "Authority expected", uri.length());
345            }
346            if (authorityStart < fileStart) {
347                authority = AUTHORITY_ENCODER.validate(uri, authorityStart, fileStart, "authority");
348            }
349        } else {
350            fileStart = start;
351        }
352
353        // "path"
354        int queryStart = UrlUtils.findFirstOf(uri, "?", fileStart, fragmentStart);
355        path = PATH_ENCODER.validate(uri, fileStart, queryStart, "path");
356
357        // "?query"
358        if (queryStart < fragmentStart) {
359            query = ALL_LEGAL_ENCODER.validate(uri, queryStart + 1, fragmentStart, "query");
360        }
361
362        parseAuthority(forceServer);
363    }
364
365    private String validateScheme(String uri, int end) throws URISyntaxException {
366        if (end == 0) {
367            throw new URISyntaxException(uri, "Scheme expected", 0);
368        }
369
370        for (int i = 0; i < end; i++) {
371            if (!UrlUtils.isValidSchemeChar(i, uri.charAt(i))) {
372                throw new URISyntaxException(uri, "Illegal character in scheme", 0);
373            }
374        }
375
376        return uri.substring(0, end);
377    }
378
379    /**
380     * Breaks this URI's authority into user info, host and port parts.
381     *   [user-info@][host][:port]
382     * If any part of this fails this method will give up and potentially leave
383     * these fields with their default values.
384     *
385     * @param forceServer true to always throw if the authority cannot be
386     *     parsed. If false, this method may still throw for some kinds of
387     *     errors; this unpredictable behavior is consistent with the RI.
388     */
389    private void parseAuthority(boolean forceServer) throws URISyntaxException {
390        if (authority == null) {
391            return;
392        }
393
394        String tempUserInfo = null;
395        String temp = authority;
396        int index = temp.indexOf('@');
397        int hostIndex = 0;
398        if (index != -1) {
399            // remove user info
400            tempUserInfo = temp.substring(0, index);
401            validateUserInfo(authority, tempUserInfo, 0);
402            temp = temp.substring(index + 1); // host[:port] is left
403            hostIndex = index + 1;
404        }
405
406        index = temp.lastIndexOf(':');
407        int endIndex = temp.indexOf(']');
408
409        String tempHost;
410        int tempPort = -1;
411        if (index != -1 && endIndex < index) {
412            // determine port and host
413            tempHost = temp.substring(0, index);
414
415            if (index < (temp.length() - 1)) { // port part is not empty
416                try {
417                    tempPort = Integer.parseInt(temp.substring(index + 1));
418                    if (tempPort < 0) {
419                        if (forceServer) {
420                            throw new URISyntaxException(authority,
421                                    "Invalid port number", hostIndex + index + 1);
422                        }
423                        return;
424                    }
425                } catch (NumberFormatException e) {
426                    if (forceServer) {
427                        throw new URISyntaxException(authority,
428                                "Invalid port number", hostIndex + index + 1);
429                    }
430                    return;
431                }
432            }
433        } else {
434            tempHost = temp;
435        }
436
437        if (tempHost.isEmpty()) {
438            if (forceServer) {
439                throw new URISyntaxException(authority, "Expected host", hostIndex);
440            }
441            return;
442        }
443
444        if (!isValidHost(forceServer, tempHost)) {
445            return;
446        }
447
448        // this is a server based uri,
449        // fill in the userInfo, host and port fields
450        userInfo = tempUserInfo;
451        host = tempHost;
452        port = tempPort;
453        serverAuthority = true;
454    }
455
456    private void validateUserInfo(String uri, String userInfo, int index)
457            throws URISyntaxException {
458        for (int i = 0; i < userInfo.length(); i++) {
459            char ch = userInfo.charAt(i);
460            if (ch == ']' || ch == '[') {
461                throw new URISyntaxException(uri, "Illegal character in userInfo", index + i);
462            }
463        }
464    }
465
466    /**
467     * Returns true if {@code host} is a well-formed host name or IP address.
468     *
469     * @param forceServer true to always throw if the host cannot be parsed. If
470     *     false, this method may still throw for some kinds of errors; this
471     *     unpredictable behavior is consistent with the RI.
472     */
473    private boolean isValidHost(boolean forceServer, String host) throws URISyntaxException {
474        if (host.startsWith("[")) {
475            // IPv6 address
476            if (!host.endsWith("]")) {
477                throw new URISyntaxException(host,
478                        "Expected a closing square bracket for IPv6 address", 0);
479            }
480            if (InetAddress.isNumeric(host)) {
481                // If it's numeric, the presence of square brackets guarantees
482                // that it's a numeric IPv6 address.
483                return true;
484            }
485            throw new URISyntaxException(host, "Malformed IPv6 address");
486        }
487
488        // '[' and ']' can only be the first char and last char
489        // of the host name
490        if (host.indexOf('[') != -1 || host.indexOf(']') != -1) {
491            throw new URISyntaxException(host, "Illegal character in host name", 0);
492        }
493
494        int index = host.lastIndexOf('.');
495        if (index < 0 || index == host.length() - 1
496                || !Character.isDigit(host.charAt(index + 1))) {
497            // domain name
498            if (isValidDomainName(host)) {
499                return true;
500            }
501            if (forceServer) {
502                throw new URISyntaxException(host, "Illegal character in host name", 0);
503            }
504            return false;
505        }
506
507        // IPv4 address?
508        try {
509            InetAddress ia = InetAddress.parseNumericAddress(host);
510            if (ia instanceof Inet4Address) {
511                return true;
512            }
513        } catch (IllegalArgumentException ignored) {
514        }
515
516        if (forceServer) {
517            throw new URISyntaxException(host, "Malformed IPv4 address", 0);
518        }
519        return false;
520    }
521
522    private boolean isValidDomainName(String host) {
523        try {
524            UriCodec.validateSimple(host, "-.");
525        } catch (URISyntaxException e) {
526            return false;
527        }
528
529        String lastLabel = null;
530        for (String token : host.split("\\.")) {
531            lastLabel = token;
532            if (lastLabel.startsWith("-") || lastLabel.endsWith("-")) {
533                return false;
534            }
535        }
536
537        if (lastLabel == null) {
538            return false;
539        }
540
541        if (!lastLabel.equals(host)) {
542            char ch = lastLabel.charAt(0);
543            if (ch >= '0' && ch <= '9') {
544                return false;
545            }
546        }
547        return true;
548    }
549
550    /**
551     * Compares this URI with the given argument {@code uri}. This method will
552     * return a negative value if this URI instance is less than the given
553     * argument and a positive value if this URI instance is greater than the
554     * given argument. The return value {@code 0} indicates that the two
555     * instances represent the same URI. To define the order the single parts of
556     * the URI are compared with each other. String components will be ordered
557     * in the natural case-sensitive way. A hierarchical URI is less than an
558     * opaque URI and if one part is {@code null} the URI with the undefined
559     * part is less than the other one.
560     *
561     * @param uri
562     *            the URI this instance has to compare with.
563     * @return the value representing the order of the two instances.
564     */
565    public int compareTo(URI uri) {
566        int ret;
567
568        // compare schemes
569        if (scheme == null && uri.scheme != null) {
570            return -1;
571        } else if (scheme != null && uri.scheme == null) {
572            return 1;
573        } else if (scheme != null && uri.scheme != null) {
574            ret = scheme.compareToIgnoreCase(uri.scheme);
575            if (ret != 0) {
576                return ret;
577            }
578        }
579
580        // compare opacities
581        if (!opaque && uri.opaque) {
582            return -1;
583        } else if (opaque && !uri.opaque) {
584            return 1;
585        } else if (opaque && uri.opaque) {
586            ret = schemeSpecificPart.compareTo(uri.schemeSpecificPart);
587            if (ret != 0) {
588                return ret;
589            }
590        } else {
591
592            // otherwise both must be hierarchical
593
594            // compare authorities
595            if (authority != null && uri.authority == null) {
596                return 1;
597            } else if (authority == null && uri.authority != null) {
598                return -1;
599            } else if (authority != null && uri.authority != null) {
600                if (host != null && uri.host != null) {
601                    // both are server based, so compare userInfo, host, port
602                    if (userInfo != null && uri.userInfo == null) {
603                        return 1;
604                    } else if (userInfo == null && uri.userInfo != null) {
605                        return -1;
606                    } else if (userInfo != null && uri.userInfo != null) {
607                        ret = userInfo.compareTo(uri.userInfo);
608                        if (ret != 0) {
609                            return ret;
610                        }
611                    }
612
613                    // userInfo's are the same, compare hostname
614                    ret = host.compareToIgnoreCase(uri.host);
615                    if (ret != 0) {
616                        return ret;
617                    }
618
619                    // compare port
620                    if (port != uri.port) {
621                        return port - uri.port;
622                    }
623                } else { // one or both are registry based, compare the whole
624                    // authority
625                    ret = authority.compareTo(uri.authority);
626                    if (ret != 0) {
627                        return ret;
628                    }
629                }
630            }
631
632            // authorities are the same
633            // compare paths
634            ret = path.compareTo(uri.path);
635            if (ret != 0) {
636                return ret;
637            }
638
639            // compare queries
640
641            if (query != null && uri.query == null) {
642                return 1;
643            } else if (query == null && uri.query != null) {
644                return -1;
645            } else if (query != null && uri.query != null) {
646                ret = query.compareTo(uri.query);
647                if (ret != 0) {
648                    return ret;
649                }
650            }
651        }
652
653        // everything else is identical, so compare fragments
654        if (fragment != null && uri.fragment == null) {
655            return 1;
656        } else if (fragment == null && uri.fragment != null) {
657            return -1;
658        } else if (fragment != null && uri.fragment != null) {
659            ret = fragment.compareTo(uri.fragment);
660            if (ret != 0) {
661                return ret;
662            }
663        }
664
665        // identical
666        return 0;
667    }
668
669    /**
670     * Returns the URI formed by parsing {@code uri}. This method behaves
671     * identically to the string constructor but throws a different exception
672     * on failure. The constructor fails with a checked {@link
673     * URISyntaxException}; this method fails with an unchecked {@link
674     * IllegalArgumentException}.
675     */
676    public static URI create(String uri) {
677        try {
678            return new URI(uri);
679        } catch (URISyntaxException e) {
680            throw new IllegalArgumentException(e.getMessage());
681        }
682    }
683
684    private URI duplicate() {
685        URI clone = new URI();
686        clone.absolute = absolute;
687        clone.authority = authority;
688        clone.fragment = fragment;
689        clone.host = host;
690        clone.opaque = opaque;
691        clone.path = path;
692        clone.port = port;
693        clone.query = query;
694        clone.scheme = scheme;
695        clone.schemeSpecificPart = schemeSpecificPart;
696        clone.userInfo = userInfo;
697        clone.serverAuthority = serverAuthority;
698        return clone;
699    }
700
701    /*
702     * Takes a string that may contain hex sequences like %F1 or %2b and
703     * converts the hex values following the '%' to lowercase
704     */
705    private String convertHexToLowerCase(String s) {
706        StringBuilder result = new StringBuilder("");
707        if (s.indexOf('%') == -1) {
708            return s;
709        }
710
711        int index, prevIndex = 0;
712        while ((index = s.indexOf('%', prevIndex)) != -1) {
713            result.append(s.substring(prevIndex, index + 1));
714            result.append(s.substring(index + 1, index + 3).toLowerCase(Locale.US));
715            index += 3;
716            prevIndex = index;
717        }
718        return result.toString();
719    }
720
721    /**
722     * Returns true if {@code first} and {@code second} are equal after
723     * unescaping hex sequences like %F1 and %2b.
724     */
725    private boolean escapedEquals(String first, String second) {
726        if (first.indexOf('%') != second.indexOf('%')) {
727            return first.equals(second);
728        }
729
730        int index, prevIndex = 0;
731        while ((index = first.indexOf('%', prevIndex)) != -1
732                && second.indexOf('%', prevIndex) == index) {
733            boolean match = first.substring(prevIndex, index).equals(
734                    second.substring(prevIndex, index));
735            if (!match) {
736                return false;
737            }
738
739            match = first.substring(index + 1, index + 3).equalsIgnoreCase(
740                    second.substring(index + 1, index + 3));
741            if (!match) {
742                return false;
743            }
744
745            index += 3;
746            prevIndex = index;
747        }
748        return first.substring(prevIndex).equals(second.substring(prevIndex));
749    }
750
751    @Override public boolean equals(Object o) {
752        if (!(o instanceof URI)) {
753            return false;
754        }
755        URI uri = (URI) o;
756
757        if (uri.fragment == null && fragment != null || uri.fragment != null
758                && fragment == null) {
759            return false;
760        } else if (uri.fragment != null && fragment != null) {
761            if (!escapedEquals(uri.fragment, fragment)) {
762                return false;
763            }
764        }
765
766        if (uri.scheme == null && scheme != null || uri.scheme != null
767                && scheme == null) {
768            return false;
769        } else if (uri.scheme != null && scheme != null) {
770            if (!uri.scheme.equalsIgnoreCase(scheme)) {
771                return false;
772            }
773        }
774
775        if (uri.opaque && opaque) {
776            return escapedEquals(uri.schemeSpecificPart,
777                    schemeSpecificPart);
778        } else if (!uri.opaque && !opaque) {
779            if (!escapedEquals(path, uri.path)) {
780                return false;
781            }
782
783            if (uri.query != null && query == null || uri.query == null
784                    && query != null) {
785                return false;
786            } else if (uri.query != null && query != null) {
787                if (!escapedEquals(uri.query, query)) {
788                    return false;
789                }
790            }
791
792            if (uri.authority != null && authority == null
793                    || uri.authority == null && authority != null) {
794                return false;
795            } else if (uri.authority != null && authority != null) {
796                if (uri.host != null && host == null || uri.host == null
797                        && host != null) {
798                    return false;
799                } else if (uri.host == null && host == null) {
800                    // both are registry based, so compare the whole authority
801                    return escapedEquals(uri.authority, authority);
802                } else { // uri.host != null && host != null, so server-based
803                    if (!host.equalsIgnoreCase(uri.host)) {
804                        return false;
805                    }
806
807                    if (port != uri.port) {
808                        return false;
809                    }
810
811                    if (uri.userInfo != null && userInfo == null
812                            || uri.userInfo == null && userInfo != null) {
813                        return false;
814                    } else if (uri.userInfo != null && userInfo != null) {
815                        return escapedEquals(userInfo, uri.userInfo);
816                    } else {
817                        return true;
818                    }
819                }
820            } else {
821                // no authority
822                return true;
823            }
824
825        } else {
826            // one is opaque, the other hierarchical
827            return false;
828        }
829    }
830
831    /**
832     * Returns the scheme of this URI, or null if this URI has no scheme. This
833     * is also known as the protocol.
834     */
835    public String getScheme() {
836        return scheme;
837    }
838
839    /**
840     * Returns the decoded scheme-specific part of this URI, or null if this URI
841     * has no scheme-specific part.
842     */
843    public String getSchemeSpecificPart() {
844        return decode(schemeSpecificPart);
845    }
846
847    /**
848     * Returns the encoded scheme-specific part of this URI, or null if this URI
849     * has no scheme-specific part.
850     */
851    public String getRawSchemeSpecificPart() {
852        return schemeSpecificPart;
853    }
854
855    /**
856     * Returns the decoded authority part of this URI, or null if this URI has
857     * no authority.
858     */
859    public String getAuthority() {
860        return decode(authority);
861    }
862
863    /**
864     * Returns the encoded authority of this URI, or null if this URI has no
865     * authority.
866     */
867    public String getRawAuthority() {
868        return authority;
869    }
870
871    /**
872     * Returns the decoded user info of this URI, or null if this URI has no
873     * user info.
874     */
875    public String getUserInfo() {
876        return decode(userInfo);
877    }
878
879    /**
880     * Returns the encoded user info of this URI, or null if this URI has no
881     * user info.
882     */
883    public String getRawUserInfo() {
884        return userInfo;
885    }
886
887    /**
888     * Returns the host of this URI, or null if this URI has no host.
889     */
890    public String getHost() {
891        return host;
892    }
893
894    /**
895     * Returns the port number of this URI, or {@code -1} if this URI has no
896     * explicit port.
897     */
898    public int getPort() {
899        return port;
900    }
901
902    /** @hide */
903    public int getEffectivePort() {
904        return getEffectivePort(scheme, port);
905    }
906
907    /**
908     * Returns the port to use for {@code scheme} connections will use when
909     * {@link #getPort} returns {@code specifiedPort}.
910     *
911     * @hide
912     */
913    public static int getEffectivePort(String scheme, int specifiedPort) {
914        if (specifiedPort != -1) {
915            return specifiedPort;
916        }
917
918        if ("http".equalsIgnoreCase(scheme)) {
919            return 80;
920        } else if ("https".equalsIgnoreCase(scheme)) {
921            return 443;
922        } else {
923            return -1;
924        }
925    }
926
927    /**
928     * Returns the decoded path of this URI, or null if this URI has no path.
929     */
930    public String getPath() {
931        return decode(path);
932    }
933
934    /**
935     * Gets the encoded path of this URI, or null if this URI has no path.
936     */
937    public String getRawPath() {
938        return path;
939    }
940
941    /**
942     * Returns the decoded query of this URI, or null if this URI has no query.
943     */
944    public String getQuery() {
945        return decode(query);
946    }
947
948    /**
949     * Returns the encoded query of this URI, or null if this URI has no query.
950     */
951    public String getRawQuery() {
952        return query;
953    }
954
955    /**
956     * Returns the decoded fragment of this URI, or null if this URI has no
957     * fragment.
958     */
959    public String getFragment() {
960        return decode(fragment);
961    }
962
963    /**
964     * Gets the encoded fragment of this URI, or null if this URI has no
965     * fragment.
966     */
967    public String getRawFragment() {
968        return fragment;
969    }
970
971    @Override public int hashCode() {
972        if (hash == -1) {
973            hash = getHashString().hashCode();
974        }
975        return hash;
976    }
977
978    /**
979     * Returns true if this URI is absolute, which means that a scheme is
980     * defined.
981     */
982    public boolean isAbsolute() {
983        // TODO: simplify to 'scheme != null' ?
984        return absolute;
985    }
986
987    /**
988     * Returns true if this URI is opaque. Opaque URIs are absolute and have a
989     * scheme-specific part that does not start with a slash character. All
990     * parts except scheme, scheme-specific and fragment are undefined.
991     */
992    public boolean isOpaque() {
993        return opaque;
994    }
995
996    /**
997     * Returns the normalized path.
998     */
999    private String normalize(String path, boolean discardRelativePrefix) {
1000        path = UrlUtils.canonicalizePath(path, discardRelativePrefix);
1001
1002        /*
1003         * If the path contains a colon before the first colon, prepend
1004         * "./" to differentiate the path from a scheme prefix.
1005         */
1006        int colon = path.indexOf(':');
1007        if (colon != -1) {
1008            int slash = path.indexOf('/');
1009            if (slash == -1 || colon < slash) {
1010                path = "./" + path;
1011            }
1012        }
1013
1014        return path;
1015    }
1016
1017    /**
1018     * Normalizes the path part of this URI.
1019     *
1020     * @return an URI object which represents this instance with a normalized
1021     *         path.
1022     */
1023    public URI normalize() {
1024        if (opaque) {
1025            return this;
1026        }
1027        String normalizedPath = normalize(path, false);
1028        // if the path is already normalized, return this
1029        if (path.equals(normalizedPath)) {
1030            return this;
1031        }
1032        // get an exact copy of the URI re-calculate the scheme specific part
1033        // since the path of the normalized URI is different from this URI.
1034        URI result = duplicate();
1035        result.path = normalizedPath;
1036        result.setSchemeSpecificPart();
1037        return result;
1038    }
1039
1040    /**
1041     * Tries to parse the authority component of this URI to divide it into the
1042     * host, port, and user-info. If this URI is already determined as a
1043     * ServerAuthority this instance will be returned without changes.
1044     *
1045     * @return this instance with the components of the parsed server authority.
1046     * @throws URISyntaxException
1047     *             if the authority part could not be parsed as a server-based
1048     *             authority.
1049     */
1050    public URI parseServerAuthority() throws URISyntaxException {
1051        if (!serverAuthority) {
1052            parseAuthority(true);
1053        }
1054        return this;
1055    }
1056
1057    /**
1058     * Makes the given URI {@code relative} to a relative URI against the URI
1059     * represented by this instance.
1060     *
1061     * @param relative
1062     *            the URI which has to be relativized against this URI.
1063     * @return the relative URI.
1064     */
1065    public URI relativize(URI relative) {
1066        if (relative.opaque || opaque) {
1067            return relative;
1068        }
1069
1070        if (scheme == null ? relative.scheme != null : !scheme
1071                .equals(relative.scheme)) {
1072            return relative;
1073        }
1074
1075        if (authority == null ? relative.authority != null : !authority
1076                .equals(relative.authority)) {
1077            return relative;
1078        }
1079
1080        // normalize both paths
1081        String thisPath = normalize(path, false);
1082        String relativePath = normalize(relative.path, false);
1083
1084        /*
1085         * if the paths aren't equal, then we need to determine if this URI's
1086         * path is a parent path (begins with) the relative URI's path
1087         */
1088        if (!thisPath.equals(relativePath)) {
1089            // drop everything after the last slash in this path
1090            thisPath = thisPath.substring(0, thisPath.lastIndexOf('/') + 1);
1091
1092            /*
1093             * if the relative URI's path doesn't start with this URI's path,
1094             * then just return the relative URI; the URIs have nothing in
1095             * common
1096             */
1097            if (!relativePath.startsWith(thisPath)) {
1098                return relative;
1099            }
1100        }
1101
1102        URI result = new URI();
1103        result.fragment = relative.fragment;
1104        result.query = relative.query;
1105        // the result URI is the remainder of the relative URI's path
1106        result.path = relativePath.substring(thisPath.length());
1107        result.setSchemeSpecificPart();
1108        return result;
1109    }
1110
1111    /**
1112     * Resolves the given URI {@code relative} against the URI represented by
1113     * this instance.
1114     *
1115     * @param relative
1116     *            the URI which has to be resolved against this URI.
1117     * @return the resolved URI.
1118     */
1119    public URI resolve(URI relative) {
1120        if (relative.absolute || opaque) {
1121            return relative;
1122        }
1123
1124        if (relative.authority != null) {
1125            // If the relative URI has an authority, the result is the relative
1126            // with this URI's scheme.
1127            URI result = relative.duplicate();
1128            result.scheme = scheme;
1129            result.absolute = absolute;
1130            return result;
1131        }
1132
1133        if (relative.path.isEmpty() && relative.scheme == null && relative.query == null) {
1134            // if the relative URI only consists of at most a fragment,
1135            URI result = duplicate();
1136            result.fragment = relative.fragment;
1137            return result;
1138        }
1139
1140        URI result = duplicate();
1141        result.fragment = relative.fragment;
1142        result.query = relative.query;
1143        String resolvedPath;
1144        if (relative.path.startsWith("/")) {
1145            // The relative URI has an absolute path; use it.
1146            resolvedPath = relative.path;
1147        } else if (relative.path.isEmpty()) {
1148            // The relative URI has no path; use the base path.
1149            resolvedPath = path;
1150        } else {
1151            // The relative URI has a relative path; combine the paths.
1152            int endIndex = path.lastIndexOf('/') + 1;
1153            resolvedPath = path.substring(0, endIndex) + relative.path;
1154        }
1155        result.path = UrlUtils.authoritySafePath(result.authority, normalize(resolvedPath, true));
1156        result.setSchemeSpecificPart();
1157        return result;
1158    }
1159
1160    /**
1161     * Helper method used to re-calculate the scheme specific part of the
1162     * resolved or normalized URIs
1163     */
1164    private void setSchemeSpecificPart() {
1165        // ssp = [//authority][path][?query]
1166        StringBuilder ssp = new StringBuilder();
1167        if (authority != null) {
1168            ssp.append("//" + authority);
1169        }
1170        if (path != null) {
1171            ssp.append(path);
1172        }
1173        if (query != null) {
1174            ssp.append("?" + query);
1175        }
1176        schemeSpecificPart = ssp.toString();
1177        // reset string, so that it can be re-calculated correctly when asked.
1178        string = null;
1179    }
1180
1181    /**
1182     * Creates a new URI instance by parsing the given string {@code relative}
1183     * and resolves the created URI against the URI represented by this
1184     * instance.
1185     *
1186     * @param relative
1187     *            the given string to create the new URI instance which has to
1188     *            be resolved later on.
1189     * @return the created and resolved URI.
1190     */
1191    public URI resolve(String relative) {
1192        return resolve(create(relative));
1193    }
1194
1195    private String decode(String s) {
1196        return s != null ? UriCodec.decode(s) : null;
1197    }
1198
1199    /**
1200     * Returns the textual string representation of this URI instance using the
1201     * US-ASCII encoding.
1202     *
1203     * @return the US-ASCII string representation of this URI.
1204     */
1205    public String toASCIIString() {
1206        StringBuilder result = new StringBuilder();
1207        ASCII_ONLY.appendEncoded(result, toString());
1208        return result.toString();
1209    }
1210
1211    /**
1212     * Returns the encoded URI.
1213     */
1214    @Override public String toString() {
1215        if (string != null) {
1216            return string;
1217        }
1218
1219        StringBuilder result = new StringBuilder();
1220        if (scheme != null) {
1221            result.append(scheme);
1222            result.append(':');
1223        }
1224        if (opaque) {
1225            result.append(schemeSpecificPart);
1226        } else {
1227            if (authority != null) {
1228                result.append("//");
1229                result.append(authority);
1230            }
1231
1232            if (path != null) {
1233                result.append(path);
1234            }
1235
1236            if (query != null) {
1237                result.append('?');
1238                result.append(query);
1239            }
1240        }
1241
1242        if (fragment != null) {
1243            result.append('#');
1244            result.append(fragment);
1245        }
1246
1247        string = result.toString();
1248        return string;
1249    }
1250
1251    /*
1252     * Form a string from the components of this URI, similarly to the
1253     * toString() method. But this method converts scheme and host to lowercase,
1254     * and converts escaped octets to lowercase.
1255     */
1256    private String getHashString() {
1257        StringBuilder result = new StringBuilder();
1258        if (scheme != null) {
1259            result.append(scheme.toLowerCase(Locale.US));
1260            result.append(':');
1261        }
1262        if (opaque) {
1263            result.append(schemeSpecificPart);
1264        } else {
1265            if (authority != null) {
1266                result.append("//");
1267                if (host == null) {
1268                    result.append(authority);
1269                } else {
1270                    if (userInfo != null) {
1271                        result.append(userInfo + "@");
1272                    }
1273                    result.append(host.toLowerCase(Locale.US));
1274                    if (port != -1) {
1275                        result.append(":" + port);
1276                    }
1277                }
1278            }
1279
1280            if (path != null) {
1281                result.append(path);
1282            }
1283
1284            if (query != null) {
1285                result.append('?');
1286                result.append(query);
1287            }
1288        }
1289
1290        if (fragment != null) {
1291            result.append('#');
1292            result.append(fragment);
1293        }
1294
1295        return convertHexToLowerCase(result.toString());
1296    }
1297
1298    /**
1299     * Converts this URI instance to a URL.
1300     *
1301     * @return the created URL representing the same resource as this URI.
1302     * @throws MalformedURLException
1303     *             if an error occurs while creating the URL or no protocol
1304     *             handler could be found.
1305     */
1306    public URL toURL() throws MalformedURLException {
1307        if (!absolute) {
1308            throw new IllegalArgumentException("URI is not absolute: " + toString());
1309        }
1310        return new URL(toString());
1311    }
1312
1313    private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
1314        in.defaultReadObject();
1315        try {
1316            parseURI(string, false);
1317        } catch (URISyntaxException e) {
1318            throw new IOException(e.toString());
1319        }
1320    }
1321
1322    private void writeObject(ObjectOutputStream out) throws IOException, ClassNotFoundException {
1323        // call toString() to ensure the value of string field is calculated
1324        toString();
1325        out.defaultWriteObject();
1326    }
1327}
1328