1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package java.net; 19 20import java.io.IOException; 21import java.io.ObjectInputStream; 22import java.io.ObjectOutputStream; 23import java.io.Serializable; 24import java.util.Locale; 25import libcore.net.UriCodec; 26import libcore.net.url.UrlUtils; 27 28/** 29 * A Uniform Resource Identifier that identifies an abstract or physical 30 * resource, as specified by <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 31 * 2396</a>. 32 * 33 * <h3>Parts of a URI</h3> 34 * A URI is composed of many parts. This class can both parse URI strings into 35 * parts and compose URI strings from parts. For example, consider the parts of 36 * this URI: 37 * {@code http://username:password@host:8080/directory/file?query#fragment} 38 * <table> 39 * <tr><th>Component </th><th>Example value </th><th>Also known as</th></tr> 40 * <tr><td>{@link #getScheme() Scheme} </td><td>{@code http} </td><td>protocol</td></tr> 41 * <tr><td>{@link #getSchemeSpecificPart() Scheme-specific part}</td><td>{@code //username:password@host:8080/directory/file?query#fragment}</td><td></td></tr> 42 * <tr><td>{@link #getAuthority() Authority} </td><td>{@code username:password@host:8080} </td><td></td></tr> 43 * <tr><td>{@link #getUserInfo() User Info} </td><td>{@code username:password} </td><td></td></tr> 44 * <tr><td>{@link #getHost() Host} </td><td>{@code host} </td><td></td></tr> 45 * <tr><td>{@link #getPort() Port} </td><td>{@code 8080} </td><td></td></tr> 46 * <tr><td>{@link #getPath() Path} </td><td>{@code /directory/file} </td><td></td></tr> 47 * <tr><td>{@link #getQuery() Query} </td><td>{@code query} </td><td></td></tr> 48 * <tr><td>{@link #getFragment() Fragment} </td><td>{@code fragment} </td><td>ref</td></tr> 49 * </table> 50 * 51 * <h3>Absolute vs. Relative URIs</h3> 52 * URIs are either {@link #isAbsolute() absolute or relative}. 53 * <ul> 54 * <li><strong>Absolute:</strong> {@code http://android.com/robots.txt} 55 * <li><strong>Relative:</strong> {@code robots.txt} 56 * </ul> 57 * 58 * <p>Absolute URIs always have a scheme. If its scheme is supported by {@link 59 * URL}, you can use {@link #toURL} to convert an absolute URI to a URL. 60 * 61 * <p>Relative URIs do not have a scheme and cannot be converted to URLs. If you 62 * have the absolute URI that a relative URI is relative to, you can use {@link 63 * #resolve} to compute the referenced absolute URI. Symmetrically, you can use 64 * {@link #relativize} to compute the relative URI from one URI to another. 65 * <pre> {@code 66 * URI absolute = new URI("http://android.com/"); 67 * URI relative = new URI("robots.txt"); 68 * URI resolved = new URI("http://android.com/robots.txt"); 69 * 70 * // print "http://android.com/robots.txt" 71 * System.out.println(absolute.resolve(relative)); 72 * 73 * // print "robots.txt" 74 * System.out.println(absolute.relativize(resolved)); 75 * }</pre> 76 * 77 * <h3>Opaque vs. Hierarchical URIs</h3> 78 * Absolute URIs are either {@link #isOpaque() opaque or hierarchical}. Relative 79 * URIs are always hierarchical. 80 * <ul> 81 * <li><strong>Hierarchical:</strong> {@code http://android.com/robots.txt} 82 * <li><strong>Opaque:</strong> {@code mailto:robots@example.com} 83 * </ul> 84 * 85 * <p>Opaque URIs have both a scheme and a scheme-specific part that does not 86 * begin with the slash character: {@code /}. The contents of the 87 * scheme-specific part of an opaque URI is not parsed so an opaque URI never 88 * has an authority, user info, host, port, path or query. An opaque URIs may 89 * have a fragment, however. A typical opaque URI is 90 * {@code mailto:robots@example.com}. 91 * <table> 92 * <tr><th>Component </th><th>Example value </th></tr> 93 * <tr><td>Scheme </td><td>{@code mailto} </td></tr> 94 * <tr><td>Scheme-specific part</td><td>{@code robots@example.com}</td></tr> 95 * <tr><td>Fragment </td><td> </td></tr> 96 * </table> 97 * <p>Hierarchical URIs may have values for any URL component. They always 98 * have a non-null path, though that path may be the empty string. 99 * 100 * <h3>Encoding and Decoding URI Components</h3> 101 * Each component of a URI permits a limited set of legal characters. Other 102 * characters must first be <i>encoded</i> before they can be embedded in a URI. 103 * To recover the original characters from a URI, they may be <i>decoded</i>. 104 * <strong>Contrary to what you might expect,</strong> this class uses the 105 * term <i>raw</i> to refer to encoded strings. The non-<i>raw</i> accessors 106 * return decoded strings. For example, consider how this URI is decoded: 107 * {@code http://user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots#over%206%22} 108 * <table> 109 * <tr><th>Component </th><th>Legal Characters </th><th>Other Constraints </th><th>Raw Value </th><th>Value</th></tr> 110 * <tr><td>Scheme </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code +-.} </td><td>First character must be in {@code a-z}, {@code A-Z}</td><td> </td><td>{@code http}</td></tr> 111 * <tr><td>Scheme-specific part</td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay </td><td>{@code //user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots}</td><td>{@code //user:pa55w?rd@host:80/doc|search?q=green robots}</td></tr> 112 * <tr><td>Authority </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=@[]} </td><td>Non-ASCII characters okay </td><td>{@code user:pa55w%3Frd@host:80} </td><td>{@code user:pa55w?rd@host:80}</td></tr> 113 * <tr><td>User Info </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=} </td><td>Non-ASCII characters okay </td><td>{@code user:pa55w%3Frd} </td><td>{@code user:pa55w?rd}</td></tr> 114 * <tr><td>Host </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code -.[]} </td><td>Domain name, IPv4 address or [IPv6 address] </td><td> </td><td>host</td></tr> 115 * <tr><td>Port </td><td>{@code 0-9} </td><td> </td><td> </td><td>{@code 80}</td></tr> 116 * <tr><td>Path </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=/@} </td><td>Non-ASCII characters okay </td><td>{@code /doc%7Csearch} </td><td>{@code /doc|search}</td></tr> 117 * <tr><td>Query </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay </td><td>{@code q=green%20robots} </td><td>{@code q=green robots}</td></tr> 118 * <tr><td>Fragment </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay </td><td>{@code over%206%22} </td><td>{@code over 6"}</td></tr> 119 * </table> 120 * A URI's host, port and scheme are not eligible for encoding and must not 121 * contain illegal characters. 122 * 123 * <p>To encode a URI, invoke any of the multiple-parameter constructors of this 124 * class. These constructors accept your original strings and encode them into 125 * their raw form. 126 * 127 * <p>To decode a URI, invoke the single-string constructor, and then use the 128 * appropriate accessor methods to get the decoded components. 129 * 130 * <p>The {@link URL} class can be used to retrieve resources by their URI. 131 */ 132public final class URI implements Comparable<URI>, Serializable { 133 134 private static final long serialVersionUID = -6052424284110960213l; 135 136 static final String UNRESERVED = "_-!.~\'()*"; 137 static final String PUNCTUATION = ",;:$&+="; 138 139 static final UriCodec USER_INFO_ENCODER = new PartEncoder(""); 140 static final UriCodec PATH_ENCODER = new PartEncoder("/@"); 141 static final UriCodec AUTHORITY_ENCODER = new PartEncoder("@[]"); 142 143 /** for java.net.URL, which foolishly combines these two parts */ 144 static final UriCodec FILE_AND_QUERY_ENCODER = new PartEncoder("/@?"); 145 146 /** for query, fragment, and scheme-specific part */ 147 static final UriCodec ALL_LEGAL_ENCODER = new PartEncoder("?/[]@"); 148 149 /** Retains all ASCII chars including delimiters. */ 150 private static final UriCodec ASCII_ONLY = new UriCodec() { 151 @Override protected boolean isRetained(char c) { 152 return c <= 127; 153 } 154 }; 155 156 /** 157 * Encodes the unescaped characters of {@code s} that are not permitted. 158 * Permitted characters are: 159 * <ul> 160 * <li>Unreserved characters in <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>. 161 * <li>{@code extraOkayChars}, 162 * <li>non-ASCII, non-control, non-whitespace characters 163 * </ul> 164 */ 165 private static class PartEncoder extends UriCodec { 166 private final String extraLegalCharacters; 167 168 PartEncoder(String extraLegalCharacters) { 169 this.extraLegalCharacters = extraLegalCharacters; 170 } 171 172 @Override protected boolean isRetained(char c) { 173 return UNRESERVED.indexOf(c) != -1 174 || PUNCTUATION.indexOf(c) != -1 175 || extraLegalCharacters.indexOf(c) != -1 176 || (c > 127 && !Character.isSpaceChar(c) && !Character.isISOControl(c)); 177 } 178 } 179 180 private String string; 181 private transient String scheme; 182 private transient String schemeSpecificPart; 183 private transient String authority; 184 private transient String userInfo; 185 private transient String host; 186 private transient int port = -1; 187 private transient String path; 188 private transient String query; 189 private transient String fragment; 190 private transient boolean opaque; 191 private transient boolean absolute; 192 private transient boolean serverAuthority = false; 193 194 private transient int hash = -1; 195 196 private URI() {} 197 198 /** 199 * Creates a new URI instance by parsing {@code spec}. 200 * 201 * @param spec a URI whose illegal characters have all been encoded. 202 */ 203 public URI(String spec) throws URISyntaxException { 204 parseURI(spec, false); 205 } 206 207 /** 208 * Creates a new URI instance of the given unencoded component parts. 209 * 210 * @param scheme the URI scheme, or null for a non-absolute URI. 211 */ 212 public URI(String scheme, String schemeSpecificPart, String fragment) 213 throws URISyntaxException { 214 StringBuilder uri = new StringBuilder(); 215 if (scheme != null) { 216 uri.append(scheme); 217 uri.append(':'); 218 } 219 if (schemeSpecificPart != null) { 220 ALL_LEGAL_ENCODER.appendEncoded(uri, schemeSpecificPart); 221 } 222 if (fragment != null) { 223 uri.append('#'); 224 ALL_LEGAL_ENCODER.appendEncoded(uri, fragment); 225 } 226 227 parseURI(uri.toString(), false); 228 } 229 230 /** 231 * Creates a new URI instance of the given unencoded component parts. 232 * 233 * @param scheme the URI scheme, or null for a non-absolute URI. 234 */ 235 public URI(String scheme, String userInfo, String host, int port, String path, String query, 236 String fragment) throws URISyntaxException { 237 if (scheme == null && userInfo == null && host == null && path == null 238 && query == null && fragment == null) { 239 this.path = ""; 240 return; 241 } 242 243 if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') { 244 throw new URISyntaxException(path, "Relative path"); 245 } 246 247 StringBuilder uri = new StringBuilder(); 248 if (scheme != null) { 249 uri.append(scheme); 250 uri.append(':'); 251 } 252 253 if (userInfo != null || host != null || port != -1) { 254 uri.append("//"); 255 } 256 257 if (userInfo != null) { 258 USER_INFO_ENCODER.appendEncoded(uri, userInfo); 259 uri.append('@'); 260 } 261 262 if (host != null) { 263 // check for IPv6 addresses that hasn't been enclosed in square brackets 264 if (host.indexOf(':') != -1 && host.indexOf(']') == -1 && host.indexOf('[') == -1) { 265 host = "[" + host + "]"; 266 } 267 uri.append(host); 268 } 269 270 if (port != -1) { 271 uri.append(':'); 272 uri.append(port); 273 } 274 275 if (path != null) { 276 PATH_ENCODER.appendEncoded(uri, path); 277 } 278 279 if (query != null) { 280 uri.append('?'); 281 ALL_LEGAL_ENCODER.appendEncoded(uri, query); 282 } 283 284 if (fragment != null) { 285 uri.append('#'); 286 ALL_LEGAL_ENCODER.appendEncoded(uri, fragment); 287 } 288 289 parseURI(uri.toString(), true); 290 } 291 292 /** 293 * Creates a new URI instance of the given unencoded component parts. 294 * 295 * @param scheme the URI scheme, or null for a non-absolute URI. 296 */ 297 public URI(String scheme, String host, String path, String fragment) throws URISyntaxException { 298 this(scheme, null, host, -1, path, null, fragment); 299 } 300 301 /** 302 * Creates a new URI instance of the given unencoded component parts. 303 * 304 * @param scheme the URI scheme, or null for a non-absolute URI. 305 */ 306 public URI(String scheme, String authority, String path, String query, 307 String fragment) throws URISyntaxException { 308 if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') { 309 throw new URISyntaxException(path, "Relative path"); 310 } 311 312 StringBuilder uri = new StringBuilder(); 313 if (scheme != null) { 314 uri.append(scheme); 315 uri.append(':'); 316 } 317 if (authority != null) { 318 uri.append("//"); 319 AUTHORITY_ENCODER.appendEncoded(uri, authority); 320 } 321 322 if (path != null) { 323 PATH_ENCODER.appendEncoded(uri, path); 324 } 325 if (query != null) { 326 uri.append('?'); 327 ALL_LEGAL_ENCODER.appendEncoded(uri, query); 328 } 329 if (fragment != null) { 330 uri.append('#'); 331 ALL_LEGAL_ENCODER.appendEncoded(uri, fragment); 332 } 333 334 parseURI(uri.toString(), false); 335 } 336 337 /** 338 * Breaks uri into its component parts. This first splits URI into scheme, 339 * scheme-specific part and fragment: 340 * [scheme:][scheme-specific part][#fragment] 341 * 342 * Then it breaks the scheme-specific part into authority, path and query: 343 * [//authority][path][?query] 344 * 345 * Finally it delegates to parseAuthority to break the authority into user 346 * info, host and port: 347 * [user-info@][host][:port] 348 */ 349 private void parseURI(String uri, boolean forceServer) throws URISyntaxException { 350 string = uri; 351 352 // "#fragment" 353 int fragmentStart = UrlUtils.findFirstOf(uri, "#", 0, uri.length()); 354 if (fragmentStart < uri.length()) { 355 fragment = ALL_LEGAL_ENCODER.validate(uri, fragmentStart + 1, uri.length(), "fragment"); 356 } 357 358 // scheme: 359 int start; 360 int colon = UrlUtils.findFirstOf(uri, ":", 0, fragmentStart); 361 if (colon < UrlUtils.findFirstOf(uri, "/?#", 0, fragmentStart)) { 362 absolute = true; 363 scheme = validateScheme(uri, colon); 364 start = colon + 1; 365 366 if (start == fragmentStart) { 367 throw new URISyntaxException(uri, "Scheme-specific part expected", start); 368 } 369 370 // URIs with schemes followed by a non-/ char are opaque and need no further parsing. 371 if (!uri.regionMatches(start, "/", 0, 1)) { 372 opaque = true; 373 schemeSpecificPart = ALL_LEGAL_ENCODER.validate( 374 uri, start, fragmentStart, "scheme specific part"); 375 return; 376 } 377 } else { 378 absolute = false; 379 start = 0; 380 } 381 382 opaque = false; 383 schemeSpecificPart = uri.substring(start, fragmentStart); 384 385 // "//authority" 386 int fileStart; 387 if (uri.regionMatches(start, "//", 0, 2)) { 388 int authorityStart = start + 2; 389 fileStart = UrlUtils.findFirstOf(uri, "/?", authorityStart, fragmentStart); 390 if (authorityStart == uri.length()) { 391 throw new URISyntaxException(uri, "Authority expected", uri.length()); 392 } 393 if (authorityStart < fileStart) { 394 authority = AUTHORITY_ENCODER.validate(uri, authorityStart, fileStart, "authority"); 395 } 396 } else { 397 fileStart = start; 398 } 399 400 // "path" 401 int queryStart = UrlUtils.findFirstOf(uri, "?", fileStart, fragmentStart); 402 path = PATH_ENCODER.validate(uri, fileStart, queryStart, "path"); 403 404 // "?query" 405 if (queryStart < fragmentStart) { 406 query = ALL_LEGAL_ENCODER.validate(uri, queryStart + 1, fragmentStart, "query"); 407 } 408 409 parseAuthority(forceServer); 410 } 411 412 private String validateScheme(String uri, int end) throws URISyntaxException { 413 if (end == 0) { 414 throw new URISyntaxException(uri, "Scheme expected", 0); 415 } 416 417 for (int i = 0; i < end; i++) { 418 if (!UrlUtils.isValidSchemeChar(i, uri.charAt(i))) { 419 throw new URISyntaxException(uri, "Illegal character in scheme", 0); 420 } 421 } 422 423 return uri.substring(0, end); 424 } 425 426 /** 427 * Breaks this URI's authority into user info, host and port parts. 428 * [user-info@][host][:port] 429 * If any part of this fails this method will give up and potentially leave 430 * these fields with their default values. 431 * 432 * @param forceServer true to always throw if the authority cannot be 433 * parsed. If false, this method may still throw for some kinds of 434 * errors; this unpredictable behavior is consistent with the RI. 435 */ 436 private void parseAuthority(boolean forceServer) throws URISyntaxException { 437 if (authority == null) { 438 return; 439 } 440 441 String tempUserInfo = null; 442 String temp = authority; 443 int index = temp.indexOf('@'); 444 int hostIndex = 0; 445 if (index != -1) { 446 // remove user info 447 tempUserInfo = temp.substring(0, index); 448 validateUserInfo(authority, tempUserInfo, 0); 449 temp = temp.substring(index + 1); // host[:port] is left 450 hostIndex = index + 1; 451 } 452 453 index = temp.lastIndexOf(':'); 454 int endIndex = temp.indexOf(']'); 455 456 String tempHost; 457 int tempPort = -1; 458 if (index != -1 && endIndex < index) { 459 // determine port and host 460 tempHost = temp.substring(0, index); 461 462 if (index < (temp.length() - 1)) { // port part is not empty 463 try { 464 char firstPortChar = temp.charAt(index + 1); 465 if (firstPortChar >= '0' && firstPortChar <= '9') { 466 // allow only digits, no signs 467 tempPort = Integer.parseInt(temp.substring(index + 1)); 468 } else { 469 if (forceServer) { 470 throw new URISyntaxException(authority, 471 "Invalid port number", hostIndex + index + 1); 472 } 473 return; 474 } 475 } catch (NumberFormatException e) { 476 if (forceServer) { 477 throw new URISyntaxException(authority, 478 "Invalid port number", hostIndex + index + 1); 479 } 480 return; 481 } 482 } 483 } else { 484 tempHost = temp; 485 } 486 487 if (tempHost.isEmpty()) { 488 if (forceServer) { 489 throw new URISyntaxException(authority, "Expected host", hostIndex); 490 } 491 return; 492 } 493 494 if (!isValidHost(forceServer, tempHost)) { 495 return; 496 } 497 498 // this is a server based uri, 499 // fill in the userInfo, host and port fields 500 userInfo = tempUserInfo; 501 host = tempHost; 502 port = tempPort; 503 serverAuthority = true; 504 } 505 506 private void validateUserInfo(String uri, String userInfo, int index) 507 throws URISyntaxException { 508 for (int i = 0; i < userInfo.length(); i++) { 509 char ch = userInfo.charAt(i); 510 if (ch == ']' || ch == '[') { 511 throw new URISyntaxException(uri, "Illegal character in userInfo", index + i); 512 } 513 } 514 } 515 516 /** 517 * Returns true if {@code host} is a well-formed host name or IP address. 518 * 519 * @param forceServer true to always throw if the host cannot be parsed. If 520 * false, this method may still throw for some kinds of errors; this 521 * unpredictable behavior is consistent with the RI. 522 */ 523 private boolean isValidHost(boolean forceServer, String host) throws URISyntaxException { 524 if (host.startsWith("[")) { 525 // IPv6 address 526 if (!host.endsWith("]")) { 527 throw new URISyntaxException(host, 528 "Expected a closing square bracket for IPv6 address", 0); 529 } 530 if (InetAddress.isNumeric(host)) { 531 // If it's numeric, the presence of square brackets guarantees 532 // that it's a numeric IPv6 address. 533 return true; 534 } 535 throw new URISyntaxException(host, "Malformed IPv6 address"); 536 } 537 538 // '[' and ']' can only be the first char and last char 539 // of the host name 540 if (host.indexOf('[') != -1 || host.indexOf(']') != -1) { 541 throw new URISyntaxException(host, "Illegal character in host name", 0); 542 } 543 544 int index = host.lastIndexOf('.'); 545 if (index < 0 || index == host.length() - 1 546 || !Character.isDigit(host.charAt(index + 1))) { 547 // domain name 548 if (isValidDomainName(host)) { 549 return true; 550 } 551 if (forceServer) { 552 throw new URISyntaxException(host, "Illegal character in host name", 0); 553 } 554 return false; 555 } 556 557 // IPv4 address? 558 try { 559 InetAddress ia = InetAddress.parseNumericAddress(host); 560 if (ia instanceof Inet4Address) { 561 return true; 562 } 563 } catch (IllegalArgumentException ignored) { 564 } 565 566 if (forceServer) { 567 throw new URISyntaxException(host, "Malformed IPv4 address", 0); 568 } 569 return false; 570 } 571 572 private boolean isValidDomainName(String host) { 573 try { 574 // The RFCs don't permit underscores in hostnames, but URI has to because 575 // a certain large website doesn't seem to care about standards and specs. 576 // See bugs 18023709, 17579865 and 18016625. 577 UriCodec.validateSimple(host, "_-."); 578 } catch (URISyntaxException e) { 579 return false; 580 } 581 582 String lastLabel = null; 583 for (String token : host.split("\\.")) { 584 lastLabel = token; 585 if (lastLabel.startsWith("-") || lastLabel.endsWith("-")) { 586 return false; 587 } 588 } 589 590 if (lastLabel == null) { 591 return false; 592 } 593 594 if (!lastLabel.equals(host)) { 595 char ch = lastLabel.charAt(0); 596 if (ch >= '0' && ch <= '9') { 597 return false; 598 } 599 } 600 return true; 601 } 602 603 /** 604 * Compares this URI with the given argument {@code uri}. This method will 605 * return a negative value if this URI instance is less than the given 606 * argument and a positive value if this URI instance is greater than the 607 * given argument. The return value {@code 0} indicates that the two 608 * instances represent the same URI. To define the order the single parts of 609 * the URI are compared with each other. String components will be ordered 610 * in the natural case-sensitive way. A hierarchical URI is less than an 611 * opaque URI and if one part is {@code null} the URI with the undefined 612 * part is less than the other one. 613 * 614 * @param uri 615 * the URI this instance has to compare with. 616 * @return the value representing the order of the two instances. 617 */ 618 public int compareTo(URI uri) { 619 int ret; 620 621 // compare schemes 622 if (scheme == null && uri.scheme != null) { 623 return -1; 624 } else if (scheme != null && uri.scheme == null) { 625 return 1; 626 } else if (scheme != null && uri.scheme != null) { 627 ret = scheme.compareToIgnoreCase(uri.scheme); 628 if (ret != 0) { 629 return ret; 630 } 631 } 632 633 // compare opacities 634 if (!opaque && uri.opaque) { 635 return -1; 636 } else if (opaque && !uri.opaque) { 637 return 1; 638 } else if (opaque && uri.opaque) { 639 ret = schemeSpecificPart.compareTo(uri.schemeSpecificPart); 640 if (ret != 0) { 641 return ret; 642 } 643 } else { 644 645 // otherwise both must be hierarchical 646 647 // compare authorities 648 if (authority != null && uri.authority == null) { 649 return 1; 650 } else if (authority == null && uri.authority != null) { 651 return -1; 652 } else if (authority != null && uri.authority != null) { 653 if (host != null && uri.host != null) { 654 // both are server based, so compare userInfo, host, port 655 if (userInfo != null && uri.userInfo == null) { 656 return 1; 657 } else if (userInfo == null && uri.userInfo != null) { 658 return -1; 659 } else if (userInfo != null && uri.userInfo != null) { 660 ret = userInfo.compareTo(uri.userInfo); 661 if (ret != 0) { 662 return ret; 663 } 664 } 665 666 // userInfo's are the same, compare hostname 667 ret = host.compareToIgnoreCase(uri.host); 668 if (ret != 0) { 669 return ret; 670 } 671 672 // compare port 673 if (port != uri.port) { 674 return port - uri.port; 675 } 676 } else { // one or both are registry based, compare the whole 677 // authority 678 ret = authority.compareTo(uri.authority); 679 if (ret != 0) { 680 return ret; 681 } 682 } 683 } 684 685 // authorities are the same 686 // compare paths 687 ret = path.compareTo(uri.path); 688 if (ret != 0) { 689 return ret; 690 } 691 692 // compare queries 693 694 if (query != null && uri.query == null) { 695 return 1; 696 } else if (query == null && uri.query != null) { 697 return -1; 698 } else if (query != null && uri.query != null) { 699 ret = query.compareTo(uri.query); 700 if (ret != 0) { 701 return ret; 702 } 703 } 704 } 705 706 // everything else is identical, so compare fragments 707 if (fragment != null && uri.fragment == null) { 708 return 1; 709 } else if (fragment == null && uri.fragment != null) { 710 return -1; 711 } else if (fragment != null && uri.fragment != null) { 712 ret = fragment.compareTo(uri.fragment); 713 if (ret != 0) { 714 return ret; 715 } 716 } 717 718 // identical 719 return 0; 720 } 721 722 /** 723 * Returns the URI formed by parsing {@code uri}. This method behaves 724 * identically to the string constructor but throws a different exception 725 * on failure. The constructor fails with a checked {@link 726 * URISyntaxException}; this method fails with an unchecked {@link 727 * IllegalArgumentException}. 728 */ 729 public static URI create(String uri) { 730 try { 731 return new URI(uri); 732 } catch (URISyntaxException e) { 733 throw new IllegalArgumentException(e.getMessage()); 734 } 735 } 736 737 private URI duplicate() { 738 URI clone = new URI(); 739 clone.absolute = absolute; 740 clone.authority = authority; 741 clone.fragment = fragment; 742 clone.host = host; 743 clone.opaque = opaque; 744 clone.path = path; 745 clone.port = port; 746 clone.query = query; 747 clone.scheme = scheme; 748 clone.schemeSpecificPart = schemeSpecificPart; 749 clone.userInfo = userInfo; 750 clone.serverAuthority = serverAuthority; 751 return clone; 752 } 753 754 /* 755 * Takes a string that may contain hex sequences like %F1 or %2b and 756 * converts the hex values following the '%' to lowercase 757 */ 758 private String convertHexToLowerCase(String s) { 759 StringBuilder result = new StringBuilder(""); 760 if (s.indexOf('%') == -1) { 761 return s; 762 } 763 764 int index, prevIndex = 0; 765 while ((index = s.indexOf('%', prevIndex)) != -1) { 766 result.append(s.substring(prevIndex, index + 1)); 767 result.append(s.substring(index + 1, index + 3).toLowerCase(Locale.US)); 768 index += 3; 769 prevIndex = index; 770 } 771 return result.toString(); 772 } 773 774 /** 775 * Returns true if the given URI escaped strings {@code first} and {@code second} are 776 * equal. 777 * 778 * TODO: This method assumes that both strings are escaped using the same escape rules 779 * yet it still performs case insensitive comparison of the escaped sequences. 780 * Why is this necessary ? We can just replace it with first.equals(second) 781 * otherwise. 782 */ 783 private boolean escapedEquals(String first, String second) { 784 // This length test isn't a micro-optimization. We need it because we sometimes 785 // calculate the number of characters to match based on the length of the second 786 // string. If the second string is shorter than the first, we might attempt to match 787 // 0 chars, and regionMatches is specified to return true in that case. 788 if (first.length() != second.length()) { 789 return false; 790 } 791 792 int prevIndex = 0; 793 while (true) { 794 int index = first.indexOf('%', prevIndex); 795 int index1 = second.indexOf('%', prevIndex); 796 if (index != index1) { 797 return false; 798 } 799 800 // index == index1 from this point on. 801 802 if (index == -1) { 803 // No more escapes, match the remainder of the string 804 // normally. 805 return first.regionMatches(prevIndex, second, prevIndex, 806 second.length() - prevIndex); 807 } 808 809 if (!first.regionMatches(prevIndex, second, prevIndex, (index - prevIndex))) { 810 return false; 811 } 812 813 if (!first.regionMatches(true /* ignore case */, index + 1, second, index + 1, 2)) { 814 return false; 815 } 816 817 index += 3; 818 prevIndex = index; 819 } 820 } 821 822 @Override public boolean equals(Object o) { 823 if (!(o instanceof URI)) { 824 return false; 825 } 826 URI uri = (URI) o; 827 828 if (uri.fragment == null && fragment != null || uri.fragment != null 829 && fragment == null) { 830 return false; 831 } else if (uri.fragment != null && fragment != null) { 832 if (!escapedEquals(uri.fragment, fragment)) { 833 return false; 834 } 835 } 836 837 if (uri.scheme == null && scheme != null || uri.scheme != null 838 && scheme == null) { 839 return false; 840 } else if (uri.scheme != null && scheme != null) { 841 if (!uri.scheme.equalsIgnoreCase(scheme)) { 842 return false; 843 } 844 } 845 846 if (uri.opaque && opaque) { 847 return escapedEquals(uri.schemeSpecificPart, 848 schemeSpecificPart); 849 } else if (!uri.opaque && !opaque) { 850 if (!escapedEquals(path, uri.path)) { 851 return false; 852 } 853 854 if (uri.query != null && query == null || uri.query == null 855 && query != null) { 856 return false; 857 } else if (uri.query != null && query != null) { 858 if (!escapedEquals(uri.query, query)) { 859 return false; 860 } 861 } 862 863 if (uri.authority != null && authority == null 864 || uri.authority == null && authority != null) { 865 return false; 866 } else if (uri.authority != null && authority != null) { 867 if (uri.host != null && host == null || uri.host == null 868 && host != null) { 869 return false; 870 } else if (uri.host == null && host == null) { 871 // both are registry based, so compare the whole authority 872 return escapedEquals(uri.authority, authority); 873 } else { // uri.host != null && host != null, so server-based 874 if (!host.equalsIgnoreCase(uri.host)) { 875 return false; 876 } 877 878 if (port != uri.port) { 879 return false; 880 } 881 882 if (uri.userInfo != null && userInfo == null 883 || uri.userInfo == null && userInfo != null) { 884 return false; 885 } else if (uri.userInfo != null && userInfo != null) { 886 return escapedEquals(userInfo, uri.userInfo); 887 } else { 888 return true; 889 } 890 } 891 } else { 892 // no authority 893 return true; 894 } 895 896 } else { 897 // one is opaque, the other hierarchical 898 return false; 899 } 900 } 901 902 /** 903 * Returns the scheme of this URI, or null if this URI has no scheme. This 904 * is also known as the protocol. 905 */ 906 public String getScheme() { 907 return scheme; 908 } 909 910 /** 911 * Returns the decoded scheme-specific part of this URI, or null if this URI 912 * has no scheme-specific part. 913 */ 914 public String getSchemeSpecificPart() { 915 return decode(schemeSpecificPart); 916 } 917 918 /** 919 * Returns the encoded scheme-specific part of this URI, or null if this URI 920 * has no scheme-specific part. 921 */ 922 public String getRawSchemeSpecificPart() { 923 return schemeSpecificPart; 924 } 925 926 /** 927 * Returns the decoded authority part of this URI, or null if this URI has 928 * no authority. 929 */ 930 public String getAuthority() { 931 return decode(authority); 932 } 933 934 /** 935 * Returns the encoded authority of this URI, or null if this URI has no 936 * authority. 937 */ 938 public String getRawAuthority() { 939 return authority; 940 } 941 942 /** 943 * Returns the decoded user info of this URI, or null if this URI has no 944 * user info. 945 */ 946 public String getUserInfo() { 947 return decode(userInfo); 948 } 949 950 /** 951 * Returns the encoded user info of this URI, or null if this URI has no 952 * user info. 953 */ 954 public String getRawUserInfo() { 955 return userInfo; 956 } 957 958 /** 959 * Returns the host of this URI, or null if this URI has no host. 960 */ 961 public String getHost() { 962 return host; 963 } 964 965 /** 966 * Returns the port number of this URI, or {@code -1} if this URI has no 967 * explicit port. 968 */ 969 public int getPort() { 970 return port; 971 } 972 973 /** @hide */ 974 public int getEffectivePort() { 975 return getEffectivePort(scheme, port); 976 } 977 978 /** 979 * Returns the port to use for {@code scheme} connections will use when 980 * {@link #getPort} returns {@code specifiedPort}. 981 * 982 * @hide 983 */ 984 public static int getEffectivePort(String scheme, int specifiedPort) { 985 if (specifiedPort != -1) { 986 return specifiedPort; 987 } 988 989 if ("http".equalsIgnoreCase(scheme)) { 990 return 80; 991 } else if ("https".equalsIgnoreCase(scheme)) { 992 return 443; 993 } else { 994 return -1; 995 } 996 } 997 998 /** 999 * Returns the decoded path of this URI, or null if this URI has no path. 1000 */ 1001 public String getPath() { 1002 return decode(path); 1003 } 1004 1005 /** 1006 * Returns the encoded path of this URI, or null if this URI has no path. 1007 */ 1008 public String getRawPath() { 1009 return path; 1010 } 1011 1012 /** 1013 * Returns the decoded query of this URI, or null if this URI has no query. 1014 */ 1015 public String getQuery() { 1016 return decode(query); 1017 } 1018 1019 /** 1020 * Returns the encoded query of this URI, or null if this URI has no query. 1021 */ 1022 public String getRawQuery() { 1023 return query; 1024 } 1025 1026 /** 1027 * Returns the decoded fragment of this URI, or null if this URI has no 1028 * fragment. 1029 */ 1030 public String getFragment() { 1031 return decode(fragment); 1032 } 1033 1034 /** 1035 * Gets the encoded fragment of this URI, or null if this URI has no 1036 * fragment. 1037 */ 1038 public String getRawFragment() { 1039 return fragment; 1040 } 1041 1042 @Override public int hashCode() { 1043 if (hash == -1) { 1044 hash = getHashString().hashCode(); 1045 } 1046 return hash; 1047 } 1048 1049 /** 1050 * Returns true if this URI is absolute, which means that a scheme is 1051 * defined. 1052 */ 1053 public boolean isAbsolute() { 1054 // TODO: simplify to 'scheme != null' ? 1055 return absolute; 1056 } 1057 1058 /** 1059 * Returns true if this URI is opaque. Opaque URIs are absolute and have a 1060 * scheme-specific part that does not start with a slash character. All 1061 * parts except scheme, scheme-specific and fragment are undefined. 1062 */ 1063 public boolean isOpaque() { 1064 return opaque; 1065 } 1066 1067 /** 1068 * Returns the normalized path. 1069 */ 1070 private String normalize(String path, boolean discardRelativePrefix) { 1071 path = UrlUtils.canonicalizePath(path, discardRelativePrefix); 1072 1073 /* 1074 * If the path contains a colon before the first colon, prepend 1075 * "./" to differentiate the path from a scheme prefix. 1076 */ 1077 int colon = path.indexOf(':'); 1078 if (colon != -1) { 1079 int slash = path.indexOf('/'); 1080 if (slash == -1 || colon < slash) { 1081 path = "./" + path; 1082 } 1083 } 1084 1085 return path; 1086 } 1087 1088 /** 1089 * Normalizes the path part of this URI. 1090 * 1091 * @return an URI object which represents this instance with a normalized 1092 * path. 1093 */ 1094 public URI normalize() { 1095 if (opaque) { 1096 return this; 1097 } 1098 String normalizedPath = normalize(path, false); 1099 // if the path is already normalized, return this 1100 if (path.equals(normalizedPath)) { 1101 return this; 1102 } 1103 // get an exact copy of the URI re-calculate the scheme specific part 1104 // since the path of the normalized URI is different from this URI. 1105 URI result = duplicate(); 1106 result.path = normalizedPath; 1107 result.setSchemeSpecificPart(); 1108 return result; 1109 } 1110 1111 /** 1112 * Tries to parse the authority component of this URI to divide it into the 1113 * host, port, and user-info. If this URI is already determined as a 1114 * ServerAuthority this instance will be returned without changes. 1115 * 1116 * @return this instance with the components of the parsed server authority. 1117 * @throws URISyntaxException 1118 * if the authority part could not be parsed as a server-based 1119 * authority. 1120 */ 1121 public URI parseServerAuthority() throws URISyntaxException { 1122 if (!serverAuthority) { 1123 parseAuthority(true); 1124 } 1125 return this; 1126 } 1127 1128 /** 1129 * Makes the given URI {@code relative} to a relative URI against the URI 1130 * represented by this instance. 1131 * 1132 * @param relative 1133 * the URI which has to be relativized against this URI. 1134 * @return the relative URI. 1135 */ 1136 public URI relativize(URI relative) { 1137 if (relative.opaque || opaque) { 1138 return relative; 1139 } 1140 1141 if (scheme == null ? relative.scheme != null : !scheme 1142 .equals(relative.scheme)) { 1143 return relative; 1144 } 1145 1146 if (authority == null ? relative.authority != null : !authority 1147 .equals(relative.authority)) { 1148 return relative; 1149 } 1150 1151 // normalize both paths 1152 String thisPath = normalize(path, false); 1153 String relativePath = normalize(relative.path, false); 1154 1155 /* 1156 * if the paths aren't equal, then we need to determine if this URI's 1157 * path is a parent path (begins with) the relative URI's path 1158 */ 1159 if (!thisPath.equals(relativePath)) { 1160 // drop everything after the last slash in this path 1161 thisPath = thisPath.substring(0, thisPath.lastIndexOf('/') + 1); 1162 1163 /* 1164 * if the relative URI's path doesn't start with this URI's path, 1165 * then just return the relative URI; the URIs have nothing in 1166 * common 1167 */ 1168 if (!relativePath.startsWith(thisPath)) { 1169 return relative; 1170 } 1171 } 1172 1173 URI result = new URI(); 1174 result.fragment = relative.fragment; 1175 result.query = relative.query; 1176 // the result URI is the remainder of the relative URI's path 1177 result.path = relativePath.substring(thisPath.length()); 1178 result.setSchemeSpecificPart(); 1179 return result; 1180 } 1181 1182 /** 1183 * Resolves the given URI {@code relative} against the URI represented by 1184 * this instance. 1185 * 1186 * @param relative 1187 * the URI which has to be resolved against this URI. 1188 * @return the resolved URI. 1189 */ 1190 public URI resolve(URI relative) { 1191 if (relative.absolute || opaque) { 1192 return relative; 1193 } 1194 1195 if (relative.authority != null) { 1196 // If the relative URI has an authority, the result is the relative 1197 // with this URI's scheme. 1198 URI result = relative.duplicate(); 1199 result.scheme = scheme; 1200 result.absolute = absolute; 1201 return result; 1202 } 1203 1204 if (relative.path.isEmpty() && relative.scheme == null && relative.query == null) { 1205 // if the relative URI only consists of at most a fragment, 1206 URI result = duplicate(); 1207 result.fragment = relative.fragment; 1208 return result; 1209 } 1210 1211 URI result = duplicate(); 1212 result.fragment = relative.fragment; 1213 result.query = relative.query; 1214 String resolvedPath; 1215 if (relative.path.startsWith("/")) { 1216 // The relative URI has an absolute path; use it. 1217 resolvedPath = relative.path; 1218 } else if (relative.path.isEmpty()) { 1219 // The relative URI has no path; use the base path. 1220 resolvedPath = path; 1221 } else { 1222 // The relative URI has a relative path; combine the paths. 1223 int endIndex = path.lastIndexOf('/') + 1; 1224 resolvedPath = path.substring(0, endIndex) + relative.path; 1225 } 1226 result.path = UrlUtils.authoritySafePath(result.authority, normalize(resolvedPath, true)); 1227 result.setSchemeSpecificPart(); 1228 return result; 1229 } 1230 1231 /** 1232 * Helper method used to re-calculate the scheme specific part of the 1233 * resolved or normalized URIs 1234 */ 1235 private void setSchemeSpecificPart() { 1236 // ssp = [//authority][path][?query] 1237 StringBuilder ssp = new StringBuilder(); 1238 if (authority != null) { 1239 ssp.append("//" + authority); 1240 } 1241 if (path != null) { 1242 ssp.append(path); 1243 } 1244 if (query != null) { 1245 ssp.append("?" + query); 1246 } 1247 schemeSpecificPart = ssp.toString(); 1248 // reset string, so that it can be re-calculated correctly when asked. 1249 string = null; 1250 } 1251 1252 /** 1253 * Creates a new URI instance by parsing the given string {@code relative} 1254 * and resolves the created URI against the URI represented by this 1255 * instance. 1256 * 1257 * @param relative 1258 * the given string to create the new URI instance which has to 1259 * be resolved later on. 1260 * @return the created and resolved URI. 1261 */ 1262 public URI resolve(String relative) { 1263 return resolve(create(relative)); 1264 } 1265 1266 private String decode(String s) { 1267 return s != null ? UriCodec.decode(s) : null; 1268 } 1269 1270 /** 1271 * Returns the textual string representation of this URI instance using the 1272 * US-ASCII encoding. 1273 * 1274 * @return the US-ASCII string representation of this URI. 1275 */ 1276 public String toASCIIString() { 1277 StringBuilder result = new StringBuilder(); 1278 ASCII_ONLY.appendEncoded(result, toString()); 1279 return result.toString(); 1280 } 1281 1282 /** 1283 * Returns the encoded URI. 1284 */ 1285 @Override public String toString() { 1286 if (string != null) { 1287 return string; 1288 } 1289 1290 StringBuilder result = new StringBuilder(); 1291 if (scheme != null) { 1292 result.append(scheme); 1293 result.append(':'); 1294 } 1295 if (opaque) { 1296 result.append(schemeSpecificPart); 1297 } else { 1298 if (authority != null) { 1299 result.append("//"); 1300 result.append(authority); 1301 } 1302 1303 if (path != null) { 1304 result.append(path); 1305 } 1306 1307 if (query != null) { 1308 result.append('?'); 1309 result.append(query); 1310 } 1311 } 1312 1313 if (fragment != null) { 1314 result.append('#'); 1315 result.append(fragment); 1316 } 1317 1318 string = result.toString(); 1319 return string; 1320 } 1321 1322 /* 1323 * Form a string from the components of this URI, similarly to the 1324 * toString() method. But this method converts scheme and host to lowercase, 1325 * and converts escaped octets to lowercase. 1326 */ 1327 private String getHashString() { 1328 StringBuilder result = new StringBuilder(); 1329 if (scheme != null) { 1330 result.append(scheme.toLowerCase(Locale.US)); 1331 result.append(':'); 1332 } 1333 if (opaque) { 1334 result.append(schemeSpecificPart); 1335 } else { 1336 if (authority != null) { 1337 result.append("//"); 1338 if (host == null) { 1339 result.append(authority); 1340 } else { 1341 if (userInfo != null) { 1342 result.append(userInfo + "@"); 1343 } 1344 result.append(host.toLowerCase(Locale.US)); 1345 if (port != -1) { 1346 result.append(":" + port); 1347 } 1348 } 1349 } 1350 1351 if (path != null) { 1352 result.append(path); 1353 } 1354 1355 if (query != null) { 1356 result.append('?'); 1357 result.append(query); 1358 } 1359 } 1360 1361 if (fragment != null) { 1362 result.append('#'); 1363 result.append(fragment); 1364 } 1365 1366 return convertHexToLowerCase(result.toString()); 1367 } 1368 1369 /** 1370 * Converts this URI instance to a URL. 1371 * 1372 * @return the created URL representing the same resource as this URI. 1373 * @throws MalformedURLException 1374 * if an error occurs while creating the URL or no protocol 1375 * handler could be found. 1376 */ 1377 public URL toURL() throws MalformedURLException { 1378 if (!absolute) { 1379 throw new IllegalArgumentException("URI is not absolute: " + toString()); 1380 } 1381 return new URL(toString()); 1382 } 1383 1384 private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { 1385 in.defaultReadObject(); 1386 try { 1387 parseURI(string, false); 1388 } catch (URISyntaxException e) { 1389 throw new IOException(e.toString()); 1390 } 1391 } 1392 1393 private void writeObject(ObjectOutputStream out) throws IOException, ClassNotFoundException { 1394 // call toString() to ensure the value of string field is calculated 1395 toString(); 1396 out.defaultWriteObject(); 1397 } 1398} 1399