URI.java revision c68609e723a5daa20888abdb640799d4353fd590
1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package java.net; 19 20import java.io.IOException; 21import java.io.ObjectInputStream; 22import java.io.ObjectOutputStream; 23import java.io.Serializable; 24import java.util.Locale; 25import libcore.net.UriCodec; 26import libcore.net.url.UrlUtils; 27 28/** 29 * A Uniform Resource Identifier that identifies an abstract or physical 30 * resource, as specified by <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 31 * 2396</a>. 32 * 33 * <h3>Parts of a URI</h3> 34 * A URI is composed of many parts. This class can both parse URI strings into 35 * parts and compose URI strings from parts. For example, consider the parts of 36 * this URI: 37 * {@code http://username:password@host:8080/directory/file?query#fragment} 38 * <table> 39 * <tr><th>Component </th><th>Example value </th><th>Also known as</th></tr> 40 * <tr><td>{@link #getScheme() Scheme} </td><td>{@code http} </td><td>protocol</td></tr> 41 * <tr><td>{@link #getSchemeSpecificPart() Scheme-specific part}</td><td>{@code //username:password@host:8080/directory/file?query#fragment}</td><td></td></tr> 42 * <tr><td>{@link #getAuthority() Authority} </td><td>{@code username:password@host:8080} </td><td></td></tr> 43 * <tr><td>{@link #getUserInfo() User Info} </td><td>{@code username:password} </td><td></td></tr> 44 * <tr><td>{@link #getHost() Host} </td><td>{@code host} </td><td></td></tr> 45 * <tr><td>{@link #getPort() Port} </td><td>{@code 8080} </td><td></td></tr> 46 * <tr><td>{@link #getPath() Path} </td><td>{@code /directory/file} </td><td></td></tr> 47 * <tr><td>{@link #getQuery() Query} </td><td>{@code query} </td><td></td></tr> 48 * <tr><td>{@link #getFragment() Fragment} </td><td>{@code fragment} </td><td>ref</td></tr> 49 * </table> 50 * 51 * <h3>Encoding and Decoding URI Components</h3> 52 * Each component of a URI permits a limited set of legal characters. Other 53 * characters must first be <i>encoded</i> before they can be embedded in a URI. 54 * To recover the original characters from a URI, they may be <i>decoded</i>. 55 * This class refers to encoded strings as <string>raw</string> strings. For 56 * example, consider how this URI is decoded: 57 * {@code http://user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots#over%206%22} 58 * <table> 59 * <tr><th>Component </th><th>Legal Characters </th><th>Other Constraints </th><th>Raw Value </th><th>Value</th></tr> 60 * <tr><td>Scheme </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code +-.} </td><td>First character must be in {@code a-z}, {@code A-Z}</td><td> </td><td>{@code http}</td></tr> 61 * <tr><td>Scheme-specific part</td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay </td><td>{@code //user:pa55w%3Frd@host:80/doc%7Csearch?q=green%20robots}</td><td>{@code //user:pa55w?rd@host:80/doc|search?q=green robots}</td></tr> 62 * <tr><td>Authority </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=@[]} </td><td>Non-ASCII characters okay </td><td>{@code user:pa55w%3Frd@host:80} </td><td>{@code user:pa55w?rd@host:80}</td></tr> 63 * <tr><td>User Info </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=} </td><td>Non-ASCII characters okay </td><td>{@code user:pa55w%3Frd} </td><td>{@code user:pa55w?rd}</td></tr> 64 * <tr><td>Host </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code -.[]} </td><td>Domain name, IPv4 address or [IPv6 address] </td><td> </td><td>host</td></tr> 65 * <tr><td>Port </td><td>{@code 0-9} </td><td> </td><td> </td><td>{@code 80}</td></tr> 66 * <tr><td>Path </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=/@} </td><td>Non-ASCII characters okay </td><td>{@code /doc%7Csearch} </td><td>{@code /doc|search}</td></tr> 67 * <tr><td>Query </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay </td><td>{@code q=green%20robots} </td><td>{@code q=green robots}</td></tr> 68 * <tr><td>Fragment </td><td>{@code 0-9}, {@code a-z}, {@code A-Z}, {@code _-!.~'()*,;:$&+=?/[]@}</td><td>Non-ASCII characters okay </td><td>{@code over%206%22} </td><td>{@code over 6"}</td></tr> 69 * </table> 70 * A URI's host, port and scheme are not eligible for encoding and must not 71 * contain illegal characters. 72 * 73 * <p>To encode a URI, invoke any of the multiple-parameter constructors of this 74 * class. These constructors accept your original strings and encode them into 75 * their raw form. 76 * 77 * <p>To decode a URI, invoke the single-string constructor, and then the 78 * appropriate <code>get<i>Component()</i></code> methods to get the decoded 79 * components. 80 * 81 * <p>The {@link URL} class can be used to retrieve resources by their URI. 82 */ 83// TODO: document relative URIs 84// TODO: document opaque URIs 85public final class URI implements Comparable<URI>, Serializable { 86 87 private static final long serialVersionUID = -6052424284110960213l; 88 89 static final String UNRESERVED = "_-!.~\'()*"; 90 static final String PUNCTUATION = ",;:$&+="; 91 92 static final UriCodec USER_INFO_ENCODER = new PartEncoder(""); 93 static final UriCodec PATH_ENCODER = new PartEncoder("/@"); 94 static final UriCodec AUTHORITY_ENCODER = new PartEncoder("@[]"); 95 96 /** for java.net.URL, which foolishly combines these two parts */ 97 static final UriCodec FILE_AND_QUERY_ENCODER = new PartEncoder("/@?"); 98 99 /** for query, fragment, and scheme-specific part */ 100 static final UriCodec ALL_LEGAL_ENCODER = new PartEncoder("?/[]@"); 101 102 /** Retains all ASCII chars including delimiters. */ 103 private static final UriCodec ASCII_ONLY = new UriCodec() { 104 @Override protected boolean isRetained(char c) { 105 return c <= 127; 106 } 107 }; 108 109 /** 110 * Encodes the unescaped characters of {@code s} that are not permitted. 111 * Permitted characters are: 112 * <ul> 113 * <li>Unreserved characters in <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>. 114 * <li>{@code extraOkayChars}, 115 * <li>non-ASCII, non-control, non-whitespace characters 116 * </ul> 117 */ 118 private static class PartEncoder extends UriCodec { 119 private final String extraLegalCharacters; 120 121 PartEncoder(String extraLegalCharacters) { 122 this.extraLegalCharacters = extraLegalCharacters; 123 } 124 125 @Override protected boolean isRetained(char c) { 126 return UNRESERVED.indexOf(c) != -1 127 || PUNCTUATION.indexOf(c) != -1 128 || extraLegalCharacters.indexOf(c) != -1 129 || (c > 127 && !Character.isSpaceChar(c) && !Character.isISOControl(c)); 130 } 131 } 132 133 private String string; 134 private transient String scheme; 135 private transient String schemeSpecificPart; 136 private transient String authority; 137 private transient String userInfo; 138 private transient String host; 139 private transient int port = -1; 140 private transient String path; 141 private transient String query; 142 private transient String fragment; 143 private transient boolean opaque; 144 private transient boolean absolute; 145 private transient boolean serverAuthority = false; 146 147 private transient int hash = -1; 148 149 private URI() {} 150 151 /** 152 * Creates a new URI instance by parsing {@code spec}. 153 * 154 * @param spec a URI whose illegal characters have all been encoded. 155 */ 156 public URI(String spec) throws URISyntaxException { 157 parseURI(spec, false); 158 } 159 160 /** 161 * Creates a new URI instance of the given unencoded component parts. 162 * 163 * @param scheme the URI scheme, or null for a non-absolute URI. 164 */ 165 public URI(String scheme, String schemeSpecificPart, String fragment) 166 throws URISyntaxException { 167 StringBuilder uri = new StringBuilder(); 168 if (scheme != null) { 169 uri.append(scheme); 170 uri.append(':'); 171 } 172 if (schemeSpecificPart != null) { 173 ALL_LEGAL_ENCODER.appendEncoded(uri, schemeSpecificPart); 174 } 175 if (fragment != null) { 176 uri.append('#'); 177 ALL_LEGAL_ENCODER.appendEncoded(uri, fragment); 178 } 179 180 parseURI(uri.toString(), false); 181 } 182 183 /** 184 * Creates a new URI instance of the given unencoded component parts. 185 * 186 * @param scheme the URI scheme, or null for a non-absolute URI. 187 */ 188 public URI(String scheme, String userInfo, String host, int port, String path, String query, 189 String fragment) throws URISyntaxException { 190 if (scheme == null && userInfo == null && host == null && path == null 191 && query == null && fragment == null) { 192 this.path = ""; 193 return; 194 } 195 196 if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') { 197 throw new URISyntaxException(path, "Relative path"); 198 } 199 200 StringBuilder uri = new StringBuilder(); 201 if (scheme != null) { 202 uri.append(scheme); 203 uri.append(':'); 204 } 205 206 if (userInfo != null || host != null || port != -1) { 207 uri.append("//"); 208 } 209 210 if (userInfo != null) { 211 USER_INFO_ENCODER.appendEncoded(uri, userInfo); 212 uri.append('@'); 213 } 214 215 if (host != null) { 216 // check for IPv6 addresses that hasn't been enclosed in square brackets 217 if (host.indexOf(':') != -1 && host.indexOf(']') == -1 && host.indexOf('[') == -1) { 218 host = "[" + host + "]"; 219 } 220 uri.append(host); 221 } 222 223 if (port != -1) { 224 uri.append(':'); 225 uri.append(port); 226 } 227 228 if (path != null) { 229 PATH_ENCODER.appendEncoded(uri, path); 230 } 231 232 if (query != null) { 233 uri.append('?'); 234 ALL_LEGAL_ENCODER.appendEncoded(uri, query); 235 } 236 237 if (fragment != null) { 238 uri.append('#'); 239 ALL_LEGAL_ENCODER.appendEncoded(uri, fragment); 240 } 241 242 parseURI(uri.toString(), true); 243 } 244 245 /** 246 * Creates a new URI instance of the given unencoded component parts. 247 * 248 * @param scheme the URI scheme, or null for a non-absolute URI. 249 */ 250 public URI(String scheme, String host, String path, String fragment) throws URISyntaxException { 251 this(scheme, null, host, -1, path, null, fragment); 252 } 253 254 /** 255 * Creates a new URI instance of the given unencoded component parts. 256 * 257 * @param scheme the URI scheme, or null for a non-absolute URI. 258 */ 259 public URI(String scheme, String authority, String path, String query, 260 String fragment) throws URISyntaxException { 261 if (scheme != null && path != null && !path.isEmpty() && path.charAt(0) != '/') { 262 throw new URISyntaxException(path, "Relative path"); 263 } 264 265 StringBuilder uri = new StringBuilder(); 266 if (scheme != null) { 267 uri.append(scheme); 268 uri.append(':'); 269 } 270 if (authority != null) { 271 uri.append("//"); 272 AUTHORITY_ENCODER.appendEncoded(uri, authority); 273 } 274 275 if (path != null) { 276 PATH_ENCODER.appendEncoded(uri, path); 277 } 278 if (query != null) { 279 uri.append('?'); 280 ALL_LEGAL_ENCODER.appendEncoded(uri, query); 281 } 282 if (fragment != null) { 283 uri.append('#'); 284 ALL_LEGAL_ENCODER.appendEncoded(uri, fragment); 285 } 286 287 parseURI(uri.toString(), false); 288 } 289 290 /** 291 * Breaks uri into its component parts. This first splits URI into scheme, 292 * scheme-specific part and fragment: 293 * [scheme:][scheme-specific part][#fragment] 294 * 295 * Then it breaks the scheme-specific part into authority, path and query: 296 * [//authority][path][?query] 297 * 298 * Finally it delegates to parseAuthority to break the authority into user 299 * info, host and port: 300 * [user-info@][host][:port] 301 */ 302 private void parseURI(String uri, boolean forceServer) throws URISyntaxException { 303 string = uri; 304 305 // "#fragment" 306 int fragmentStart = UrlUtils.findFirstOf(uri, "#", 0, uri.length()); 307 if (fragmentStart < uri.length()) { 308 fragment = ALL_LEGAL_ENCODER.validate(uri, fragmentStart + 1, uri.length(), "fragment"); 309 } 310 311 // scheme: 312 int start; 313 int colon = UrlUtils.findFirstOf(uri, ":", 0, fragmentStart); 314 if (colon < UrlUtils.findFirstOf(uri, "/?#", 0, fragmentStart)) { 315 absolute = true; 316 scheme = validateScheme(uri, colon); 317 start = colon + 1; 318 319 if (start == fragmentStart) { 320 throw new URISyntaxException(uri, "Scheme-specific part expected", start); 321 } 322 323 // URIs with schemes followed by a non-/ char are opaque and need no further parsing. 324 if (!uri.regionMatches(start, "/", 0, 1)) { 325 opaque = true; 326 schemeSpecificPart = ALL_LEGAL_ENCODER.validate( 327 uri, start, fragmentStart, "scheme specific part"); 328 return; 329 } 330 } else { 331 absolute = false; 332 start = 0; 333 } 334 335 opaque = false; 336 schemeSpecificPart = uri.substring(start, fragmentStart); 337 338 // "//authority" 339 int fileStart; 340 if (uri.regionMatches(start, "//", 0, 2)) { 341 int authorityStart = start + 2; 342 fileStart = UrlUtils.findFirstOf(uri, "/?", authorityStart, fragmentStart); 343 if (authorityStart == uri.length()) { 344 throw new URISyntaxException(uri, "Authority expected", uri.length()); 345 } 346 if (authorityStart < fileStart) { 347 authority = AUTHORITY_ENCODER.validate(uri, authorityStart, fileStart, "authority"); 348 } 349 } else { 350 fileStart = start; 351 } 352 353 // "path" 354 int queryStart = UrlUtils.findFirstOf(uri, "?", fileStart, fragmentStart); 355 path = PATH_ENCODER.validate(uri, fileStart, queryStart, "path"); 356 357 // "?query" 358 if (queryStart < fragmentStart) { 359 query = ALL_LEGAL_ENCODER.validate(uri, queryStart + 1, fragmentStart, "query"); 360 } 361 362 parseAuthority(forceServer); 363 } 364 365 private String validateScheme(String uri, int end) throws URISyntaxException { 366 if (end == 0) { 367 throw new URISyntaxException(uri, "Scheme expected", 0); 368 } 369 370 for (int i = 0; i < end; i++) { 371 if (!UrlUtils.isValidSchemeChar(i, uri.charAt(i))) { 372 throw new URISyntaxException(uri, "Illegal character in scheme", 0); 373 } 374 } 375 376 return uri.substring(0, end); 377 } 378 379 /** 380 * Breaks this URI's authority into user info, host and port parts. 381 * [user-info@][host][:port] 382 * If any part of this fails this method will give up and potentially leave 383 * these fields with their default values. 384 * 385 * @param forceServer true to always throw if the authority cannot be 386 * parsed. If false, this method may still throw for some kinds of 387 * errors; this unpredictable behavior is consistent with the RI. 388 */ 389 private void parseAuthority(boolean forceServer) throws URISyntaxException { 390 if (authority == null) { 391 return; 392 } 393 394 String tempUserInfo = null; 395 String temp = authority; 396 int index = temp.indexOf('@'); 397 int hostIndex = 0; 398 if (index != -1) { 399 // remove user info 400 tempUserInfo = temp.substring(0, index); 401 validateUserInfo(authority, tempUserInfo, 0); 402 temp = temp.substring(index + 1); // host[:port] is left 403 hostIndex = index + 1; 404 } 405 406 index = temp.lastIndexOf(':'); 407 int endIndex = temp.indexOf(']'); 408 409 String tempHost; 410 int tempPort = -1; 411 if (index != -1 && endIndex < index) { 412 // determine port and host 413 tempHost = temp.substring(0, index); 414 415 if (index < (temp.length() - 1)) { // port part is not empty 416 try { 417 tempPort = Integer.parseInt(temp.substring(index + 1)); 418 if (tempPort < 0) { 419 if (forceServer) { 420 throw new URISyntaxException(authority, 421 "Invalid port number", hostIndex + index + 1); 422 } 423 return; 424 } 425 } catch (NumberFormatException e) { 426 if (forceServer) { 427 throw new URISyntaxException(authority, 428 "Invalid port number", hostIndex + index + 1); 429 } 430 return; 431 } 432 } 433 } else { 434 tempHost = temp; 435 } 436 437 if (tempHost.isEmpty()) { 438 if (forceServer) { 439 throw new URISyntaxException(authority, "Expected host", hostIndex); 440 } 441 return; 442 } 443 444 if (!isValidHost(forceServer, tempHost)) { 445 return; 446 } 447 448 // this is a server based uri, 449 // fill in the userInfo, host and port fields 450 userInfo = tempUserInfo; 451 host = tempHost; 452 port = tempPort; 453 serverAuthority = true; 454 } 455 456 private void validateUserInfo(String uri, String userInfo, int index) 457 throws URISyntaxException { 458 for (int i = 0; i < userInfo.length(); i++) { 459 char ch = userInfo.charAt(i); 460 if (ch == ']' || ch == '[') { 461 throw new URISyntaxException(uri, "Illegal character in userInfo", index + i); 462 } 463 } 464 } 465 466 /** 467 * Returns true if {@code host} is a well-formed host name or IP address. 468 * 469 * @param forceServer true to always throw if the host cannot be parsed. If 470 * false, this method may still throw for some kinds of errors; this 471 * unpredictable behavior is consistent with the RI. 472 */ 473 private boolean isValidHost(boolean forceServer, String host) throws URISyntaxException { 474 if (host.startsWith("[")) { 475 // IPv6 address 476 if (!host.endsWith("]")) { 477 throw new URISyntaxException(host, 478 "Expected a closing square bracket for IPv6 address", 0); 479 } 480 if (InetAddress.isNumeric(host)) { 481 // If it's numeric, the presence of square brackets guarantees 482 // that it's a numeric IPv6 address. 483 return true; 484 } 485 throw new URISyntaxException(host, "Malformed IPv6 address"); 486 } 487 488 // '[' and ']' can only be the first char and last char 489 // of the host name 490 if (host.indexOf('[') != -1 || host.indexOf(']') != -1) { 491 throw new URISyntaxException(host, "Illegal character in host name", 0); 492 } 493 494 int index = host.lastIndexOf('.'); 495 if (index < 0 || index == host.length() - 1 496 || !Character.isDigit(host.charAt(index + 1))) { 497 // domain name 498 if (isValidDomainName(host)) { 499 return true; 500 } 501 if (forceServer) { 502 throw new URISyntaxException(host, "Illegal character in host name", 0); 503 } 504 return false; 505 } 506 507 // IPv4 address? 508 try { 509 InetAddress ia = InetAddress.parseNumericAddress(host); 510 if (ia instanceof Inet4Address) { 511 return true; 512 } 513 } catch (IllegalArgumentException ignored) { 514 } 515 516 if (forceServer) { 517 throw new URISyntaxException(host, "Malformed IPv4 address", 0); 518 } 519 return false; 520 } 521 522 private boolean isValidDomainName(String host) { 523 try { 524 UriCodec.validateSimple(host, "-."); 525 } catch (URISyntaxException e) { 526 return false; 527 } 528 529 String lastLabel = null; 530 for (String token : host.split("\\.")) { 531 lastLabel = token; 532 if (lastLabel.startsWith("-") || lastLabel.endsWith("-")) { 533 return false; 534 } 535 } 536 537 if (lastLabel == null) { 538 return false; 539 } 540 541 if (!lastLabel.equals(host)) { 542 char ch = lastLabel.charAt(0); 543 if (ch >= '0' && ch <= '9') { 544 return false; 545 } 546 } 547 return true; 548 } 549 550 /** 551 * Compares this URI with the given argument {@code uri}. This method will 552 * return a negative value if this URI instance is less than the given 553 * argument and a positive value if this URI instance is greater than the 554 * given argument. The return value {@code 0} indicates that the two 555 * instances represent the same URI. To define the order the single parts of 556 * the URI are compared with each other. String components will be ordered 557 * in the natural case-sensitive way. A hierarchical URI is less than an 558 * opaque URI and if one part is {@code null} the URI with the undefined 559 * part is less than the other one. 560 * 561 * @param uri 562 * the URI this instance has to compare with. 563 * @return the value representing the order of the two instances. 564 */ 565 public int compareTo(URI uri) { 566 int ret; 567 568 // compare schemes 569 if (scheme == null && uri.scheme != null) { 570 return -1; 571 } else if (scheme != null && uri.scheme == null) { 572 return 1; 573 } else if (scheme != null && uri.scheme != null) { 574 ret = scheme.compareToIgnoreCase(uri.scheme); 575 if (ret != 0) { 576 return ret; 577 } 578 } 579 580 // compare opacities 581 if (!opaque && uri.opaque) { 582 return -1; 583 } else if (opaque && !uri.opaque) { 584 return 1; 585 } else if (opaque && uri.opaque) { 586 ret = schemeSpecificPart.compareTo(uri.schemeSpecificPart); 587 if (ret != 0) { 588 return ret; 589 } 590 } else { 591 592 // otherwise both must be hierarchical 593 594 // compare authorities 595 if (authority != null && uri.authority == null) { 596 return 1; 597 } else if (authority == null && uri.authority != null) { 598 return -1; 599 } else if (authority != null && uri.authority != null) { 600 if (host != null && uri.host != null) { 601 // both are server based, so compare userInfo, host, port 602 if (userInfo != null && uri.userInfo == null) { 603 return 1; 604 } else if (userInfo == null && uri.userInfo != null) { 605 return -1; 606 } else if (userInfo != null && uri.userInfo != null) { 607 ret = userInfo.compareTo(uri.userInfo); 608 if (ret != 0) { 609 return ret; 610 } 611 } 612 613 // userInfo's are the same, compare hostname 614 ret = host.compareToIgnoreCase(uri.host); 615 if (ret != 0) { 616 return ret; 617 } 618 619 // compare port 620 if (port != uri.port) { 621 return port - uri.port; 622 } 623 } else { // one or both are registry based, compare the whole 624 // authority 625 ret = authority.compareTo(uri.authority); 626 if (ret != 0) { 627 return ret; 628 } 629 } 630 } 631 632 // authorities are the same 633 // compare paths 634 ret = path.compareTo(uri.path); 635 if (ret != 0) { 636 return ret; 637 } 638 639 // compare queries 640 641 if (query != null && uri.query == null) { 642 return 1; 643 } else if (query == null && uri.query != null) { 644 return -1; 645 } else if (query != null && uri.query != null) { 646 ret = query.compareTo(uri.query); 647 if (ret != 0) { 648 return ret; 649 } 650 } 651 } 652 653 // everything else is identical, so compare fragments 654 if (fragment != null && uri.fragment == null) { 655 return 1; 656 } else if (fragment == null && uri.fragment != null) { 657 return -1; 658 } else if (fragment != null && uri.fragment != null) { 659 ret = fragment.compareTo(uri.fragment); 660 if (ret != 0) { 661 return ret; 662 } 663 } 664 665 // identical 666 return 0; 667 } 668 669 /** 670 * Returns the URI formed by parsing {@code uri}. This method behaves 671 * identically to the string constructor but throws a different exception 672 * on failure. The constructor fails with a checked {@link 673 * URISyntaxException}; this method fails with an unchecked {@link 674 * IllegalArgumentException}. 675 */ 676 public static URI create(String uri) { 677 try { 678 return new URI(uri); 679 } catch (URISyntaxException e) { 680 throw new IllegalArgumentException(e.getMessage()); 681 } 682 } 683 684 private URI duplicate() { 685 URI clone = new URI(); 686 clone.absolute = absolute; 687 clone.authority = authority; 688 clone.fragment = fragment; 689 clone.host = host; 690 clone.opaque = opaque; 691 clone.path = path; 692 clone.port = port; 693 clone.query = query; 694 clone.scheme = scheme; 695 clone.schemeSpecificPart = schemeSpecificPart; 696 clone.userInfo = userInfo; 697 clone.serverAuthority = serverAuthority; 698 return clone; 699 } 700 701 /* 702 * Takes a string that may contain hex sequences like %F1 or %2b and 703 * converts the hex values following the '%' to lowercase 704 */ 705 private String convertHexToLowerCase(String s) { 706 StringBuilder result = new StringBuilder(""); 707 if (s.indexOf('%') == -1) { 708 return s; 709 } 710 711 int index, prevIndex = 0; 712 while ((index = s.indexOf('%', prevIndex)) != -1) { 713 result.append(s.substring(prevIndex, index + 1)); 714 result.append(s.substring(index + 1, index + 3).toLowerCase(Locale.US)); 715 index += 3; 716 prevIndex = index; 717 } 718 return result.toString(); 719 } 720 721 /** 722 * Returns true if {@code first} and {@code second} are equal after 723 * unescaping hex sequences like %F1 and %2b. 724 */ 725 private boolean escapedEquals(String first, String second) { 726 if (first.indexOf('%') != second.indexOf('%')) { 727 return first.equals(second); 728 } 729 730 int index, prevIndex = 0; 731 while ((index = first.indexOf('%', prevIndex)) != -1 732 && second.indexOf('%', prevIndex) == index) { 733 boolean match = first.substring(prevIndex, index).equals( 734 second.substring(prevIndex, index)); 735 if (!match) { 736 return false; 737 } 738 739 match = first.substring(index + 1, index + 3).equalsIgnoreCase( 740 second.substring(index + 1, index + 3)); 741 if (!match) { 742 return false; 743 } 744 745 index += 3; 746 prevIndex = index; 747 } 748 return first.substring(prevIndex).equals(second.substring(prevIndex)); 749 } 750 751 @Override public boolean equals(Object o) { 752 if (!(o instanceof URI)) { 753 return false; 754 } 755 URI uri = (URI) o; 756 757 if (uri.fragment == null && fragment != null || uri.fragment != null 758 && fragment == null) { 759 return false; 760 } else if (uri.fragment != null && fragment != null) { 761 if (!escapedEquals(uri.fragment, fragment)) { 762 return false; 763 } 764 } 765 766 if (uri.scheme == null && scheme != null || uri.scheme != null 767 && scheme == null) { 768 return false; 769 } else if (uri.scheme != null && scheme != null) { 770 if (!uri.scheme.equalsIgnoreCase(scheme)) { 771 return false; 772 } 773 } 774 775 if (uri.opaque && opaque) { 776 return escapedEquals(uri.schemeSpecificPart, 777 schemeSpecificPart); 778 } else if (!uri.opaque && !opaque) { 779 if (!escapedEquals(path, uri.path)) { 780 return false; 781 } 782 783 if (uri.query != null && query == null || uri.query == null 784 && query != null) { 785 return false; 786 } else if (uri.query != null && query != null) { 787 if (!escapedEquals(uri.query, query)) { 788 return false; 789 } 790 } 791 792 if (uri.authority != null && authority == null 793 || uri.authority == null && authority != null) { 794 return false; 795 } else if (uri.authority != null && authority != null) { 796 if (uri.host != null && host == null || uri.host == null 797 && host != null) { 798 return false; 799 } else if (uri.host == null && host == null) { 800 // both are registry based, so compare the whole authority 801 return escapedEquals(uri.authority, authority); 802 } else { // uri.host != null && host != null, so server-based 803 if (!host.equalsIgnoreCase(uri.host)) { 804 return false; 805 } 806 807 if (port != uri.port) { 808 return false; 809 } 810 811 if (uri.userInfo != null && userInfo == null 812 || uri.userInfo == null && userInfo != null) { 813 return false; 814 } else if (uri.userInfo != null && userInfo != null) { 815 return escapedEquals(userInfo, uri.userInfo); 816 } else { 817 return true; 818 } 819 } 820 } else { 821 // no authority 822 return true; 823 } 824 825 } else { 826 // one is opaque, the other hierarchical 827 return false; 828 } 829 } 830 831 /** 832 * Returns the scheme of this URI, or null if this URI has no scheme. This 833 * is also known as the protocol. 834 */ 835 public String getScheme() { 836 return scheme; 837 } 838 839 /** 840 * Returns the decoded scheme-specific part of this URI, or null if this URI 841 * has no scheme-specific part. 842 */ 843 public String getSchemeSpecificPart() { 844 return decode(schemeSpecificPart); 845 } 846 847 /** 848 * Returns the encoded scheme-specific part of this URI, or null if this URI 849 * has no scheme-specific part. 850 */ 851 public String getRawSchemeSpecificPart() { 852 return schemeSpecificPart; 853 } 854 855 /** 856 * Returns the decoded authority part of this URI, or null if this URI has 857 * no authority. 858 */ 859 public String getAuthority() { 860 return decode(authority); 861 } 862 863 /** 864 * Returns the encoded authority of this URI, or null if this URI has no 865 * authority. 866 */ 867 public String getRawAuthority() { 868 return authority; 869 } 870 871 /** 872 * Returns the decoded user info of this URI, or null if this URI has no 873 * user info. 874 */ 875 public String getUserInfo() { 876 return decode(userInfo); 877 } 878 879 /** 880 * Returns the encoded user info of this URI, or null if this URI has no 881 * user info. 882 */ 883 public String getRawUserInfo() { 884 return userInfo; 885 } 886 887 /** 888 * Returns the host of this URI, or null if this URI has no host. 889 */ 890 public String getHost() { 891 return host; 892 } 893 894 /** 895 * Returns the port number of this URI, or {@code -1} if this URI has no 896 * explicit port. 897 */ 898 public int getPort() { 899 return port; 900 } 901 902 /** @hide */ 903 public int getEffectivePort() { 904 return getEffectivePort(scheme, port); 905 } 906 907 /** 908 * Returns the port to use for {@code scheme} connections will use when 909 * {@link #getPort} returns {@code specifiedPort}. 910 * 911 * @hide 912 */ 913 public static int getEffectivePort(String scheme, int specifiedPort) { 914 if (specifiedPort != -1) { 915 return specifiedPort; 916 } 917 918 if ("http".equalsIgnoreCase(scheme)) { 919 return 80; 920 } else if ("https".equalsIgnoreCase(scheme)) { 921 return 443; 922 } else { 923 return -1; 924 } 925 } 926 927 /** 928 * Returns the decoded path of this URI, or null if this URI has no path. 929 */ 930 public String getPath() { 931 return decode(path); 932 } 933 934 /** 935 * Gets the encoded path of this URI, or null if this URI has no path. 936 */ 937 public String getRawPath() { 938 return path; 939 } 940 941 /** 942 * Returns the decoded query of this URI, or null if this URI has no query. 943 */ 944 public String getQuery() { 945 return decode(query); 946 } 947 948 /** 949 * Returns the encoded query of this URI, or null if this URI has no query. 950 */ 951 public String getRawQuery() { 952 return query; 953 } 954 955 /** 956 * Returns the decoded fragment of this URI, or null if this URI has no 957 * fragment. 958 */ 959 public String getFragment() { 960 return decode(fragment); 961 } 962 963 /** 964 * Gets the encoded fragment of this URI, or null if this URI has no 965 * fragment. 966 */ 967 public String getRawFragment() { 968 return fragment; 969 } 970 971 @Override public int hashCode() { 972 if (hash == -1) { 973 hash = getHashString().hashCode(); 974 } 975 return hash; 976 } 977 978 /** 979 * Returns true if this URI is absolute, which means that a scheme is 980 * defined. 981 */ 982 public boolean isAbsolute() { 983 // TODO: simplify to 'scheme != null' ? 984 return absolute; 985 } 986 987 /** 988 * Returns true if this URI is opaque. Opaque URIs are absolute and have a 989 * scheme-specific part that does not start with a slash character. All 990 * parts except scheme, scheme-specific and fragment are undefined. 991 */ 992 public boolean isOpaque() { 993 return opaque; 994 } 995 996 /** 997 * Returns the normalized path. 998 */ 999 private String normalize(String path, boolean discardRelativePrefix) { 1000 path = UrlUtils.canonicalizePath(path, discardRelativePrefix); 1001 1002 /* 1003 * If the path contains a colon before the first colon, prepend 1004 * "./" to differentiate the path from a scheme prefix. 1005 */ 1006 int colon = path.indexOf(':'); 1007 if (colon != -1) { 1008 int slash = path.indexOf('/'); 1009 if (slash == -1 || colon < slash) { 1010 path = "./" + path; 1011 } 1012 } 1013 1014 return path; 1015 } 1016 1017 /** 1018 * Normalizes the path part of this URI. 1019 * 1020 * @return an URI object which represents this instance with a normalized 1021 * path. 1022 */ 1023 public URI normalize() { 1024 if (opaque) { 1025 return this; 1026 } 1027 String normalizedPath = normalize(path, false); 1028 // if the path is already normalized, return this 1029 if (path.equals(normalizedPath)) { 1030 return this; 1031 } 1032 // get an exact copy of the URI re-calculate the scheme specific part 1033 // since the path of the normalized URI is different from this URI. 1034 URI result = duplicate(); 1035 result.path = normalizedPath; 1036 result.setSchemeSpecificPart(); 1037 return result; 1038 } 1039 1040 /** 1041 * Tries to parse the authority component of this URI to divide it into the 1042 * host, port, and user-info. If this URI is already determined as a 1043 * ServerAuthority this instance will be returned without changes. 1044 * 1045 * @return this instance with the components of the parsed server authority. 1046 * @throws URISyntaxException 1047 * if the authority part could not be parsed as a server-based 1048 * authority. 1049 */ 1050 public URI parseServerAuthority() throws URISyntaxException { 1051 if (!serverAuthority) { 1052 parseAuthority(true); 1053 } 1054 return this; 1055 } 1056 1057 /** 1058 * Makes the given URI {@code relative} to a relative URI against the URI 1059 * represented by this instance. 1060 * 1061 * @param relative 1062 * the URI which has to be relativized against this URI. 1063 * @return the relative URI. 1064 */ 1065 public URI relativize(URI relative) { 1066 if (relative.opaque || opaque) { 1067 return relative; 1068 } 1069 1070 if (scheme == null ? relative.scheme != null : !scheme 1071 .equals(relative.scheme)) { 1072 return relative; 1073 } 1074 1075 if (authority == null ? relative.authority != null : !authority 1076 .equals(relative.authority)) { 1077 return relative; 1078 } 1079 1080 // normalize both paths 1081 String thisPath = normalize(path, false); 1082 String relativePath = normalize(relative.path, false); 1083 1084 /* 1085 * if the paths aren't equal, then we need to determine if this URI's 1086 * path is a parent path (begins with) the relative URI's path 1087 */ 1088 if (!thisPath.equals(relativePath)) { 1089 // drop everything after the last slash in this path 1090 thisPath = thisPath.substring(0, thisPath.lastIndexOf('/') + 1); 1091 1092 /* 1093 * if the relative URI's path doesn't start with this URI's path, 1094 * then just return the relative URI; the URIs have nothing in 1095 * common 1096 */ 1097 if (!relativePath.startsWith(thisPath)) { 1098 return relative; 1099 } 1100 } 1101 1102 URI result = new URI(); 1103 result.fragment = relative.fragment; 1104 result.query = relative.query; 1105 // the result URI is the remainder of the relative URI's path 1106 result.path = relativePath.substring(thisPath.length()); 1107 result.setSchemeSpecificPart(); 1108 return result; 1109 } 1110 1111 /** 1112 * Resolves the given URI {@code relative} against the URI represented by 1113 * this instance. 1114 * 1115 * @param relative 1116 * the URI which has to be resolved against this URI. 1117 * @return the resolved URI. 1118 */ 1119 public URI resolve(URI relative) { 1120 if (relative.absolute || opaque) { 1121 return relative; 1122 } 1123 1124 if (relative.authority != null) { 1125 // If the relative URI has an authority, the result is the relative 1126 // with this URI's scheme. 1127 URI result = relative.duplicate(); 1128 result.scheme = scheme; 1129 result.absolute = absolute; 1130 return result; 1131 } 1132 1133 if (relative.path.isEmpty() && relative.scheme == null && relative.query == null) { 1134 // if the relative URI only consists of at most a fragment, 1135 URI result = duplicate(); 1136 result.fragment = relative.fragment; 1137 return result; 1138 } 1139 1140 URI result = duplicate(); 1141 result.fragment = relative.fragment; 1142 result.query = relative.query; 1143 String resolvedPath; 1144 if (relative.path.startsWith("/")) { 1145 // The relative URI has an absolute path; use it. 1146 resolvedPath = relative.path; 1147 } else if (relative.path.isEmpty()) { 1148 // The relative URI has no path; use the base path. 1149 resolvedPath = path; 1150 } else { 1151 // The relative URI has a relative path; combine the paths. 1152 int endIndex = path.lastIndexOf('/') + 1; 1153 resolvedPath = path.substring(0, endIndex) + relative.path; 1154 } 1155 result.path = UrlUtils.authoritySafePath(result.authority, normalize(resolvedPath, true)); 1156 result.setSchemeSpecificPart(); 1157 return result; 1158 } 1159 1160 /** 1161 * Helper method used to re-calculate the scheme specific part of the 1162 * resolved or normalized URIs 1163 */ 1164 private void setSchemeSpecificPart() { 1165 // ssp = [//authority][path][?query] 1166 StringBuilder ssp = new StringBuilder(); 1167 if (authority != null) { 1168 ssp.append("//" + authority); 1169 } 1170 if (path != null) { 1171 ssp.append(path); 1172 } 1173 if (query != null) { 1174 ssp.append("?" + query); 1175 } 1176 schemeSpecificPart = ssp.toString(); 1177 // reset string, so that it can be re-calculated correctly when asked. 1178 string = null; 1179 } 1180 1181 /** 1182 * Creates a new URI instance by parsing the given string {@code relative} 1183 * and resolves the created URI against the URI represented by this 1184 * instance. 1185 * 1186 * @param relative 1187 * the given string to create the new URI instance which has to 1188 * be resolved later on. 1189 * @return the created and resolved URI. 1190 */ 1191 public URI resolve(String relative) { 1192 return resolve(create(relative)); 1193 } 1194 1195 private String decode(String s) { 1196 return s != null ? UriCodec.decode(s) : null; 1197 } 1198 1199 /** 1200 * Returns the textual string representation of this URI instance using the 1201 * US-ASCII encoding. 1202 * 1203 * @return the US-ASCII string representation of this URI. 1204 */ 1205 public String toASCIIString() { 1206 StringBuilder result = new StringBuilder(); 1207 ASCII_ONLY.appendEncoded(result, toString()); 1208 return result.toString(); 1209 } 1210 1211 /** 1212 * Returns the encoded URI. 1213 */ 1214 @Override public String toString() { 1215 if (string != null) { 1216 return string; 1217 } 1218 1219 StringBuilder result = new StringBuilder(); 1220 if (scheme != null) { 1221 result.append(scheme); 1222 result.append(':'); 1223 } 1224 if (opaque) { 1225 result.append(schemeSpecificPart); 1226 } else { 1227 if (authority != null) { 1228 result.append("//"); 1229 result.append(authority); 1230 } 1231 1232 if (path != null) { 1233 result.append(path); 1234 } 1235 1236 if (query != null) { 1237 result.append('?'); 1238 result.append(query); 1239 } 1240 } 1241 1242 if (fragment != null) { 1243 result.append('#'); 1244 result.append(fragment); 1245 } 1246 1247 string = result.toString(); 1248 return string; 1249 } 1250 1251 /* 1252 * Form a string from the components of this URI, similarly to the 1253 * toString() method. But this method converts scheme and host to lowercase, 1254 * and converts escaped octets to lowercase. 1255 */ 1256 private String getHashString() { 1257 StringBuilder result = new StringBuilder(); 1258 if (scheme != null) { 1259 result.append(scheme.toLowerCase(Locale.US)); 1260 result.append(':'); 1261 } 1262 if (opaque) { 1263 result.append(schemeSpecificPart); 1264 } else { 1265 if (authority != null) { 1266 result.append("//"); 1267 if (host == null) { 1268 result.append(authority); 1269 } else { 1270 if (userInfo != null) { 1271 result.append(userInfo + "@"); 1272 } 1273 result.append(host.toLowerCase(Locale.US)); 1274 if (port != -1) { 1275 result.append(":" + port); 1276 } 1277 } 1278 } 1279 1280 if (path != null) { 1281 result.append(path); 1282 } 1283 1284 if (query != null) { 1285 result.append('?'); 1286 result.append(query); 1287 } 1288 } 1289 1290 if (fragment != null) { 1291 result.append('#'); 1292 result.append(fragment); 1293 } 1294 1295 return convertHexToLowerCase(result.toString()); 1296 } 1297 1298 /** 1299 * Converts this URI instance to a URL. 1300 * 1301 * @return the created URL representing the same resource as this URI. 1302 * @throws MalformedURLException 1303 * if an error occurs while creating the URL or no protocol 1304 * handler could be found. 1305 */ 1306 public URL toURL() throws MalformedURLException { 1307 if (!absolute) { 1308 throw new IllegalArgumentException("URI is not absolute: " + toString()); 1309 } 1310 return new URL(toString()); 1311 } 1312 1313 private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { 1314 in.defaultReadObject(); 1315 try { 1316 parseURI(string, false); 1317 } catch (URISyntaxException e) { 1318 throw new IOException(e.toString()); 1319 } 1320 } 1321 1322 private void writeObject(ObjectOutputStream out) throws IOException, ClassNotFoundException { 1323 // call toString() to ensure the value of string field is calculated 1324 toString(); 1325 out.defaultWriteObject(); 1326 } 1327} 1328