URI.java revision 56099d23fcb002b164bff8fb7f14d6ec0453509e
1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package java.net; 19 20import java.io.IOException; 21import java.io.ObjectInputStream; 22import java.io.ObjectOutputStream; 23import java.io.Serializable; 24import java.util.Locale; 25import libcore.net.UriCodec; 26 27/** 28 * This class represents an instance of a URI as defined by RFC 2396. 29 */ 30public final class URI implements Comparable<URI>, Serializable { 31 32 private static final long serialVersionUID = -6052424284110960213l; 33 34 static final String UNRESERVED = "_-!.~\'()*"; 35 static final String PUNCTUATION = ",;:$&+="; 36 37 static final UriCodec USER_INFO_ENCODER = new PartEncoder(""); 38 static final UriCodec PATH_ENCODER = new PartEncoder("/@"); 39 static final UriCodec AUTHORITY_ENCODER = new PartEncoder("@[]"); 40 41 /** for java.net.URL, which foolishly combines these two parts */ 42 static final UriCodec FILE_AND_QUERY_ENCODER = new PartEncoder("/@?"); 43 44 /** for query, fragment, and scheme-specific part */ 45 static final UriCodec ALL_LEGAL_ENCODER = new PartEncoder("?/[]@"); 46 47 /** Retains all ASCII chars including delimiters. */ 48 private static final UriCodec ASCII_ONLY = new UriCodec() { 49 @Override protected boolean isRetained(char c) { 50 return c <= 127; 51 } 52 }; 53 54 /** 55 * Encodes the unescaped characters of {@code s} that are not permitted. 56 * Permitted characters are: 57 * <ul> 58 * <li>Unreserved characters in RFC 2396. 59 * <li>{@code extraOkayChars}, 60 * <li>non-ASCII, non-control, non-whitespace characters 61 * </ul> 62 */ 63 private static class PartEncoder extends UriCodec { 64 private final String extraLegalCharacters; 65 66 PartEncoder(String extraLegalCharacters) { 67 this.extraLegalCharacters = extraLegalCharacters; 68 } 69 70 @Override protected boolean isRetained(char c) { 71 return UNRESERVED.indexOf(c) != -1 72 || PUNCTUATION.indexOf(c) != -1 73 || extraLegalCharacters.indexOf(c) != -1 74 || (c > 127 && !Character.isSpaceChar(c) && !Character.isISOControl(c)); 75 } 76 } 77 78 private String string; 79 private transient String scheme; 80 private transient String schemeSpecificPart; 81 private transient String authority; 82 private transient String userInfo; 83 private transient String host; 84 private transient int port = -1; 85 private transient String path; 86 private transient String query; 87 private transient String fragment; 88 private transient boolean opaque; 89 private transient boolean absolute; 90 private transient boolean serverAuthority = false; 91 92 private transient int hash = -1; 93 94 private URI() {} 95 96 /** 97 * Creates a new URI instance according to the given string {@code uri}. 98 * 99 * @param uri 100 * the textual URI representation to be parsed into a URI object. 101 * @throws URISyntaxException 102 * if the given string {@code uri} doesn't fit to the 103 * specification RFC2396 or could not be parsed correctly. 104 */ 105 public URI(String uri) throws URISyntaxException { 106 parseURI(uri, false); 107 } 108 109 /** 110 * Creates a new URI instance using the given arguments. This constructor 111 * first creates a temporary URI string from the given components. This 112 * string will be parsed later on to create the URI instance. 113 * <p> 114 * {@code [scheme:]scheme-specific-part[#fragment]} 115 * 116 * @param scheme 117 * the scheme part of the URI. 118 * @param ssp 119 * the scheme-specific-part of the URI. 120 * @param frag 121 * the fragment part of the URI. 122 * @throws URISyntaxException 123 * if the temporary created string doesn't fit to the 124 * specification RFC2396 or could not be parsed correctly. 125 */ 126 public URI(String scheme, String ssp, String frag) 127 throws URISyntaxException { 128 StringBuilder uri = new StringBuilder(); 129 if (scheme != null) { 130 uri.append(scheme); 131 uri.append(':'); 132 } 133 if (ssp != null) { 134 ALL_LEGAL_ENCODER.appendEncoded(uri, ssp); 135 } 136 if (frag != null) { 137 uri.append('#'); 138 ALL_LEGAL_ENCODER.appendEncoded(uri, frag); 139 } 140 141 parseURI(uri.toString(), false); 142 } 143 144 /** 145 * Creates a new URI instance using the given arguments. This constructor 146 * first creates a temporary URI string from the given components. This 147 * string will be parsed later on to create the URI instance. 148 * <p> 149 * {@code [scheme:][user-info@]host[:port][path][?query][#fragment]} 150 * 151 * @param scheme 152 * the scheme part of the URI. 153 * @param userInfo 154 * the user information of the URI for authentication and 155 * authorization. 156 * @param host 157 * the host name of the URI. 158 * @param port 159 * the port number of the URI. 160 * @param path 161 * the path to the resource on the host. 162 * @param query 163 * the query part of the URI to specify parameters for the 164 * resource. 165 * @param fragment 166 * the fragment part of the URI. 167 * @throws URISyntaxException 168 * if the temporary created string doesn't fit to the 169 * specification RFC2396 or could not be parsed correctly. 170 */ 171 public URI(String scheme, String userInfo, String host, int port, 172 String path, String query, String fragment) 173 throws URISyntaxException { 174 175 if (scheme == null && userInfo == null && host == null && path == null 176 && query == null && fragment == null) { 177 this.path = ""; 178 return; 179 } 180 181 if (scheme != null && path != null && path.length() > 0 182 && path.charAt(0) != '/') { 183 throw new URISyntaxException(path, "Relative path"); 184 } 185 186 StringBuilder uri = new StringBuilder(); 187 if (scheme != null) { 188 uri.append(scheme); 189 uri.append(':'); 190 } 191 192 if (userInfo != null || host != null || port != -1) { 193 uri.append("//"); 194 } 195 196 if (userInfo != null) { 197 USER_INFO_ENCODER.appendEncoded(uri, userInfo); 198 uri.append('@'); 199 } 200 201 if (host != null) { 202 // check for IPv6 addresses that hasn't been enclosed 203 // in square brackets 204 if (host.indexOf(':') != -1 && host.indexOf(']') == -1 205 && host.indexOf('[') == -1) { 206 host = "[" + host + "]"; 207 } 208 uri.append(host); 209 } 210 211 if (port != -1) { 212 uri.append(':'); 213 uri.append(port); 214 } 215 216 if (path != null) { 217 PATH_ENCODER.appendEncoded(uri, path); 218 } 219 220 if (query != null) { 221 uri.append('?'); 222 ALL_LEGAL_ENCODER.appendEncoded(uri, query); 223 } 224 225 if (fragment != null) { 226 uri.append('#'); 227 ALL_LEGAL_ENCODER.appendEncoded(uri, fragment); 228 } 229 230 parseURI(uri.toString(), true); 231 } 232 233 /** 234 * Creates a new URI instance using the given arguments. This constructor 235 * first creates a temporary URI string from the given components. This 236 * string will be parsed later on to create the URI instance. 237 * <p> 238 * {@code [scheme:]host[path][#fragment]} 239 * 240 * @param scheme 241 * the scheme part of the URI. 242 * @param host 243 * the host name of the URI. 244 * @param path 245 * the path to the resource on the host. 246 * @param fragment 247 * the fragment part of the URI. 248 * @throws URISyntaxException 249 * if the temporary created string doesn't fit to the 250 * specification RFC2396 or could not be parsed correctly. 251 */ 252 public URI(String scheme, String host, String path, String fragment) 253 throws URISyntaxException { 254 this(scheme, null, host, -1, path, null, fragment); 255 } 256 257 /** 258 * Creates a new URI instance using the given arguments. This constructor 259 * first creates a temporary URI string from the given components. This 260 * string will be parsed later on to create the URI instance. 261 * <p> 262 * {@code [scheme:][//authority][path][?query][#fragment]} 263 * 264 * @param scheme 265 * the scheme part of the URI. 266 * @param authority 267 * the authority part of the URI. 268 * @param path 269 * the path to the resource on the host. 270 * @param query 271 * the query part of the URI to specify parameters for the 272 * resource. 273 * @param fragment 274 * the fragment part of the URI. 275 * @throws URISyntaxException 276 * if the temporary created string doesn't fit to the 277 * specification RFC2396 or could not be parsed correctly. 278 */ 279 public URI(String scheme, String authority, String path, String query, 280 String fragment) throws URISyntaxException { 281 if (scheme != null && path != null && path.length() > 0 282 && path.charAt(0) != '/') { 283 throw new URISyntaxException(path, "Relative path"); 284 } 285 286 StringBuilder uri = new StringBuilder(); 287 if (scheme != null) { 288 uri.append(scheme); 289 uri.append(':'); 290 } 291 if (authority != null) { 292 uri.append("//"); 293 AUTHORITY_ENCODER.appendEncoded(uri, authority); 294 } 295 296 if (path != null) { 297 PATH_ENCODER.appendEncoded(uri, path); 298 } 299 if (query != null) { 300 uri.append('?'); 301 ALL_LEGAL_ENCODER.appendEncoded(uri, query); 302 } 303 if (fragment != null) { 304 uri.append('#'); 305 ALL_LEGAL_ENCODER.appendEncoded(uri, fragment); 306 } 307 308 parseURI(uri.toString(), false); 309 } 310 311 private void parseURI(String uri, boolean forceServer) throws URISyntaxException { 312 String temp = uri; 313 // assign uri string to the input value per spec 314 string = uri; 315 int index, index1, index2, index3; 316 // parse into Fragment, Scheme, and SchemeSpecificPart 317 // then parse SchemeSpecificPart if necessary 318 319 // Fragment 320 index = temp.indexOf('#'); 321 if (index != -1) { 322 // remove the fragment from the end 323 fragment = temp.substring(index + 1); 324 validateFragment(uri, fragment, index + 1); 325 temp = temp.substring(0, index); 326 } 327 328 // Scheme and SchemeSpecificPart 329 index = index1 = temp.indexOf(':'); 330 index2 = temp.indexOf('/'); 331 index3 = temp.indexOf('?'); 332 333 // if a '/' or '?' occurs before the first ':' the uri has no 334 // specified scheme, and is therefore not absolute 335 if (index != -1 && (index2 >= index || index2 == -1) 336 && (index3 >= index || index3 == -1)) { 337 // the characters up to the first ':' comprise the scheme 338 absolute = true; 339 scheme = temp.substring(0, index); 340 if (scheme.length() == 0) { 341 throw new URISyntaxException(uri, "Scheme expected", index); 342 } 343 validateScheme(uri, scheme, 0); 344 schemeSpecificPart = temp.substring(index + 1); 345 if (schemeSpecificPart.length() == 0) { 346 throw new URISyntaxException(uri, "Scheme-specific part expected", index + 1); 347 } 348 } else { 349 absolute = false; 350 schemeSpecificPart = temp; 351 } 352 353 if (scheme == null || schemeSpecificPart.length() > 0 354 && schemeSpecificPart.charAt(0) == '/') { 355 opaque = false; 356 // the URI is hierarchical 357 358 // Query 359 temp = schemeSpecificPart; 360 index = temp.indexOf('?'); 361 if (index != -1) { 362 query = temp.substring(index + 1); 363 temp = temp.substring(0, index); 364 validateQuery(uri, query, index2 + 1 + index); 365 } 366 367 // Authority and Path 368 if (temp.startsWith("//")) { 369 index = temp.indexOf('/', 2); 370 if (index != -1) { 371 authority = temp.substring(2, index); 372 path = temp.substring(index); 373 } else { 374 authority = temp.substring(2); 375 if (authority.length() == 0 && query == null 376 && fragment == null) { 377 throw new URISyntaxException(uri, "Authority expected", uri.length()); 378 } 379 380 path = ""; 381 // nothing left, so path is empty (not null, path should 382 // never be null) 383 } 384 385 if (authority.length() == 0) { 386 authority = null; 387 } else { 388 validateAuthority(uri, authority, index1 + 3); 389 } 390 } else { // no authority specified 391 path = temp; 392 } 393 394 int pathIndex = 0; 395 if (index2 > -1) { 396 pathIndex += index2; 397 } 398 if (index > -1) { 399 pathIndex += index; 400 } 401 validatePath(uri, path, pathIndex); 402 } else { // if not hierarchical, URI is opaque 403 opaque = true; 404 validateSsp(uri, schemeSpecificPart, index2 + 2 + index); 405 } 406 407 parseAuthority(forceServer); 408 } 409 410 private void validateScheme(String uri, String scheme, int index) 411 throws URISyntaxException { 412 // first char needs to be an alpha char 413 char ch = scheme.charAt(0); 414 if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))) { 415 throw new URISyntaxException(uri, "Illegal character in scheme", 0); 416 } 417 418 try { 419 UriCodec.validateSimple(scheme, "+-."); 420 } catch (URISyntaxException e) { 421 throw new URISyntaxException(uri, "Illegal character in scheme", index + e.getIndex()); 422 } 423 } 424 425 private void validateSsp(String uri, String ssp, int index) 426 throws URISyntaxException { 427 try { 428 ALL_LEGAL_ENCODER.validate(ssp); 429 } catch (URISyntaxException e) { 430 throw new URISyntaxException(uri, 431 e.getReason() + " in schemeSpecificPart", index + e.getIndex()); 432 } 433 } 434 435 private void validateAuthority(String uri, String authority, int index) 436 throws URISyntaxException { 437 try { 438 AUTHORITY_ENCODER.validate(authority); 439 } catch (URISyntaxException e) { 440 throw new URISyntaxException(uri, e.getReason() + " in authority", index + e.getIndex()); 441 } 442 } 443 444 private void validatePath(String uri, String path, int index) 445 throws URISyntaxException { 446 try { 447 PATH_ENCODER.validate(path); 448 } catch (URISyntaxException e) { 449 throw new URISyntaxException(uri, e.getReason() + " in path", index + e.getIndex()); 450 } 451 } 452 453 private void validateQuery(String uri, String query, int index) 454 throws URISyntaxException { 455 try { 456 ALL_LEGAL_ENCODER.validate(query); 457 } catch (URISyntaxException e) { 458 throw new URISyntaxException(uri, e.getReason() + " in query", index + e.getIndex()); 459 460 } 461 } 462 463 private void validateFragment(String uri, String fragment, int index) 464 throws URISyntaxException { 465 try { 466 ALL_LEGAL_ENCODER.validate(fragment); 467 } catch (URISyntaxException e) { 468 throw new URISyntaxException(uri, e.getReason() + " in fragment", index + e.getIndex()); 469 } 470 } 471 472 /** 473 * Parse the authority string into its component parts: user info, 474 * host, and port. This operation doesn't apply to registry URIs, and 475 * calling it on such <i>may</i> result in a syntax exception. 476 * 477 * @param forceServer true to always throw if the authority cannot be 478 * parsed. If false, this method may still throw for some kinds of 479 * errors; this unpredictable behavior is consistent with the RI. 480 */ 481 private void parseAuthority(boolean forceServer) throws URISyntaxException { 482 if (authority == null) { 483 return; 484 } 485 486 String tempUserInfo = null; 487 String temp = authority; 488 int index = temp.indexOf('@'); 489 int hostIndex = 0; 490 if (index != -1) { 491 // remove user info 492 tempUserInfo = temp.substring(0, index); 493 validateUserInfo(authority, tempUserInfo, 0); 494 temp = temp.substring(index + 1); // host[:port] is left 495 hostIndex = index + 1; 496 } 497 498 index = temp.lastIndexOf(':'); 499 int endIndex = temp.indexOf(']'); 500 501 String tempHost; 502 int tempPort = -1; 503 if (index != -1 && endIndex < index) { 504 // determine port and host 505 tempHost = temp.substring(0, index); 506 507 if (index < (temp.length() - 1)) { // port part is not empty 508 try { 509 tempPort = Integer.parseInt(temp.substring(index + 1)); 510 if (tempPort < 0) { 511 if (forceServer) { 512 throw new URISyntaxException(authority, 513 "Invalid port number", hostIndex + index + 1); 514 } 515 return; 516 } 517 } catch (NumberFormatException e) { 518 if (forceServer) { 519 throw new URISyntaxException(authority, 520 "Invalid port number", hostIndex + index + 1); 521 } 522 return; 523 } 524 } 525 } else { 526 tempHost = temp; 527 } 528 529 if (tempHost.isEmpty()) { 530 if (forceServer) { 531 throw new URISyntaxException(authority, "Expected host", hostIndex); 532 } 533 return; 534 } 535 536 if (!isValidHost(forceServer, tempHost)) { 537 return; 538 } 539 540 // this is a server based uri, 541 // fill in the userInfo, host and port fields 542 userInfo = tempUserInfo; 543 host = tempHost; 544 port = tempPort; 545 serverAuthority = true; 546 } 547 548 private void validateUserInfo(String uri, String userInfo, int index) 549 throws URISyntaxException { 550 for (int i = 0; i < userInfo.length(); i++) { 551 char ch = userInfo.charAt(i); 552 if (ch == ']' || ch == '[') { 553 throw new URISyntaxException(uri, "Illegal character in userInfo", index + i); 554 } 555 } 556 } 557 558 /** 559 * Returns true if {@code host} is a well-formed host name or IP address. 560 * 561 * @param forceServer true to always throw if the host cannot be parsed. If 562 * false, this method may still throw for some kinds of errors; this 563 * unpredictable behavior is consistent with the RI. 564 */ 565 private boolean isValidHost(boolean forceServer, String host) throws URISyntaxException { 566 if (host.startsWith("[")) { 567 // IPv6 address 568 if (!host.endsWith("]")) { 569 throw new URISyntaxException(host, 570 "Expected a closing square bracket for IPv6 address", 0); 571 } 572 byte[] bytes = InetAddress.ipStringToByteArray(host); 573 /* 574 * The native IP parser may return 4 bytes for addresses like 575 * "[::FFFF:127.0.0.1]". This is allowed, but we must not accept 576 * IPv4-formatted addresses in square braces like "[127.0.0.1]". 577 */ 578 if (bytes != null && (bytes.length == 16 || bytes.length == 4 && host.contains(":"))) { 579 return true; 580 } 581 throw new URISyntaxException(host, "Malformed IPv6 address"); 582 } 583 584 // '[' and ']' can only be the first char and last char 585 // of the host name 586 if (host.indexOf('[') != -1 || host.indexOf(']') != -1) { 587 throw new URISyntaxException(host, "Illegal character in host name", 0); 588 } 589 590 int index = host.lastIndexOf('.'); 591 if (index < 0 || index == host.length() - 1 592 || !Character.isDigit(host.charAt(index + 1))) { 593 // domain name 594 if (isValidDomainName(host)) { 595 return true; 596 } 597 if (forceServer) { 598 throw new URISyntaxException(host, "Illegal character in host name", 0); 599 } 600 return false; 601 } 602 603 // IPv4 address 604 byte[] bytes = InetAddress.ipStringToByteArray(host); 605 if (bytes != null && bytes.length == 4) { 606 return true; 607 } 608 609 if (forceServer) { 610 throw new URISyntaxException(host, "Malformed IPv4 address", 0); 611 } 612 return false; 613 } 614 615 private boolean isValidDomainName(String host) { 616 try { 617 UriCodec.validateSimple(host, "-."); 618 } catch (URISyntaxException e) { 619 return false; 620 } 621 622 String lastLabel = null; 623 for (String token : host.split("\\.")) { 624 lastLabel = token; 625 if (lastLabel.startsWith("-") || lastLabel.endsWith("-")) { 626 return false; 627 } 628 } 629 630 if (lastLabel == null) { 631 return false; 632 } 633 634 if (!lastLabel.equals(host)) { 635 char ch = lastLabel.charAt(0); 636 if (ch >= '0' && ch <= '9') { 637 return false; 638 } 639 } 640 return true; 641 } 642 643 /** 644 * Compares this URI with the given argument {@code uri}. This method will 645 * return a negative value if this URI instance is less than the given 646 * argument and a positive value if this URI instance is greater than the 647 * given argument. The return value {@code 0} indicates that the two 648 * instances represent the same URI. To define the order the single parts of 649 * the URI are compared with each other. String components will be ordered 650 * in the natural case-sensitive way. A hierarchical URI is less than an 651 * opaque URI and if one part is {@code null} the URI with the undefined 652 * part is less than the other one. 653 * 654 * @param uri 655 * the URI this instance has to compare with. 656 * @return the value representing the order of the two instances. 657 */ 658 public int compareTo(URI uri) { 659 int ret; 660 661 // compare schemes 662 if (scheme == null && uri.scheme != null) { 663 return -1; 664 } else if (scheme != null && uri.scheme == null) { 665 return 1; 666 } else if (scheme != null && uri.scheme != null) { 667 ret = scheme.compareToIgnoreCase(uri.scheme); 668 if (ret != 0) { 669 return ret; 670 } 671 } 672 673 // compare opacities 674 if (!opaque && uri.opaque) { 675 return -1; 676 } else if (opaque && !uri.opaque) { 677 return 1; 678 } else if (opaque && uri.opaque) { 679 ret = schemeSpecificPart.compareTo(uri.schemeSpecificPart); 680 if (ret != 0) { 681 return ret; 682 } 683 } else { 684 685 // otherwise both must be hierarchical 686 687 // compare authorities 688 if (authority != null && uri.authority == null) { 689 return 1; 690 } else if (authority == null && uri.authority != null) { 691 return -1; 692 } else if (authority != null && uri.authority != null) { 693 if (host != null && uri.host != null) { 694 // both are server based, so compare userInfo, host, port 695 if (userInfo != null && uri.userInfo == null) { 696 return 1; 697 } else if (userInfo == null && uri.userInfo != null) { 698 return -1; 699 } else if (userInfo != null && uri.userInfo != null) { 700 ret = userInfo.compareTo(uri.userInfo); 701 if (ret != 0) { 702 return ret; 703 } 704 } 705 706 // userInfo's are the same, compare hostname 707 ret = host.compareToIgnoreCase(uri.host); 708 if (ret != 0) { 709 return ret; 710 } 711 712 // compare port 713 if (port != uri.port) { 714 return port - uri.port; 715 } 716 } else { // one or both are registry based, compare the whole 717 // authority 718 ret = authority.compareTo(uri.authority); 719 if (ret != 0) { 720 return ret; 721 } 722 } 723 } 724 725 // authorities are the same 726 // compare paths 727 ret = path.compareTo(uri.path); 728 if (ret != 0) { 729 return ret; 730 } 731 732 // compare queries 733 734 if (query != null && uri.query == null) { 735 return 1; 736 } else if (query == null && uri.query != null) { 737 return -1; 738 } else if (query != null && uri.query != null) { 739 ret = query.compareTo(uri.query); 740 if (ret != 0) { 741 return ret; 742 } 743 } 744 } 745 746 // everything else is identical, so compare fragments 747 if (fragment != null && uri.fragment == null) { 748 return 1; 749 } else if (fragment == null && uri.fragment != null) { 750 return -1; 751 } else if (fragment != null && uri.fragment != null) { 752 ret = fragment.compareTo(uri.fragment); 753 if (ret != 0) { 754 return ret; 755 } 756 } 757 758 // identical 759 return 0; 760 } 761 762 /** 763 * Returns the URI formed by parsing {@code uri}. This method behaves 764 * identically to the string constructor but throws a different exception 765 * on failure. The constructor fails with a checked {@link 766 * URISyntaxException}; this method fails with an unchecked {@link 767 * IllegalArgumentException}. 768 */ 769 public static URI create(String uri) { 770 try { 771 return new URI(uri); 772 } catch (URISyntaxException e) { 773 throw new IllegalArgumentException(e.getMessage()); 774 } 775 } 776 777 private URI duplicate() { 778 URI clone = new URI(); 779 clone.absolute = absolute; 780 clone.authority = authority; 781 clone.fragment = fragment; 782 clone.host = host; 783 clone.opaque = opaque; 784 clone.path = path; 785 clone.port = port; 786 clone.query = query; 787 clone.scheme = scheme; 788 clone.schemeSpecificPart = schemeSpecificPart; 789 clone.userInfo = userInfo; 790 clone.serverAuthority = serverAuthority; 791 return clone; 792 } 793 794 /* 795 * Takes a string that may contain hex sequences like %F1 or %2b and 796 * converts the hex values following the '%' to lowercase 797 */ 798 private String convertHexToLowerCase(String s) { 799 StringBuilder result = new StringBuilder(""); 800 if (s.indexOf('%') == -1) { 801 return s; 802 } 803 804 int index, prevIndex = 0; 805 while ((index = s.indexOf('%', prevIndex)) != -1) { 806 result.append(s.substring(prevIndex, index + 1)); 807 result.append(s.substring(index + 1, index + 3).toLowerCase(Locale.US)); 808 index += 3; 809 prevIndex = index; 810 } 811 return result.toString(); 812 } 813 814 /** 815 * Returns true if {@code first} and {@code second} are equal after 816 * unescaping hex sequences like %F1 and %2b. 817 */ 818 private boolean escapedEquals(String first, String second) { 819 if (first.indexOf('%') != second.indexOf('%')) { 820 return first.equals(second); 821 } 822 823 int index, prevIndex = 0; 824 while ((index = first.indexOf('%', prevIndex)) != -1 825 && second.indexOf('%', prevIndex) == index) { 826 boolean match = first.substring(prevIndex, index).equals( 827 second.substring(prevIndex, index)); 828 if (!match) { 829 return false; 830 } 831 832 match = first.substring(index + 1, index + 3).equalsIgnoreCase( 833 second.substring(index + 1, index + 3)); 834 if (!match) { 835 return false; 836 } 837 838 index += 3; 839 prevIndex = index; 840 } 841 return first.substring(prevIndex).equals(second.substring(prevIndex)); 842 } 843 844 /** 845 * Compares this URI instance with the given argument {@code o} and 846 * determines if both are equal. Two URI instances are equal if all single 847 * parts are identical in their meaning. 848 * 849 * @param o 850 * the URI this instance has to be compared with. 851 * @return {@code true} if both URI instances point to the same resource, 852 * {@code false} otherwise. 853 */ 854 @Override 855 public boolean equals(Object o) { 856 if (!(o instanceof URI)) { 857 return false; 858 } 859 URI uri = (URI) o; 860 861 if (uri.fragment == null && fragment != null || uri.fragment != null 862 && fragment == null) { 863 return false; 864 } else if (uri.fragment != null && fragment != null) { 865 if (!escapedEquals(uri.fragment, fragment)) { 866 return false; 867 } 868 } 869 870 if (uri.scheme == null && scheme != null || uri.scheme != null 871 && scheme == null) { 872 return false; 873 } else if (uri.scheme != null && scheme != null) { 874 if (!uri.scheme.equalsIgnoreCase(scheme)) { 875 return false; 876 } 877 } 878 879 if (uri.opaque && opaque) { 880 return escapedEquals(uri.schemeSpecificPart, 881 schemeSpecificPart); 882 } else if (!uri.opaque && !opaque) { 883 if (!escapedEquals(path, uri.path)) { 884 return false; 885 } 886 887 if (uri.query != null && query == null || uri.query == null 888 && query != null) { 889 return false; 890 } else if (uri.query != null && query != null) { 891 if (!escapedEquals(uri.query, query)) { 892 return false; 893 } 894 } 895 896 if (uri.authority != null && authority == null 897 || uri.authority == null && authority != null) { 898 return false; 899 } else if (uri.authority != null && authority != null) { 900 if (uri.host != null && host == null || uri.host == null 901 && host != null) { 902 return false; 903 } else if (uri.host == null && host == null) { 904 // both are registry based, so compare the whole authority 905 return escapedEquals(uri.authority, authority); 906 } else { // uri.host != null && host != null, so server-based 907 if (!host.equalsIgnoreCase(uri.host)) { 908 return false; 909 } 910 911 if (port != uri.port) { 912 return false; 913 } 914 915 if (uri.userInfo != null && userInfo == null 916 || uri.userInfo == null && userInfo != null) { 917 return false; 918 } else if (uri.userInfo != null && userInfo != null) { 919 return escapedEquals(userInfo, uri.userInfo); 920 } else { 921 return true; 922 } 923 } 924 } else { 925 // no authority 926 return true; 927 } 928 929 } else { 930 // one is opaque, the other hierarchical 931 return false; 932 } 933 } 934 935 /** 936 * Gets the decoded authority part of this URI. 937 * 938 * @return the decoded authority part or {@code null} if undefined. 939 */ 940 public String getAuthority() { 941 return decode(authority); 942 } 943 944 /** 945 * Gets the decoded fragment part of this URI. 946 * 947 * @return the decoded fragment part or {@code null} if undefined. 948 */ 949 public String getFragment() { 950 return decode(fragment); 951 } 952 953 /** 954 * Gets the host part of this URI. 955 * 956 * @return the host part or {@code null} if undefined. 957 */ 958 public String getHost() { 959 return host; 960 } 961 962 /** 963 * Gets the decoded path part of this URI. 964 * 965 * @return the decoded path part or {@code null} if undefined. 966 */ 967 public String getPath() { 968 return decode(path); 969 } 970 971 /** 972 * Gets the port number of this URI. 973 * 974 * @return the port number or {@code -1} if undefined. 975 */ 976 public int getPort() { 977 return port; 978 } 979 980 /** @hide */ 981 public int getEffectivePort() { 982 return getEffectivePort(scheme, port); 983 } 984 985 /** 986 * Returns the port to use for {@code scheme} connections will use when 987 * {@link #getPort} returns {@code specifiedPort}. 988 * 989 * @hide 990 */ 991 public static int getEffectivePort(String scheme, int specifiedPort) { 992 if (specifiedPort != -1) { 993 return specifiedPort; 994 } 995 996 if ("http".equalsIgnoreCase(scheme)) { 997 return 80; 998 } else if ("https".equalsIgnoreCase(scheme)) { 999 return 443; 1000 } else { 1001 return -1; 1002 } 1003 } 1004 1005 /** 1006 * Gets the decoded query part of this URI. 1007 * 1008 * @return the decoded query part or {@code null} if undefined. 1009 */ 1010 public String getQuery() { 1011 return decode(query); 1012 } 1013 1014 /** 1015 * Gets the authority part of this URI in raw form. 1016 * 1017 * @return the encoded authority part or {@code null} if undefined. 1018 */ 1019 public String getRawAuthority() { 1020 return authority; 1021 } 1022 1023 /** 1024 * Gets the fragment part of this URI in raw form. 1025 * 1026 * @return the encoded fragment part or {@code null} if undefined. 1027 */ 1028 public String getRawFragment() { 1029 return fragment; 1030 } 1031 1032 /** 1033 * Gets the path part of this URI in raw form. 1034 * 1035 * @return the encoded path part or {@code null} if undefined. 1036 */ 1037 public String getRawPath() { 1038 return path; 1039 } 1040 1041 /** 1042 * Gets the query part of this URI in raw form. 1043 * 1044 * @return the encoded query part or {@code null} if undefined. 1045 */ 1046 public String getRawQuery() { 1047 return query; 1048 } 1049 1050 /** 1051 * Gets the scheme-specific part of this URI in raw form. 1052 * 1053 * @return the encoded scheme-specific part or {@code null} if undefined. 1054 */ 1055 public String getRawSchemeSpecificPart() { 1056 return schemeSpecificPart; 1057 } 1058 1059 /** 1060 * Gets the user-info part of this URI in raw form. 1061 * 1062 * @return the encoded user-info part or {@code null} if undefined. 1063 */ 1064 public String getRawUserInfo() { 1065 return userInfo; 1066 } 1067 1068 /** 1069 * Gets the scheme part of this URI. 1070 * 1071 * @return the scheme part or {@code null} if undefined. 1072 */ 1073 public String getScheme() { 1074 return scheme; 1075 } 1076 1077 /** 1078 * Gets the decoded scheme-specific part of this URI. 1079 * 1080 * @return the decoded scheme-specific part or {@code null} if undefined. 1081 */ 1082 public String getSchemeSpecificPart() { 1083 return decode(schemeSpecificPart); 1084 } 1085 1086 /** 1087 * Gets the decoded user-info part of this URI. 1088 * 1089 * @return the decoded user-info part or {@code null} if undefined. 1090 */ 1091 public String getUserInfo() { 1092 return decode(userInfo); 1093 } 1094 1095 /** 1096 * Gets the hashcode value of this URI instance. 1097 * 1098 * @return the appropriate hashcode value. 1099 */ 1100 @Override 1101 public int hashCode() { 1102 if (hash == -1) { 1103 hash = getHashString().hashCode(); 1104 } 1105 return hash; 1106 } 1107 1108 /** 1109 * Indicates whether this URI is absolute, which means that a scheme part is 1110 * defined in this URI. 1111 * 1112 * @return {@code true} if this URI is absolute, {@code false} otherwise. 1113 */ 1114 public boolean isAbsolute() { 1115 return absolute; 1116 } 1117 1118 /** 1119 * Indicates whether this URI is opaque or not. An opaque URI is absolute 1120 * and has a scheme-specific part which does not start with a slash 1121 * character. All parts except scheme, scheme-specific and fragment are 1122 * undefined. 1123 * 1124 * @return {@code true} if the URI is opaque, {@code false} otherwise. 1125 */ 1126 public boolean isOpaque() { 1127 return opaque; 1128 } 1129 1130 /* 1131 * normalize path, and return the resulting string 1132 */ 1133 private String normalize(String path) { 1134 // count the number of '/'s, to determine number of segments 1135 int index = -1; 1136 int pathLength = path.length(); 1137 int size = 0; 1138 if (pathLength > 0 && path.charAt(0) != '/') { 1139 size++; 1140 } 1141 while ((index = path.indexOf('/', index + 1)) != -1) { 1142 if (index + 1 < pathLength && path.charAt(index + 1) != '/') { 1143 size++; 1144 } 1145 } 1146 1147 String[] segList = new String[size]; 1148 boolean[] include = new boolean[size]; 1149 1150 // break the path into segments and store in the list 1151 int current = 0; 1152 int index2; 1153 index = (pathLength > 0 && path.charAt(0) == '/') ? 1 : 0; 1154 while ((index2 = path.indexOf('/', index + 1)) != -1) { 1155 segList[current++] = path.substring(index, index2); 1156 index = index2 + 1; 1157 } 1158 1159 // if current==size, then the last character was a slash 1160 // and there are no more segments 1161 if (current < size) { 1162 segList[current] = path.substring(index); 1163 } 1164 1165 // determine which segments get included in the normalized path 1166 for (int i = 0; i < size; i++) { 1167 include[i] = true; 1168 if (segList[i].equals("..")) { 1169 int remove = i - 1; 1170 // search back to find a segment to remove, if possible 1171 while (remove > -1 && !include[remove]) { 1172 remove--; 1173 } 1174 // if we find a segment to remove, remove it and the ".." 1175 // segment 1176 if (remove > -1 && !segList[remove].equals("..")) { 1177 include[remove] = false; 1178 include[i] = false; 1179 } 1180 } else if (segList[i].equals(".")) { 1181 include[i] = false; 1182 } 1183 } 1184 1185 // put the path back together 1186 StringBuilder newPath = new StringBuilder(); 1187 if (path.startsWith("/")) { 1188 newPath.append('/'); 1189 } 1190 1191 for (int i = 0; i < segList.length; i++) { 1192 if (include[i]) { 1193 newPath.append(segList[i]); 1194 newPath.append('/'); 1195 } 1196 } 1197 1198 // if we used at least one segment and the path previously ended with 1199 // a slash and the last segment is still used, then delete the extra 1200 // trailing '/' 1201 if (!path.endsWith("/") && segList.length > 0 1202 && include[segList.length - 1]) { 1203 newPath.deleteCharAt(newPath.length() - 1); 1204 } 1205 1206 String result = newPath.toString(); 1207 1208 // check for a ':' in the first segment if one exists, 1209 // prepend "./" to normalize 1210 index = result.indexOf(':'); 1211 index2 = result.indexOf('/'); 1212 if (index != -1 && (index < index2 || index2 == -1)) { 1213 newPath.insert(0, "./"); 1214 result = newPath.toString(); 1215 } 1216 return result; 1217 } 1218 1219 /** 1220 * Normalizes the path part of this URI. 1221 * 1222 * @return an URI object which represents this instance with a normalized 1223 * path. 1224 */ 1225 public URI normalize() { 1226 if (opaque) { 1227 return this; 1228 } 1229 String normalizedPath = normalize(path); 1230 // if the path is already normalized, return this 1231 if (path.equals(normalizedPath)) { 1232 return this; 1233 } 1234 // get an exact copy of the URI re-calculate the scheme specific part 1235 // since the path of the normalized URI is different from this URI. 1236 URI result = duplicate(); 1237 result.path = normalizedPath; 1238 result.setSchemeSpecificPart(); 1239 return result; 1240 } 1241 1242 /** 1243 * Tries to parse the authority component of this URI to divide it into the 1244 * host, port, and user-info. If this URI is already determined as a 1245 * ServerAuthority this instance will be returned without changes. 1246 * 1247 * @return this instance with the components of the parsed server authority. 1248 * @throws URISyntaxException 1249 * if the authority part could not be parsed as a server-based 1250 * authority. 1251 */ 1252 public URI parseServerAuthority() throws URISyntaxException { 1253 if (!serverAuthority) { 1254 parseAuthority(true); 1255 } 1256 return this; 1257 } 1258 1259 /** 1260 * Makes the given URI {@code relative} to a relative URI against the URI 1261 * represented by this instance. 1262 * 1263 * @param relative 1264 * the URI which has to be relativized against this URI. 1265 * @return the relative URI. 1266 */ 1267 public URI relativize(URI relative) { 1268 if (relative.opaque || opaque) { 1269 return relative; 1270 } 1271 1272 if (scheme == null ? relative.scheme != null : !scheme 1273 .equals(relative.scheme)) { 1274 return relative; 1275 } 1276 1277 if (authority == null ? relative.authority != null : !authority 1278 .equals(relative.authority)) { 1279 return relative; 1280 } 1281 1282 // normalize both paths 1283 String thisPath = normalize(path); 1284 String relativePath = normalize(relative.path); 1285 1286 /* 1287 * if the paths aren't equal, then we need to determine if this URI's 1288 * path is a parent path (begins with) the relative URI's path 1289 */ 1290 if (!thisPath.equals(relativePath)) { 1291 // if this URI's path doesn't end in a '/', add one 1292 if (!thisPath.endsWith("/")) { 1293 thisPath = thisPath + '/'; 1294 } 1295 /* 1296 * if the relative URI's path doesn't start with this URI's path, 1297 * then just return the relative URI; the URIs have nothing in 1298 * common 1299 */ 1300 if (!relativePath.startsWith(thisPath)) { 1301 return relative; 1302 } 1303 } 1304 1305 URI result = new URI(); 1306 result.fragment = relative.fragment; 1307 result.query = relative.query; 1308 // the result URI is the remainder of the relative URI's path 1309 result.path = relativePath.substring(thisPath.length()); 1310 result.setSchemeSpecificPart(); 1311 return result; 1312 } 1313 1314 /** 1315 * Resolves the given URI {@code relative} against the URI represented by 1316 * this instance. 1317 * 1318 * @param relative 1319 * the URI which has to be resolved against this URI. 1320 * @return the resolved URI. 1321 */ 1322 public URI resolve(URI relative) { 1323 if (relative.absolute || opaque) { 1324 return relative; 1325 } 1326 1327 URI result; 1328 if (relative.path.isEmpty() && relative.scheme == null 1329 && relative.authority == null && relative.query == null 1330 && relative.fragment != null) { 1331 // if the relative URI only consists of fragment, 1332 // the resolved URI is very similar to this URI, 1333 // except that it has the fragment from the relative URI. 1334 result = duplicate(); 1335 result.fragment = relative.fragment; 1336 // no need to re-calculate the scheme specific part, 1337 // since fragment is not part of scheme specific part. 1338 return result; 1339 } 1340 1341 if (relative.authority != null) { 1342 // if the relative URI has authority, 1343 // the resolved URI is almost the same as the relative URI, 1344 // except that it has the scheme of this URI. 1345 result = relative.duplicate(); 1346 result.scheme = scheme; 1347 result.absolute = absolute; 1348 } else { 1349 // since relative URI has no authority, 1350 // the resolved URI is very similar to this URI, 1351 // except that it has the query and fragment of the relative URI, 1352 // and the path is different. 1353 result = duplicate(); 1354 result.fragment = relative.fragment; 1355 result.query = relative.query; 1356 if (relative.path.startsWith("/")) { 1357 result.path = relative.path; 1358 } else { 1359 // resolve a relative reference 1360 int endIndex = path.lastIndexOf('/') + 1; 1361 result.path = normalize(path.substring(0, endIndex) 1362 + relative.path); 1363 } 1364 // re-calculate the scheme specific part since 1365 // query and path of the resolved URI is different from this URI. 1366 result.setSchemeSpecificPart(); 1367 } 1368 return result; 1369 } 1370 1371 /** 1372 * Helper method used to re-calculate the scheme specific part of the 1373 * resolved or normalized URIs 1374 */ 1375 private void setSchemeSpecificPart() { 1376 // ssp = [//authority][path][?query] 1377 StringBuilder ssp = new StringBuilder(); 1378 if (authority != null) { 1379 ssp.append("//" + authority); 1380 } 1381 if (path != null) { 1382 ssp.append(path); 1383 } 1384 if (query != null) { 1385 ssp.append("?" + query); 1386 } 1387 schemeSpecificPart = ssp.toString(); 1388 // reset string, so that it can be re-calculated correctly when asked. 1389 string = null; 1390 } 1391 1392 /** 1393 * Creates a new URI instance by parsing the given string {@code relative} 1394 * and resolves the created URI against the URI represented by this 1395 * instance. 1396 * 1397 * @param relative 1398 * the given string to create the new URI instance which has to 1399 * be resolved later on. 1400 * @return the created and resolved URI. 1401 */ 1402 public URI resolve(String relative) { 1403 return resolve(create(relative)); 1404 } 1405 1406 private String decode(String s) { 1407 return s != null ? UriCodec.decode(s) : null; 1408 } 1409 1410 /** 1411 * Returns the textual string representation of this URI instance using the 1412 * US-ASCII encoding. 1413 * 1414 * @return the US-ASCII string representation of this URI. 1415 */ 1416 public String toASCIIString() { 1417 StringBuilder result = new StringBuilder(); 1418 ASCII_ONLY.appendEncoded(result, toString()); 1419 return result.toString(); 1420 } 1421 1422 /** 1423 * Returns the textual string representation of this URI instance. 1424 * 1425 * @return the textual string representation of this URI. 1426 */ 1427 @Override 1428 public String toString() { 1429 if (string == null) { 1430 StringBuilder result = new StringBuilder(); 1431 if (scheme != null) { 1432 result.append(scheme); 1433 result.append(':'); 1434 } 1435 if (opaque) { 1436 result.append(schemeSpecificPart); 1437 } else { 1438 if (authority != null) { 1439 result.append("//"); 1440 result.append(authority); 1441 } 1442 1443 if (path != null) { 1444 result.append(path); 1445 } 1446 1447 if (query != null) { 1448 result.append('?'); 1449 result.append(query); 1450 } 1451 } 1452 1453 if (fragment != null) { 1454 result.append('#'); 1455 result.append(fragment); 1456 } 1457 1458 string = result.toString(); 1459 } 1460 return string; 1461 } 1462 1463 /* 1464 * Form a string from the components of this URI, similarly to the 1465 * toString() method. But this method converts scheme and host to lowercase, 1466 * and converts escaped octets to lowercase. 1467 */ 1468 private String getHashString() { 1469 StringBuilder result = new StringBuilder(); 1470 if (scheme != null) { 1471 result.append(scheme.toLowerCase(Locale.US)); 1472 result.append(':'); 1473 } 1474 if (opaque) { 1475 result.append(schemeSpecificPart); 1476 } else { 1477 if (authority != null) { 1478 result.append("//"); 1479 if (host == null) { 1480 result.append(authority); 1481 } else { 1482 if (userInfo != null) { 1483 result.append(userInfo + "@"); 1484 } 1485 result.append(host.toLowerCase(Locale.US)); 1486 if (port != -1) { 1487 result.append(":" + port); 1488 } 1489 } 1490 } 1491 1492 if (path != null) { 1493 result.append(path); 1494 } 1495 1496 if (query != null) { 1497 result.append('?'); 1498 result.append(query); 1499 } 1500 } 1501 1502 if (fragment != null) { 1503 result.append('#'); 1504 result.append(fragment); 1505 } 1506 1507 return convertHexToLowerCase(result.toString()); 1508 } 1509 1510 /** 1511 * Converts this URI instance to a URL. 1512 * 1513 * @return the created URL representing the same resource as this URI. 1514 * @throws MalformedURLException 1515 * if an error occurs while creating the URL or no protocol 1516 * handler could be found. 1517 */ 1518 public URL toURL() throws MalformedURLException { 1519 if (!absolute) { 1520 throw new IllegalArgumentException("URI is not absolute: " + toString()); 1521 } 1522 return new URL(toString()); 1523 } 1524 1525 private void readObject(ObjectInputStream in) throws IOException, 1526 ClassNotFoundException { 1527 in.defaultReadObject(); 1528 try { 1529 parseURI(string, false); 1530 } catch (URISyntaxException e) { 1531 throw new IOException(e.toString()); 1532 } 1533 } 1534 1535 private void writeObject(ObjectOutputStream out) throws IOException, 1536 ClassNotFoundException { 1537 // call toString() to ensure the value of string field is calculated 1538 toString(); 1539 out.defaultWriteObject(); 1540 } 1541} 1542