URI.java revision 753dcd862b31e85766225590d90ba0b9f481176f
1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package java.net; 19 20import java.io.IOException; 21import java.io.ObjectInputStream; 22import java.io.ObjectOutputStream; 23import java.io.Serializable; 24import java.io.UnsupportedEncodingException; 25import java.util.StringTokenizer; 26import org.apache.harmony.luni.platform.INetworkSystem; 27import org.apache.harmony.luni.platform.Platform; 28 29/** 30 * This class represents an instance of a URI as defined by RFC 2396. 31 */ 32public final class URI implements Comparable<URI>, Serializable { 33 34 private final static INetworkSystem NETWORK_SYSTEM = Platform.getNetworkSystem(); 35 36 private static final long serialVersionUID = -6052424284110960213l; 37 38 static final String UNRESERVED = "_-!.~\'()*"; 39 static final String PUNCTUATION = ",;:$&+="; 40 static final String RESERVED = PUNCTUATION + "?/[]@"; 41 static final String SOME_LEGAL = UNRESERVED + PUNCTUATION; 42 static final String ALL_LEGAL = UNRESERVED + RESERVED; 43 44 private String string; 45 private transient String scheme; 46 private transient String schemeSpecificPart; 47 private transient String authority; 48 private transient String userInfo; 49 private transient String host; 50 private transient int port = -1; 51 private transient String path; 52 private transient String query; 53 private transient String fragment; 54 private transient boolean opaque; 55 private transient boolean absolute; 56 private transient boolean serverAuthority = false; 57 58 private transient int hash = -1; 59 60 private URI() {} 61 62 /** 63 * Creates a new URI instance according to the given string {@code uri}. 64 * 65 * @param uri 66 * the textual URI representation to be parsed into a URI object. 67 * @throws URISyntaxException 68 * if the given string {@code uri} doesn't fit to the 69 * specification RFC2396 or could not be parsed correctly. 70 */ 71 public URI(String uri) throws URISyntaxException { 72 parseURI(uri, false); 73 } 74 75 /** 76 * Creates a new URI instance using the given arguments. This constructor 77 * first creates a temporary URI string from the given components. This 78 * string will be parsed later on to create the URI instance. 79 * <p> 80 * {@code [scheme:]scheme-specific-part[#fragment]} 81 * 82 * @param scheme 83 * the scheme part of the URI. 84 * @param ssp 85 * the scheme-specific-part of the URI. 86 * @param frag 87 * the fragment part of the URI. 88 * @throws URISyntaxException 89 * if the temporary created string doesn't fit to the 90 * specification RFC2396 or could not be parsed correctly. 91 */ 92 public URI(String scheme, String ssp, String frag) 93 throws URISyntaxException { 94 StringBuilder uri = new StringBuilder(); 95 if (scheme != null) { 96 uri.append(scheme); 97 uri.append(':'); 98 } 99 if (ssp != null) { 100 // QUOTE ILLEGAL CHARACTERS 101 uri.append(quoteComponent(ssp, ALL_LEGAL)); 102 } 103 if (frag != null) { 104 uri.append('#'); 105 // QUOTE ILLEGAL CHARACTERS 106 uri.append(quoteComponent(frag, ALL_LEGAL)); 107 } 108 109 parseURI(uri.toString(), false); 110 } 111 112 /** 113 * Creates a new URI instance using the given arguments. This constructor 114 * first creates a temporary URI string from the given components. This 115 * string will be parsed later on to create the URI instance. 116 * <p> 117 * {@code [scheme:][user-info@]host[:port][path][?query][#fragment]} 118 * 119 * @param scheme 120 * the scheme part of the URI. 121 * @param userInfo 122 * the user information of the URI for authentication and 123 * authorization. 124 * @param host 125 * the host name of the URI. 126 * @param port 127 * the port number of the URI. 128 * @param path 129 * the path to the resource on the host. 130 * @param query 131 * the query part of the URI to specify parameters for the 132 * resource. 133 * @param fragment 134 * the fragment part of the URI. 135 * @throws URISyntaxException 136 * if the temporary created string doesn't fit to the 137 * specification RFC2396 or could not be parsed correctly. 138 */ 139 public URI(String scheme, String userInfo, String host, int port, 140 String path, String query, String fragment) 141 throws URISyntaxException { 142 143 if (scheme == null && userInfo == null && host == null && path == null 144 && query == null && fragment == null) { 145 this.path = ""; 146 return; 147 } 148 149 if (scheme != null && path != null && path.length() > 0 150 && path.charAt(0) != '/') { 151 throw new URISyntaxException(path, "Relative path"); 152 } 153 154 StringBuilder uri = new StringBuilder(); 155 if (scheme != null) { 156 uri.append(scheme); 157 uri.append(':'); 158 } 159 160 if (userInfo != null || host != null || port != -1) { 161 uri.append("//"); 162 } 163 164 if (userInfo != null) { 165 // QUOTE ILLEGAL CHARACTERS in userInfo 166 uri.append(quoteComponent(userInfo, SOME_LEGAL)); 167 uri.append('@'); 168 } 169 170 if (host != null) { 171 // check for IPv6 addresses that hasn't been enclosed 172 // in square brackets 173 if (host.indexOf(':') != -1 && host.indexOf(']') == -1 174 && host.indexOf('[') == -1) { 175 host = "[" + host + "]"; 176 } 177 uri.append(host); 178 } 179 180 if (port != -1) { 181 uri.append(':'); 182 uri.append(port); 183 } 184 185 if (path != null) { 186 // QUOTE ILLEGAL CHARS 187 uri.append(quoteComponent(path, "/@" + SOME_LEGAL)); 188 } 189 190 if (query != null) { 191 uri.append('?'); 192 // QUOTE ILLEGAL CHARS 193 uri.append(quoteComponent(query, ALL_LEGAL)); 194 } 195 196 if (fragment != null) { 197 // QUOTE ILLEGAL CHARS 198 uri.append('#'); 199 uri.append(quoteComponent(fragment, ALL_LEGAL)); 200 } 201 202 parseURI(uri.toString(), true); 203 } 204 205 /** 206 * Creates a new URI instance using the given arguments. This constructor 207 * first creates a temporary URI string from the given components. This 208 * string will be parsed later on to create the URI instance. 209 * <p> 210 * {@code [scheme:]host[path][#fragment]} 211 * 212 * @param scheme 213 * the scheme part of the URI. 214 * @param host 215 * the host name of the URI. 216 * @param path 217 * the path to the resource on the host. 218 * @param fragment 219 * the fragment part of the URI. 220 * @throws URISyntaxException 221 * if the temporary created string doesn't fit to the 222 * specification RFC2396 or could not be parsed correctly. 223 */ 224 public URI(String scheme, String host, String path, String fragment) 225 throws URISyntaxException { 226 this(scheme, null, host, -1, path, null, fragment); 227 } 228 229 /** 230 * Creates a new URI instance using the given arguments. This constructor 231 * first creates a temporary URI string from the given components. This 232 * string will be parsed later on to create the URI instance. 233 * <p> 234 * {@code [scheme:][//authority][path][?query][#fragment]} 235 * 236 * @param scheme 237 * the scheme part of the URI. 238 * @param authority 239 * the authority part of the URI. 240 * @param path 241 * the path to the resource on the host. 242 * @param query 243 * the query part of the URI to specify parameters for the 244 * resource. 245 * @param fragment 246 * the fragment part of the URI. 247 * @throws URISyntaxException 248 * if the temporary created string doesn't fit to the 249 * specification RFC2396 or could not be parsed correctly. 250 */ 251 public URI(String scheme, String authority, String path, String query, 252 String fragment) throws URISyntaxException { 253 if (scheme != null && path != null && path.length() > 0 254 && path.charAt(0) != '/') { 255 throw new URISyntaxException(path, "Relative path"); 256 } 257 258 StringBuilder uri = new StringBuilder(); 259 if (scheme != null) { 260 uri.append(scheme); 261 uri.append(':'); 262 } 263 if (authority != null) { 264 uri.append("//"); 265 // QUOTE ILLEGAL CHARS 266 uri.append(quoteComponent(authority, "@[]" + SOME_LEGAL)); 267 } 268 269 if (path != null) { 270 // QUOTE ILLEGAL CHARS 271 uri.append(quoteComponent(path, "/@" + SOME_LEGAL)); 272 } 273 if (query != null) { 274 // QUOTE ILLEGAL CHARS 275 uri.append('?'); 276 uri.append(quoteComponent(query, ALL_LEGAL)); 277 } 278 if (fragment != null) { 279 // QUOTE ILLEGAL CHARS 280 uri.append('#'); 281 uri.append(quoteComponent(fragment, ALL_LEGAL)); 282 } 283 284 parseURI(uri.toString(), false); 285 } 286 287 private void parseURI(String uri, boolean forceServer) throws URISyntaxException { 288 String temp = uri; 289 // assign uri string to the input value per spec 290 string = uri; 291 int index, index1, index2, index3; 292 // parse into Fragment, Scheme, and SchemeSpecificPart 293 // then parse SchemeSpecificPart if necessary 294 295 // Fragment 296 index = temp.indexOf('#'); 297 if (index != -1) { 298 // remove the fragment from the end 299 fragment = temp.substring(index + 1); 300 validateFragment(uri, fragment, index + 1); 301 temp = temp.substring(0, index); 302 } 303 304 // Scheme and SchemeSpecificPart 305 index = index1 = temp.indexOf(':'); 306 index2 = temp.indexOf('/'); 307 index3 = temp.indexOf('?'); 308 309 // if a '/' or '?' occurs before the first ':' the uri has no 310 // specified scheme, and is therefore not absolute 311 if (index != -1 && (index2 >= index || index2 == -1) 312 && (index3 >= index || index3 == -1)) { 313 // the characters up to the first ':' comprise the scheme 314 absolute = true; 315 scheme = temp.substring(0, index); 316 if (scheme.length() == 0) { 317 throw new URISyntaxException(uri, "Scheme expected", index); 318 } 319 validateScheme(uri, scheme, 0); 320 schemeSpecificPart = temp.substring(index + 1); 321 if (schemeSpecificPart.length() == 0) { 322 throw new URISyntaxException(uri, "Scheme-specific part expected", index + 1); 323 } 324 } else { 325 absolute = false; 326 schemeSpecificPart = temp; 327 } 328 329 if (scheme == null || schemeSpecificPart.length() > 0 330 && schemeSpecificPart.charAt(0) == '/') { 331 opaque = false; 332 // the URI is hierarchical 333 334 // Query 335 temp = schemeSpecificPart; 336 index = temp.indexOf('?'); 337 if (index != -1) { 338 query = temp.substring(index + 1); 339 temp = temp.substring(0, index); 340 validateQuery(uri, query, index2 + 1 + index); 341 } 342 343 // Authority and Path 344 if (temp.startsWith("//")) { 345 index = temp.indexOf('/', 2); 346 if (index != -1) { 347 authority = temp.substring(2, index); 348 path = temp.substring(index); 349 } else { 350 authority = temp.substring(2); 351 if (authority.length() == 0 && query == null 352 && fragment == null) { 353 throw new URISyntaxException(uri, "Authority expected", uri.length()); 354 } 355 356 path = ""; 357 // nothing left, so path is empty (not null, path should 358 // never be null) 359 } 360 361 if (authority.length() == 0) { 362 authority = null; 363 } else { 364 validateAuthority(uri, authority, index1 + 3); 365 } 366 } else { // no authority specified 367 path = temp; 368 } 369 370 int pathIndex = 0; 371 if (index2 > -1) { 372 pathIndex += index2; 373 } 374 if (index > -1) { 375 pathIndex += index; 376 } 377 validatePath(uri, path, pathIndex); 378 } else { // if not hierarchical, URI is opaque 379 opaque = true; 380 validateSsp(uri, schemeSpecificPart, index2 + 2 + index); 381 } 382 383 parseAuthority(forceServer); 384 } 385 386 private void validateScheme(String uri, String scheme, int index) 387 throws URISyntaxException { 388 // first char needs to be an alpha char 389 char ch = scheme.charAt(0); 390 if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))) { 391 throw new URISyntaxException(uri, "Illegal character in scheme", 0); 392 } 393 394 try { 395 URIEncoderDecoder.validateSimple(scheme, "+-."); 396 } catch (URISyntaxException e) { 397 throw new URISyntaxException(uri, "Illegal character in scheme", index + e.getIndex()); 398 } 399 } 400 401 private void validateSsp(String uri, String ssp, int index) 402 throws URISyntaxException { 403 try { 404 URIEncoderDecoder.validate(ssp, ALL_LEGAL); 405 } catch (URISyntaxException e) { 406 throw new URISyntaxException(uri, 407 e.getReason() + " in schemeSpecificPart", index + e.getIndex()); 408 } 409 } 410 411 private void validateAuthority(String uri, String authority, int index) 412 throws URISyntaxException { 413 try { 414 URIEncoderDecoder.validate(authority, "@[]" + SOME_LEGAL); 415 } catch (URISyntaxException e) { 416 throw new URISyntaxException(uri, e.getReason() + " in authority", index + e.getIndex()); 417 } 418 } 419 420 private void validatePath(String uri, String path, int index) 421 throws URISyntaxException { 422 try { 423 URIEncoderDecoder.validate(path, "/@" + SOME_LEGAL); 424 } catch (URISyntaxException e) { 425 throw new URISyntaxException(uri, e.getReason() + " in path", index + e.getIndex()); 426 } 427 } 428 429 private void validateQuery(String uri, String query, int index) 430 throws URISyntaxException { 431 try { 432 URIEncoderDecoder.validate(query, ALL_LEGAL); 433 } catch (URISyntaxException e) { 434 throw new URISyntaxException(uri, e.getReason() + " in query", index + e.getIndex()); 435 436 } 437 } 438 439 private void validateFragment(String uri, String fragment, int index) 440 throws URISyntaxException { 441 try { 442 URIEncoderDecoder.validate(fragment, ALL_LEGAL); 443 } catch (URISyntaxException e) { 444 throw new URISyntaxException(uri, e.getReason() + " in fragment", index + e.getIndex()); 445 } 446 } 447 448 /** 449 * Parse the authority string into its component parts: user info, 450 * host, and port. This operation doesn't apply to registry URIs, and 451 * calling it on such <i>may</i> result in a syntax exception. 452 * 453 * @param forceServer true to always throw if the authority cannot be 454 * parsed. If false, this method may still throw for some kinds of 455 * errors; this unpredictable behaviour is consistent with the RI. 456 */ 457 private void parseAuthority(boolean forceServer) throws URISyntaxException { 458 if (authority == null) { 459 return; 460 } 461 462 String tempUserInfo = null; 463 String temp = authority; 464 int index = temp.indexOf('@'); 465 int hostIndex = 0; 466 if (index != -1) { 467 // remove user info 468 tempUserInfo = temp.substring(0, index); 469 validateUserInfo(authority, tempUserInfo, 0); 470 temp = temp.substring(index + 1); // host[:port] is left 471 hostIndex = index + 1; 472 } 473 474 index = temp.lastIndexOf(':'); 475 int endIndex = temp.indexOf(']'); 476 477 String tempHost; 478 int tempPort = -1; 479 if (index != -1 && endIndex < index) { 480 // determine port and host 481 tempHost = temp.substring(0, index); 482 483 if (index < (temp.length() - 1)) { // port part is not empty 484 try { 485 tempPort = Integer.parseInt(temp.substring(index + 1)); 486 if (tempPort < 0) { 487 if (forceServer) { 488 throw new URISyntaxException(authority, 489 "Invalid port number", hostIndex + index + 1); 490 } 491 return; 492 } 493 } catch (NumberFormatException e) { 494 if (forceServer) { 495 throw new URISyntaxException(authority, 496 "Invalid port number", hostIndex + index + 1); 497 } 498 return; 499 } 500 } 501 } else { 502 tempHost = temp; 503 } 504 505 if (tempHost.isEmpty()) { 506 if (forceServer) { 507 throw new URISyntaxException(authority, "Expected host", hostIndex); 508 } 509 return; 510 } 511 512 if (!isValidHost(forceServer, tempHost)) { 513 return; 514 } 515 516 // this is a server based uri, 517 // fill in the userInfo, host and port fields 518 userInfo = tempUserInfo; 519 host = tempHost; 520 port = tempPort; 521 serverAuthority = true; 522 } 523 524 private void validateUserInfo(String uri, String userInfo, int index) 525 throws URISyntaxException { 526 for (int i = 0; i < userInfo.length(); i++) { 527 char ch = userInfo.charAt(i); 528 if (ch == ']' || ch == '[') { 529 throw new URISyntaxException(uri, "Illegal character in userInfo", index + i); 530 } 531 } 532 } 533 534 /** 535 * Returns true if {@code host} is a well-formed host name or IP address. 536 * 537 * @param forceServer true to always throw if the host cannot be parsed. If 538 * false, this method may still throw for some kinds of errors; this 539 * unpredictable behaviour is consistent with the RI. 540 */ 541 private boolean isValidHost(boolean forceServer, String host) throws URISyntaxException { 542 if (host.startsWith("[")) { 543 // IPv6 address 544 if (!host.endsWith("]")) { 545 throw new URISyntaxException(host, 546 "Expected a closing square bracket for IPv6 address", 0); 547 } 548 try { 549 byte[] bytes = InetAddress.ipStringToByteArray(host); 550 /* 551 * The native IP parser may return 4 bytes for addresses like 552 * "[::FFFF:127.0.0.1]". This is allowed, but we must not accept 553 * IPv4-formatted addresses in square braces like "[127.0.0.1]". 554 */ 555 if (bytes.length == 16 || bytes.length == 4 && host.contains(":")) { 556 return true; 557 } 558 } catch (UnknownHostException e) { 559 } 560 throw new URISyntaxException(host, "Malformed IPv6 address"); 561 } 562 563 // '[' and ']' can only be the first char and last char 564 // of the host name 565 if (host.indexOf('[') != -1 || host.indexOf(']') != -1) { 566 throw new URISyntaxException(host, "Illegal character in host name", 0); 567 } 568 569 int index = host.lastIndexOf('.'); 570 if (index < 0 || index == host.length() - 1 571 || !Character.isDigit(host.charAt(index + 1))) { 572 // domain name 573 if (isValidDomainName(host)) { 574 return true; 575 } 576 if (forceServer) { 577 throw new URISyntaxException(host, "Illegal character in host name", 0); 578 } 579 return false; 580 } 581 582 // IPv4 address 583 try { 584 if (InetAddress.ipStringToByteArray(host).length == 4) { 585 return true; 586 } 587 } catch (UnknownHostException e) { 588 } 589 590 if (forceServer) { 591 throw new URISyntaxException(host, "Malformed IPv4 address", 0); 592 } 593 return false; 594 } 595 596 private boolean isValidDomainName(String host) { 597 try { 598 URIEncoderDecoder.validateSimple(host, "-."); 599 } catch (URISyntaxException e) { 600 return false; 601 } 602 603 String lastLabel = null; 604 StringTokenizer st = new StringTokenizer(host, "."); 605 while (st.hasMoreTokens()) { 606 lastLabel = st.nextToken(); 607 if (lastLabel.startsWith("-") || lastLabel.endsWith("-")) { 608 return false; 609 } 610 } 611 612 if (lastLabel == null) { 613 return false; 614 } 615 616 if (!lastLabel.equals(host)) { 617 char ch = lastLabel.charAt(0); 618 if (ch >= '0' && ch <= '9') { 619 return false; 620 } 621 } 622 return true; 623 } 624 625 /** 626 * Quote illegal chars for each component, but not the others 627 * 628 * @param component java.lang.String the component to be converted 629 * @param legalSet the legal character set allowed in the component 630 * @return java.lang.String the converted string 631 */ 632 private String quoteComponent(String component, String legalSet) { 633 try { 634 /* 635 * Use a different encoder than URLEncoder since: 1. chars like "/", 636 * "#", "@" etc needs to be preserved instead of being encoded, 2. 637 * UTF-8 char set needs to be used for encoding instead of default 638 * platform one 639 */ 640 return URIEncoderDecoder.quoteIllegal(component, legalSet); 641 } catch (UnsupportedEncodingException e) { 642 throw new RuntimeException(e.toString()); 643 } 644 } 645 646 /** 647 * Compares this URI with the given argument {@code uri}. This method will 648 * return a negative value if this URI instance is less than the given 649 * argument and a positive value if this URI instance is greater than the 650 * given argument. The return value {@code 0} indicates that the two 651 * instances represent the same URI. To define the order the single parts of 652 * the URI are compared with each other. String components will be ordered 653 * in the natural case-sensitive way. A hierarchical URI is less than an 654 * opaque URI and if one part is {@code null} the URI with the undefined 655 * part is less than the other one. 656 * 657 * @param uri 658 * the URI this instance has to compare with. 659 * @return the value representing the order of the two instances. 660 */ 661 public int compareTo(URI uri) { 662 int ret; 663 664 // compare schemes 665 if (scheme == null && uri.scheme != null) { 666 return -1; 667 } else if (scheme != null && uri.scheme == null) { 668 return 1; 669 } else if (scheme != null && uri.scheme != null) { 670 ret = scheme.compareToIgnoreCase(uri.scheme); 671 if (ret != 0) { 672 return ret; 673 } 674 } 675 676 // compare opacities 677 if (!opaque && uri.opaque) { 678 return -1; 679 } else if (opaque && !uri.opaque) { 680 return 1; 681 } else if (opaque && uri.opaque) { 682 ret = schemeSpecificPart.compareTo(uri.schemeSpecificPart); 683 if (ret != 0) { 684 return ret; 685 } 686 } else { 687 688 // otherwise both must be hierarchical 689 690 // compare authorities 691 if (authority != null && uri.authority == null) { 692 return 1; 693 } else if (authority == null && uri.authority != null) { 694 return -1; 695 } else if (authority != null && uri.authority != null) { 696 if (host != null && uri.host != null) { 697 // both are server based, so compare userInfo, host, port 698 if (userInfo != null && uri.userInfo == null) { 699 return 1; 700 } else if (userInfo == null && uri.userInfo != null) { 701 return -1; 702 } else if (userInfo != null && uri.userInfo != null) { 703 ret = userInfo.compareTo(uri.userInfo); 704 if (ret != 0) { 705 return ret; 706 } 707 } 708 709 // userInfo's are the same, compare hostname 710 ret = host.compareToIgnoreCase(uri.host); 711 if (ret != 0) { 712 return ret; 713 } 714 715 // compare port 716 if (port != uri.port) { 717 return port - uri.port; 718 } 719 } else { // one or both are registry based, compare the whole 720 // authority 721 ret = authority.compareTo(uri.authority); 722 if (ret != 0) { 723 return ret; 724 } 725 } 726 } 727 728 // authorities are the same 729 // compare paths 730 ret = path.compareTo(uri.path); 731 if (ret != 0) { 732 return ret; 733 } 734 735 // compare queries 736 737 if (query != null && uri.query == null) { 738 return 1; 739 } else if (query == null && uri.query != null) { 740 return -1; 741 } else if (query != null && uri.query != null) { 742 ret = query.compareTo(uri.query); 743 if (ret != 0) { 744 return ret; 745 } 746 } 747 } 748 749 // everything else is identical, so compare fragments 750 if (fragment != null && uri.fragment == null) { 751 return 1; 752 } else if (fragment == null && uri.fragment != null) { 753 return -1; 754 } else if (fragment != null && uri.fragment != null) { 755 ret = fragment.compareTo(uri.fragment); 756 if (ret != 0) { 757 return ret; 758 } 759 } 760 761 // identical 762 return 0; 763 } 764 765 /** 766 * Returns the URI formed by parsing {@code uri}. This method behaves 767 * identically to the string constructor but throws a different exception 768 * on failure. The constructor fails with a checked {@link 769 * URISyntaxException}; this method fails with an unchecked {@link 770 * IllegalArgumentException}. 771 */ 772 public static URI create(String uri) { 773 try { 774 return new URI(uri); 775 } catch (URISyntaxException e) { 776 throw new IllegalArgumentException(e.getMessage()); 777 } 778 } 779 780 private URI duplicate() { 781 URI clone = new URI(); 782 clone.absolute = absolute; 783 clone.authority = authority; 784 clone.fragment = fragment; 785 clone.host = host; 786 clone.opaque = opaque; 787 clone.path = path; 788 clone.port = port; 789 clone.query = query; 790 clone.scheme = scheme; 791 clone.schemeSpecificPart = schemeSpecificPart; 792 clone.userInfo = userInfo; 793 clone.serverAuthority = serverAuthority; 794 return clone; 795 } 796 797 /* 798 * Takes a string that may contain hex sequences like %F1 or %2b and 799 * converts the hex values following the '%' to lowercase 800 */ 801 private String convertHexToLowerCase(String s) { 802 StringBuilder result = new StringBuilder(""); 803 if (s.indexOf('%') == -1) { 804 return s; 805 } 806 807 int index, prevIndex = 0; 808 while ((index = s.indexOf('%', prevIndex)) != -1) { 809 result.append(s.substring(prevIndex, index + 1)); 810 result.append(s.substring(index + 1, index + 3).toLowerCase()); 811 index += 3; 812 prevIndex = index; 813 } 814 return result.toString(); 815 } 816 817 /** 818 * Returns true if {@code first} and {@code second} are equal after 819 * unescaping hex sequences like %F1 and %2b. 820 */ 821 private boolean escapedEquals(String first, String second) { 822 if (first.indexOf('%') != second.indexOf('%')) { 823 return first.equals(second); 824 } 825 826 int index, prevIndex = 0; 827 while ((index = first.indexOf('%', prevIndex)) != -1 828 && second.indexOf('%', prevIndex) == index) { 829 boolean match = first.substring(prevIndex, index).equals( 830 second.substring(prevIndex, index)); 831 if (!match) { 832 return false; 833 } 834 835 match = first.substring(index + 1, index + 3).equalsIgnoreCase( 836 second.substring(index + 1, index + 3)); 837 if (!match) { 838 return false; 839 } 840 841 index += 3; 842 prevIndex = index; 843 } 844 return first.substring(prevIndex).equals(second.substring(prevIndex)); 845 } 846 847 /** 848 * Compares this URI instance with the given argument {@code o} and 849 * determines if both are equal. Two URI instances are equal if all single 850 * parts are identical in their meaning. 851 * 852 * @param o 853 * the URI this instance has to be compared with. 854 * @return {@code true} if both URI instances point to the same resource, 855 * {@code false} otherwise. 856 */ 857 @Override 858 public boolean equals(Object o) { 859 if (!(o instanceof URI)) { 860 return false; 861 } 862 URI uri = (URI) o; 863 864 if (uri.fragment == null && fragment != null || uri.fragment != null 865 && fragment == null) { 866 return false; 867 } else if (uri.fragment != null && fragment != null) { 868 if (!escapedEquals(uri.fragment, fragment)) { 869 return false; 870 } 871 } 872 873 if (uri.scheme == null && scheme != null || uri.scheme != null 874 && scheme == null) { 875 return false; 876 } else if (uri.scheme != null && scheme != null) { 877 if (!uri.scheme.equalsIgnoreCase(scheme)) { 878 return false; 879 } 880 } 881 882 if (uri.opaque && opaque) { 883 return escapedEquals(uri.schemeSpecificPart, 884 schemeSpecificPart); 885 } else if (!uri.opaque && !opaque) { 886 if (!escapedEquals(path, uri.path)) { 887 return false; 888 } 889 890 if (uri.query != null && query == null || uri.query == null 891 && query != null) { 892 return false; 893 } else if (uri.query != null && query != null) { 894 if (!escapedEquals(uri.query, query)) { 895 return false; 896 } 897 } 898 899 if (uri.authority != null && authority == null 900 || uri.authority == null && authority != null) { 901 return false; 902 } else if (uri.authority != null && authority != null) { 903 if (uri.host != null && host == null || uri.host == null 904 && host != null) { 905 return false; 906 } else if (uri.host == null && host == null) { 907 // both are registry based, so compare the whole authority 908 return escapedEquals(uri.authority, authority); 909 } else { // uri.host != null && host != null, so server-based 910 if (!host.equalsIgnoreCase(uri.host)) { 911 return false; 912 } 913 914 if (port != uri.port) { 915 return false; 916 } 917 918 if (uri.userInfo != null && userInfo == null 919 || uri.userInfo == null && userInfo != null) { 920 return false; 921 } else if (uri.userInfo != null && userInfo != null) { 922 return escapedEquals(userInfo, uri.userInfo); 923 } else { 924 return true; 925 } 926 } 927 } else { 928 // no authority 929 return true; 930 } 931 932 } else { 933 // one is opaque, the other hierarchical 934 return false; 935 } 936 } 937 938 /** 939 * Gets the decoded authority part of this URI. 940 * 941 * @return the decoded authority part or {@code null} if undefined. 942 */ 943 public String getAuthority() { 944 return decode(authority); 945 } 946 947 /** 948 * Gets the decoded fragment part of this URI. 949 * 950 * @return the decoded fragment part or {@code null} if undefined. 951 */ 952 public String getFragment() { 953 return decode(fragment); 954 } 955 956 /** 957 * Gets the host part of this URI. 958 * 959 * @return the host part or {@code null} if undefined. 960 */ 961 public String getHost() { 962 return host; 963 } 964 965 /** 966 * Gets the decoded path part of this URI. 967 * 968 * @return the decoded path part or {@code null} if undefined. 969 */ 970 public String getPath() { 971 return decode(path); 972 } 973 974 /** 975 * Gets the port number of this URI. 976 * 977 * @return the port number or {@code -1} if undefined. 978 */ 979 public int getPort() { 980 return port; 981 } 982 983 /** 984 * Returns the port of {@code host} that requests to this URI shall use. 985 * Unlike {@code getPort}, this returns the default port (80 or 443) for 986 * built-in protocols when known. 987 * 988 * @hide 989 */ 990 public int getEffectivePort() { 991 if (port != -1) { 992 return port; 993 } 994 995 if ("http".equalsIgnoreCase(scheme)) { 996 return 80; 997 } else if ("https".equalsIgnoreCase(scheme)) { 998 return 443; 999 } else { 1000 return -1; 1001 } 1002 } 1003 1004 /** 1005 * Gets the decoded query part of this URI. 1006 * 1007 * @return the decoded query part or {@code null} if undefined. 1008 */ 1009 public String getQuery() { 1010 return decode(query); 1011 } 1012 1013 /** 1014 * Gets the authority part of this URI in raw form. 1015 * 1016 * @return the encoded authority part or {@code null} if undefined. 1017 */ 1018 public String getRawAuthority() { 1019 return authority; 1020 } 1021 1022 /** 1023 * Gets the fragment part of this URI in raw form. 1024 * 1025 * @return the encoded fragment part or {@code null} if undefined. 1026 */ 1027 public String getRawFragment() { 1028 return fragment; 1029 } 1030 1031 /** 1032 * Gets the path part of this URI in raw form. 1033 * 1034 * @return the encoded path part or {@code null} if undefined. 1035 */ 1036 public String getRawPath() { 1037 return path; 1038 } 1039 1040 /** 1041 * Gets the query part of this URI in raw form. 1042 * 1043 * @return the encoded query part or {@code null} if undefined. 1044 */ 1045 public String getRawQuery() { 1046 return query; 1047 } 1048 1049 /** 1050 * Gets the scheme-specific part of this URI in raw form. 1051 * 1052 * @return the encoded scheme-specific part or {@code null} if undefined. 1053 */ 1054 public String getRawSchemeSpecificPart() { 1055 return schemeSpecificPart; 1056 } 1057 1058 /** 1059 * Gets the user-info part of this URI in raw form. 1060 * 1061 * @return the encoded user-info part or {@code null} if undefined. 1062 */ 1063 public String getRawUserInfo() { 1064 return userInfo; 1065 } 1066 1067 /** 1068 * Gets the scheme part of this URI. 1069 * 1070 * @return the scheme part or {@code null} if undefined. 1071 */ 1072 public String getScheme() { 1073 return scheme; 1074 } 1075 1076 /** 1077 * Gets the decoded scheme-specific part of this URI. 1078 * 1079 * @return the decoded scheme-specific part or {@code null} if undefined. 1080 */ 1081 public String getSchemeSpecificPart() { 1082 return decode(schemeSpecificPart); 1083 } 1084 1085 /** 1086 * Gets the decoded user-info part of this URI. 1087 * 1088 * @return the decoded user-info part or {@code null} if undefined. 1089 */ 1090 public String getUserInfo() { 1091 return decode(userInfo); 1092 } 1093 1094 /** 1095 * Gets the hashcode value of this URI instance. 1096 * 1097 * @return the appropriate hashcode value. 1098 */ 1099 @Override 1100 public int hashCode() { 1101 if (hash == -1) { 1102 hash = getHashString().hashCode(); 1103 } 1104 return hash; 1105 } 1106 1107 /** 1108 * Indicates whether this URI is absolute, which means that a scheme part is 1109 * defined in this URI. 1110 * 1111 * @return {@code true} if this URI is absolute, {@code false} otherwise. 1112 */ 1113 public boolean isAbsolute() { 1114 return absolute; 1115 } 1116 1117 /** 1118 * Indicates whether this URI is opaque or not. An opaque URI is absolute 1119 * and has a scheme-specific part which does not start with a slash 1120 * character. All parts except scheme, scheme-specific and fragment are 1121 * undefined. 1122 * 1123 * @return {@code true} if the URI is opaque, {@code false} otherwise. 1124 */ 1125 public boolean isOpaque() { 1126 return opaque; 1127 } 1128 1129 /* 1130 * normalize path, and return the resulting string 1131 */ 1132 private String normalize(String path) { 1133 // count the number of '/'s, to determine number of segments 1134 int index = -1; 1135 int pathLength = path.length(); 1136 int size = 0; 1137 if (pathLength > 0 && path.charAt(0) != '/') { 1138 size++; 1139 } 1140 while ((index = path.indexOf('/', index + 1)) != -1) { 1141 if (index + 1 < pathLength && path.charAt(index + 1) != '/') { 1142 size++; 1143 } 1144 } 1145 1146 String[] segList = new String[size]; 1147 boolean[] include = new boolean[size]; 1148 1149 // break the path into segments and store in the list 1150 int current = 0; 1151 int index2; 1152 index = (pathLength > 0 && path.charAt(0) == '/') ? 1 : 0; 1153 while ((index2 = path.indexOf('/', index + 1)) != -1) { 1154 segList[current++] = path.substring(index, index2); 1155 index = index2 + 1; 1156 } 1157 1158 // if current==size, then the last character was a slash 1159 // and there are no more segments 1160 if (current < size) { 1161 segList[current] = path.substring(index); 1162 } 1163 1164 // determine which segments get included in the normalized path 1165 for (int i = 0; i < size; i++) { 1166 include[i] = true; 1167 if (segList[i].equals("..")) { 1168 int remove = i - 1; 1169 // search back to find a segment to remove, if possible 1170 while (remove > -1 && !include[remove]) { 1171 remove--; 1172 } 1173 // if we find a segment to remove, remove it and the ".." 1174 // segment 1175 if (remove > -1 && !segList[remove].equals("..")) { 1176 include[remove] = false; 1177 include[i] = false; 1178 } 1179 } else if (segList[i].equals(".")) { 1180 include[i] = false; 1181 } 1182 } 1183 1184 // put the path back together 1185 StringBuilder newPath = new StringBuilder(); 1186 if (path.startsWith("/")) { 1187 newPath.append('/'); 1188 } 1189 1190 for (int i = 0; i < segList.length; i++) { 1191 if (include[i]) { 1192 newPath.append(segList[i]); 1193 newPath.append('/'); 1194 } 1195 } 1196 1197 // if we used at least one segment and the path previously ended with 1198 // a slash and the last segment is still used, then delete the extra 1199 // trailing '/' 1200 if (!path.endsWith("/") && segList.length > 0 1201 && include[segList.length - 1]) { 1202 newPath.deleteCharAt(newPath.length() - 1); 1203 } 1204 1205 String result = newPath.toString(); 1206 1207 // check for a ':' in the first segment if one exists, 1208 // prepend "./" to normalize 1209 index = result.indexOf(':'); 1210 index2 = result.indexOf('/'); 1211 if (index != -1 && (index < index2 || index2 == -1)) { 1212 newPath.insert(0, "./"); 1213 result = newPath.toString(); 1214 } 1215 return result; 1216 } 1217 1218 /** 1219 * Normalizes the path part of this URI. 1220 * 1221 * @return an URI object which represents this instance with a normalized 1222 * path. 1223 */ 1224 public URI normalize() { 1225 if (opaque) { 1226 return this; 1227 } 1228 String normalizedPath = normalize(path); 1229 // if the path is already normalized, return this 1230 if (path.equals(normalizedPath)) { 1231 return this; 1232 } 1233 // get an exact copy of the URI re-calculate the scheme specific part 1234 // since the path of the normalized URI is different from this URI. 1235 URI result = duplicate(); 1236 result.path = normalizedPath; 1237 result.setSchemeSpecificPart(); 1238 return result; 1239 } 1240 1241 /** 1242 * Tries to parse the authority component of this URI to divide it into the 1243 * host, port, and user-info. If this URI is already determined as a 1244 * ServerAuthority this instance will be returned without changes. 1245 * 1246 * @return this instance with the components of the parsed server authority. 1247 * @throws URISyntaxException 1248 * if the authority part could not be parsed as a server-based 1249 * authority. 1250 */ 1251 public URI parseServerAuthority() throws URISyntaxException { 1252 if (!serverAuthority) { 1253 parseAuthority(true); 1254 } 1255 return this; 1256 } 1257 1258 /** 1259 * Makes the given URI {@code relative} to a relative URI against the URI 1260 * represented by this instance. 1261 * 1262 * @param relative 1263 * the URI which has to be relativized against this URI. 1264 * @return the relative URI. 1265 */ 1266 public URI relativize(URI relative) { 1267 if (relative.opaque || opaque) { 1268 return relative; 1269 } 1270 1271 if (scheme == null ? relative.scheme != null : !scheme 1272 .equals(relative.scheme)) { 1273 return relative; 1274 } 1275 1276 if (authority == null ? relative.authority != null : !authority 1277 .equals(relative.authority)) { 1278 return relative; 1279 } 1280 1281 // normalize both paths 1282 String thisPath = normalize(path); 1283 String relativePath = normalize(relative.path); 1284 1285 /* 1286 * if the paths aren't equal, then we need to determine if this URI's 1287 * path is a parent path (begins with) the relative URI's path 1288 */ 1289 if (!thisPath.equals(relativePath)) { 1290 // if this URI's path doesn't end in a '/', add one 1291 if (!thisPath.endsWith("/")) { 1292 thisPath = thisPath + '/'; 1293 } 1294 /* 1295 * if the relative URI's path doesn't start with this URI's path, 1296 * then just return the relative URI; the URIs have nothing in 1297 * common 1298 */ 1299 if (!relativePath.startsWith(thisPath)) { 1300 return relative; 1301 } 1302 } 1303 1304 URI result = new URI(); 1305 result.fragment = relative.fragment; 1306 result.query = relative.query; 1307 // the result URI is the remainder of the relative URI's path 1308 result.path = relativePath.substring(thisPath.length()); 1309 result.setSchemeSpecificPart(); 1310 return result; 1311 } 1312 1313 /** 1314 * Resolves the given URI {@code relative} against the URI represented by 1315 * this instance. 1316 * 1317 * @param relative 1318 * the URI which has to be resolved against this URI. 1319 * @return the resolved URI. 1320 */ 1321 public URI resolve(URI relative) { 1322 if (relative.absolute || opaque) { 1323 return relative; 1324 } 1325 1326 URI result; 1327 if (relative.path.isEmpty() && relative.scheme == null 1328 && relative.authority == null && relative.query == null 1329 && relative.fragment != null) { 1330 // if the relative URI only consists of fragment, 1331 // the resolved URI is very similar to this URI, 1332 // except that it has the fragment from the relative URI. 1333 result = duplicate(); 1334 result.fragment = relative.fragment; 1335 // no need to re-calculate the scheme specific part, 1336 // since fragment is not part of scheme specific part. 1337 return result; 1338 } 1339 1340 if (relative.authority != null) { 1341 // if the relative URI has authority, 1342 // the resolved URI is almost the same as the relative URI, 1343 // except that it has the scheme of this URI. 1344 result = relative.duplicate(); 1345 result.scheme = scheme; 1346 result.absolute = absolute; 1347 } else { 1348 // since relative URI has no authority, 1349 // the resolved URI is very similar to this URI, 1350 // except that it has the query and fragment of the relative URI, 1351 // and the path is different. 1352 result = duplicate(); 1353 result.fragment = relative.fragment; 1354 result.query = relative.query; 1355 if (relative.path.startsWith("/")) { 1356 result.path = relative.path; 1357 } else { 1358 // resolve a relative reference 1359 int endIndex = path.lastIndexOf('/') + 1; 1360 result.path = normalize(path.substring(0, endIndex) 1361 + relative.path); 1362 } 1363 // re-calculate the scheme specific part since 1364 // query and path of the resolved URI is different from this URI. 1365 result.setSchemeSpecificPart(); 1366 } 1367 return result; 1368 } 1369 1370 /** 1371 * Helper method used to re-calculate the scheme specific part of the 1372 * resolved or normalized URIs 1373 */ 1374 private void setSchemeSpecificPart() { 1375 // ssp = [//authority][path][?query] 1376 StringBuilder ssp = new StringBuilder(); 1377 if (authority != null) { 1378 ssp.append("//" + authority); 1379 } 1380 if (path != null) { 1381 ssp.append(path); 1382 } 1383 if (query != null) { 1384 ssp.append("?" + query); 1385 } 1386 schemeSpecificPart = ssp.toString(); 1387 // reset string, so that it can be re-calculated correctly when asked. 1388 string = null; 1389 } 1390 1391 /** 1392 * Creates a new URI instance by parsing the given string {@code relative} 1393 * and resolves the created URI against the URI represented by this 1394 * instance. 1395 * 1396 * @param relative 1397 * the given string to create the new URI instance which has to 1398 * be resolved later on. 1399 * @return the created and resolved URI. 1400 */ 1401 public URI resolve(String relative) { 1402 return resolve(create(relative)); 1403 } 1404 1405 /** 1406 * Encode unicode chars that are not part of US-ASCII char set into the 1407 * escaped form 1408 * 1409 * i.e. The Euro currency symbol is encoded as "%E2%82%AC". 1410 */ 1411 private String encodeNonAscii(String s) { 1412 try { 1413 /* 1414 * Use a different encoder than URLEncoder since: 1. chars like "/", 1415 * "#", "@" etc needs to be preserved instead of being encoded, 2. 1416 * UTF-8 char set needs to be used for encoding instead of default 1417 * platform one 3. Only other chars need to be converted 1418 */ 1419 return URIEncoderDecoder.encodeOthers(s); 1420 } catch (UnsupportedEncodingException e) { 1421 throw new RuntimeException(e.toString()); 1422 } 1423 } 1424 1425 private String decode(String s) { 1426 if (s == null) { 1427 return s; 1428 } 1429 1430 try { 1431 return URIEncoderDecoder.decode(s); 1432 } catch (UnsupportedEncodingException e) { 1433 throw new RuntimeException(e.toString()); 1434 } 1435 } 1436 1437 /** 1438 * Returns the textual string representation of this URI instance using the 1439 * US-ASCII encoding. 1440 * 1441 * @return the US-ASCII string representation of this URI. 1442 */ 1443 public String toASCIIString() { 1444 return encodeNonAscii(toString()); 1445 } 1446 1447 /** 1448 * Returns the textual string representation of this URI instance. 1449 * 1450 * @return the textual string representation of this URI. 1451 */ 1452 @Override 1453 public String toString() { 1454 if (string == null) { 1455 StringBuilder result = new StringBuilder(); 1456 if (scheme != null) { 1457 result.append(scheme); 1458 result.append(':'); 1459 } 1460 if (opaque) { 1461 result.append(schemeSpecificPart); 1462 } else { 1463 if (authority != null) { 1464 result.append("//"); 1465 result.append(authority); 1466 } 1467 1468 if (path != null) { 1469 result.append(path); 1470 } 1471 1472 if (query != null) { 1473 result.append('?'); 1474 result.append(query); 1475 } 1476 } 1477 1478 if (fragment != null) { 1479 result.append('#'); 1480 result.append(fragment); 1481 } 1482 1483 string = result.toString(); 1484 } 1485 return string; 1486 } 1487 1488 /* 1489 * Form a string from the components of this URI, similarly to the 1490 * toString() method. But this method converts scheme and host to lowercase, 1491 * and converts escaped octets to lowercase. 1492 */ 1493 private String getHashString() { 1494 StringBuilder result = new StringBuilder(); 1495 if (scheme != null) { 1496 result.append(scheme.toLowerCase()); 1497 result.append(':'); 1498 } 1499 if (opaque) { 1500 result.append(schemeSpecificPart); 1501 } else { 1502 if (authority != null) { 1503 result.append("//"); 1504 if (host == null) { 1505 result.append(authority); 1506 } else { 1507 if (userInfo != null) { 1508 result.append(userInfo + "@"); 1509 } 1510 result.append(host.toLowerCase()); 1511 if (port != -1) { 1512 result.append(":" + port); 1513 } 1514 } 1515 } 1516 1517 if (path != null) { 1518 result.append(path); 1519 } 1520 1521 if (query != null) { 1522 result.append('?'); 1523 result.append(query); 1524 } 1525 } 1526 1527 if (fragment != null) { 1528 result.append('#'); 1529 result.append(fragment); 1530 } 1531 1532 return convertHexToLowerCase(result.toString()); 1533 } 1534 1535 /** 1536 * Converts this URI instance to a URL. 1537 * 1538 * @return the created URL representing the same resource as this URI. 1539 * @throws MalformedURLException 1540 * if an error occurs while creating the URL or no protocol 1541 * handler could be found. 1542 */ 1543 public URL toURL() throws MalformedURLException { 1544 if (!absolute) { 1545 throw new IllegalArgumentException("URI is not absolute: " + toString()); 1546 } 1547 return new URL(toString()); 1548 } 1549 1550 private void readObject(ObjectInputStream in) throws IOException, 1551 ClassNotFoundException { 1552 in.defaultReadObject(); 1553 try { 1554 parseURI(string, false); 1555 } catch (URISyntaxException e) { 1556 throw new IOException(e.toString()); 1557 } 1558 } 1559 1560 private void writeObject(ObjectOutputStream out) throws IOException, 1561 ClassNotFoundException { 1562 // call toString() to ensure the value of string field is calculated 1563 toString(); 1564 out.defaultWriteObject(); 1565 } 1566} 1567