URI.java revision 118abc3050371812703e4fabf03f4399d01fb28c
1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package java.net; 19 20import java.io.IOException; 21import java.io.ObjectInputStream; 22import java.io.ObjectOutputStream; 23import java.io.Serializable; 24import java.io.UnsupportedEncodingException; 25import java.util.Locale; 26import org.apache.harmony.luni.platform.INetworkSystem; 27import org.apache.harmony.luni.platform.Platform; 28 29/** 30 * This class represents an instance of a URI as defined by RFC 2396. 31 */ 32public final class URI implements Comparable<URI>, Serializable { 33 34 private static final long serialVersionUID = -6052424284110960213l; 35 36 static final String UNRESERVED = "_-!.~\'()*"; 37 static final String PUNCTUATION = ",;:$&+="; 38 static final String RESERVED = PUNCTUATION + "?/[]@"; 39 static final String SOME_LEGAL = UNRESERVED + PUNCTUATION; 40 static final String ALL_LEGAL = UNRESERVED + RESERVED; 41 42 private String string; 43 private transient String scheme; 44 private transient String schemeSpecificPart; 45 private transient String authority; 46 private transient String userInfo; 47 private transient String host; 48 private transient int port = -1; 49 private transient String path; 50 private transient String query; 51 private transient String fragment; 52 private transient boolean opaque; 53 private transient boolean absolute; 54 private transient boolean serverAuthority = false; 55 56 private transient int hash = -1; 57 58 private URI() {} 59 60 /** 61 * Creates a new URI instance according to the given string {@code uri}. 62 * 63 * @param uri 64 * the textual URI representation to be parsed into a URI object. 65 * @throws URISyntaxException 66 * if the given string {@code uri} doesn't fit to the 67 * specification RFC2396 or could not be parsed correctly. 68 */ 69 public URI(String uri) throws URISyntaxException { 70 parseURI(uri, false); 71 } 72 73 /** 74 * Creates a new URI instance using the given arguments. This constructor 75 * first creates a temporary URI string from the given components. This 76 * string will be parsed later on to create the URI instance. 77 * <p> 78 * {@code [scheme:]scheme-specific-part[#fragment]} 79 * 80 * @param scheme 81 * the scheme part of the URI. 82 * @param ssp 83 * the scheme-specific-part of the URI. 84 * @param frag 85 * the fragment part of the URI. 86 * @throws URISyntaxException 87 * if the temporary created string doesn't fit to the 88 * specification RFC2396 or could not be parsed correctly. 89 */ 90 public URI(String scheme, String ssp, String frag) 91 throws URISyntaxException { 92 StringBuilder uri = new StringBuilder(); 93 if (scheme != null) { 94 uri.append(scheme); 95 uri.append(':'); 96 } 97 if (ssp != null) { 98 // QUOTE ILLEGAL CHARACTERS 99 uri.append(quoteComponent(ssp, ALL_LEGAL)); 100 } 101 if (frag != null) { 102 uri.append('#'); 103 // QUOTE ILLEGAL CHARACTERS 104 uri.append(quoteComponent(frag, ALL_LEGAL)); 105 } 106 107 parseURI(uri.toString(), false); 108 } 109 110 /** 111 * Creates a new URI instance using the given arguments. This constructor 112 * first creates a temporary URI string from the given components. This 113 * string will be parsed later on to create the URI instance. 114 * <p> 115 * {@code [scheme:][user-info@]host[:port][path][?query][#fragment]} 116 * 117 * @param scheme 118 * the scheme part of the URI. 119 * @param userInfo 120 * the user information of the URI for authentication and 121 * authorization. 122 * @param host 123 * the host name of the URI. 124 * @param port 125 * the port number of the URI. 126 * @param path 127 * the path to the resource on the host. 128 * @param query 129 * the query part of the URI to specify parameters for the 130 * resource. 131 * @param fragment 132 * the fragment part of the URI. 133 * @throws URISyntaxException 134 * if the temporary created string doesn't fit to the 135 * specification RFC2396 or could not be parsed correctly. 136 */ 137 public URI(String scheme, String userInfo, String host, int port, 138 String path, String query, String fragment) 139 throws URISyntaxException { 140 141 if (scheme == null && userInfo == null && host == null && path == null 142 && query == null && fragment == null) { 143 this.path = ""; 144 return; 145 } 146 147 if (scheme != null && path != null && path.length() > 0 148 && path.charAt(0) != '/') { 149 throw new URISyntaxException(path, "Relative path"); 150 } 151 152 StringBuilder uri = new StringBuilder(); 153 if (scheme != null) { 154 uri.append(scheme); 155 uri.append(':'); 156 } 157 158 if (userInfo != null || host != null || port != -1) { 159 uri.append("//"); 160 } 161 162 if (userInfo != null) { 163 // QUOTE ILLEGAL CHARACTERS in userInfo 164 uri.append(quoteComponent(userInfo, SOME_LEGAL)); 165 uri.append('@'); 166 } 167 168 if (host != null) { 169 // check for IPv6 addresses that hasn't been enclosed 170 // in square brackets 171 if (host.indexOf(':') != -1 && host.indexOf(']') == -1 172 && host.indexOf('[') == -1) { 173 host = "[" + host + "]"; 174 } 175 uri.append(host); 176 } 177 178 if (port != -1) { 179 uri.append(':'); 180 uri.append(port); 181 } 182 183 if (path != null) { 184 // QUOTE ILLEGAL CHARS 185 uri.append(quoteComponent(path, "/@" + SOME_LEGAL)); 186 } 187 188 if (query != null) { 189 uri.append('?'); 190 // QUOTE ILLEGAL CHARS 191 uri.append(quoteComponent(query, ALL_LEGAL)); 192 } 193 194 if (fragment != null) { 195 // QUOTE ILLEGAL CHARS 196 uri.append('#'); 197 uri.append(quoteComponent(fragment, ALL_LEGAL)); 198 } 199 200 parseURI(uri.toString(), true); 201 } 202 203 /** 204 * Creates a new URI instance using the given arguments. This constructor 205 * first creates a temporary URI string from the given components. This 206 * string will be parsed later on to create the URI instance. 207 * <p> 208 * {@code [scheme:]host[path][#fragment]} 209 * 210 * @param scheme 211 * the scheme part of the URI. 212 * @param host 213 * the host name of the URI. 214 * @param path 215 * the path to the resource on the host. 216 * @param fragment 217 * the fragment part of the URI. 218 * @throws URISyntaxException 219 * if the temporary created string doesn't fit to the 220 * specification RFC2396 or could not be parsed correctly. 221 */ 222 public URI(String scheme, String host, String path, String fragment) 223 throws URISyntaxException { 224 this(scheme, null, host, -1, path, null, fragment); 225 } 226 227 /** 228 * Creates a new URI instance using the given arguments. This constructor 229 * first creates a temporary URI string from the given components. This 230 * string will be parsed later on to create the URI instance. 231 * <p> 232 * {@code [scheme:][//authority][path][?query][#fragment]} 233 * 234 * @param scheme 235 * the scheme part of the URI. 236 * @param authority 237 * the authority part of the URI. 238 * @param path 239 * the path to the resource on the host. 240 * @param query 241 * the query part of the URI to specify parameters for the 242 * resource. 243 * @param fragment 244 * the fragment part of the URI. 245 * @throws URISyntaxException 246 * if the temporary created string doesn't fit to the 247 * specification RFC2396 or could not be parsed correctly. 248 */ 249 public URI(String scheme, String authority, String path, String query, 250 String fragment) throws URISyntaxException { 251 if (scheme != null && path != null && path.length() > 0 252 && path.charAt(0) != '/') { 253 throw new URISyntaxException(path, "Relative path"); 254 } 255 256 StringBuilder uri = new StringBuilder(); 257 if (scheme != null) { 258 uri.append(scheme); 259 uri.append(':'); 260 } 261 if (authority != null) { 262 uri.append("//"); 263 // QUOTE ILLEGAL CHARS 264 uri.append(quoteComponent(authority, "@[]" + SOME_LEGAL)); 265 } 266 267 if (path != null) { 268 // QUOTE ILLEGAL CHARS 269 uri.append(quoteComponent(path, "/@" + SOME_LEGAL)); 270 } 271 if (query != null) { 272 // QUOTE ILLEGAL CHARS 273 uri.append('?'); 274 uri.append(quoteComponent(query, ALL_LEGAL)); 275 } 276 if (fragment != null) { 277 // QUOTE ILLEGAL CHARS 278 uri.append('#'); 279 uri.append(quoteComponent(fragment, ALL_LEGAL)); 280 } 281 282 parseURI(uri.toString(), false); 283 } 284 285 private void parseURI(String uri, boolean forceServer) throws URISyntaxException { 286 String temp = uri; 287 // assign uri string to the input value per spec 288 string = uri; 289 int index, index1, index2, index3; 290 // parse into Fragment, Scheme, and SchemeSpecificPart 291 // then parse SchemeSpecificPart if necessary 292 293 // Fragment 294 index = temp.indexOf('#'); 295 if (index != -1) { 296 // remove the fragment from the end 297 fragment = temp.substring(index + 1); 298 validateFragment(uri, fragment, index + 1); 299 temp = temp.substring(0, index); 300 } 301 302 // Scheme and SchemeSpecificPart 303 index = index1 = temp.indexOf(':'); 304 index2 = temp.indexOf('/'); 305 index3 = temp.indexOf('?'); 306 307 // if a '/' or '?' occurs before the first ':' the uri has no 308 // specified scheme, and is therefore not absolute 309 if (index != -1 && (index2 >= index || index2 == -1) 310 && (index3 >= index || index3 == -1)) { 311 // the characters up to the first ':' comprise the scheme 312 absolute = true; 313 scheme = temp.substring(0, index); 314 if (scheme.length() == 0) { 315 throw new URISyntaxException(uri, "Scheme expected", index); 316 } 317 validateScheme(uri, scheme, 0); 318 schemeSpecificPart = temp.substring(index + 1); 319 if (schemeSpecificPart.length() == 0) { 320 throw new URISyntaxException(uri, "Scheme-specific part expected", index + 1); 321 } 322 } else { 323 absolute = false; 324 schemeSpecificPart = temp; 325 } 326 327 if (scheme == null || schemeSpecificPart.length() > 0 328 && schemeSpecificPart.charAt(0) == '/') { 329 opaque = false; 330 // the URI is hierarchical 331 332 // Query 333 temp = schemeSpecificPart; 334 index = temp.indexOf('?'); 335 if (index != -1) { 336 query = temp.substring(index + 1); 337 temp = temp.substring(0, index); 338 validateQuery(uri, query, index2 + 1 + index); 339 } 340 341 // Authority and Path 342 if (temp.startsWith("//")) { 343 index = temp.indexOf('/', 2); 344 if (index != -1) { 345 authority = temp.substring(2, index); 346 path = temp.substring(index); 347 } else { 348 authority = temp.substring(2); 349 if (authority.length() == 0 && query == null 350 && fragment == null) { 351 throw new URISyntaxException(uri, "Authority expected", uri.length()); 352 } 353 354 path = ""; 355 // nothing left, so path is empty (not null, path should 356 // never be null) 357 } 358 359 if (authority.length() == 0) { 360 authority = null; 361 } else { 362 validateAuthority(uri, authority, index1 + 3); 363 } 364 } else { // no authority specified 365 path = temp; 366 } 367 368 int pathIndex = 0; 369 if (index2 > -1) { 370 pathIndex += index2; 371 } 372 if (index > -1) { 373 pathIndex += index; 374 } 375 validatePath(uri, path, pathIndex); 376 } else { // if not hierarchical, URI is opaque 377 opaque = true; 378 validateSsp(uri, schemeSpecificPart, index2 + 2 + index); 379 } 380 381 parseAuthority(forceServer); 382 } 383 384 private void validateScheme(String uri, String scheme, int index) 385 throws URISyntaxException { 386 // first char needs to be an alpha char 387 char ch = scheme.charAt(0); 388 if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))) { 389 throw new URISyntaxException(uri, "Illegal character in scheme", 0); 390 } 391 392 try { 393 URIEncoderDecoder.validateSimple(scheme, "+-."); 394 } catch (URISyntaxException e) { 395 throw new URISyntaxException(uri, "Illegal character in scheme", index + e.getIndex()); 396 } 397 } 398 399 private void validateSsp(String uri, String ssp, int index) 400 throws URISyntaxException { 401 try { 402 URIEncoderDecoder.validate(ssp, ALL_LEGAL); 403 } catch (URISyntaxException e) { 404 throw new URISyntaxException(uri, 405 e.getReason() + " in schemeSpecificPart", index + e.getIndex()); 406 } 407 } 408 409 private void validateAuthority(String uri, String authority, int index) 410 throws URISyntaxException { 411 try { 412 URIEncoderDecoder.validate(authority, "@[]" + SOME_LEGAL); 413 } catch (URISyntaxException e) { 414 throw new URISyntaxException(uri, e.getReason() + " in authority", index + e.getIndex()); 415 } 416 } 417 418 private void validatePath(String uri, String path, int index) 419 throws URISyntaxException { 420 try { 421 URIEncoderDecoder.validate(path, "/@" + SOME_LEGAL); 422 } catch (URISyntaxException e) { 423 throw new URISyntaxException(uri, e.getReason() + " in path", index + e.getIndex()); 424 } 425 } 426 427 private void validateQuery(String uri, String query, int index) 428 throws URISyntaxException { 429 try { 430 URIEncoderDecoder.validate(query, ALL_LEGAL); 431 } catch (URISyntaxException e) { 432 throw new URISyntaxException(uri, e.getReason() + " in query", index + e.getIndex()); 433 434 } 435 } 436 437 private void validateFragment(String uri, String fragment, int index) 438 throws URISyntaxException { 439 try { 440 URIEncoderDecoder.validate(fragment, ALL_LEGAL); 441 } catch (URISyntaxException e) { 442 throw new URISyntaxException(uri, e.getReason() + " in fragment", index + e.getIndex()); 443 } 444 } 445 446 /** 447 * Parse the authority string into its component parts: user info, 448 * host, and port. This operation doesn't apply to registry URIs, and 449 * calling it on such <i>may</i> result in a syntax exception. 450 * 451 * @param forceServer true to always throw if the authority cannot be 452 * parsed. If false, this method may still throw for some kinds of 453 * errors; this unpredictable behavior is consistent with the RI. 454 */ 455 private void parseAuthority(boolean forceServer) throws URISyntaxException { 456 if (authority == null) { 457 return; 458 } 459 460 String tempUserInfo = null; 461 String temp = authority; 462 int index = temp.indexOf('@'); 463 int hostIndex = 0; 464 if (index != -1) { 465 // remove user info 466 tempUserInfo = temp.substring(0, index); 467 validateUserInfo(authority, tempUserInfo, 0); 468 temp = temp.substring(index + 1); // host[:port] is left 469 hostIndex = index + 1; 470 } 471 472 index = temp.lastIndexOf(':'); 473 int endIndex = temp.indexOf(']'); 474 475 String tempHost; 476 int tempPort = -1; 477 if (index != -1 && endIndex < index) { 478 // determine port and host 479 tempHost = temp.substring(0, index); 480 481 if (index < (temp.length() - 1)) { // port part is not empty 482 try { 483 tempPort = Integer.parseInt(temp.substring(index + 1)); 484 if (tempPort < 0) { 485 if (forceServer) { 486 throw new URISyntaxException(authority, 487 "Invalid port number", hostIndex + index + 1); 488 } 489 return; 490 } 491 } catch (NumberFormatException e) { 492 if (forceServer) { 493 throw new URISyntaxException(authority, 494 "Invalid port number", hostIndex + index + 1); 495 } 496 return; 497 } 498 } 499 } else { 500 tempHost = temp; 501 } 502 503 if (tempHost.isEmpty()) { 504 if (forceServer) { 505 throw new URISyntaxException(authority, "Expected host", hostIndex); 506 } 507 return; 508 } 509 510 if (!isValidHost(forceServer, tempHost)) { 511 return; 512 } 513 514 // this is a server based uri, 515 // fill in the userInfo, host and port fields 516 userInfo = tempUserInfo; 517 host = tempHost; 518 port = tempPort; 519 serverAuthority = true; 520 } 521 522 private void validateUserInfo(String uri, String userInfo, int index) 523 throws URISyntaxException { 524 for (int i = 0; i < userInfo.length(); i++) { 525 char ch = userInfo.charAt(i); 526 if (ch == ']' || ch == '[') { 527 throw new URISyntaxException(uri, "Illegal character in userInfo", index + i); 528 } 529 } 530 } 531 532 /** 533 * Returns true if {@code host} is a well-formed host name or IP address. 534 * 535 * @param forceServer true to always throw if the host cannot be parsed. If 536 * false, this method may still throw for some kinds of errors; this 537 * unpredictable behavior is consistent with the RI. 538 */ 539 private boolean isValidHost(boolean forceServer, String host) throws URISyntaxException { 540 if (host.startsWith("[")) { 541 // IPv6 address 542 if (!host.endsWith("]")) { 543 throw new URISyntaxException(host, 544 "Expected a closing square bracket for IPv6 address", 0); 545 } 546 byte[] bytes = InetAddress.ipStringToByteArray(host); 547 /* 548 * The native IP parser may return 4 bytes for addresses like 549 * "[::FFFF:127.0.0.1]". This is allowed, but we must not accept 550 * IPv4-formatted addresses in square braces like "[127.0.0.1]". 551 */ 552 if (bytes != null && (bytes.length == 16 || bytes.length == 4 && host.contains(":"))) { 553 return true; 554 } 555 throw new URISyntaxException(host, "Malformed IPv6 address"); 556 } 557 558 // '[' and ']' can only be the first char and last char 559 // of the host name 560 if (host.indexOf('[') != -1 || host.indexOf(']') != -1) { 561 throw new URISyntaxException(host, "Illegal character in host name", 0); 562 } 563 564 int index = host.lastIndexOf('.'); 565 if (index < 0 || index == host.length() - 1 566 || !Character.isDigit(host.charAt(index + 1))) { 567 // domain name 568 if (isValidDomainName(host)) { 569 return true; 570 } 571 if (forceServer) { 572 throw new URISyntaxException(host, "Illegal character in host name", 0); 573 } 574 return false; 575 } 576 577 // IPv4 address 578 byte[] bytes = InetAddress.ipStringToByteArray(host); 579 if (bytes != null && bytes.length == 4) { 580 return true; 581 } 582 583 if (forceServer) { 584 throw new URISyntaxException(host, "Malformed IPv4 address", 0); 585 } 586 return false; 587 } 588 589 private boolean isValidDomainName(String host) { 590 try { 591 URIEncoderDecoder.validateSimple(host, "-."); 592 } catch (URISyntaxException e) { 593 return false; 594 } 595 596 String lastLabel = null; 597 for (String token : host.split("\\.")) { 598 lastLabel = token; 599 if (lastLabel.startsWith("-") || lastLabel.endsWith("-")) { 600 return false; 601 } 602 } 603 604 if (lastLabel == null) { 605 return false; 606 } 607 608 if (!lastLabel.equals(host)) { 609 char ch = lastLabel.charAt(0); 610 if (ch >= '0' && ch <= '9') { 611 return false; 612 } 613 } 614 return true; 615 } 616 617 /** 618 * Quote illegal chars for each component, but not the others 619 * 620 * @param component java.lang.String the component to be converted 621 * @param legalSet the legal character set allowed in the component 622 * @return java.lang.String the converted string 623 */ 624 private String quoteComponent(String component, String legalSet) { 625 try { 626 /* 627 * Use a different encoder than URLEncoder since: 1. chars like "/", 628 * "#", "@" etc needs to be preserved instead of being encoded, 2. 629 * UTF-8 char set needs to be used for encoding instead of default 630 * platform one 631 */ 632 return URIEncoderDecoder.quoteIllegal(component, legalSet); 633 } catch (UnsupportedEncodingException e) { 634 throw new RuntimeException(e.toString()); 635 } 636 } 637 638 /** 639 * Compares this URI with the given argument {@code uri}. This method will 640 * return a negative value if this URI instance is less than the given 641 * argument and a positive value if this URI instance is greater than the 642 * given argument. The return value {@code 0} indicates that the two 643 * instances represent the same URI. To define the order the single parts of 644 * the URI are compared with each other. String components will be ordered 645 * in the natural case-sensitive way. A hierarchical URI is less than an 646 * opaque URI and if one part is {@code null} the URI with the undefined 647 * part is less than the other one. 648 * 649 * @param uri 650 * the URI this instance has to compare with. 651 * @return the value representing the order of the two instances. 652 */ 653 public int compareTo(URI uri) { 654 int ret; 655 656 // compare schemes 657 if (scheme == null && uri.scheme != null) { 658 return -1; 659 } else if (scheme != null && uri.scheme == null) { 660 return 1; 661 } else if (scheme != null && uri.scheme != null) { 662 ret = scheme.compareToIgnoreCase(uri.scheme); 663 if (ret != 0) { 664 return ret; 665 } 666 } 667 668 // compare opacities 669 if (!opaque && uri.opaque) { 670 return -1; 671 } else if (opaque && !uri.opaque) { 672 return 1; 673 } else if (opaque && uri.opaque) { 674 ret = schemeSpecificPart.compareTo(uri.schemeSpecificPart); 675 if (ret != 0) { 676 return ret; 677 } 678 } else { 679 680 // otherwise both must be hierarchical 681 682 // compare authorities 683 if (authority != null && uri.authority == null) { 684 return 1; 685 } else if (authority == null && uri.authority != null) { 686 return -1; 687 } else if (authority != null && uri.authority != null) { 688 if (host != null && uri.host != null) { 689 // both are server based, so compare userInfo, host, port 690 if (userInfo != null && uri.userInfo == null) { 691 return 1; 692 } else if (userInfo == null && uri.userInfo != null) { 693 return -1; 694 } else if (userInfo != null && uri.userInfo != null) { 695 ret = userInfo.compareTo(uri.userInfo); 696 if (ret != 0) { 697 return ret; 698 } 699 } 700 701 // userInfo's are the same, compare hostname 702 ret = host.compareToIgnoreCase(uri.host); 703 if (ret != 0) { 704 return ret; 705 } 706 707 // compare port 708 if (port != uri.port) { 709 return port - uri.port; 710 } 711 } else { // one or both are registry based, compare the whole 712 // authority 713 ret = authority.compareTo(uri.authority); 714 if (ret != 0) { 715 return ret; 716 } 717 } 718 } 719 720 // authorities are the same 721 // compare paths 722 ret = path.compareTo(uri.path); 723 if (ret != 0) { 724 return ret; 725 } 726 727 // compare queries 728 729 if (query != null && uri.query == null) { 730 return 1; 731 } else if (query == null && uri.query != null) { 732 return -1; 733 } else if (query != null && uri.query != null) { 734 ret = query.compareTo(uri.query); 735 if (ret != 0) { 736 return ret; 737 } 738 } 739 } 740 741 // everything else is identical, so compare fragments 742 if (fragment != null && uri.fragment == null) { 743 return 1; 744 } else if (fragment == null && uri.fragment != null) { 745 return -1; 746 } else if (fragment != null && uri.fragment != null) { 747 ret = fragment.compareTo(uri.fragment); 748 if (ret != 0) { 749 return ret; 750 } 751 } 752 753 // identical 754 return 0; 755 } 756 757 /** 758 * Returns the URI formed by parsing {@code uri}. This method behaves 759 * identically to the string constructor but throws a different exception 760 * on failure. The constructor fails with a checked {@link 761 * URISyntaxException}; this method fails with an unchecked {@link 762 * IllegalArgumentException}. 763 */ 764 public static URI create(String uri) { 765 try { 766 return new URI(uri); 767 } catch (URISyntaxException e) { 768 throw new IllegalArgumentException(e.getMessage()); 769 } 770 } 771 772 private URI duplicate() { 773 URI clone = new URI(); 774 clone.absolute = absolute; 775 clone.authority = authority; 776 clone.fragment = fragment; 777 clone.host = host; 778 clone.opaque = opaque; 779 clone.path = path; 780 clone.port = port; 781 clone.query = query; 782 clone.scheme = scheme; 783 clone.schemeSpecificPart = schemeSpecificPart; 784 clone.userInfo = userInfo; 785 clone.serverAuthority = serverAuthority; 786 return clone; 787 } 788 789 /* 790 * Takes a string that may contain hex sequences like %F1 or %2b and 791 * converts the hex values following the '%' to lowercase 792 */ 793 private String convertHexToLowerCase(String s) { 794 StringBuilder result = new StringBuilder(""); 795 if (s.indexOf('%') == -1) { 796 return s; 797 } 798 799 int index, prevIndex = 0; 800 while ((index = s.indexOf('%', prevIndex)) != -1) { 801 result.append(s.substring(prevIndex, index + 1)); 802 result.append(s.substring(index + 1, index + 3).toLowerCase(Locale.US)); 803 index += 3; 804 prevIndex = index; 805 } 806 return result.toString(); 807 } 808 809 /** 810 * Returns true if {@code first} and {@code second} are equal after 811 * unescaping hex sequences like %F1 and %2b. 812 */ 813 private boolean escapedEquals(String first, String second) { 814 if (first.indexOf('%') != second.indexOf('%')) { 815 return first.equals(second); 816 } 817 818 int index, prevIndex = 0; 819 while ((index = first.indexOf('%', prevIndex)) != -1 820 && second.indexOf('%', prevIndex) == index) { 821 boolean match = first.substring(prevIndex, index).equals( 822 second.substring(prevIndex, index)); 823 if (!match) { 824 return false; 825 } 826 827 match = first.substring(index + 1, index + 3).equalsIgnoreCase( 828 second.substring(index + 1, index + 3)); 829 if (!match) { 830 return false; 831 } 832 833 index += 3; 834 prevIndex = index; 835 } 836 return first.substring(prevIndex).equals(second.substring(prevIndex)); 837 } 838 839 /** 840 * Compares this URI instance with the given argument {@code o} and 841 * determines if both are equal. Two URI instances are equal if all single 842 * parts are identical in their meaning. 843 * 844 * @param o 845 * the URI this instance has to be compared with. 846 * @return {@code true} if both URI instances point to the same resource, 847 * {@code false} otherwise. 848 */ 849 @Override 850 public boolean equals(Object o) { 851 if (!(o instanceof URI)) { 852 return false; 853 } 854 URI uri = (URI) o; 855 856 if (uri.fragment == null && fragment != null || uri.fragment != null 857 && fragment == null) { 858 return false; 859 } else if (uri.fragment != null && fragment != null) { 860 if (!escapedEquals(uri.fragment, fragment)) { 861 return false; 862 } 863 } 864 865 if (uri.scheme == null && scheme != null || uri.scheme != null 866 && scheme == null) { 867 return false; 868 } else if (uri.scheme != null && scheme != null) { 869 if (!uri.scheme.equalsIgnoreCase(scheme)) { 870 return false; 871 } 872 } 873 874 if (uri.opaque && opaque) { 875 return escapedEquals(uri.schemeSpecificPart, 876 schemeSpecificPart); 877 } else if (!uri.opaque && !opaque) { 878 if (!escapedEquals(path, uri.path)) { 879 return false; 880 } 881 882 if (uri.query != null && query == null || uri.query == null 883 && query != null) { 884 return false; 885 } else if (uri.query != null && query != null) { 886 if (!escapedEquals(uri.query, query)) { 887 return false; 888 } 889 } 890 891 if (uri.authority != null && authority == null 892 || uri.authority == null && authority != null) { 893 return false; 894 } else if (uri.authority != null && authority != null) { 895 if (uri.host != null && host == null || uri.host == null 896 && host != null) { 897 return false; 898 } else if (uri.host == null && host == null) { 899 // both are registry based, so compare the whole authority 900 return escapedEquals(uri.authority, authority); 901 } else { // uri.host != null && host != null, so server-based 902 if (!host.equalsIgnoreCase(uri.host)) { 903 return false; 904 } 905 906 if (port != uri.port) { 907 return false; 908 } 909 910 if (uri.userInfo != null && userInfo == null 911 || uri.userInfo == null && userInfo != null) { 912 return false; 913 } else if (uri.userInfo != null && userInfo != null) { 914 return escapedEquals(userInfo, uri.userInfo); 915 } else { 916 return true; 917 } 918 } 919 } else { 920 // no authority 921 return true; 922 } 923 924 } else { 925 // one is opaque, the other hierarchical 926 return false; 927 } 928 } 929 930 /** 931 * Gets the decoded authority part of this URI. 932 * 933 * @return the decoded authority part or {@code null} if undefined. 934 */ 935 public String getAuthority() { 936 return decode(authority); 937 } 938 939 /** 940 * Gets the decoded fragment part of this URI. 941 * 942 * @return the decoded fragment part or {@code null} if undefined. 943 */ 944 public String getFragment() { 945 return decode(fragment); 946 } 947 948 /** 949 * Gets the host part of this URI. 950 * 951 * @return the host part or {@code null} if undefined. 952 */ 953 public String getHost() { 954 return host; 955 } 956 957 /** 958 * Gets the decoded path part of this URI. 959 * 960 * @return the decoded path part or {@code null} if undefined. 961 */ 962 public String getPath() { 963 return decode(path); 964 } 965 966 /** 967 * Gets the port number of this URI. 968 * 969 * @return the port number or {@code -1} if undefined. 970 */ 971 public int getPort() { 972 return port; 973 } 974 975 /** @hide */ 976 public int getEffectivePort() { 977 return getEffectivePort(scheme, port); 978 } 979 980 /** 981 * Returns the port to use for {@code scheme} connections will use when 982 * {@link #getPort} returns {@code specifiedPort}. 983 * 984 * @hide 985 */ 986 public static int getEffectivePort(String scheme, int specifiedPort) { 987 if (specifiedPort != -1) { 988 return specifiedPort; 989 } 990 991 if ("http".equalsIgnoreCase(scheme)) { 992 return 80; 993 } else if ("https".equalsIgnoreCase(scheme)) { 994 return 443; 995 } else { 996 return -1; 997 } 998 } 999 1000 /** 1001 * Gets the decoded query part of this URI. 1002 * 1003 * @return the decoded query part or {@code null} if undefined. 1004 */ 1005 public String getQuery() { 1006 return decode(query); 1007 } 1008 1009 /** 1010 * Gets the authority part of this URI in raw form. 1011 * 1012 * @return the encoded authority part or {@code null} if undefined. 1013 */ 1014 public String getRawAuthority() { 1015 return authority; 1016 } 1017 1018 /** 1019 * Gets the fragment part of this URI in raw form. 1020 * 1021 * @return the encoded fragment part or {@code null} if undefined. 1022 */ 1023 public String getRawFragment() { 1024 return fragment; 1025 } 1026 1027 /** 1028 * Gets the path part of this URI in raw form. 1029 * 1030 * @return the encoded path part or {@code null} if undefined. 1031 */ 1032 public String getRawPath() { 1033 return path; 1034 } 1035 1036 /** 1037 * Gets the query part of this URI in raw form. 1038 * 1039 * @return the encoded query part or {@code null} if undefined. 1040 */ 1041 public String getRawQuery() { 1042 return query; 1043 } 1044 1045 /** 1046 * Gets the scheme-specific part of this URI in raw form. 1047 * 1048 * @return the encoded scheme-specific part or {@code null} if undefined. 1049 */ 1050 public String getRawSchemeSpecificPart() { 1051 return schemeSpecificPart; 1052 } 1053 1054 /** 1055 * Gets the user-info part of this URI in raw form. 1056 * 1057 * @return the encoded user-info part or {@code null} if undefined. 1058 */ 1059 public String getRawUserInfo() { 1060 return userInfo; 1061 } 1062 1063 /** 1064 * Gets the scheme part of this URI. 1065 * 1066 * @return the scheme part or {@code null} if undefined. 1067 */ 1068 public String getScheme() { 1069 return scheme; 1070 } 1071 1072 /** 1073 * Gets the decoded scheme-specific part of this URI. 1074 * 1075 * @return the decoded scheme-specific part or {@code null} if undefined. 1076 */ 1077 public String getSchemeSpecificPart() { 1078 return decode(schemeSpecificPart); 1079 } 1080 1081 /** 1082 * Gets the decoded user-info part of this URI. 1083 * 1084 * @return the decoded user-info part or {@code null} if undefined. 1085 */ 1086 public String getUserInfo() { 1087 return decode(userInfo); 1088 } 1089 1090 /** 1091 * Gets the hashcode value of this URI instance. 1092 * 1093 * @return the appropriate hashcode value. 1094 */ 1095 @Override 1096 public int hashCode() { 1097 if (hash == -1) { 1098 hash = getHashString().hashCode(); 1099 } 1100 return hash; 1101 } 1102 1103 /** 1104 * Indicates whether this URI is absolute, which means that a scheme part is 1105 * defined in this URI. 1106 * 1107 * @return {@code true} if this URI is absolute, {@code false} otherwise. 1108 */ 1109 public boolean isAbsolute() { 1110 return absolute; 1111 } 1112 1113 /** 1114 * Indicates whether this URI is opaque or not. An opaque URI is absolute 1115 * and has a scheme-specific part which does not start with a slash 1116 * character. All parts except scheme, scheme-specific and fragment are 1117 * undefined. 1118 * 1119 * @return {@code true} if the URI is opaque, {@code false} otherwise. 1120 */ 1121 public boolean isOpaque() { 1122 return opaque; 1123 } 1124 1125 /* 1126 * normalize path, and return the resulting string 1127 */ 1128 private String normalize(String path) { 1129 // count the number of '/'s, to determine number of segments 1130 int index = -1; 1131 int pathLength = path.length(); 1132 int size = 0; 1133 if (pathLength > 0 && path.charAt(0) != '/') { 1134 size++; 1135 } 1136 while ((index = path.indexOf('/', index + 1)) != -1) { 1137 if (index + 1 < pathLength && path.charAt(index + 1) != '/') { 1138 size++; 1139 } 1140 } 1141 1142 String[] segList = new String[size]; 1143 boolean[] include = new boolean[size]; 1144 1145 // break the path into segments and store in the list 1146 int current = 0; 1147 int index2; 1148 index = (pathLength > 0 && path.charAt(0) == '/') ? 1 : 0; 1149 while ((index2 = path.indexOf('/', index + 1)) != -1) { 1150 segList[current++] = path.substring(index, index2); 1151 index = index2 + 1; 1152 } 1153 1154 // if current==size, then the last character was a slash 1155 // and there are no more segments 1156 if (current < size) { 1157 segList[current] = path.substring(index); 1158 } 1159 1160 // determine which segments get included in the normalized path 1161 for (int i = 0; i < size; i++) { 1162 include[i] = true; 1163 if (segList[i].equals("..")) { 1164 int remove = i - 1; 1165 // search back to find a segment to remove, if possible 1166 while (remove > -1 && !include[remove]) { 1167 remove--; 1168 } 1169 // if we find a segment to remove, remove it and the ".." 1170 // segment 1171 if (remove > -1 && !segList[remove].equals("..")) { 1172 include[remove] = false; 1173 include[i] = false; 1174 } 1175 } else if (segList[i].equals(".")) { 1176 include[i] = false; 1177 } 1178 } 1179 1180 // put the path back together 1181 StringBuilder newPath = new StringBuilder(); 1182 if (path.startsWith("/")) { 1183 newPath.append('/'); 1184 } 1185 1186 for (int i = 0; i < segList.length; i++) { 1187 if (include[i]) { 1188 newPath.append(segList[i]); 1189 newPath.append('/'); 1190 } 1191 } 1192 1193 // if we used at least one segment and the path previously ended with 1194 // a slash and the last segment is still used, then delete the extra 1195 // trailing '/' 1196 if (!path.endsWith("/") && segList.length > 0 1197 && include[segList.length - 1]) { 1198 newPath.deleteCharAt(newPath.length() - 1); 1199 } 1200 1201 String result = newPath.toString(); 1202 1203 // check for a ':' in the first segment if one exists, 1204 // prepend "./" to normalize 1205 index = result.indexOf(':'); 1206 index2 = result.indexOf('/'); 1207 if (index != -1 && (index < index2 || index2 == -1)) { 1208 newPath.insert(0, "./"); 1209 result = newPath.toString(); 1210 } 1211 return result; 1212 } 1213 1214 /** 1215 * Normalizes the path part of this URI. 1216 * 1217 * @return an URI object which represents this instance with a normalized 1218 * path. 1219 */ 1220 public URI normalize() { 1221 if (opaque) { 1222 return this; 1223 } 1224 String normalizedPath = normalize(path); 1225 // if the path is already normalized, return this 1226 if (path.equals(normalizedPath)) { 1227 return this; 1228 } 1229 // get an exact copy of the URI re-calculate the scheme specific part 1230 // since the path of the normalized URI is different from this URI. 1231 URI result = duplicate(); 1232 result.path = normalizedPath; 1233 result.setSchemeSpecificPart(); 1234 return result; 1235 } 1236 1237 /** 1238 * Tries to parse the authority component of this URI to divide it into the 1239 * host, port, and user-info. If this URI is already determined as a 1240 * ServerAuthority this instance will be returned without changes. 1241 * 1242 * @return this instance with the components of the parsed server authority. 1243 * @throws URISyntaxException 1244 * if the authority part could not be parsed as a server-based 1245 * authority. 1246 */ 1247 public URI parseServerAuthority() throws URISyntaxException { 1248 if (!serverAuthority) { 1249 parseAuthority(true); 1250 } 1251 return this; 1252 } 1253 1254 /** 1255 * Makes the given URI {@code relative} to a relative URI against the URI 1256 * represented by this instance. 1257 * 1258 * @param relative 1259 * the URI which has to be relativized against this URI. 1260 * @return the relative URI. 1261 */ 1262 public URI relativize(URI relative) { 1263 if (relative.opaque || opaque) { 1264 return relative; 1265 } 1266 1267 if (scheme == null ? relative.scheme != null : !scheme 1268 .equals(relative.scheme)) { 1269 return relative; 1270 } 1271 1272 if (authority == null ? relative.authority != null : !authority 1273 .equals(relative.authority)) { 1274 return relative; 1275 } 1276 1277 // normalize both paths 1278 String thisPath = normalize(path); 1279 String relativePath = normalize(relative.path); 1280 1281 /* 1282 * if the paths aren't equal, then we need to determine if this URI's 1283 * path is a parent path (begins with) the relative URI's path 1284 */ 1285 if (!thisPath.equals(relativePath)) { 1286 // if this URI's path doesn't end in a '/', add one 1287 if (!thisPath.endsWith("/")) { 1288 thisPath = thisPath + '/'; 1289 } 1290 /* 1291 * if the relative URI's path doesn't start with this URI's path, 1292 * then just return the relative URI; the URIs have nothing in 1293 * common 1294 */ 1295 if (!relativePath.startsWith(thisPath)) { 1296 return relative; 1297 } 1298 } 1299 1300 URI result = new URI(); 1301 result.fragment = relative.fragment; 1302 result.query = relative.query; 1303 // the result URI is the remainder of the relative URI's path 1304 result.path = relativePath.substring(thisPath.length()); 1305 result.setSchemeSpecificPart(); 1306 return result; 1307 } 1308 1309 /** 1310 * Resolves the given URI {@code relative} against the URI represented by 1311 * this instance. 1312 * 1313 * @param relative 1314 * the URI which has to be resolved against this URI. 1315 * @return the resolved URI. 1316 */ 1317 public URI resolve(URI relative) { 1318 if (relative.absolute || opaque) { 1319 return relative; 1320 } 1321 1322 URI result; 1323 if (relative.path.isEmpty() && relative.scheme == null 1324 && relative.authority == null && relative.query == null 1325 && relative.fragment != null) { 1326 // if the relative URI only consists of fragment, 1327 // the resolved URI is very similar to this URI, 1328 // except that it has the fragment from the relative URI. 1329 result = duplicate(); 1330 result.fragment = relative.fragment; 1331 // no need to re-calculate the scheme specific part, 1332 // since fragment is not part of scheme specific part. 1333 return result; 1334 } 1335 1336 if (relative.authority != null) { 1337 // if the relative URI has authority, 1338 // the resolved URI is almost the same as the relative URI, 1339 // except that it has the scheme of this URI. 1340 result = relative.duplicate(); 1341 result.scheme = scheme; 1342 result.absolute = absolute; 1343 } else { 1344 // since relative URI has no authority, 1345 // the resolved URI is very similar to this URI, 1346 // except that it has the query and fragment of the relative URI, 1347 // and the path is different. 1348 result = duplicate(); 1349 result.fragment = relative.fragment; 1350 result.query = relative.query; 1351 if (relative.path.startsWith("/")) { 1352 result.path = relative.path; 1353 } else { 1354 // resolve a relative reference 1355 int endIndex = path.lastIndexOf('/') + 1; 1356 result.path = normalize(path.substring(0, endIndex) 1357 + relative.path); 1358 } 1359 // re-calculate the scheme specific part since 1360 // query and path of the resolved URI is different from this URI. 1361 result.setSchemeSpecificPart(); 1362 } 1363 return result; 1364 } 1365 1366 /** 1367 * Helper method used to re-calculate the scheme specific part of the 1368 * resolved or normalized URIs 1369 */ 1370 private void setSchemeSpecificPart() { 1371 // ssp = [//authority][path][?query] 1372 StringBuilder ssp = new StringBuilder(); 1373 if (authority != null) { 1374 ssp.append("//" + authority); 1375 } 1376 if (path != null) { 1377 ssp.append(path); 1378 } 1379 if (query != null) { 1380 ssp.append("?" + query); 1381 } 1382 schemeSpecificPart = ssp.toString(); 1383 // reset string, so that it can be re-calculated correctly when asked. 1384 string = null; 1385 } 1386 1387 /** 1388 * Creates a new URI instance by parsing the given string {@code relative} 1389 * and resolves the created URI against the URI represented by this 1390 * instance. 1391 * 1392 * @param relative 1393 * the given string to create the new URI instance which has to 1394 * be resolved later on. 1395 * @return the created and resolved URI. 1396 */ 1397 public URI resolve(String relative) { 1398 return resolve(create(relative)); 1399 } 1400 1401 /** 1402 * Encode unicode chars that are not part of US-ASCII char set into the 1403 * escaped form 1404 * 1405 * i.e. The Euro currency symbol is encoded as "%E2%82%AC". 1406 */ 1407 private String encodeNonAscii(String s) { 1408 try { 1409 /* 1410 * Use a different encoder than URLEncoder since: 1. chars like "/", 1411 * "#", "@" etc needs to be preserved instead of being encoded, 2. 1412 * UTF-8 char set needs to be used for encoding instead of default 1413 * platform one 3. Only other chars need to be converted 1414 */ 1415 return URIEncoderDecoder.encodeOthers(s); 1416 } catch (UnsupportedEncodingException e) { 1417 throw new RuntimeException(e.toString()); 1418 } 1419 } 1420 1421 private String decode(String s) { 1422 if (s == null) { 1423 return s; 1424 } 1425 1426 try { 1427 return URIEncoderDecoder.decode(s); 1428 } catch (UnsupportedEncodingException e) { 1429 throw new RuntimeException(e.toString()); 1430 } 1431 } 1432 1433 /** 1434 * Returns the textual string representation of this URI instance using the 1435 * US-ASCII encoding. 1436 * 1437 * @return the US-ASCII string representation of this URI. 1438 */ 1439 public String toASCIIString() { 1440 return encodeNonAscii(toString()); 1441 } 1442 1443 /** 1444 * Returns the textual string representation of this URI instance. 1445 * 1446 * @return the textual string representation of this URI. 1447 */ 1448 @Override 1449 public String toString() { 1450 if (string == null) { 1451 StringBuilder result = new StringBuilder(); 1452 if (scheme != null) { 1453 result.append(scheme); 1454 result.append(':'); 1455 } 1456 if (opaque) { 1457 result.append(schemeSpecificPart); 1458 } else { 1459 if (authority != null) { 1460 result.append("//"); 1461 result.append(authority); 1462 } 1463 1464 if (path != null) { 1465 result.append(path); 1466 } 1467 1468 if (query != null) { 1469 result.append('?'); 1470 result.append(query); 1471 } 1472 } 1473 1474 if (fragment != null) { 1475 result.append('#'); 1476 result.append(fragment); 1477 } 1478 1479 string = result.toString(); 1480 } 1481 return string; 1482 } 1483 1484 /* 1485 * Form a string from the components of this URI, similarly to the 1486 * toString() method. But this method converts scheme and host to lowercase, 1487 * and converts escaped octets to lowercase. 1488 */ 1489 private String getHashString() { 1490 StringBuilder result = new StringBuilder(); 1491 if (scheme != null) { 1492 result.append(scheme.toLowerCase(Locale.US)); 1493 result.append(':'); 1494 } 1495 if (opaque) { 1496 result.append(schemeSpecificPart); 1497 } else { 1498 if (authority != null) { 1499 result.append("//"); 1500 if (host == null) { 1501 result.append(authority); 1502 } else { 1503 if (userInfo != null) { 1504 result.append(userInfo + "@"); 1505 } 1506 result.append(host.toLowerCase(Locale.US)); 1507 if (port != -1) { 1508 result.append(":" + port); 1509 } 1510 } 1511 } 1512 1513 if (path != null) { 1514 result.append(path); 1515 } 1516 1517 if (query != null) { 1518 result.append('?'); 1519 result.append(query); 1520 } 1521 } 1522 1523 if (fragment != null) { 1524 result.append('#'); 1525 result.append(fragment); 1526 } 1527 1528 return convertHexToLowerCase(result.toString()); 1529 } 1530 1531 /** 1532 * Converts this URI instance to a URL. 1533 * 1534 * @return the created URL representing the same resource as this URI. 1535 * @throws MalformedURLException 1536 * if an error occurs while creating the URL or no protocol 1537 * handler could be found. 1538 */ 1539 public URL toURL() throws MalformedURLException { 1540 if (!absolute) { 1541 throw new IllegalArgumentException("URI is not absolute: " + toString()); 1542 } 1543 return new URL(toString()); 1544 } 1545 1546 private void readObject(ObjectInputStream in) throws IOException, 1547 ClassNotFoundException { 1548 in.defaultReadObject(); 1549 try { 1550 parseURI(string, false); 1551 } catch (URISyntaxException e) { 1552 throw new IOException(e.toString()); 1553 } 1554 } 1555 1556 private void writeObject(ObjectOutputStream out) throws IOException, 1557 ClassNotFoundException { 1558 // call toString() to ensure the value of string field is calculated 1559 toString(); 1560 out.defaultWriteObject(); 1561 } 1562} 1563