URI.java revision fdb2704414a9ed92394ada0d1395e4db86889465
1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package java.net; 19 20import java.io.IOException; 21import java.io.ObjectInputStream; 22import java.io.ObjectOutputStream; 23import java.io.Serializable; 24import java.io.UnsupportedEncodingException; 25import java.util.StringTokenizer; 26 27import org.apache.harmony.luni.util.Msg; 28 29/** 30 * This class represents an instance of a URI as defined by RFC 2396. 31 */ 32public final class URI implements Comparable<URI>, Serializable { 33 34 private static final long serialVersionUID = -6052424284110960213l; 35 36 static final String unreserved = "_-!.~\'()*"; //$NON-NLS-1$ 37 38 static final String punct = ",;:$&+="; //$NON-NLS-1$ 39 40 static final String reserved = punct + "?/[]@"; //$NON-NLS-1$ 41 42 static final String someLegal = unreserved + punct; 43 44 static final String allLegal = unreserved + reserved; 45 46 private String string; 47 48 private transient String scheme; 49 50 private transient String schemespecificpart; 51 52 private transient String authority; 53 54 private transient String userinfo; 55 56 private transient String host; 57 58 private transient int port = -1; 59 60 private transient String path; 61 62 private transient String query; 63 64 private transient String fragment; 65 66 private transient boolean opaque; 67 68 private transient boolean absolute; 69 70 private transient boolean serverAuthority = false; 71 72 private transient int hash = -1; 73 74 private URI() { 75 } 76 77 public URI(String uri) throws URISyntaxException { 78 new Helper().parseURI(uri, false); 79 } 80 81 public URI(String scheme, String ssp, String frag) 82 throws URISyntaxException { 83 StringBuffer uri = new StringBuffer(); 84 if (scheme != null) { 85 uri.append(scheme); 86 uri.append(':'); 87 } 88 if (ssp != null) { 89 // QUOTE ILLEGAL CHARACTERS 90 uri.append(quoteComponent(ssp, allLegal)); 91 } 92 if (frag != null) { 93 uri.append('#'); 94 // QUOTE ILLEGAL CHARACTERS 95 uri.append(quoteComponent(frag, allLegal)); 96 } 97 98 new Helper().parseURI(uri.toString(), false); 99 } 100 101 public URI(String scheme, String userinfo, String host, int port, 102 String path, String query, String fragment) 103 throws URISyntaxException { 104 105 if (scheme == null && userinfo == null && host == null && path == null 106 && query == null && fragment == null) { 107 this.path = ""; //$NON-NLS-1$ 108 return; 109 } 110 111 if (scheme != null && path != null && path.length() > 0 112 && path.charAt(0) != '/') { 113 throw new URISyntaxException(path, Msg.getString("K0302")); //$NON-NLS-1$ 114 } 115 116 StringBuffer uri = new StringBuffer(); 117 if (scheme != null) { 118 uri.append(scheme); 119 uri.append(':'); 120 } 121 122 if (userinfo != null || host != null || port != -1) { 123 uri.append("//"); //$NON-NLS-1$ 124 } 125 126 if (userinfo != null) { 127 // QUOTE ILLEGAL CHARACTERS in userinfo 128 uri.append(quoteComponent(userinfo, someLegal)); 129 uri.append('@'); 130 } 131 132 if (host != null) { 133 // check for ipv6 addresses that hasn't been enclosed 134 // in square brackets 135 if (host.indexOf(':') != -1 && host.indexOf(']') == -1 136 && host.indexOf('[') == -1) { 137 host = "[" + host + "]"; //$NON-NLS-1$ //$NON-NLS-2$ 138 } 139 uri.append(host); 140 } 141 142 if (port != -1) { 143 uri.append(':'); 144 uri.append(port); 145 } 146 147 if (path != null) { 148 // QUOTE ILLEGAL CHARS 149 uri.append(quoteComponent(path, "/@" + someLegal)); //$NON-NLS-1$ 150 } 151 152 if (query != null) { 153 uri.append('?'); 154 // QUOTE ILLEGAL CHARS 155 uri.append(quoteComponent(query, allLegal)); 156 } 157 158 if (fragment != null) { 159 // QUOTE ILLEGAL CHARS 160 uri.append('#'); 161 uri.append(quoteComponent(fragment, allLegal)); 162 } 163 164 new Helper().parseURI(uri.toString(), true); 165 } 166 167 public URI(String scheme, String host, String path, String fragment) 168 throws URISyntaxException { 169 this(scheme, null, host, -1, path, null, fragment); 170 } 171 172 public URI(String scheme, String authority, String path, String query, 173 String fragment) throws URISyntaxException { 174 if (scheme != null && path != null && path.length() > 0 175 && path.charAt(0) != '/') { 176 throw new URISyntaxException(path, Msg.getString("K0302")); //$NON-NLS-1$ 177 } 178 179 StringBuffer uri = new StringBuffer(); 180 if (scheme != null) { 181 uri.append(scheme); 182 uri.append(':'); 183 } 184 if (authority != null) { 185 uri.append("//"); //$NON-NLS-1$ 186 // QUOTE ILLEGAL CHARS 187 uri.append(quoteComponent(authority, "@[]" + someLegal)); //$NON-NLS-1$ 188 } 189 190 if (path != null) { 191 // QUOTE ILLEGAL CHARS 192 uri.append(quoteComponent(path, "/@" + someLegal)); //$NON-NLS-1$ 193 } 194 if (query != null) { 195 // QUOTE ILLEGAL CHARS 196 uri.append('?'); 197 uri.append(quoteComponent(query, allLegal)); 198 } 199 if (fragment != null) { 200 // QUOTE ILLEGAL CHARS 201 uri.append('#'); 202 uri.append(quoteComponent(fragment, allLegal)); 203 } 204 205 new Helper().parseURI(uri.toString(), false); 206 } 207 208 private class Helper { 209 210 private void parseURI(String uri, boolean forceServer) 211 throws URISyntaxException { 212 String temp = uri; 213 // assign uri string to the input value per spec 214 string = uri; 215 int index, index1, index2, index3; 216 // parse into Fragment, Scheme, and SchemeSpecificPart 217 // then parse SchemeSpecificPart if necessary 218 219 // Fragment 220 index = temp.indexOf('#'); 221 if (index != -1) { 222 // remove the fragment from the end 223 fragment = temp.substring(index + 1); 224 validateFragment(uri, fragment, index + 1); 225 temp = temp.substring(0, index); 226 } 227 228 // Scheme and SchemeSpecificPart 229 index = index1 = temp.indexOf(':'); 230 index2 = temp.indexOf('/'); 231 index3 = temp.indexOf('?'); 232 233 // if a '/' or '?' occurs before the first ':' the uri has no 234 // specified scheme, and is therefore not absolute 235 if (index != -1 && (index2 >= index || index2 == -1) 236 && (index3 >= index || index3 == -1)) { 237 // the characters up to the first ':' comprise the scheme 238 absolute = true; 239 scheme = temp.substring(0, index); 240 if (scheme.length() == 0) { 241 throw new URISyntaxException(uri, Msg.getString("K0342"), //$NON-NLS-1$ 242 index); 243 } 244 validateScheme(uri, scheme, 0); 245 schemespecificpart = temp.substring(index + 1); 246 if (schemespecificpart.length() == 0) { 247 throw new URISyntaxException(uri, Msg.getString("K0303"), //$NON-NLS-1$ 248 index + 1); 249 } 250 } else { 251 absolute = false; 252 schemespecificpart = temp; 253 } 254 255 if (scheme == null || schemespecificpart.length() > 0 256 && schemespecificpart.charAt(0) == '/') { 257 opaque = false; 258 // the URI is hierarchical 259 260 // Query 261 temp = schemespecificpart; 262 index = temp.indexOf('?'); 263 if (index != -1) { 264 query = temp.substring(index + 1); 265 temp = temp.substring(0, index); 266 validateQuery(uri, query, index2 + 1 + index); 267 } 268 269 // Authority and Path 270 if (temp.startsWith("//")) { //$NON-NLS-1$ 271 index = temp.indexOf('/', 2); 272 if (index != -1) { 273 authority = temp.substring(2, index); 274 path = temp.substring(index); 275 } else { 276 authority = temp.substring(2); 277 if (authority.length() == 0 && query == null 278 && fragment == null) { 279 throw new URISyntaxException(uri, Msg 280 .getString("K0304"), uri.length()); //$NON-NLS-1$ 281 } 282 283 path = ""; //$NON-NLS-1$ 284 // nothing left, so path is empty (not null, path should 285 // never be null) 286 } 287 288 if (authority.length() == 0) { 289 authority = null; 290 } else { 291 validateAuthority(uri, authority, index1 + 3); 292 } 293 } else { // no authority specified 294 path = temp; 295 } 296 297 int pathIndex = 0; 298 if (index2 > -1) { 299 pathIndex += index2; 300 } 301 if (index > -1) { 302 pathIndex += index; 303 } 304 validatePath(uri, path, pathIndex); 305 } else { // if not hierarchical, URI is opaque 306 opaque = true; 307 validateSsp(uri, schemespecificpart, index2 + 2 + index); 308 } 309 310 parseAuthority(forceServer); 311 } 312 313 private void validateScheme(String uri, String scheme, int index) 314 throws URISyntaxException { 315 // first char needs to be an alpha char 316 char ch = scheme.charAt(0); 317 if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))) { 318 throw new URISyntaxException(uri, Msg.getString("K0305"), 0); //$NON-NLS-1$ 319 } 320 321 try { 322 URIEncoderDecoder.validateSimple(scheme, "+-."); //$NON-NLS-1$ 323 } catch (URISyntaxException e) { 324 throw new URISyntaxException(uri, Msg.getString("K0305"), index //$NON-NLS-1$ 325 + e.getIndex()); 326 } 327 } 328 329 private void validateSsp(String uri, String ssp, int index) 330 throws URISyntaxException { 331 try { 332 URIEncoderDecoder.validate(ssp, allLegal); 333 } catch (URISyntaxException e) { 334 throw new URISyntaxException(uri, Msg.getString("K0306", e //$NON-NLS-1$ 335 .getReason()), index + e.getIndex()); 336 } 337 } 338 339 private void validateAuthority(String uri, String authority, int index) 340 throws URISyntaxException { 341 try { 342 URIEncoderDecoder.validate(authority, "@[]" + someLegal); //$NON-NLS-1$ 343 } catch (URISyntaxException e) { 344 throw new URISyntaxException(uri, Msg.getString("K0307", e //$NON-NLS-1$ 345 .getReason()), index + e.getIndex()); 346 } 347 } 348 349 private void validatePath(String uri, String path, int index) 350 throws URISyntaxException { 351 try { 352 URIEncoderDecoder.validate(path, "/@" + someLegal); //$NON-NLS-1$ 353 } catch (URISyntaxException e) { 354 throw new URISyntaxException(uri, Msg.getString("K0308", e //$NON-NLS-1$ 355 .getReason()), index + e.getIndex()); 356 } 357 } 358 359 private void validateQuery(String uri, String query, int index) 360 throws URISyntaxException { 361 try { 362 URIEncoderDecoder.validate(query, allLegal); 363 } catch (URISyntaxException e) { 364 throw new URISyntaxException(uri, Msg.getString("K0309", e //$NON-NLS-1$ 365 .getReason()), index + e.getIndex()); 366 367 } 368 } 369 370 private void validateFragment(String uri, String fragment, int index) 371 throws URISyntaxException { 372 try { 373 URIEncoderDecoder.validate(fragment, allLegal); 374 } catch (URISyntaxException e) { 375 throw new URISyntaxException(uri, Msg.getString("K030a", e //$NON-NLS-1$ 376 .getReason()), index + e.getIndex()); 377 } 378 } 379 380 /** 381 * determine the host, port and userinfo if the authority parses 382 * successfully to a server based authority 383 * 384 * behavour in error cases: if forceServer is true, throw 385 * URISyntaxException with the proper diagnostic messages. if 386 * forceServer is false assume this is a registry based uri, and just 387 * return leaving the host, port and userinfo fields undefined. 388 * 389 * and there are some error cases where URISyntaxException is thrown 390 * regardless of the forceServer parameter e.g. malformed ipv6 address 391 */ 392 private void parseAuthority(boolean forceServer) 393 throws URISyntaxException { 394 if (authority == null) { 395 return; 396 } 397 398 String temp, tempUserinfo = null, tempHost = null; 399 int index, hostindex = 0; 400 int tempPort = -1; 401 402 temp = authority; 403 index = temp.indexOf('@'); 404 if (index != -1) { 405 // remove user info 406 tempUserinfo = temp.substring(0, index); 407 validateUserinfo(authority, tempUserinfo, 0); 408 temp = temp.substring(index + 1); // host[:port] is left 409 hostindex = index + 1; 410 } 411 412 index = temp.lastIndexOf(':'); 413 int endindex = temp.indexOf(']'); 414 415 if (index != -1 && endindex < index) { 416 // determine port and host 417 tempHost = temp.substring(0, index); 418 419 if (index < (temp.length() - 1)) { // port part is not empty 420 try { 421 tempPort = Integer.parseInt(temp.substring(index + 1)); 422 if (tempPort < 0) { 423 if (forceServer) { 424 throw new URISyntaxException( 425 authority, 426 Msg.getString("K00b1"), hostindex + index + 1); //$NON-NLS-1$ 427 } 428 return; 429 } 430 } catch (NumberFormatException e) { 431 if (forceServer) { 432 throw new URISyntaxException(authority, Msg 433 .getString("K00b1"), hostindex + index + 1); //$NON-NLS-1$ 434 } 435 return; 436 } 437 } 438 } else { 439 tempHost = temp; 440 } 441 442 if (tempHost.equals("")) { //$NON-NLS-1$ 443 if (forceServer) { 444 throw new URISyntaxException(authority, Msg 445 .getString("K030c"), hostindex); //$NON-NLS-1$ 446 } 447 return; 448 } 449 450 if (!isValidHost(forceServer, tempHost)) { 451 return; 452 } 453 454 // this is a server based uri, 455 // fill in the userinfo, host and port fields 456 userinfo = tempUserinfo; 457 host = tempHost; 458 port = tempPort; 459 serverAuthority = true; 460 } 461 462 private void validateUserinfo(String uri, String userinfo, int index) 463 throws URISyntaxException { 464 for (int i = 0; i < userinfo.length(); i++) { 465 char ch = userinfo.charAt(i); 466 if (ch == ']' || ch == '[') { 467 throw new URISyntaxException(uri, Msg.getString("K030d"), //$NON-NLS-1$ 468 index + i); 469 } 470 } 471 } 472 473 /** 474 * distinguish between IPv4, IPv6, domain name and validate it based on 475 * its type 476 */ 477 private boolean isValidHost(boolean forceServer, String host) 478 throws URISyntaxException { 479 if (host.charAt(0) == '[') { 480 // ipv6 address 481 if (host.charAt(host.length() - 1) != ']') { 482 throw new URISyntaxException(host, 483 Msg.getString("K030e"), 0); //$NON-NLS-1$ 484 } 485 if (!isValidIP6Address(host)) { 486 throw new URISyntaxException(host, Msg.getString("K030f")); //$NON-NLS-1$ 487 } 488 return true; 489 } 490 491 // '[' and ']' can only be the first char and last char 492 // of the host name 493 if (host.indexOf('[') != -1 || host.indexOf(']') != -1) { 494 throw new URISyntaxException(host, Msg.getString("K0310"), 0); //$NON-NLS-1$ 495 } 496 497 int index = host.lastIndexOf('.'); 498 if (index < 0 || index == host.length() - 1 499 || !Character.isDigit(host.charAt(index + 1))) { 500 // domain name 501 if (isValidDomainName(host)) { 502 return true; 503 } 504 if (forceServer) { 505 throw new URISyntaxException(host, 506 Msg.getString("K0310"), 0); //$NON-NLS-1$ 507 } 508 return false; 509 } 510 511 // IPv4 address 512 if (isValidIPv4Address(host)) { 513 return true; 514 } 515 if (forceServer) { 516 throw new URISyntaxException(host, Msg.getString("K0311"), 0); //$NON-NLS-1$ 517 } 518 return false; 519 } 520 521 private boolean isValidDomainName(String host) { 522 try { 523 URIEncoderDecoder.validateSimple(host, "-."); //$NON-NLS-1$ 524 } catch (URISyntaxException e) { 525 return false; 526 } 527 528 String label = null; 529 StringTokenizer st = new StringTokenizer(host, "."); //$NON-NLS-1$ 530 while (st.hasMoreTokens()) { 531 label = st.nextToken(); 532 if (label.startsWith("-") || label.endsWith("-")) { //$NON-NLS-1$ //$NON-NLS-2$ 533 return false; 534 } 535 } 536 537 if (!label.equals(host)) { 538 char ch = label.charAt(0); 539 if (ch >= '0' && ch <= '9') { 540 return false; 541 } 542 } 543 return true; 544 } 545 546 private boolean isValidIPv4Address(String host) { 547 int index; 548 int index2; 549 try { 550 int num; 551 index = host.indexOf('.'); 552 num = Integer.parseInt(host.substring(0, index)); 553 if (num < 0 || num > 255) { 554 return false; 555 } 556 index2 = host.indexOf('.', index + 1); 557 num = Integer.parseInt(host.substring(index + 1, index2)); 558 if (num < 0 || num > 255) { 559 return false; 560 } 561 index = host.indexOf('.', index2 + 1); 562 num = Integer.parseInt(host.substring(index2 + 1, index)); 563 if (num < 0 || num > 255) { 564 return false; 565 } 566 num = Integer.parseInt(host.substring(index + 1)); 567 if (num < 0 || num > 255) { 568 return false; 569 } 570 } catch (Exception e) { 571 return false; 572 } 573 return true; 574 } 575 576 private boolean isValidIP6Address(String ipAddress) { 577 int length = ipAddress.length(); 578 boolean doubleColon = false; 579 int numberOfColons = 0; 580 int numberOfPeriods = 0; 581 String word = ""; //$NON-NLS-1$ 582 char c = 0; 583 char prevChar = 0; 584 int offset = 0; // offset for [] ip addresses 585 586 if (length < 2) { 587 return false; 588 } 589 590 for (int i = 0; i < length; i++) { 591 prevChar = c; 592 c = ipAddress.charAt(i); 593 switch (c) { 594 595 // case for an open bracket [x:x:x:...x] 596 case '[': 597 if (i != 0) { 598 return false; // must be first character 599 } 600 if (ipAddress.charAt(length - 1) != ']') { 601 return false; // must have a close ] 602 } 603 if ((ipAddress.charAt(1) == ':') 604 && (ipAddress.charAt(2) != ':')) { 605 return false; 606 } 607 offset = 1; 608 if (length < 4) { 609 return false; 610 } 611 break; 612 613 // case for a closed bracket at end of IP [x:x:x:...x] 614 case ']': 615 if (i != length - 1) { 616 return false; // must be last charcter 617 } 618 if (ipAddress.charAt(0) != '[') { 619 return false; // must have a open [ 620 } 621 break; 622 623 // case for the last 32-bits represented as IPv4 624 // x:x:x:x:x:x:d.d.d.d 625 case '.': 626 numberOfPeriods++; 627 if (numberOfPeriods > 3) { 628 return false; 629 } 630 if (!isValidIP4Word(word)) { 631 return false; 632 } 633 if (numberOfColons != 6 && !doubleColon) { 634 return false; 635 } 636 // a special case ::1:2:3:4:5:d.d.d.d allows 7 colons 637 // with 638 // an IPv4 ending, otherwise 7 :'s is bad 639 if (numberOfColons == 7 640 && ipAddress.charAt(0 + offset) != ':' 641 && ipAddress.charAt(1 + offset) != ':') { 642 return false; 643 } 644 word = ""; //$NON-NLS-1$ 645 break; 646 647 case ':': 648 numberOfColons++; 649 if (numberOfColons > 7) { 650 return false; 651 } 652 if (numberOfPeriods > 0) { 653 return false; 654 } 655 if (prevChar == ':') { 656 if (doubleColon) { 657 return false; 658 } 659 doubleColon = true; 660 } 661 word = ""; //$NON-NLS-1$ 662 break; 663 664 default: 665 if (word.length() > 3) { 666 return false; 667 } 668 if (!isValidHexChar(c)) { 669 return false; 670 } 671 word += c; 672 } 673 } 674 675 // Check if we have an IPv4 ending 676 if (numberOfPeriods > 0) { 677 if (numberOfPeriods != 3 || !isValidIP4Word(word)) { 678 return false; 679 } 680 } else { 681 // If we're at then end and we haven't had 7 colons then there 682 // is a problem unless we encountered a doubleColon 683 if (numberOfColons != 7 && !doubleColon) { 684 return false; 685 } 686 687 // If we have an empty word at the end, it means we ended in 688 // either a : or a . 689 // If we did not end in :: then this is invalid 690 if (word == "" && ipAddress.charAt(length - 1 - offset) != ':' //$NON-NLS-1$ 691 && ipAddress.charAt(length - 2 - offset) != ':') { 692 return false; 693 } 694 } 695 696 return true; 697 } 698 699 private boolean isValidIP4Word(String word) { 700 char c; 701 if (word.length() < 1 || word.length() > 3) { 702 return false; 703 } 704 for (int i = 0; i < word.length(); i++) { 705 c = word.charAt(i); 706 if (!(c >= '0' && c <= '9')) { 707 return false; 708 } 709 } 710 if (Integer.parseInt(word) > 255) { 711 return false; 712 } 713 return true; 714 } 715 716 private boolean isValidHexChar(char c) { 717 718 return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') 719 || (c >= 'a' && c <= 'f'); 720 } 721 } 722 723 /* 724 * Quote illegal chars for each component, but not the others 725 * 726 * @param component java.lang.String the component to be converted @param 727 * legalset java.lang.String the legal character set allowed in the 728 * component s @return java.lang.String the converted string 729 */ 730 private String quoteComponent(String component, String legalset) { 731 try { 732 /* 733 * Use a different encoder than URLEncoder since: 1. chars like "/", 734 * "#", "@" etc needs to be preserved instead of being encoded, 2. 735 * UTF-8 char set needs to be used for encoding instead of default 736 * platform one 737 */ 738 return URIEncoderDecoder.quoteIllegal(component, legalset); 739 } catch (UnsupportedEncodingException e) { 740 throw new RuntimeException(e.toString()); 741 } 742 } 743 744 public int compareTo(URI uri) { 745 int ret = 0; 746 747 // compare schemes 748 if (scheme == null && uri.scheme != null) { 749 return -1; 750 } else if (scheme != null && uri.scheme == null) { 751 return 1; 752 } else if (scheme != null && uri.scheme != null) { 753 ret = scheme.compareToIgnoreCase(uri.scheme); 754 if (ret != 0) { 755 return ret; 756 } 757 } 758 759 // compare opacities 760 if (!opaque && uri.opaque) { 761 return -1; 762 } else if (opaque && !uri.opaque) { 763 return 1; 764 } else if (opaque && uri.opaque) { 765 ret = schemespecificpart.compareTo(uri.schemespecificpart); 766 if (ret != 0) { 767 return ret; 768 } 769 } else { 770 771 // otherwise both must be hierarchical 772 773 // compare authorities 774 if (authority != null && uri.authority == null) { 775 return 1; 776 } else if (authority == null && uri.authority != null) { 777 return -1; 778 } else if (authority != null && uri.authority != null) { 779 if (host != null && uri.host != null) { 780 // both are server based, so compare userinfo, host, port 781 if (userinfo != null && uri.userinfo == null) { 782 return 1; 783 } else if (userinfo == null && uri.userinfo != null) { 784 return -1; 785 } else if (userinfo != null && uri.userinfo != null) { 786 ret = userinfo.compareTo(uri.userinfo); 787 if (ret != 0) { 788 return ret; 789 } 790 } 791 792 // userinfo's are the same, compare hostname 793 ret = host.compareToIgnoreCase(uri.host); 794 if (ret != 0) { 795 return ret; 796 } 797 798 // compare port 799 if (port != uri.port) { 800 return port - uri.port; 801 } 802 } else { // one or both are registry based, compare the whole 803 // authority 804 ret = authority.compareTo(uri.authority); 805 if (ret != 0) { 806 return ret; 807 } 808 } 809 } 810 811 // authorities are the same 812 // compare paths 813 ret = path.compareTo(uri.path); 814 if (ret != 0) { 815 return ret; 816 } 817 818 // compare queries 819 820 if (query != null && uri.query == null) { 821 return 1; 822 } else if (query == null && uri.query != null) { 823 return -1; 824 } else if (query != null && uri.query != null) { 825 ret = query.compareTo(uri.query); 826 if (ret != 0) { 827 return ret; 828 } 829 } 830 } 831 832 // everything else is identical, so compare fragments 833 if (fragment != null && uri.fragment == null) { 834 return 1; 835 } else if (fragment == null && uri.fragment != null) { 836 return -1; 837 } else if (fragment != null && uri.fragment != null) { 838 ret = fragment.compareTo(uri.fragment); 839 if (ret != 0) { 840 return ret; 841 } 842 } 843 844 // identical 845 return 0; 846 } 847 848 public static URI create(String uri) { 849 URI result = null; 850 try { 851 result = new URI(uri); 852 } catch (URISyntaxException e) { 853 throw new IllegalArgumentException(e.getMessage()); 854 } 855 return result; 856 } 857 858 private URI duplicate() { 859 URI clone = new URI(); 860 clone.absolute = absolute; 861 clone.authority = authority; 862 clone.fragment = fragment; 863 clone.host = host; 864 clone.opaque = opaque; 865 clone.path = path; 866 clone.port = port; 867 clone.query = query; 868 clone.scheme = scheme; 869 clone.schemespecificpart = schemespecificpart; 870 clone.userinfo = userinfo; 871 clone.serverAuthority = serverAuthority; 872 return clone; 873 } 874 875 /* 876 * Takes a string that may contain hex sequences like %F1 or %2b and 877 * converts the hex values following the '%' to lowercase 878 */ 879 private String convertHexToLowerCase(String s) { 880 StringBuffer result = new StringBuffer(""); //$NON-NLS-1$ 881 if (s.indexOf('%') == -1) { 882 return s; 883 } 884 885 int index = 0, previndex = 0; 886 while ((index = s.indexOf('%', previndex)) != -1) { 887 result.append(s.substring(previndex, index + 1)); 888 result.append(s.substring(index + 1, index + 3).toLowerCase()); 889 index += 3; 890 previndex = index; 891 } 892 return result.toString(); 893 } 894 895 /* 896 * Takes two strings that may contain hex sequences like %F1 or %2b and 897 * compares them, ignoring case for the hex values hex values must always 898 * occur in pairs like above 899 */ 900 private boolean equalsHexCaseInsensitive(String first, String second) { 901 if (first.indexOf('%') != second.indexOf('%')) { 902 return first.equals(second); 903 } 904 905 int index = 0, previndex = 0; 906 while ((index = first.indexOf('%', previndex)) != -1 907 && second.indexOf('%', previndex) == index) { 908 boolean match = first.substring(previndex, index).equals( 909 second.substring(previndex, index)); 910 if (!match) { 911 return false; 912 } 913 914 match = first.substring(index + 1, index + 3).equalsIgnoreCase( 915 second.substring(index + 1, index + 3)); 916 if (!match) { 917 return false; 918 } 919 920 index += 3; 921 previndex = index; 922 } 923 return first.substring(previndex).equals(second.substring(previndex)); 924 } 925 926 @Override 927 public boolean equals(Object o) { 928 if (!(o instanceof URI)) { 929 return false; 930 } 931 URI uri = (URI) o; 932 933 if (uri.fragment == null && fragment != null || uri.fragment != null 934 && fragment == null) { 935 return false; 936 } else if (uri.fragment != null && fragment != null) { 937 if (!equalsHexCaseInsensitive(uri.fragment, fragment)) { 938 return false; 939 } 940 } 941 942 if (uri.scheme == null && scheme != null || uri.scheme != null 943 && scheme == null) { 944 return false; 945 } else if (uri.scheme != null && scheme != null) { 946 if (!uri.scheme.equalsIgnoreCase(scheme)) { 947 return false; 948 } 949 } 950 951 if (uri.opaque && opaque) { 952 return equalsHexCaseInsensitive(uri.schemespecificpart, 953 schemespecificpart); 954 } else if (!uri.opaque && !opaque) { 955 if (!equalsHexCaseInsensitive(path, uri.path)) { 956 return false; 957 } 958 959 if (uri.query != null && query == null || uri.query == null 960 && query != null) { 961 return false; 962 } else if (uri.query != null && query != null) { 963 if (!equalsHexCaseInsensitive(uri.query, query)) { 964 return false; 965 } 966 } 967 968 if (uri.authority != null && authority == null 969 || uri.authority == null && authority != null) { 970 return false; 971 } else if (uri.authority != null && authority != null) { 972 if (uri.host != null && host == null || uri.host == null 973 && host != null) { 974 return false; 975 } else if (uri.host == null && host == null) { 976 // both are registry based, so compare the whole authority 977 return equalsHexCaseInsensitive(uri.authority, authority); 978 } else { // uri.host != null && host != null, so server-based 979 if (!host.equalsIgnoreCase(uri.host)) { 980 return false; 981 } 982 983 if (port != uri.port) { 984 return false; 985 } 986 987 if (uri.userinfo != null && userinfo == null 988 || uri.userinfo == null && userinfo != null) { 989 return false; 990 } else if (uri.userinfo != null && userinfo != null) { 991 return equalsHexCaseInsensitive(userinfo, uri.userinfo); 992 } else { 993 return true; 994 } 995 } 996 } else { 997 // no authority 998 return true; 999 } 1000 1001 } else { 1002 // one is opaque, the other hierarchical 1003 return false; 1004 } 1005 } 1006 1007 public String getAuthority() { 1008 return decode(authority); 1009 } 1010 1011 /** 1012 * Returns the fragment component. 1013 * 1014 * @return String 1015 */ 1016 public String getFragment() { 1017 return decode(fragment); 1018 } 1019 1020 /** 1021 * Returns the host component. 1022 * 1023 * @return String 1024 */ 1025 public String getHost() { 1026 return host; 1027 } 1028 1029 /** 1030 * Returns the path component. 1031 * 1032 * @return String 1033 */ 1034 public String getPath() { 1035 return decode(path); 1036 } 1037 1038 /** 1039 * Returns the port number. 1040 * 1041 * @return int 1042 */ 1043 public int getPort() { 1044 return port; 1045 } 1046 1047 /** 1048 * Returns the query component. 1049 * 1050 * @return String 1051 */ 1052 public String getQuery() { 1053 return decode(query); 1054 } 1055 1056 /** 1057 * Returns the authority component in raw form. 1058 * 1059 * @return String 1060 */ 1061 public String getRawAuthority() { 1062 return authority; 1063 } 1064 1065 /** 1066 * Returns the fragment component in raw form. 1067 * 1068 * @return String 1069 */ 1070 public String getRawFragment() { 1071 return fragment; 1072 } 1073 1074 /** 1075 * Returns the path component in raw form. 1076 * 1077 * @return String 1078 */ 1079 public String getRawPath() { 1080 return path; 1081 } 1082 1083 /** 1084 * Returns the query component in raw form. 1085 * 1086 * @return String 1087 */ 1088 public String getRawQuery() { 1089 return query; 1090 } 1091 1092 /** 1093 * Returns the scheme-specific part component in raw form. 1094 * 1095 * @return String 1096 */ 1097 public String getRawSchemeSpecificPart() { 1098 return schemespecificpart; 1099 } 1100 1101 /** 1102 * Returns the user-info component in raw form. 1103 * 1104 * @return String 1105 */ 1106 public String getRawUserInfo() { 1107 return userinfo; 1108 } 1109 1110 /** 1111 * Returns the scheme. 1112 * 1113 * @return String 1114 */ 1115 public String getScheme() { 1116 return scheme; 1117 } 1118 1119 /** 1120 * Returns the scheme-specific part component. 1121 * 1122 * @return String 1123 */ 1124 public String getSchemeSpecificPart() { 1125 return decode(schemespecificpart); 1126 } 1127 1128 /** 1129 * Returns the userinfo. 1130 * 1131 * @return String 1132 */ 1133 public String getUserInfo() { 1134 return decode(userinfo); 1135 } 1136 1137 @Override 1138 public int hashCode() { 1139 if (hash == -1) { 1140 hash = getHashString().hashCode(); 1141 } 1142 return hash; 1143 } 1144 1145 /** 1146 * Indicates whether this URI is absolute 1147 * 1148 * @return boolean 1149 */ 1150 public boolean isAbsolute() { 1151 return absolute; 1152 } 1153 1154 /** 1155 * Indicates whether this URI is opaque 1156 * 1157 * @return true if the URI is opaque, otherwise false 1158 */ 1159 public boolean isOpaque() { 1160 return opaque; 1161 } 1162 1163 /* 1164 * normalize path, and return the resulting string 1165 */ 1166 private String normalize(String path) { 1167 // count the number of '/'s, to determine number of segments 1168 int index = -1; 1169 int pathlen = path.length(); 1170 int size = 0; 1171 if (pathlen > 0 && path.charAt(0) != '/') { 1172 size++; 1173 } 1174 while ((index = path.indexOf('/', index + 1)) != -1) { 1175 if (index + 1 < pathlen && path.charAt(index + 1) != '/') { 1176 size++; 1177 } 1178 } 1179 1180 String[] seglist = new String[size]; 1181 boolean[] include = new boolean[size]; 1182 1183 // break the path into segments and store in the list 1184 int current = 0; 1185 int index2 = 0; 1186 index = (pathlen > 0 && path.charAt(0) == '/') ? 1 : 0; 1187 while ((index2 = path.indexOf('/', index + 1)) != -1) { 1188 seglist[current++] = path.substring(index, index2); 1189 index = index2 + 1; 1190 } 1191 1192 // if current==size, then the last character was a slash 1193 // and there are no more segments 1194 if (current < size) { 1195 seglist[current] = path.substring(index); 1196 } 1197 1198 // determine which segments get included in the normalized path 1199 for (int i = 0; i < size; i++) { 1200 include[i] = true; 1201 if (seglist[i].equals("..")) { //$NON-NLS-1$ 1202 int remove = i - 1; 1203 // search back to find a segment to remove, if possible 1204 while (remove > -1 && !include[remove]) { 1205 remove--; 1206 } 1207 // if we find a segment to remove, remove it and the ".." 1208 // segment 1209 if (remove > -1 && !seglist[remove].equals("..")) { //$NON-NLS-1$ 1210 include[remove] = false; 1211 include[i] = false; 1212 } 1213 } else if (seglist[i].equals(".")) { //$NON-NLS-1$ 1214 include[i] = false; 1215 } 1216 } 1217 1218 // put the path back together 1219 StringBuffer newpath = new StringBuffer(); 1220 if (path.startsWith("/")) { //$NON-NLS-1$ 1221 newpath.append('/'); 1222 } 1223 1224 for (int i = 0; i < seglist.length; i++) { 1225 if (include[i]) { 1226 newpath.append(seglist[i]); 1227 newpath.append('/'); 1228 } 1229 } 1230 1231 // if we used at least one segment and the path previously ended with 1232 // a slash and the last segment is still used, then delete the extra 1233 // trailing '/' 1234 if (!path.endsWith("/") && seglist.length > 0 //$NON-NLS-1$ 1235 && include[seglist.length - 1]) { 1236 newpath.deleteCharAt(newpath.length() - 1); 1237 } 1238 1239 String result = newpath.toString(); 1240 1241 // check for a ':' in the first segment if one exists, 1242 // prepend "./" to normalize 1243 index = result.indexOf(':'); 1244 index2 = result.indexOf('/'); 1245 if (index != -1 && (index < index2 || index2 == -1)) { 1246 newpath.insert(0, "./"); //$NON-NLS-1$ 1247 result = newpath.toString(); 1248 } 1249 return result; 1250 } 1251 1252 public URI normalize() { 1253 if (opaque) { 1254 return this; 1255 } 1256 String normalizedPath = normalize(path); 1257 // if the path is already normalized, return this 1258 if (path.equals(normalizedPath)) { 1259 return this; 1260 } 1261 // get an exact copy of the URI re-calculate the scheme specific part 1262 // since the path of the normalized URI is different from this URI. 1263 URI result = duplicate(); 1264 result.path = normalizedPath; 1265 result.setSchemeSpecificPart(); 1266 return result; 1267 } 1268 1269 /** 1270 * Return this uri instance if it has already been determined as a 1271 * ServerAuthority Otherwise try to parse it again as a server authority to 1272 * produce a URISyntaxException with the proper diagnostic message. 1273 */ 1274 public URI parseServerAuthority() throws URISyntaxException { 1275 if (!serverAuthority) { 1276 new Helper().parseAuthority(true); 1277 } 1278 return this; 1279 } 1280 1281 public URI relativize(URI relative) { 1282 if (relative.opaque || opaque) { 1283 return relative; 1284 } 1285 1286 if (scheme == null ? relative.scheme != null : !scheme 1287 .equals(relative.scheme)) { 1288 return relative; 1289 } 1290 1291 if (authority == null ? relative.authority != null : !authority 1292 .equals(relative.authority)) { 1293 return relative; 1294 } 1295 1296 // normalize both paths 1297 String thisPath = normalize(path); 1298 String relativePath = normalize(relative.path); 1299 1300 /* 1301 * if the paths aren't equal, then we need to determine if this URI's 1302 * path is a parent path (begins with) the relative URI's path 1303 */ 1304 if (!thisPath.equals(relativePath)) { 1305 // if this URI's path doesn't end in a '/', add one 1306 if (!thisPath.endsWith("/")) { //$NON-NLS-1$ 1307 thisPath = thisPath + '/'; 1308 } 1309 /* 1310 * if the relative URI's path doesn't start with this URI's path, 1311 * then just return the relative URI; the URIs have nothing in 1312 * common 1313 */ 1314 if (!relativePath.startsWith(thisPath)) { 1315 return relative; 1316 } 1317 } 1318 1319 URI result = new URI(); 1320 result.fragment = relative.fragment; 1321 result.query = relative.query; 1322 // the result URI is the remainder of the relative URI's path 1323 result.path = relativePath.substring(thisPath.length()); 1324 return result; 1325 } 1326 1327 public URI resolve(URI relative) { 1328 if (relative.absolute || opaque) { 1329 return relative; 1330 } 1331 1332 URI result; 1333 if (relative.path.equals("") && relative.scheme == null //$NON-NLS-1$ 1334 && relative.authority == null && relative.query == null 1335 && relative.fragment != null) { 1336 // if the relative URI only consists of fragment, 1337 // the resolved URI is very similar to this URI, 1338 // except that it has the fragement from the relative URI. 1339 result = duplicate(); 1340 result.fragment = relative.fragment; 1341 // no need to re-calculate the scheme specific part, 1342 // since fragment is not part of scheme specific part. 1343 return result; 1344 } 1345 1346 if (relative.authority != null) { 1347 // if the relative URI has authority, 1348 // the resolved URI is almost the same as the relative URI, 1349 // except that it has the scheme of this URI. 1350 result = relative.duplicate(); 1351 result.scheme = scheme; 1352 result.absolute = absolute; 1353 } else { 1354 // since relative URI has no authority, 1355 // the resolved URI is very similar to this URI, 1356 // except that it has the query and fragment of the relative URI, 1357 // and the path is different. 1358 result = duplicate(); 1359 result.fragment = relative.fragment; 1360 result.query = relative.query; 1361 if (relative.path.startsWith("/")) { //$NON-NLS-1$ 1362 result.path = relative.path; 1363 } else { 1364 // resolve a relative reference 1365 int endindex = path.lastIndexOf('/') + 1; 1366 result.path = normalize(path.substring(0, endindex) 1367 + relative.path); 1368 } 1369 // re-calculate the scheme specific part since 1370 // query and path of the resolved URI is different from this URI. 1371 result.setSchemeSpecificPart(); 1372 } 1373 return result; 1374 } 1375 1376 /** 1377 * Helper method used to re-calculate the scheme specific part of the 1378 * resolved or normalized URIs 1379 */ 1380 private void setSchemeSpecificPart() { 1381 // ssp = [//authority][path][?query] 1382 StringBuffer ssp = new StringBuffer(); 1383 if (authority != null) { 1384 ssp.append("//" + authority); //$NON-NLS-1$ 1385 } 1386 if (path != null) { 1387 ssp.append(path); 1388 } 1389 if (query != null) { 1390 ssp.append("?" + query); //$NON-NLS-1$ 1391 } 1392 schemespecificpart = ssp.toString(); 1393 // reset string, so that it can be re-calculated correctly when asked. 1394 string = null; 1395 } 1396 1397 public URI resolve(String relative) { 1398 return resolve(create(relative)); 1399 } 1400 1401 /* 1402 * Encode unicode chars that are not part of US-ASCII char set into the 1403 * escaped form 1404 * 1405 * i.e. The Euro currency symbol is encoded as "%E2%82%AC". 1406 * 1407 * @param component java.lang.String the component to be converted @param 1408 * legalset java.lang.String the legal character set allowed in the 1409 * component s @return java.lang.String the converted string 1410 */ 1411 private String encodeOthers(String s) { 1412 try { 1413 /* 1414 * Use a different encoder than URLEncoder since: 1. chars like "/", 1415 * "#", "@" etc needs to be preserved instead of being encoded, 2. 1416 * UTF-8 char set needs to be used for encoding instead of default 1417 * platform one 3. Only other chars need to be converted 1418 */ 1419 return URIEncoderDecoder.encodeOthers(s); 1420 } catch (UnsupportedEncodingException e) { 1421 throw new RuntimeException(e.toString()); 1422 } 1423 } 1424 1425 private String decode(String s) { 1426 if (s == null) { 1427 return s; 1428 } 1429 1430 try { 1431 return URIEncoderDecoder.decode(s); 1432 } catch (UnsupportedEncodingException e) { 1433 throw new RuntimeException(e.toString()); 1434 } 1435 } 1436 1437 public String toASCIIString() { 1438 return encodeOthers(toString()); 1439 } 1440 1441 @Override 1442 public String toString() { 1443 if (string == null) { 1444 StringBuffer result = new StringBuffer(); 1445 if (scheme != null) { 1446 result.append(scheme); 1447 result.append(':'); 1448 } 1449 if (opaque) { 1450 result.append(schemespecificpart); 1451 } else { 1452 if (authority != null) { 1453 result.append("//"); //$NON-NLS-1$ 1454 result.append(authority); 1455 } 1456 1457 if (path != null) { 1458 result.append(path); 1459 } 1460 1461 if (query != null) { 1462 result.append('?'); 1463 result.append(query); 1464 } 1465 } 1466 1467 if (fragment != null) { 1468 result.append('#'); 1469 result.append(fragment); 1470 } 1471 1472 string = result.toString(); 1473 } 1474 return string; 1475 } 1476 1477 /* 1478 * Form a string from the components of this URI, similarly to the 1479 * toString() method. But this method converts scheme and host to lowercase, 1480 * and converts escaped octets to lowercase. 1481 */ 1482 private String getHashString() { 1483 StringBuffer result = new StringBuffer(); 1484 if (scheme != null) { 1485 result.append(scheme.toLowerCase()); 1486 result.append(':'); 1487 } 1488 if (opaque) { 1489 result.append(schemespecificpart); 1490 } else { 1491 if (authority != null) { 1492 result.append("//"); //$NON-NLS-1$ 1493 if (host == null) { 1494 result.append(authority); 1495 } else { 1496 if (userinfo != null) { 1497 result.append(userinfo + "@"); //$NON-NLS-1$ 1498 } 1499 result.append(host.toLowerCase()); 1500 if (port != -1) { 1501 result.append(":" + port); //$NON-NLS-1$ 1502 } 1503 } 1504 } 1505 1506 if (path != null) { 1507 result.append(path); 1508 } 1509 1510 if (query != null) { 1511 result.append('?'); 1512 result.append(query); 1513 } 1514 } 1515 1516 if (fragment != null) { 1517 result.append('#'); 1518 result.append(fragment); 1519 } 1520 1521 return convertHexToLowerCase(result.toString()); 1522 } 1523 1524 public URL toURL() throws MalformedURLException { 1525 if (!absolute) { 1526 throw new IllegalArgumentException(Msg.getString("K0312") + ": " //$NON-NLS-1$//$NON-NLS-2$ 1527 + toString()); 1528 } 1529 return new URL(toString()); 1530 } 1531 1532 private void readObject(ObjectInputStream in) throws IOException, 1533 ClassNotFoundException { 1534 in.defaultReadObject(); 1535 try { 1536 new Helper().parseURI(string, false); 1537 } catch (URISyntaxException e) { 1538 throw new IOException(e.toString()); 1539 } 1540 } 1541 1542 private void writeObject(ObjectOutputStream out) throws IOException, 1543 ClassNotFoundException { 1544 // call toString() to ensure the value of string field is calculated 1545 toString(); 1546 out.defaultWriteObject(); 1547 } 1548} 1549