URI.java revision fdb2704414a9ed92394ada0d1395e4db86889465
1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.net;
19
20import java.io.IOException;
21import java.io.ObjectInputStream;
22import java.io.ObjectOutputStream;
23import java.io.Serializable;
24import java.io.UnsupportedEncodingException;
25import java.util.StringTokenizer;
26
27import org.apache.harmony.luni.util.Msg;
28
29/**
30 * This class represents an instance of a URI as defined by RFC 2396.
31 */
32public final class URI implements Comparable<URI>, Serializable {
33
34    private static final long serialVersionUID = -6052424284110960213l;
35
36    static final String unreserved = "_-!.~\'()*"; //$NON-NLS-1$
37
38    static final String punct = ",;:$&+="; //$NON-NLS-1$
39
40    static final String reserved = punct + "?/[]@"; //$NON-NLS-1$
41
42    static final String someLegal = unreserved + punct;
43
44    static final String allLegal = unreserved + reserved;
45
46    private String string;
47
48    private transient String scheme;
49
50    private transient String schemespecificpart;
51
52    private transient String authority;
53
54    private transient String userinfo;
55
56    private transient String host;
57
58    private transient int port = -1;
59
60    private transient String path;
61
62    private transient String query;
63
64    private transient String fragment;
65
66    private transient boolean opaque;
67
68    private transient boolean absolute;
69
70    private transient boolean serverAuthority = false;
71
72    private transient int hash = -1;
73
74    private URI() {
75    }
76
77    public URI(String uri) throws URISyntaxException {
78        new Helper().parseURI(uri, false);
79    }
80
81    public URI(String scheme, String ssp, String frag)
82            throws URISyntaxException {
83        StringBuffer uri = new StringBuffer();
84        if (scheme != null) {
85            uri.append(scheme);
86            uri.append(':');
87        }
88        if (ssp != null) {
89            // QUOTE ILLEGAL CHARACTERS
90            uri.append(quoteComponent(ssp, allLegal));
91        }
92        if (frag != null) {
93            uri.append('#');
94            // QUOTE ILLEGAL CHARACTERS
95            uri.append(quoteComponent(frag, allLegal));
96        }
97
98        new Helper().parseURI(uri.toString(), false);
99    }
100
101    public URI(String scheme, String userinfo, String host, int port,
102            String path, String query, String fragment)
103            throws URISyntaxException {
104
105        if (scheme == null && userinfo == null && host == null && path == null
106                && query == null && fragment == null) {
107            this.path = ""; //$NON-NLS-1$
108            return;
109        }
110
111        if (scheme != null && path != null && path.length() > 0
112                && path.charAt(0) != '/') {
113            throw new URISyntaxException(path, Msg.getString("K0302")); //$NON-NLS-1$
114        }
115
116        StringBuffer uri = new StringBuffer();
117        if (scheme != null) {
118            uri.append(scheme);
119            uri.append(':');
120        }
121
122        if (userinfo != null || host != null || port != -1) {
123            uri.append("//"); //$NON-NLS-1$
124        }
125
126        if (userinfo != null) {
127            // QUOTE ILLEGAL CHARACTERS in userinfo
128            uri.append(quoteComponent(userinfo, someLegal));
129            uri.append('@');
130        }
131
132        if (host != null) {
133            // check for ipv6 addresses that hasn't been enclosed
134            // in square brackets
135            if (host.indexOf(':') != -1 && host.indexOf(']') == -1
136                    && host.indexOf('[') == -1) {
137                host = "[" + host + "]"; //$NON-NLS-1$ //$NON-NLS-2$
138            }
139            uri.append(host);
140        }
141
142        if (port != -1) {
143            uri.append(':');
144            uri.append(port);
145        }
146
147        if (path != null) {
148            // QUOTE ILLEGAL CHARS
149            uri.append(quoteComponent(path, "/@" + someLegal)); //$NON-NLS-1$
150        }
151
152        if (query != null) {
153            uri.append('?');
154            // QUOTE ILLEGAL CHARS
155            uri.append(quoteComponent(query, allLegal));
156        }
157
158        if (fragment != null) {
159            // QUOTE ILLEGAL CHARS
160            uri.append('#');
161            uri.append(quoteComponent(fragment, allLegal));
162        }
163
164        new Helper().parseURI(uri.toString(), true);
165    }
166
167    public URI(String scheme, String host, String path, String fragment)
168            throws URISyntaxException {
169        this(scheme, null, host, -1, path, null, fragment);
170    }
171
172    public URI(String scheme, String authority, String path, String query,
173            String fragment) throws URISyntaxException {
174        if (scheme != null && path != null && path.length() > 0
175                && path.charAt(0) != '/') {
176            throw new URISyntaxException(path, Msg.getString("K0302")); //$NON-NLS-1$
177        }
178
179        StringBuffer uri = new StringBuffer();
180        if (scheme != null) {
181            uri.append(scheme);
182            uri.append(':');
183        }
184        if (authority != null) {
185            uri.append("//"); //$NON-NLS-1$
186            // QUOTE ILLEGAL CHARS
187            uri.append(quoteComponent(authority, "@[]" + someLegal)); //$NON-NLS-1$
188        }
189
190        if (path != null) {
191            // QUOTE ILLEGAL CHARS
192            uri.append(quoteComponent(path, "/@" + someLegal)); //$NON-NLS-1$
193        }
194        if (query != null) {
195            // QUOTE ILLEGAL CHARS
196            uri.append('?');
197            uri.append(quoteComponent(query, allLegal));
198        }
199        if (fragment != null) {
200            // QUOTE ILLEGAL CHARS
201            uri.append('#');
202            uri.append(quoteComponent(fragment, allLegal));
203        }
204
205        new Helper().parseURI(uri.toString(), false);
206    }
207
208    private class Helper {
209
210        private void parseURI(String uri, boolean forceServer)
211                throws URISyntaxException {
212            String temp = uri;
213            // assign uri string to the input value per spec
214            string = uri;
215            int index, index1, index2, index3;
216            // parse into Fragment, Scheme, and SchemeSpecificPart
217            // then parse SchemeSpecificPart if necessary
218
219            // Fragment
220            index = temp.indexOf('#');
221            if (index != -1) {
222                // remove the fragment from the end
223                fragment = temp.substring(index + 1);
224                validateFragment(uri, fragment, index + 1);
225                temp = temp.substring(0, index);
226            }
227
228            // Scheme and SchemeSpecificPart
229            index = index1 = temp.indexOf(':');
230            index2 = temp.indexOf('/');
231            index3 = temp.indexOf('?');
232
233            // if a '/' or '?' occurs before the first ':' the uri has no
234            // specified scheme, and is therefore not absolute
235            if (index != -1 && (index2 >= index || index2 == -1)
236                    && (index3 >= index || index3 == -1)) {
237                // the characters up to the first ':' comprise the scheme
238                absolute = true;
239                scheme = temp.substring(0, index);
240                if (scheme.length() == 0) {
241                    throw new URISyntaxException(uri, Msg.getString("K0342"), //$NON-NLS-1$
242                            index);
243                }
244                validateScheme(uri, scheme, 0);
245                schemespecificpart = temp.substring(index + 1);
246                if (schemespecificpart.length() == 0) {
247                    throw new URISyntaxException(uri, Msg.getString("K0303"), //$NON-NLS-1$
248                            index + 1);
249                }
250            } else {
251                absolute = false;
252                schemespecificpart = temp;
253            }
254
255            if (scheme == null || schemespecificpart.length() > 0
256                    && schemespecificpart.charAt(0) == '/') {
257                opaque = false;
258                // the URI is hierarchical
259
260                // Query
261                temp = schemespecificpart;
262                index = temp.indexOf('?');
263                if (index != -1) {
264                    query = temp.substring(index + 1);
265                    temp = temp.substring(0, index);
266                    validateQuery(uri, query, index2 + 1 + index);
267                }
268
269                // Authority and Path
270                if (temp.startsWith("//")) { //$NON-NLS-1$
271                    index = temp.indexOf('/', 2);
272                    if (index != -1) {
273                        authority = temp.substring(2, index);
274                        path = temp.substring(index);
275                    } else {
276                        authority = temp.substring(2);
277                        if (authority.length() == 0 && query == null
278                                && fragment == null) {
279                            throw new URISyntaxException(uri, Msg
280                                    .getString("K0304"), uri.length()); //$NON-NLS-1$
281                        }
282
283                        path = ""; //$NON-NLS-1$
284                        // nothing left, so path is empty (not null, path should
285                        // never be null)
286                    }
287
288                    if (authority.length() == 0) {
289                        authority = null;
290                    } else {
291                        validateAuthority(uri, authority, index1 + 3);
292                    }
293                } else { // no authority specified
294                    path = temp;
295                }
296
297                int pathIndex = 0;
298                if (index2 > -1) {
299                    pathIndex += index2;
300                }
301                if (index > -1) {
302                    pathIndex += index;
303                }
304                validatePath(uri, path, pathIndex);
305            } else { // if not hierarchical, URI is opaque
306                opaque = true;
307                validateSsp(uri, schemespecificpart, index2 + 2 + index);
308            }
309
310            parseAuthority(forceServer);
311        }
312
313        private void validateScheme(String uri, String scheme, int index)
314                throws URISyntaxException {
315            // first char needs to be an alpha char
316            char ch = scheme.charAt(0);
317            if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))) {
318                throw new URISyntaxException(uri, Msg.getString("K0305"), 0); //$NON-NLS-1$
319            }
320
321            try {
322                URIEncoderDecoder.validateSimple(scheme, "+-."); //$NON-NLS-1$
323            } catch (URISyntaxException e) {
324                throw new URISyntaxException(uri, Msg.getString("K0305"), index //$NON-NLS-1$
325                        + e.getIndex());
326            }
327        }
328
329        private void validateSsp(String uri, String ssp, int index)
330                throws URISyntaxException {
331            try {
332                URIEncoderDecoder.validate(ssp, allLegal);
333            } catch (URISyntaxException e) {
334                throw new URISyntaxException(uri, Msg.getString("K0306", e //$NON-NLS-1$
335                        .getReason()), index + e.getIndex());
336            }
337        }
338
339        private void validateAuthority(String uri, String authority, int index)
340                throws URISyntaxException {
341            try {
342                URIEncoderDecoder.validate(authority, "@[]" + someLegal); //$NON-NLS-1$
343            } catch (URISyntaxException e) {
344                throw new URISyntaxException(uri, Msg.getString("K0307", e //$NON-NLS-1$
345                        .getReason()), index + e.getIndex());
346            }
347        }
348
349        private void validatePath(String uri, String path, int index)
350                throws URISyntaxException {
351            try {
352                URIEncoderDecoder.validate(path, "/@" + someLegal); //$NON-NLS-1$
353            } catch (URISyntaxException e) {
354                throw new URISyntaxException(uri, Msg.getString("K0308", e //$NON-NLS-1$
355                        .getReason()), index + e.getIndex());
356            }
357        }
358
359        private void validateQuery(String uri, String query, int index)
360                throws URISyntaxException {
361            try {
362                URIEncoderDecoder.validate(query, allLegal);
363            } catch (URISyntaxException e) {
364                throw new URISyntaxException(uri, Msg.getString("K0309", e //$NON-NLS-1$
365                        .getReason()), index + e.getIndex());
366
367            }
368        }
369
370        private void validateFragment(String uri, String fragment, int index)
371                throws URISyntaxException {
372            try {
373                URIEncoderDecoder.validate(fragment, allLegal);
374            } catch (URISyntaxException e) {
375                throw new URISyntaxException(uri, Msg.getString("K030a", e //$NON-NLS-1$
376                        .getReason()), index + e.getIndex());
377            }
378        }
379
380        /**
381         * determine the host, port and userinfo if the authority parses
382         * successfully to a server based authority
383         *
384         * behavour in error cases: if forceServer is true, throw
385         * URISyntaxException with the proper diagnostic messages. if
386         * forceServer is false assume this is a registry based uri, and just
387         * return leaving the host, port and userinfo fields undefined.
388         *
389         * and there are some error cases where URISyntaxException is thrown
390         * regardless of the forceServer parameter e.g. malformed ipv6 address
391         */
392        private void parseAuthority(boolean forceServer)
393                throws URISyntaxException {
394            if (authority == null) {
395                return;
396            }
397
398            String temp, tempUserinfo = null, tempHost = null;
399            int index, hostindex = 0;
400            int tempPort = -1;
401
402            temp = authority;
403            index = temp.indexOf('@');
404            if (index != -1) {
405                // remove user info
406                tempUserinfo = temp.substring(0, index);
407                validateUserinfo(authority, tempUserinfo, 0);
408                temp = temp.substring(index + 1); // host[:port] is left
409                hostindex = index + 1;
410            }
411
412            index = temp.lastIndexOf(':');
413            int endindex = temp.indexOf(']');
414
415            if (index != -1 && endindex < index) {
416                // determine port and host
417                tempHost = temp.substring(0, index);
418
419                if (index < (temp.length() - 1)) { // port part is not empty
420                    try {
421                        tempPort = Integer.parseInt(temp.substring(index + 1));
422                        if (tempPort < 0) {
423                            if (forceServer) {
424                                throw new URISyntaxException(
425                                        authority,
426                                        Msg.getString("K00b1"), hostindex + index + 1); //$NON-NLS-1$
427                            }
428                            return;
429                        }
430                    } catch (NumberFormatException e) {
431                        if (forceServer) {
432                            throw new URISyntaxException(authority, Msg
433                                    .getString("K00b1"), hostindex + index + 1); //$NON-NLS-1$
434                        }
435                        return;
436                    }
437                }
438            } else {
439                tempHost = temp;
440            }
441
442            if (tempHost.equals("")) { //$NON-NLS-1$
443                if (forceServer) {
444                    throw new URISyntaxException(authority, Msg
445                            .getString("K030c"), hostindex); //$NON-NLS-1$
446                }
447                return;
448            }
449
450            if (!isValidHost(forceServer, tempHost)) {
451                return;
452            }
453
454            // this is a server based uri,
455            // fill in the userinfo, host and port fields
456            userinfo = tempUserinfo;
457            host = tempHost;
458            port = tempPort;
459            serverAuthority = true;
460        }
461
462        private void validateUserinfo(String uri, String userinfo, int index)
463                throws URISyntaxException {
464            for (int i = 0; i < userinfo.length(); i++) {
465                char ch = userinfo.charAt(i);
466                if (ch == ']' || ch == '[') {
467                    throw new URISyntaxException(uri, Msg.getString("K030d"), //$NON-NLS-1$
468                            index + i);
469                }
470            }
471        }
472
473        /**
474         * distinguish between IPv4, IPv6, domain name and validate it based on
475         * its type
476         */
477        private boolean isValidHost(boolean forceServer, String host)
478                throws URISyntaxException {
479            if (host.charAt(0) == '[') {
480                // ipv6 address
481                if (host.charAt(host.length() - 1) != ']') {
482                    throw new URISyntaxException(host,
483                            Msg.getString("K030e"), 0); //$NON-NLS-1$
484                }
485                if (!isValidIP6Address(host)) {
486                    throw new URISyntaxException(host, Msg.getString("K030f")); //$NON-NLS-1$
487                }
488                return true;
489            }
490
491            // '[' and ']' can only be the first char and last char
492            // of the host name
493            if (host.indexOf('[') != -1 || host.indexOf(']') != -1) {
494                throw new URISyntaxException(host, Msg.getString("K0310"), 0); //$NON-NLS-1$
495            }
496
497            int index = host.lastIndexOf('.');
498            if (index < 0 || index == host.length() - 1
499                    || !Character.isDigit(host.charAt(index + 1))) {
500                // domain name
501                if (isValidDomainName(host)) {
502                    return true;
503                }
504                if (forceServer) {
505                    throw new URISyntaxException(host,
506                            Msg.getString("K0310"), 0); //$NON-NLS-1$
507                }
508                return false;
509            }
510
511            // IPv4 address
512            if (isValidIPv4Address(host)) {
513                return true;
514            }
515            if (forceServer) {
516                throw new URISyntaxException(host, Msg.getString("K0311"), 0); //$NON-NLS-1$
517            }
518            return false;
519        }
520
521        private boolean isValidDomainName(String host) {
522            try {
523                URIEncoderDecoder.validateSimple(host, "-."); //$NON-NLS-1$
524            } catch (URISyntaxException e) {
525                return false;
526            }
527
528            String label = null;
529            StringTokenizer st = new StringTokenizer(host, "."); //$NON-NLS-1$
530            while (st.hasMoreTokens()) {
531                label = st.nextToken();
532                if (label.startsWith("-") || label.endsWith("-")) { //$NON-NLS-1$ //$NON-NLS-2$
533                    return false;
534                }
535            }
536
537            if (!label.equals(host)) {
538                char ch = label.charAt(0);
539                if (ch >= '0' && ch <= '9') {
540                    return false;
541                }
542            }
543            return true;
544        }
545
546        private boolean isValidIPv4Address(String host) {
547            int index;
548            int index2;
549            try {
550                int num;
551                index = host.indexOf('.');
552                num = Integer.parseInt(host.substring(0, index));
553                if (num < 0 || num > 255) {
554                    return false;
555                }
556                index2 = host.indexOf('.', index + 1);
557                num = Integer.parseInt(host.substring(index + 1, index2));
558                if (num < 0 || num > 255) {
559                    return false;
560                }
561                index = host.indexOf('.', index2 + 1);
562                num = Integer.parseInt(host.substring(index2 + 1, index));
563                if (num < 0 || num > 255) {
564                    return false;
565                }
566                num = Integer.parseInt(host.substring(index + 1));
567                if (num < 0 || num > 255) {
568                    return false;
569                }
570            } catch (Exception e) {
571                return false;
572            }
573            return true;
574        }
575
576        private boolean isValidIP6Address(String ipAddress) {
577            int length = ipAddress.length();
578            boolean doubleColon = false;
579            int numberOfColons = 0;
580            int numberOfPeriods = 0;
581            String word = ""; //$NON-NLS-1$
582            char c = 0;
583            char prevChar = 0;
584            int offset = 0; // offset for [] ip addresses
585
586            if (length < 2) {
587                return false;
588            }
589
590            for (int i = 0; i < length; i++) {
591                prevChar = c;
592                c = ipAddress.charAt(i);
593                switch (c) {
594
595                    // case for an open bracket [x:x:x:...x]
596                    case '[':
597                        if (i != 0) {
598                            return false; // must be first character
599                        }
600                        if (ipAddress.charAt(length - 1) != ']') {
601                            return false; // must have a close ]
602                        }
603                        if ((ipAddress.charAt(1) == ':')
604                                && (ipAddress.charAt(2) != ':')) {
605                            return false;
606                        }
607                        offset = 1;
608                        if (length < 4) {
609                            return false;
610                        }
611                        break;
612
613                    // case for a closed bracket at end of IP [x:x:x:...x]
614                    case ']':
615                        if (i != length - 1) {
616                            return false; // must be last charcter
617                        }
618                        if (ipAddress.charAt(0) != '[') {
619                            return false; // must have a open [
620                        }
621                        break;
622
623                    // case for the last 32-bits represented as IPv4
624                    // x:x:x:x:x:x:d.d.d.d
625                    case '.':
626                        numberOfPeriods++;
627                        if (numberOfPeriods > 3) {
628                            return false;
629                        }
630                        if (!isValidIP4Word(word)) {
631                            return false;
632                        }
633                        if (numberOfColons != 6 && !doubleColon) {
634                            return false;
635                        }
636                        // a special case ::1:2:3:4:5:d.d.d.d allows 7 colons
637                        // with
638                        // an IPv4 ending, otherwise 7 :'s is bad
639                        if (numberOfColons == 7
640                                && ipAddress.charAt(0 + offset) != ':'
641                                && ipAddress.charAt(1 + offset) != ':') {
642                            return false;
643                        }
644                        word = ""; //$NON-NLS-1$
645                        break;
646
647                    case ':':
648                        numberOfColons++;
649                        if (numberOfColons > 7) {
650                            return false;
651                        }
652                        if (numberOfPeriods > 0) {
653                            return false;
654                        }
655                        if (prevChar == ':') {
656                            if (doubleColon) {
657                                return false;
658                            }
659                            doubleColon = true;
660                        }
661                        word = ""; //$NON-NLS-1$
662                        break;
663
664                    default:
665                        if (word.length() > 3) {
666                            return false;
667                        }
668                        if (!isValidHexChar(c)) {
669                            return false;
670                        }
671                        word += c;
672                }
673            }
674
675            // Check if we have an IPv4 ending
676            if (numberOfPeriods > 0) {
677                if (numberOfPeriods != 3 || !isValidIP4Word(word)) {
678                    return false;
679                }
680            } else {
681                // If we're at then end and we haven't had 7 colons then there
682                // is a problem unless we encountered a doubleColon
683                if (numberOfColons != 7 && !doubleColon) {
684                    return false;
685                }
686
687                // If we have an empty word at the end, it means we ended in
688                // either a : or a .
689                // If we did not end in :: then this is invalid
690                if (word == "" && ipAddress.charAt(length - 1 - offset) != ':' //$NON-NLS-1$
691                        && ipAddress.charAt(length - 2 - offset) != ':') {
692                    return false;
693                }
694            }
695
696            return true;
697        }
698
699        private boolean isValidIP4Word(String word) {
700            char c;
701            if (word.length() < 1 || word.length() > 3) {
702                return false;
703            }
704            for (int i = 0; i < word.length(); i++) {
705                c = word.charAt(i);
706                if (!(c >= '0' && c <= '9')) {
707                    return false;
708                }
709            }
710            if (Integer.parseInt(word) > 255) {
711                return false;
712            }
713            return true;
714        }
715
716        private boolean isValidHexChar(char c) {
717
718            return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F')
719                    || (c >= 'a' && c <= 'f');
720        }
721    }
722
723    /*
724     * Quote illegal chars for each component, but not the others
725     *
726     * @param component java.lang.String the component to be converted @param
727     * legalset java.lang.String the legal character set allowed in the
728     * component s @return java.lang.String the converted string
729     */
730    private String quoteComponent(String component, String legalset) {
731        try {
732            /*
733             * Use a different encoder than URLEncoder since: 1. chars like "/",
734             * "#", "@" etc needs to be preserved instead of being encoded, 2.
735             * UTF-8 char set needs to be used for encoding instead of default
736             * platform one
737             */
738            return URIEncoderDecoder.quoteIllegal(component, legalset);
739        } catch (UnsupportedEncodingException e) {
740            throw new RuntimeException(e.toString());
741        }
742    }
743
744    public int compareTo(URI uri) {
745        int ret = 0;
746
747        // compare schemes
748        if (scheme == null && uri.scheme != null) {
749            return -1;
750        } else if (scheme != null && uri.scheme == null) {
751            return 1;
752        } else if (scheme != null && uri.scheme != null) {
753            ret = scheme.compareToIgnoreCase(uri.scheme);
754            if (ret != 0) {
755                return ret;
756            }
757        }
758
759        // compare opacities
760        if (!opaque && uri.opaque) {
761            return -1;
762        } else if (opaque && !uri.opaque) {
763            return 1;
764        } else if (opaque && uri.opaque) {
765            ret = schemespecificpart.compareTo(uri.schemespecificpart);
766            if (ret != 0) {
767                return ret;
768            }
769        } else {
770
771            // otherwise both must be hierarchical
772
773            // compare authorities
774            if (authority != null && uri.authority == null) {
775                return 1;
776            } else if (authority == null && uri.authority != null) {
777                return -1;
778            } else if (authority != null && uri.authority != null) {
779                if (host != null && uri.host != null) {
780                    // both are server based, so compare userinfo, host, port
781                    if (userinfo != null && uri.userinfo == null) {
782                        return 1;
783                    } else if (userinfo == null && uri.userinfo != null) {
784                        return -1;
785                    } else if (userinfo != null && uri.userinfo != null) {
786                        ret = userinfo.compareTo(uri.userinfo);
787                        if (ret != 0) {
788                            return ret;
789                        }
790                    }
791
792                    // userinfo's are the same, compare hostname
793                    ret = host.compareToIgnoreCase(uri.host);
794                    if (ret != 0) {
795                        return ret;
796                    }
797
798                    // compare port
799                    if (port != uri.port) {
800                        return port - uri.port;
801                    }
802                } else { // one or both are registry based, compare the whole
803                    // authority
804                    ret = authority.compareTo(uri.authority);
805                    if (ret != 0) {
806                        return ret;
807                    }
808                }
809            }
810
811            // authorities are the same
812            // compare paths
813            ret = path.compareTo(uri.path);
814            if (ret != 0) {
815                return ret;
816            }
817
818            // compare queries
819
820            if (query != null && uri.query == null) {
821                return 1;
822            } else if (query == null && uri.query != null) {
823                return -1;
824            } else if (query != null && uri.query != null) {
825                ret = query.compareTo(uri.query);
826                if (ret != 0) {
827                    return ret;
828                }
829            }
830        }
831
832        // everything else is identical, so compare fragments
833        if (fragment != null && uri.fragment == null) {
834            return 1;
835        } else if (fragment == null && uri.fragment != null) {
836            return -1;
837        } else if (fragment != null && uri.fragment != null) {
838            ret = fragment.compareTo(uri.fragment);
839            if (ret != 0) {
840                return ret;
841            }
842        }
843
844        // identical
845        return 0;
846    }
847
848    public static URI create(String uri) {
849        URI result = null;
850        try {
851            result = new URI(uri);
852        } catch (URISyntaxException e) {
853            throw new IllegalArgumentException(e.getMessage());
854        }
855        return result;
856    }
857
858    private URI duplicate() {
859        URI clone = new URI();
860        clone.absolute = absolute;
861        clone.authority = authority;
862        clone.fragment = fragment;
863        clone.host = host;
864        clone.opaque = opaque;
865        clone.path = path;
866        clone.port = port;
867        clone.query = query;
868        clone.scheme = scheme;
869        clone.schemespecificpart = schemespecificpart;
870        clone.userinfo = userinfo;
871        clone.serverAuthority = serverAuthority;
872        return clone;
873    }
874
875    /*
876     * Takes a string that may contain hex sequences like %F1 or %2b and
877     * converts the hex values following the '%' to lowercase
878     */
879    private String convertHexToLowerCase(String s) {
880        StringBuffer result = new StringBuffer(""); //$NON-NLS-1$
881        if (s.indexOf('%') == -1) {
882            return s;
883        }
884
885        int index = 0, previndex = 0;
886        while ((index = s.indexOf('%', previndex)) != -1) {
887            result.append(s.substring(previndex, index + 1));
888            result.append(s.substring(index + 1, index + 3).toLowerCase());
889            index += 3;
890            previndex = index;
891        }
892        return result.toString();
893    }
894
895    /*
896     * Takes two strings that may contain hex sequences like %F1 or %2b and
897     * compares them, ignoring case for the hex values hex values must always
898     * occur in pairs like above
899     */
900    private boolean equalsHexCaseInsensitive(String first, String second) {
901        if (first.indexOf('%') != second.indexOf('%')) {
902            return first.equals(second);
903        }
904
905        int index = 0, previndex = 0;
906        while ((index = first.indexOf('%', previndex)) != -1
907                && second.indexOf('%', previndex) == index) {
908            boolean match = first.substring(previndex, index).equals(
909                    second.substring(previndex, index));
910            if (!match) {
911                return false;
912            }
913
914            match = first.substring(index + 1, index + 3).equalsIgnoreCase(
915                    second.substring(index + 1, index + 3));
916            if (!match) {
917                return false;
918            }
919
920            index += 3;
921            previndex = index;
922        }
923        return first.substring(previndex).equals(second.substring(previndex));
924    }
925
926    @Override
927    public boolean equals(Object o) {
928        if (!(o instanceof URI)) {
929            return false;
930        }
931        URI uri = (URI) o;
932
933        if (uri.fragment == null && fragment != null || uri.fragment != null
934                && fragment == null) {
935            return false;
936        } else if (uri.fragment != null && fragment != null) {
937            if (!equalsHexCaseInsensitive(uri.fragment, fragment)) {
938                return false;
939            }
940        }
941
942        if (uri.scheme == null && scheme != null || uri.scheme != null
943                && scheme == null) {
944            return false;
945        } else if (uri.scheme != null && scheme != null) {
946            if (!uri.scheme.equalsIgnoreCase(scheme)) {
947                return false;
948            }
949        }
950
951        if (uri.opaque && opaque) {
952            return equalsHexCaseInsensitive(uri.schemespecificpart,
953                    schemespecificpart);
954        } else if (!uri.opaque && !opaque) {
955            if (!equalsHexCaseInsensitive(path, uri.path)) {
956                return false;
957            }
958
959            if (uri.query != null && query == null || uri.query == null
960                    && query != null) {
961                return false;
962            } else if (uri.query != null && query != null) {
963                if (!equalsHexCaseInsensitive(uri.query, query)) {
964                    return false;
965                }
966            }
967
968            if (uri.authority != null && authority == null
969                    || uri.authority == null && authority != null) {
970                return false;
971            } else if (uri.authority != null && authority != null) {
972                if (uri.host != null && host == null || uri.host == null
973                        && host != null) {
974                    return false;
975                } else if (uri.host == null && host == null) {
976                    // both are registry based, so compare the whole authority
977                    return equalsHexCaseInsensitive(uri.authority, authority);
978                } else { // uri.host != null && host != null, so server-based
979                    if (!host.equalsIgnoreCase(uri.host)) {
980                        return false;
981                    }
982
983                    if (port != uri.port) {
984                        return false;
985                    }
986
987                    if (uri.userinfo != null && userinfo == null
988                            || uri.userinfo == null && userinfo != null) {
989                        return false;
990                    } else if (uri.userinfo != null && userinfo != null) {
991                        return equalsHexCaseInsensitive(userinfo, uri.userinfo);
992                    } else {
993                        return true;
994                    }
995                }
996            } else {
997                // no authority
998                return true;
999            }
1000
1001        } else {
1002            // one is opaque, the other hierarchical
1003            return false;
1004        }
1005    }
1006
1007    public String getAuthority() {
1008        return decode(authority);
1009    }
1010
1011    /**
1012     * Returns the fragment component.
1013     *
1014     * @return String
1015     */
1016    public String getFragment() {
1017        return decode(fragment);
1018    }
1019
1020    /**
1021     * Returns the host component.
1022     *
1023     * @return String
1024     */
1025    public String getHost() {
1026        return host;
1027    }
1028
1029    /**
1030     * Returns the path component.
1031     *
1032     * @return String
1033     */
1034    public String getPath() {
1035        return decode(path);
1036    }
1037
1038    /**
1039     * Returns the port number.
1040     *
1041     * @return int
1042     */
1043    public int getPort() {
1044        return port;
1045    }
1046
1047    /**
1048     * Returns the query component.
1049     *
1050     * @return String
1051     */
1052    public String getQuery() {
1053        return decode(query);
1054    }
1055
1056    /**
1057     * Returns the authority component in raw form.
1058     *
1059     * @return String
1060     */
1061    public String getRawAuthority() {
1062        return authority;
1063    }
1064
1065    /**
1066     * Returns the fragment component in raw form.
1067     *
1068     * @return String
1069     */
1070    public String getRawFragment() {
1071        return fragment;
1072    }
1073
1074    /**
1075     * Returns the path component in raw form.
1076     *
1077     * @return String
1078     */
1079    public String getRawPath() {
1080        return path;
1081    }
1082
1083    /**
1084     * Returns the query component in raw form.
1085     *
1086     * @return String
1087     */
1088    public String getRawQuery() {
1089        return query;
1090    }
1091
1092    /**
1093     * Returns the scheme-specific part component in raw form.
1094     *
1095     * @return String
1096     */
1097    public String getRawSchemeSpecificPart() {
1098        return schemespecificpart;
1099    }
1100
1101    /**
1102     * Returns the user-info component in raw form.
1103     *
1104     * @return String
1105     */
1106    public String getRawUserInfo() {
1107        return userinfo;
1108    }
1109
1110    /**
1111     * Returns the scheme.
1112     *
1113     * @return String
1114     */
1115    public String getScheme() {
1116        return scheme;
1117    }
1118
1119    /**
1120     * Returns the scheme-specific part component.
1121     *
1122     * @return String
1123     */
1124    public String getSchemeSpecificPart() {
1125        return decode(schemespecificpart);
1126    }
1127
1128    /**
1129     * Returns the userinfo.
1130     *
1131     * @return String
1132     */
1133    public String getUserInfo() {
1134        return decode(userinfo);
1135    }
1136
1137    @Override
1138    public int hashCode() {
1139        if (hash == -1) {
1140            hash = getHashString().hashCode();
1141        }
1142        return hash;
1143    }
1144
1145    /**
1146     * Indicates whether this URI is absolute
1147     *
1148     * @return boolean
1149     */
1150    public boolean isAbsolute() {
1151        return absolute;
1152    }
1153
1154    /**
1155     * Indicates whether this URI is opaque
1156     *
1157     * @return true if the URI is opaque, otherwise false
1158     */
1159    public boolean isOpaque() {
1160        return opaque;
1161    }
1162
1163    /*
1164     * normalize path, and return the resulting string
1165     */
1166    private String normalize(String path) {
1167        // count the number of '/'s, to determine number of segments
1168        int index = -1;
1169        int pathlen = path.length();
1170        int size = 0;
1171        if (pathlen > 0 && path.charAt(0) != '/') {
1172            size++;
1173        }
1174        while ((index = path.indexOf('/', index + 1)) != -1) {
1175            if (index + 1 < pathlen && path.charAt(index + 1) != '/') {
1176                size++;
1177            }
1178        }
1179
1180        String[] seglist = new String[size];
1181        boolean[] include = new boolean[size];
1182
1183        // break the path into segments and store in the list
1184        int current = 0;
1185        int index2 = 0;
1186        index = (pathlen > 0 && path.charAt(0) == '/') ? 1 : 0;
1187        while ((index2 = path.indexOf('/', index + 1)) != -1) {
1188            seglist[current++] = path.substring(index, index2);
1189            index = index2 + 1;
1190        }
1191
1192        // if current==size, then the last character was a slash
1193        // and there are no more segments
1194        if (current < size) {
1195            seglist[current] = path.substring(index);
1196        }
1197
1198        // determine which segments get included in the normalized path
1199        for (int i = 0; i < size; i++) {
1200            include[i] = true;
1201            if (seglist[i].equals("..")) { //$NON-NLS-1$
1202                int remove = i - 1;
1203                // search back to find a segment to remove, if possible
1204                while (remove > -1 && !include[remove]) {
1205                    remove--;
1206                }
1207                // if we find a segment to remove, remove it and the ".."
1208                // segment
1209                if (remove > -1 && !seglist[remove].equals("..")) { //$NON-NLS-1$
1210                    include[remove] = false;
1211                    include[i] = false;
1212                }
1213            } else if (seglist[i].equals(".")) { //$NON-NLS-1$
1214                include[i] = false;
1215            }
1216        }
1217
1218        // put the path back together
1219        StringBuffer newpath = new StringBuffer();
1220        if (path.startsWith("/")) { //$NON-NLS-1$
1221            newpath.append('/');
1222        }
1223
1224        for (int i = 0; i < seglist.length; i++) {
1225            if (include[i]) {
1226                newpath.append(seglist[i]);
1227                newpath.append('/');
1228            }
1229        }
1230
1231        // if we used at least one segment and the path previously ended with
1232        // a slash and the last segment is still used, then delete the extra
1233        // trailing '/'
1234        if (!path.endsWith("/") && seglist.length > 0 //$NON-NLS-1$
1235                && include[seglist.length - 1]) {
1236            newpath.deleteCharAt(newpath.length() - 1);
1237        }
1238
1239        String result = newpath.toString();
1240
1241        // check for a ':' in the first segment if one exists,
1242        // prepend "./" to normalize
1243        index = result.indexOf(':');
1244        index2 = result.indexOf('/');
1245        if (index != -1 && (index < index2 || index2 == -1)) {
1246            newpath.insert(0, "./"); //$NON-NLS-1$
1247            result = newpath.toString();
1248        }
1249        return result;
1250    }
1251
1252    public URI normalize() {
1253        if (opaque) {
1254            return this;
1255        }
1256        String normalizedPath = normalize(path);
1257        // if the path is already normalized, return this
1258        if (path.equals(normalizedPath)) {
1259            return this;
1260        }
1261        // get an exact copy of the URI re-calculate the scheme specific part
1262        // since the path of the normalized URI is different from this URI.
1263        URI result = duplicate();
1264        result.path = normalizedPath;
1265        result.setSchemeSpecificPart();
1266        return result;
1267    }
1268
1269    /**
1270     * Return this uri instance if it has already been determined as a
1271     * ServerAuthority Otherwise try to parse it again as a server authority to
1272     * produce a URISyntaxException with the proper diagnostic message.
1273     */
1274    public URI parseServerAuthority() throws URISyntaxException {
1275        if (!serverAuthority) {
1276            new Helper().parseAuthority(true);
1277        }
1278        return this;
1279    }
1280
1281    public URI relativize(URI relative) {
1282        if (relative.opaque || opaque) {
1283            return relative;
1284        }
1285
1286        if (scheme == null ? relative.scheme != null : !scheme
1287                .equals(relative.scheme)) {
1288            return relative;
1289        }
1290
1291        if (authority == null ? relative.authority != null : !authority
1292                .equals(relative.authority)) {
1293            return relative;
1294        }
1295
1296        // normalize both paths
1297        String thisPath = normalize(path);
1298        String relativePath = normalize(relative.path);
1299
1300        /*
1301         * if the paths aren't equal, then we need to determine if this URI's
1302         * path is a parent path (begins with) the relative URI's path
1303         */
1304        if (!thisPath.equals(relativePath)) {
1305            // if this URI's path doesn't end in a '/', add one
1306            if (!thisPath.endsWith("/")) { //$NON-NLS-1$
1307                thisPath = thisPath + '/';
1308            }
1309            /*
1310             * if the relative URI's path doesn't start with this URI's path,
1311             * then just return the relative URI; the URIs have nothing in
1312             * common
1313             */
1314            if (!relativePath.startsWith(thisPath)) {
1315                return relative;
1316            }
1317        }
1318
1319        URI result = new URI();
1320        result.fragment = relative.fragment;
1321        result.query = relative.query;
1322        // the result URI is the remainder of the relative URI's path
1323        result.path = relativePath.substring(thisPath.length());
1324        return result;
1325    }
1326
1327    public URI resolve(URI relative) {
1328        if (relative.absolute || opaque) {
1329            return relative;
1330        }
1331
1332        URI result;
1333        if (relative.path.equals("") && relative.scheme == null //$NON-NLS-1$
1334                && relative.authority == null && relative.query == null
1335                && relative.fragment != null) {
1336            // if the relative URI only consists of fragment,
1337            // the resolved URI is very similar to this URI,
1338            // except that it has the fragement from the relative URI.
1339            result = duplicate();
1340            result.fragment = relative.fragment;
1341            // no need to re-calculate the scheme specific part,
1342            // since fragment is not part of scheme specific part.
1343            return result;
1344        }
1345
1346        if (relative.authority != null) {
1347            // if the relative URI has authority,
1348            // the resolved URI is almost the same as the relative URI,
1349            // except that it has the scheme of this URI.
1350            result = relative.duplicate();
1351            result.scheme = scheme;
1352            result.absolute = absolute;
1353        } else {
1354            // since relative URI has no authority,
1355            // the resolved URI is very similar to this URI,
1356            // except that it has the query and fragment of the relative URI,
1357            // and the path is different.
1358            result = duplicate();
1359            result.fragment = relative.fragment;
1360            result.query = relative.query;
1361            if (relative.path.startsWith("/")) { //$NON-NLS-1$
1362                result.path = relative.path;
1363            } else {
1364                // resolve a relative reference
1365                int endindex = path.lastIndexOf('/') + 1;
1366                result.path = normalize(path.substring(0, endindex)
1367                        + relative.path);
1368            }
1369            // re-calculate the scheme specific part since
1370            // query and path of the resolved URI is different from this URI.
1371            result.setSchemeSpecificPart();
1372        }
1373        return result;
1374    }
1375
1376    /**
1377     * Helper method used to re-calculate the scheme specific part of the
1378     * resolved or normalized URIs
1379     */
1380    private void setSchemeSpecificPart() {
1381        // ssp = [//authority][path][?query]
1382        StringBuffer ssp = new StringBuffer();
1383        if (authority != null) {
1384            ssp.append("//" + authority); //$NON-NLS-1$
1385        }
1386        if (path != null) {
1387            ssp.append(path);
1388        }
1389        if (query != null) {
1390            ssp.append("?" + query); //$NON-NLS-1$
1391        }
1392        schemespecificpart = ssp.toString();
1393        // reset string, so that it can be re-calculated correctly when asked.
1394        string = null;
1395    }
1396
1397    public URI resolve(String relative) {
1398        return resolve(create(relative));
1399    }
1400
1401    /*
1402     * Encode unicode chars that are not part of US-ASCII char set into the
1403     * escaped form
1404     *
1405     * i.e. The Euro currency symbol is encoded as "%E2%82%AC".
1406     *
1407     * @param component java.lang.String the component to be converted @param
1408     * legalset java.lang.String the legal character set allowed in the
1409     * component s @return java.lang.String the converted string
1410     */
1411    private String encodeOthers(String s) {
1412        try {
1413            /*
1414             * Use a different encoder than URLEncoder since: 1. chars like "/",
1415             * "#", "@" etc needs to be preserved instead of being encoded, 2.
1416             * UTF-8 char set needs to be used for encoding instead of default
1417             * platform one 3. Only other chars need to be converted
1418             */
1419            return URIEncoderDecoder.encodeOthers(s);
1420        } catch (UnsupportedEncodingException e) {
1421            throw new RuntimeException(e.toString());
1422        }
1423    }
1424
1425    private String decode(String s) {
1426        if (s == null) {
1427            return s;
1428        }
1429
1430        try {
1431            return URIEncoderDecoder.decode(s);
1432        } catch (UnsupportedEncodingException e) {
1433            throw new RuntimeException(e.toString());
1434        }
1435    }
1436
1437    public String toASCIIString() {
1438        return encodeOthers(toString());
1439    }
1440
1441    @Override
1442    public String toString() {
1443        if (string == null) {
1444            StringBuffer result = new StringBuffer();
1445            if (scheme != null) {
1446                result.append(scheme);
1447                result.append(':');
1448            }
1449            if (opaque) {
1450                result.append(schemespecificpart);
1451            } else {
1452                if (authority != null) {
1453                    result.append("//"); //$NON-NLS-1$
1454                    result.append(authority);
1455                }
1456
1457                if (path != null) {
1458                    result.append(path);
1459                }
1460
1461                if (query != null) {
1462                    result.append('?');
1463                    result.append(query);
1464                }
1465            }
1466
1467            if (fragment != null) {
1468                result.append('#');
1469                result.append(fragment);
1470            }
1471
1472            string = result.toString();
1473        }
1474        return string;
1475    }
1476
1477    /*
1478     * Form a string from the components of this URI, similarly to the
1479     * toString() method. But this method converts scheme and host to lowercase,
1480     * and converts escaped octets to lowercase.
1481     */
1482    private String getHashString() {
1483        StringBuffer result = new StringBuffer();
1484        if (scheme != null) {
1485            result.append(scheme.toLowerCase());
1486            result.append(':');
1487        }
1488        if (opaque) {
1489            result.append(schemespecificpart);
1490        } else {
1491            if (authority != null) {
1492                result.append("//"); //$NON-NLS-1$
1493                if (host == null) {
1494                    result.append(authority);
1495                } else {
1496                    if (userinfo != null) {
1497                        result.append(userinfo + "@"); //$NON-NLS-1$
1498                    }
1499                    result.append(host.toLowerCase());
1500                    if (port != -1) {
1501                        result.append(":" + port); //$NON-NLS-1$
1502                    }
1503                }
1504            }
1505
1506            if (path != null) {
1507                result.append(path);
1508            }
1509
1510            if (query != null) {
1511                result.append('?');
1512                result.append(query);
1513            }
1514        }
1515
1516        if (fragment != null) {
1517            result.append('#');
1518            result.append(fragment);
1519        }
1520
1521        return convertHexToLowerCase(result.toString());
1522    }
1523
1524    public URL toURL() throws MalformedURLException {
1525        if (!absolute) {
1526            throw new IllegalArgumentException(Msg.getString("K0312") + ": " //$NON-NLS-1$//$NON-NLS-2$
1527                    + toString());
1528        }
1529        return new URL(toString());
1530    }
1531
1532     private void readObject(ObjectInputStream in) throws IOException,
1533            ClassNotFoundException {
1534        in.defaultReadObject();
1535        try {
1536            new Helper().parseURI(string, false);
1537        } catch (URISyntaxException e) {
1538            throw new IOException(e.toString());
1539        }
1540    }
1541
1542    private void writeObject(ObjectOutputStream out) throws IOException,
1543            ClassNotFoundException {
1544        // call toString() to ensure the value of string field is calculated
1545        toString();
1546        out.defaultWriteObject();
1547    }
1548}
1549