URLStreamHandler.java revision 6c434a1215049a76ba82fea69e8c5aa76cad955b
1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.net;
19
20import java.io.IOException;
21import java.nio.charset.Charsets;
22import java.util.Locale;
23import libcore.util.Objects;
24import org.apache.harmony.luni.util.URLUtil;
25
26/**
27 * The abstract class {@code URLStreamHandler} is the base for all classes which
28 * can handle the communication with a URL object over a particular protocol
29 * type.
30 */
31public abstract class URLStreamHandler {
32    /**
33     * Establishes a new connection to the resource specified by the URL {@code
34     * u}. Since different protocols also have unique ways of connecting, it
35     * must be overwritten by the subclass.
36     *
37     * @param u
38     *            the URL to the resource where a connection has to be opened.
39     * @return the opened URLConnection to the specified resource.
40     * @throws IOException
41     *             if an I/O error occurs during opening the connection.
42     */
43    protected abstract URLConnection openConnection(URL u) throws IOException;
44
45    /**
46     * Establishes a new connection to the resource specified by the URL {@code
47     * u} using the given {@code proxy}. Since different protocols also have
48     * unique ways of connecting, it must be overwritten by the subclass.
49     *
50     * @param u
51     *            the URL to the resource where a connection has to be opened.
52     * @param proxy
53     *            the proxy that is used to make the connection.
54     * @return the opened URLConnection to the specified resource.
55     * @throws IOException
56     *             if an I/O error occurs during opening the connection.
57     * @throws IllegalArgumentException
58     *             if any argument is {@code null} or the type of proxy is
59     *             wrong.
60     * @throws UnsupportedOperationException
61     *             if the protocol handler doesn't support this method.
62     */
63    protected URLConnection openConnection(URL u, Proxy proxy) throws IOException {
64        throw new UnsupportedOperationException();
65    }
66
67    /**
68     * Parses the clear text URL in {@code str} into a URL object. URL strings
69     * generally have the following format:
70     * <p>
71     * http://www.company.com/java/file1.java#reference
72     * <p>
73     * The string is parsed in HTTP format. If the protocol has a different URL
74     * format this method must be overridden.
75     *
76     * @param u
77     *            the URL to fill in the parsed clear text URL parts.
78     * @param str
79     *            the URL string that is to be parsed.
80     * @param start
81     *            the string position from where to begin parsing.
82     * @param end
83     *            the string position to stop parsing.
84     * @see #toExternalForm
85     * @see URL
86     */
87    protected void parseURL(URL u, String str, int start, int end) {
88        // For compatibility, refer to Harmony-2941
89        if (str.startsWith("//", start)
90                && str.indexOf('/', start + 2) == -1
91                && end <= Integer.MIN_VALUE + 1) {
92            throw new StringIndexOutOfBoundsException(end - 2 - start);
93        }
94        if (end < start) {
95            if (this != u.strmHandler) {
96                throw new SecurityException();
97            }
98            return;
99        }
100        String parseString = "";
101        if (start < end) {
102            parseString = str.substring(start, end);
103        }
104        end -= start;
105        int fileIdx = 0;
106
107        // Default is to use info from context
108        String host = u.getHost();
109        int port = u.getPort();
110        String ref = u.getRef();
111        String file = u.getPath();
112        String query = u.getQuery();
113        String authority = u.getAuthority();
114        String userInfo = u.getUserInfo();
115
116        int refIdx = parseString.indexOf('#', 0);
117        if (parseString.startsWith("//")) {
118            int hostIdx = 2;
119            port = -1;
120            fileIdx = parseString.indexOf('/', hostIdx);
121            int questionMarkIndex = parseString.indexOf('?', hostIdx);
122            if (questionMarkIndex != -1 && (fileIdx == -1 || fileIdx > questionMarkIndex)) {
123                fileIdx = questionMarkIndex;
124            }
125            if (fileIdx == -1) {
126                fileIdx = end;
127                // Use default
128                file = "";
129            }
130            int hostEnd = fileIdx;
131            if (refIdx != -1 && refIdx < fileIdx) {
132                hostEnd = refIdx;
133                fileIdx = refIdx;
134                file = "";
135            }
136            int userIdx = parseString.lastIndexOf('@', hostEnd);
137            authority = parseString.substring(hostIdx, hostEnd);
138            if (userIdx != -1) {
139                userInfo = parseString.substring(hostIdx, userIdx);
140                hostIdx = userIdx + 1;
141            }
142
143            int endOfIPv6Addr = parseString.indexOf(']', hostIdx);
144            if (endOfIPv6Addr >= hostEnd) {
145                endOfIPv6Addr = -1;
146            }
147
148            // the port separator must be immediately after an IPv6 address "http://[::1]:80/"
149            int portIdx = -1;
150            if (endOfIPv6Addr != -1) {
151                int maybeColon = endOfIPv6Addr + 1;
152                if (maybeColon < hostEnd && parseString.charAt(maybeColon) == ':') {
153                    portIdx = maybeColon;
154                }
155            } else {
156                portIdx = parseString.indexOf(':', hostIdx);
157            }
158
159            if (portIdx == -1 || portIdx > hostEnd) {
160                host = parseString.substring(hostIdx, hostEnd);
161            } else {
162                host = parseString.substring(hostIdx, portIdx);
163                String portString = parseString.substring(portIdx + 1, hostEnd);
164                if (portString.length() == 0) {
165                    port = -1;
166                } else {
167                    port = Integer.parseInt(portString);
168                }
169            }
170        }
171
172        if (refIdx > -1) {
173            ref = parseString.substring(refIdx + 1, end);
174        }
175        int fileEnd = (refIdx == -1 ? end : refIdx);
176
177        int queryIdx = parseString.lastIndexOf('?', fileEnd);
178        boolean canonicalize = false;
179        if (queryIdx > -1) {
180            query = parseString.substring(queryIdx + 1, fileEnd);
181            if (queryIdx == 0 && file != null) {
182                if (file.isEmpty()) {
183                    file = "/";
184                } else if (file.startsWith("/")) {
185                    canonicalize = true;
186                }
187                int last = file.lastIndexOf('/') + 1;
188                file = file.substring(0, last);
189            }
190            fileEnd = queryIdx;
191        } else
192        // Don't inherit query unless only the ref is changed
193        if (refIdx != 0) {
194            query = null;
195        }
196
197        if (fileIdx > -1) {
198            if (fileIdx < end && parseString.charAt(fileIdx) == '/') {
199                file = parseString.substring(fileIdx, fileEnd);
200            } else if (fileEnd > fileIdx) {
201                if (file == null) {
202                    file = "";
203                } else if (file.isEmpty()) {
204                    file = "/";
205                } else if (file.startsWith("/")) {
206                    canonicalize = true;
207                }
208                int last = file.lastIndexOf('/') + 1;
209                if (last == 0) {
210                    file = parseString.substring(fileIdx, fileEnd);
211                } else {
212                    file = file.substring(0, last)
213                            + parseString.substring(fileIdx, fileEnd);
214                }
215            }
216        }
217        if (file == null) {
218            file = "";
219        }
220
221        if (host == null) {
222            host = "";
223        }
224
225        if (canonicalize) {
226            // modify file if there's any relative referencing
227            file = URLUtil.canonicalizePath(file);
228        }
229
230        setURL(u, u.getProtocol(), host, port, authority, userInfo, file,
231                query, ref);
232    }
233
234    /**
235     * Sets the fields of the URL {@code u} to the values of the supplied
236     * arguments.
237     *
238     * @param u
239     *            the non-null URL object to be set.
240     * @param protocol
241     *            the protocol.
242     * @param host
243     *            the host name.
244     * @param port
245     *            the port number.
246     * @param file
247     *            the file component.
248     * @param ref
249     *            the reference.
250     * @deprecated use setURL(URL, String String, int, String, String, String,
251     *             String, String) instead.
252     */
253    @Deprecated
254    protected void setURL(URL u, String protocol, String host, int port,
255            String file, String ref) {
256        if (this != u.strmHandler) {
257            throw new SecurityException();
258        }
259        u.set(protocol, host, port, file, ref);
260    }
261
262    /**
263     * Sets the fields of the URL {@code u} to the values of the supplied
264     * arguments.
265     *
266     * @param u
267     *            the non-null URL object to be set.
268     * @param protocol
269     *            the protocol.
270     * @param host
271     *            the host name.
272     * @param port
273     *            the port number.
274     * @param authority
275     *            the authority.
276     * @param userInfo
277     *            the user info.
278     * @param file
279     *            the file component.
280     * @param query
281     *            the query.
282     * @param ref
283     *            the reference.
284     */
285    protected void setURL(URL u, String protocol, String host, int port,
286            String authority, String userInfo, String file, String query,
287            String ref) {
288        if (this != u.strmHandler) {
289            throw new SecurityException();
290        }
291        u.set(protocol, host, port, authority, userInfo, file, query, ref);
292    }
293
294    /**
295     * Returns the clear text representation of a given URL using HTTP format.
296     *
297     * @param url
298     *            the URL object to be converted.
299     * @return the clear text representation of the specified URL.
300     * @see #parseURL
301     * @see URL#toExternalForm()
302     */
303    protected String toExternalForm(URL url) {
304        return toExternalForm(url, false);
305    }
306
307    String toExternalForm(URL url, boolean escapeIllegalCharacters) {
308        StringBuilder result = new StringBuilder();
309        result.append(url.getProtocol());
310        result.append(':');
311
312        String authority = url.getAuthority();
313        if (authority != null && !authority.isEmpty()) {
314            result.append("//");
315            if (escapeIllegalCharacters) {
316                authority = fixEncoding(authority, "$,;@&=+:[]");
317            }
318            result.append(authority);
319        }
320
321        String fileAndQuery = url.getFile();
322        if (fileAndQuery != null) {
323            if (escapeIllegalCharacters) {
324                fileAndQuery = fixEncoding(fileAndQuery, "$,;@&=+:/?");
325            }
326            result.append(fileAndQuery);
327        }
328
329        String ref = url.getRef();
330        if (ref != null) {
331            result.append('#');
332            if (escapeIllegalCharacters) {
333                ref = fixEncoding(ref, "$,;@&=+:/?[]");
334            }
335            result.append(ref);
336        }
337
338        return result.toString();
339    }
340
341    /**
342     * Escapes the unescaped characters of {@code s} that are not permitted.
343     * Permitted characters are:
344     * <ul>
345     *   <li>Unreserved characters in RFC 2396.
346     *   <li>{@code extraOkayChars},
347     *   <li>non-ASCII, non-control, non-whitespace characters
348     * </ul>
349     *
350     * <p>Unlike the methods in {@code URI}, this method ignores input that has
351     * already been escaped. For example, input of "hello%20world" is unchanged
352     * by this method but would be double-escaped to "hello%2520world" by URI.
353     *
354     * <p>UTF-8 is used to encode escaped characters. A single input character
355     * like "\u0080" may be encoded to multiple octets like %C2%80.
356     */
357    private String fixEncoding(String s, String extraPermittedChars) {
358        StringBuilder result = null;
359        int copiedCount = 0;
360
361        for (int i = 0; i < s.length(); i++) {
362            char c = s.charAt(i);
363
364            if (c == '%') {
365                i += 2; // this is a 3-character sequence like "%20"
366                continue;
367            }
368
369            // unreserved characters: alphanum | mark
370            if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9'
371                    || c == '-' || c == '_' || c == '.' || c == '!' || c == '~'
372                    || c == '*' || c == '\'' || c == '(' || c == ')') {
373                continue;
374            }
375
376            // characters permitted in this context
377            if (extraPermittedChars.indexOf(c) != -1) {
378                continue;
379            }
380
381            // other characters
382            if (c > 0x7f && !Character.isISOControl(c) && !Character.isSpaceChar(c)) {
383                continue;
384            }
385
386            /*
387             * We've encountered a character that must be escaped.
388             */
389            if (result == null) {
390                result = new StringBuilder();
391            }
392            result.append(s, copiedCount, i);
393
394            if (c < 0x7f) {
395                appendHex(result, c);
396            } else {
397                for (byte b : s.substring(i, i + 1).getBytes(Charsets.UTF_8)) {
398                    appendHex(result, b & 0xff);
399                }
400            }
401
402            copiedCount = i + 1;
403        }
404
405        if (result == null) {
406            return s;
407        } else {
408            result.append(s, copiedCount, s.length());
409            return result.toString();
410        }
411    }
412
413    private void appendHex(StringBuilder stringBuilder, int b) {
414        String hex = Integer.toHexString(b).toUpperCase(Locale.US);
415        stringBuilder.append(hex.length() == 1 ? "%0" : "%").append(hex); // always 2 hex digits
416    }
417
418    /**
419     * Compares two URL objects whether they represent the same URL. Two URLs
420     * are equal if they have the same file, host, port, protocol, query, and
421     * reference components.
422     *
423     * @param url1
424     *            the first URL to compare.
425     * @param url2
426     *            the second URL to compare.
427     * @return {@code true} if the URLs are the same, {@code false} otherwise.
428     * @see #hashCode
429     */
430    protected boolean equals(URL url1, URL url2) {
431        if (!sameFile(url1, url2)) {
432            return false;
433        }
434        return Objects.equal(url1.getRef(), url2.getRef())
435                && Objects.equal(url1.getQuery(), url2.getQuery());
436    }
437
438    /**
439     * Returns the default port of the protocol used by the handled URL. The
440     * current implementation returns always {@code -1}.
441     *
442     * @return the appropriate default port number of the protocol.
443     */
444    protected int getDefaultPort() {
445        return -1;
446    }
447
448    /**
449     * Returns the host address of the given URL.
450     *
451     * @param url
452     *            the URL object where to read the host address from.
453     * @return the host address of the specified URL.
454     */
455    protected InetAddress getHostAddress(URL url) {
456        try {
457            String host = url.getHost();
458            if (host == null || host.length() == 0) {
459                return null;
460            }
461            return InetAddress.getByName(host);
462        } catch (UnknownHostException e) {
463            return null;
464        }
465    }
466
467    /**
468     * Returns the hashcode value for the given URL object.
469     *
470     * @param url
471     *            the URL to determine the hashcode.
472     * @return the hashcode of the given URL.
473     */
474    protected int hashCode(URL url) {
475        return toExternalForm(url).hashCode();
476    }
477
478    /**
479     * Compares two URL objects whether they refer to the same host.
480     *
481     * @param a the first URL to be compared.
482     * @param b the second URL to be compared.
483     * @return {@code true} if both URLs refer to the same host, {@code false}
484     *         otherwise.
485     */
486    protected boolean hostsEqual(URL a, URL b) {
487        /*
488         * URLs with the same case-insensitive host name have equal hosts
489         */
490        String aHost = getHost(a);
491        String bHost = getHost(b);
492        if (aHost != null && aHost.equalsIgnoreCase(bHost)) {
493            return true;
494        }
495
496        /*
497         * Call out to DNS to resolve the host addresses. If this succeeds for
498         * both addresses and both addresses yield the same InetAddress, report
499         * equality.
500         *
501         * Although it's consistent with historical behavior of the RI, this
502         * approach is fundamentally broken. In particular, acting upon this
503         * result is bogus because a single server may serve content for many
504         * unrelated host names.
505         */
506        InetAddress aResolved = getHostAddress(a);
507        return aResolved != null && aResolved.equals(getHostAddress(b));
508    }
509
510    /**
511     * Compares two URL objects whether they refer to the same file. In the
512     * comparison included are the URL components protocol, host, port and file.
513     *
514     * @param url1
515     *            the first URL to be compared.
516     * @param url2
517     *            the second URL to be compared.
518     * @return {@code true} if both URLs refer to the same file, {@code false}
519     *         otherwise.
520     */
521    protected boolean sameFile(URL url1, URL url2) {
522        return Objects.equal(url1.getProtocol(), url2.getProtocol())
523                && Objects.equal(url1.getFile(), url2.getFile())
524                && hostsEqual(url1, url2)
525                && url1.getEffectivePort() == url2.getEffectivePort();
526    }
527
528    /*
529     * If the URL host is empty while protocal is file, the host is regarded as
530     * localhost.
531     */
532    private static String getHost(URL url) {
533        String host = url.getHost();
534        if ("file".equals(url.getProtocol()) && host != null && host.isEmpty()) {
535            host = "localhost";
536        }
537        return host;
538    }
539}
540