URLUtil.java revision f013e1afd1e68af5e3b868c26a653bbfb39538f8
1/*
2 * Copyright (C) 2006 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.webkit;
18
19import java.io.UnsupportedEncodingException;
20import java.util.regex.Matcher;
21import java.util.regex.Pattern;
22
23import android.net.Uri;
24import android.net.ParseException;
25import android.net.WebAddress;
26import android.util.Config;
27import android.util.Log;
28
29public final class URLUtil {
30
31    private static final String LOGTAG = "webkit";
32
33    static final String ASSET_BASE = "file:///android_asset/";
34    static final String FILE_BASE = "file://";
35    static final String PROXY_BASE = "file:///cookieless_proxy/";
36
37    /**
38     * Cleans up (if possible) user-entered web addresses
39     */
40    public static String guessUrl(String inUrl) {
41
42        String retVal = inUrl;
43        WebAddress webAddress;
44
45        Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl);
46
47        if (inUrl.length() == 0) return inUrl;
48        if (inUrl.startsWith("about:")) return inUrl;
49        // Do not try to interpret data scheme URLs
50        if (inUrl.startsWith("data:")) return inUrl;
51        // Do not try to interpret file scheme URLs
52        if (inUrl.startsWith("file:")) return inUrl;
53        // Do not try to interpret javascript scheme URLs
54        if (inUrl.startsWith("javascript:")) return inUrl;
55
56        // bug 762454: strip period off end of url
57        if (inUrl.endsWith(".") == true) {
58            inUrl = inUrl.substring(0, inUrl.length() - 1);
59        }
60
61        try {
62            webAddress = new WebAddress(inUrl);
63        } catch (ParseException ex) {
64
65            if (Config.LOGV) {
66                Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl);
67            }
68            return retVal;
69        }
70
71        // Check host
72        if (webAddress.mHost.indexOf('.') == -1) {
73            // no dot: user probably entered a bare domain.  try .com
74            webAddress.mHost = "www." + webAddress.mHost + ".com";
75        }
76        return webAddress.toString();
77    }
78
79    public static String composeSearchUrl(String inQuery, String template,
80                                          String queryPlaceHolder) {
81        int placeHolderIndex = template.indexOf(queryPlaceHolder);
82        if (placeHolderIndex < 0) {
83            return null;
84        }
85
86        String query;
87        StringBuilder buffer = new StringBuilder();
88        buffer.append(template.substring(0, placeHolderIndex));
89
90        try {
91            query = java.net.URLEncoder.encode(inQuery, "utf-8");
92            buffer.append(query);
93        } catch (UnsupportedEncodingException ex) {
94            return null;
95        }
96
97        buffer.append(template.substring(
98                placeHolderIndex + queryPlaceHolder.length()));
99
100        return buffer.toString();
101    }
102
103    public static byte[] decode(byte[] url) throws IllegalArgumentException {
104        if (url.length == 0) {
105            return new byte[0];
106        }
107
108        // Create a new byte array with the same length to ensure capacity
109        byte[] tempData = new byte[url.length];
110
111        int tempCount = 0;
112        for (int i = 0; i < url.length; i++) {
113            byte b = url[i];
114            if (b == '%') {
115                if (url.length - i > 2) {
116                    b = (byte) (parseHex(url[i + 1]) * 16
117                            + parseHex(url[i + 2]));
118                    i += 2;
119                } else {
120                    throw new IllegalArgumentException("Invalid format");
121                }
122            }
123            tempData[tempCount++] = b;
124        }
125        byte[] retData = new byte[tempCount];
126        System.arraycopy(tempData, 0, retData, 0, tempCount);
127        return retData;
128    }
129
130    private static int parseHex(byte b) {
131        if (b >= '0' && b <= '9') return (b - '0');
132        if (b >= 'A' && b <= 'F') return (b - 'A' + 10);
133        if (b >= 'a' && b <= 'f') return (b - 'a' + 10);
134
135        throw new IllegalArgumentException("Invalid hex char '" + b + "'");
136    }
137
138    /**
139     * @return True iff the url is an asset file.
140     */
141    public static boolean isAssetUrl(String url) {
142        return (null != url) && url.startsWith(ASSET_BASE);
143    }
144
145    /**
146     * @return True iff the url is an proxy url to allow cookieless network
147     * requests from a file url.
148     * @deprecated Cookieless proxy is no longer supported.
149     */
150    public static boolean isCookielessProxyUrl(String url) {
151        return (null != url) && url.startsWith(PROXY_BASE);
152    }
153
154    /**
155     * @return True iff the url is a local file.
156     */
157    public static boolean isFileUrl(String url) {
158        return (null != url) && (url.startsWith(FILE_BASE) &&
159                                 !url.startsWith(ASSET_BASE) &&
160                                 !url.startsWith(PROXY_BASE));
161    }
162
163    /**
164     * @return True iff the url is an about: url.
165     */
166    public static boolean isAboutUrl(String url) {
167        return (null != url) && url.startsWith("about:");
168    }
169
170    /**
171     * @return True iff the url is a data: url.
172     */
173    public static boolean isDataUrl(String url) {
174        return (null != url) && url.startsWith("data:");
175    }
176
177    /**
178     * @return True iff the url is a javascript: url.
179     */
180    public static boolean isJavaScriptUrl(String url) {
181        return (null != url) && url.startsWith("javascript:");
182    }
183
184    /**
185     * @return True iff the url is an http: url.
186     */
187    public static boolean isHttpUrl(String url) {
188        return (null != url) &&
189               (url.length() > 6) &&
190               url.substring(0, 7).equalsIgnoreCase("http://");
191    }
192
193    /**
194     * @return True iff the url is an https: url.
195     */
196    public static boolean isHttpsUrl(String url) {
197        return (null != url) &&
198               (url.length() > 7) &&
199               url.substring(0, 8).equalsIgnoreCase("https://");
200    }
201
202    /**
203     * @return True iff the url is a network url.
204     */
205    public static boolean isNetworkUrl(String url) {
206        if (url == null || url.length() == 0) {
207            return false;
208        }
209        return isHttpUrl(url) || isHttpsUrl(url);
210    }
211
212    /**
213     * @return True iff the url is a content: url.
214     */
215    public static boolean isContentUrl(String url) {
216        return (null != url) && url.startsWith("content:");
217    }
218
219    /**
220     * @return True iff the url is valid.
221     */
222    public static boolean isValidUrl(String url) {
223        if (url == null || url.length() == 0) {
224            return false;
225        }
226
227        return (isAssetUrl(url) ||
228                isFileUrl(url) ||
229                isAboutUrl(url) ||
230                isHttpUrl(url) ||
231                isHttpsUrl(url) ||
232                isJavaScriptUrl(url) ||
233                isContentUrl(url));
234    }
235
236    /**
237     * Strips the url of the anchor.
238     */
239    public static String stripAnchor(String url) {
240        int anchorIndex = url.indexOf('#');
241        if (anchorIndex != -1) {
242            return url.substring(0, anchorIndex);
243        }
244        return url;
245    }
246
247    /**
248     * Guesses canonical filename that a download would have, using
249     * the URL and contentDisposition. File extension, if not defined,
250     * is added based on the mimetype
251     * @param url Url to the content
252     * @param contentDisposition Content-Disposition HTTP header or null
253     * @param mimeType Mime-type of the content or null
254     *
255     * @return suggested filename
256     */
257    public static final String guessFileName(
258            String url,
259            String contentDisposition,
260            String mimeType) {
261        String filename = null;
262        String extension = null;
263
264        // If we couldn't do anything with the hint, move toward the content disposition
265        if (filename == null && contentDisposition != null) {
266            filename = parseContentDisposition(contentDisposition);
267            if (filename != null) {
268                int index = filename.lastIndexOf('/') + 1;
269                if (index > 0) {
270                    filename = filename.substring(index);
271                }
272            }
273        }
274
275        // If all the other http-related approaches failed, use the plain uri
276        if (filename == null) {
277            String decodedUrl = Uri.decode(url);
278            if (decodedUrl != null) {
279                int queryIndex = decodedUrl.indexOf('?');
280                // If there is a query string strip it, same as desktop browsers
281                if (queryIndex > 0) {
282                    decodedUrl = decodedUrl.substring(0, queryIndex);
283                }
284                if (!decodedUrl.endsWith("/")) {
285                    int index = decodedUrl.lastIndexOf('/') + 1;
286                    if (index > 0) {
287                        filename = decodedUrl.substring(index);
288                    }
289                }
290            }
291        }
292
293        // Finally, if couldn't get filename from URI, get a generic filename
294        if (filename == null) {
295            filename = "downloadfile";
296        }
297
298        // Split filename between base and extension
299        // Add an extension if filename does not have one
300        int dotIndex = filename.indexOf('.');
301        if (dotIndex < 0) {
302            if (mimeType != null) {
303                extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
304                if (extension != null) {
305                    extension = "." + extension;
306                }
307            }
308            if (extension == null) {
309                if (mimeType != null && mimeType.toLowerCase().startsWith("text/")) {
310                    if (mimeType.equalsIgnoreCase("text/html")) {
311                        extension = ".html";
312                    } else {
313                        extension = ".txt";
314                    }
315                } else {
316                    extension = ".bin";
317                }
318            }
319        } else {
320            if (mimeType != null) {
321                // Compare the last segment of the extension against the mime type.
322                // If there's a mismatch, discard the entire extension.
323                int lastDotIndex = filename.lastIndexOf('.');
324                String typeFromExt = MimeTypeMap.getSingleton().getMimeTypeFromExtension(
325                        filename.substring(lastDotIndex + 1));
326                if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) {
327                    extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
328                    if (extension != null) {
329                        extension = "." + extension;
330                    }
331                }
332            }
333            if (extension == null) {
334                extension = filename.substring(dotIndex);
335            }
336            filename = filename.substring(0, dotIndex);
337        }
338
339        return filename + extension;
340    }
341
342    /** Regex used to parse content-disposition headers */
343    private static final Pattern CONTENT_DISPOSITION_PATTERN =
344            Pattern.compile("attachment;\\s*filename\\s*=\\s*\"([^\"]*)\"");
345
346    /*
347     * Parse the Content-Disposition HTTP Header. The format of the header
348     * is defined here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html
349     * This header provides a filename for content that is going to be
350     * downloaded to the file system. We only support the attachment type.
351     */
352    private static String parseContentDisposition(String contentDisposition) {
353        try {
354            Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition);
355            if (m.find()) {
356                return m.group(1);
357            }
358        } catch (IllegalStateException ex) {
359             // This function is defined as returning null when it can't parse the header
360        }
361        return null;
362    }
363}
364