URLUtil.java revision 4a51c20ce607c74914f90fd897f04080121ac13b
1/*
2 * Copyright (C) 2006 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.webkit;
18
19import java.io.UnsupportedEncodingException;
20import java.util.regex.Matcher;
21import java.util.regex.Pattern;
22
23import android.net.Uri;
24import android.net.ParseException;
25import android.net.WebAddress;
26import android.util.Log;
27
28public final class URLUtil {
29
30    private static final String LOGTAG = "webkit";
31
32    static final String ASSET_BASE = "file:///android_asset/";
33    static final String FILE_BASE = "file://";
34    static final String PROXY_BASE = "file:///cookieless_proxy/";
35
36    /**
37     * Cleans up (if possible) user-entered web addresses
38     */
39    public static String guessUrl(String inUrl) {
40
41        String retVal = inUrl;
42        WebAddress webAddress;
43
44        Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl);
45
46        if (inUrl.length() == 0) return inUrl;
47        if (inUrl.startsWith("about:")) return inUrl;
48        // Do not try to interpret data scheme URLs
49        if (inUrl.startsWith("data:")) return inUrl;
50        // Do not try to interpret file scheme URLs
51        if (inUrl.startsWith("file:")) return inUrl;
52        // Do not try to interpret javascript scheme URLs
53        if (inUrl.startsWith("javascript:")) return inUrl;
54
55        // bug 762454: strip period off end of url
56        if (inUrl.endsWith(".") == true) {
57            inUrl = inUrl.substring(0, inUrl.length() - 1);
58        }
59
60        try {
61            webAddress = new WebAddress(inUrl);
62        } catch (ParseException ex) {
63
64            if (DebugFlags.URL_UTIL) {
65                Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl);
66            }
67            return retVal;
68        }
69
70        // Check host
71        if (webAddress.mHost.indexOf('.') == -1) {
72            // no dot: user probably entered a bare domain.  try .com
73            webAddress.mHost = "www." + webAddress.mHost + ".com";
74        }
75        return webAddress.toString();
76    }
77
78    public static String composeSearchUrl(String inQuery, String template,
79                                          String queryPlaceHolder) {
80        int placeHolderIndex = template.indexOf(queryPlaceHolder);
81        if (placeHolderIndex < 0) {
82            return null;
83        }
84
85        String query;
86        StringBuilder buffer = new StringBuilder();
87        buffer.append(template.substring(0, placeHolderIndex));
88
89        try {
90            query = java.net.URLEncoder.encode(inQuery, "utf-8");
91            buffer.append(query);
92        } catch (UnsupportedEncodingException ex) {
93            return null;
94        }
95
96        buffer.append(template.substring(
97                placeHolderIndex + queryPlaceHolder.length()));
98
99        return buffer.toString();
100    }
101
102    public static byte[] decode(byte[] url) throws IllegalArgumentException {
103        if (url.length == 0) {
104            return new byte[0];
105        }
106
107        // Create a new byte array with the same length to ensure capacity
108        byte[] tempData = new byte[url.length];
109
110        int tempCount = 0;
111        for (int i = 0; i < url.length; i++) {
112            byte b = url[i];
113            if (b == '%') {
114                if (url.length - i > 2) {
115                    b = (byte) (parseHex(url[i + 1]) * 16
116                            + parseHex(url[i + 2]));
117                    i += 2;
118                } else {
119                    throw new IllegalArgumentException("Invalid format");
120                }
121            }
122            tempData[tempCount++] = b;
123        }
124        byte[] retData = new byte[tempCount];
125        System.arraycopy(tempData, 0, retData, 0, tempCount);
126        return retData;
127    }
128
129    /**
130     * @return True iff the url is correctly URL encoded
131     */
132    static boolean verifyURLEncoding(String url) {
133        int count = url.length();
134        if (count == 0) {
135            return false;
136        }
137
138        int index = url.indexOf('%');
139        while (index >= 0 && index < count) {
140            if (index < count - 2) {
141                try {
142                    parseHex((byte) url.charAt(++index));
143                    parseHex((byte) url.charAt(++index));
144                } catch (IllegalArgumentException e) {
145                    return false;
146                }
147            } else {
148                return false;
149            }
150            index = url.indexOf('%', index + 1);
151        }
152        return true;
153    }
154
155    private static int parseHex(byte b) {
156        if (b >= '0' && b <= '9') return (b - '0');
157        if (b >= 'A' && b <= 'F') return (b - 'A' + 10);
158        if (b >= 'a' && b <= 'f') return (b - 'a' + 10);
159
160        throw new IllegalArgumentException("Invalid hex char '" + b + "'");
161    }
162
163    /**
164     * @return True iff the url is an asset file.
165     */
166    public static boolean isAssetUrl(String url) {
167        return (null != url) && url.startsWith(ASSET_BASE);
168    }
169
170    /**
171     * @return True iff the url is an proxy url to allow cookieless network
172     * requests from a file url.
173     * @deprecated Cookieless proxy is no longer supported.
174     */
175    @Deprecated
176    public static boolean isCookielessProxyUrl(String url) {
177        return (null != url) && url.startsWith(PROXY_BASE);
178    }
179
180    /**
181     * @return True iff the url is a local file.
182     */
183    public static boolean isFileUrl(String url) {
184        return (null != url) && (url.startsWith(FILE_BASE) &&
185                                 !url.startsWith(ASSET_BASE) &&
186                                 !url.startsWith(PROXY_BASE));
187    }
188
189    /**
190     * @return True iff the url is an about: url.
191     */
192    public static boolean isAboutUrl(String url) {
193        return (null != url) && url.startsWith("about:");
194    }
195
196    /**
197     * @return True iff the url is a data: url.
198     */
199    public static boolean isDataUrl(String url) {
200        return (null != url) && url.startsWith("data:");
201    }
202
203    /**
204     * @return True iff the url is a javascript: url.
205     */
206    public static boolean isJavaScriptUrl(String url) {
207        return (null != url) && url.startsWith("javascript:");
208    }
209
210    /**
211     * @return True iff the url is an http: url.
212     */
213    public static boolean isHttpUrl(String url) {
214        return (null != url) &&
215               (url.length() > 6) &&
216               url.substring(0, 7).equalsIgnoreCase("http://");
217    }
218
219    /**
220     * @return True iff the url is an https: url.
221     */
222    public static boolean isHttpsUrl(String url) {
223        return (null != url) &&
224               (url.length() > 7) &&
225               url.substring(0, 8).equalsIgnoreCase("https://");
226    }
227
228    /**
229     * @return True iff the url is a network url.
230     */
231    public static boolean isNetworkUrl(String url) {
232        if (url == null || url.length() == 0) {
233            return false;
234        }
235        return isHttpUrl(url) || isHttpsUrl(url);
236    }
237
238    /**
239     * @return True iff the url is a content: url.
240     */
241    public static boolean isContentUrl(String url) {
242        return (null != url) && url.startsWith("content:");
243    }
244
245    /**
246     * @return True iff the url is valid.
247     */
248    public static boolean isValidUrl(String url) {
249        if (url == null || url.length() == 0) {
250            return false;
251        }
252
253        return (isAssetUrl(url) ||
254                isFileUrl(url) ||
255                isAboutUrl(url) ||
256                isHttpUrl(url) ||
257                isHttpsUrl(url) ||
258                isJavaScriptUrl(url) ||
259                isContentUrl(url));
260    }
261
262    /**
263     * Strips the url of the anchor.
264     */
265    public static String stripAnchor(String url) {
266        int anchorIndex = url.indexOf('#');
267        if (anchorIndex != -1) {
268            return url.substring(0, anchorIndex);
269        }
270        return url;
271    }
272
273    /**
274     * Guesses canonical filename that a download would have, using
275     * the URL and contentDisposition. File extension, if not defined,
276     * is added based on the mimetype
277     * @param url Url to the content
278     * @param contentDisposition Content-Disposition HTTP header or null
279     * @param mimeType Mime-type of the content or null
280     *
281     * @return suggested filename
282     */
283    public static final String guessFileName(
284            String url,
285            String contentDisposition,
286            String mimeType) {
287        String filename = null;
288        String extension = null;
289
290        // If we couldn't do anything with the hint, move toward the content disposition
291        if (filename == null && contentDisposition != null) {
292            filename = parseContentDisposition(contentDisposition);
293            if (filename != null) {
294                int index = filename.lastIndexOf('/') + 1;
295                if (index > 0) {
296                    filename = filename.substring(index);
297                }
298            }
299        }
300
301        // If all the other http-related approaches failed, use the plain uri
302        if (filename == null) {
303            String decodedUrl = Uri.decode(url);
304            if (decodedUrl != null) {
305                int queryIndex = decodedUrl.indexOf('?');
306                // If there is a query string strip it, same as desktop browsers
307                if (queryIndex > 0) {
308                    decodedUrl = decodedUrl.substring(0, queryIndex);
309                }
310                if (!decodedUrl.endsWith("/")) {
311                    int index = decodedUrl.lastIndexOf('/') + 1;
312                    if (index > 0) {
313                        filename = decodedUrl.substring(index);
314                    }
315                }
316            }
317        }
318
319        // Finally, if couldn't get filename from URI, get a generic filename
320        if (filename == null) {
321            filename = "downloadfile";
322        }
323
324        // Split filename between base and extension
325        // Add an extension if filename does not have one
326        int dotIndex = filename.indexOf('.');
327        if (dotIndex < 0) {
328            if (mimeType != null) {
329                extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
330                if (extension != null) {
331                    extension = "." + extension;
332                }
333            }
334            if (extension == null) {
335                if (mimeType != null && mimeType.toLowerCase().startsWith("text/")) {
336                    if (mimeType.equalsIgnoreCase("text/html")) {
337                        extension = ".html";
338                    } else {
339                        extension = ".txt";
340                    }
341                } else {
342                    extension = ".bin";
343                }
344            }
345        } else {
346            if (mimeType != null) {
347                // Compare the last segment of the extension against the mime type.
348                // If there's a mismatch, discard the entire extension.
349                int lastDotIndex = filename.lastIndexOf('.');
350                String typeFromExt = MimeTypeMap.getSingleton().getMimeTypeFromExtension(
351                        filename.substring(lastDotIndex + 1));
352                if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) {
353                    extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
354                    if (extension != null) {
355                        extension = "." + extension;
356                    }
357                }
358            }
359            if (extension == null) {
360                extension = filename.substring(dotIndex);
361            }
362            filename = filename.substring(0, dotIndex);
363        }
364
365        return filename + extension;
366    }
367
368    /** Regex used to parse content-disposition headers */
369    private static final Pattern CONTENT_DISPOSITION_PATTERN =
370            Pattern.compile("attachment;\\s*filename\\s*=\\s*\"([^\"]*)\"");
371
372    /*
373     * Parse the Content-Disposition HTTP Header. The format of the header
374     * is defined here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html
375     * This header provides a filename for content that is going to be
376     * downloaded to the file system. We only support the attachment type.
377     */
378    static String parseContentDisposition(String contentDisposition) {
379        try {
380            Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition);
381            if (m.find()) {
382                return m.group(1);
383            }
384        } catch (IllegalStateException ex) {
385             // This function is defined as returning null when it can't parse the header
386        }
387        return null;
388    }
389}
390