URLUtil.java revision eb8be973c7982fe3ece0aeaeca379c3b3cdced0c
1/*
2 * Copyright (C) 2006 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.webkit;
18
19import java.io.UnsupportedEncodingException;
20import java.util.regex.Matcher;
21import java.util.regex.Pattern;
22
23import android.net.Uri;
24import android.net.ParseException;
25import android.net.WebAddress;
26import android.util.Log;
27
28public final class URLUtil {
29
30    private static final String LOGTAG = "webkit";
31
32    // to refer to bar.png under your package's asset/foo/ directory, use
33    // "file:///android_asset/foo/bar.png".
34    static final String ASSET_BASE = "file:///android_asset/";
35    // to refer to bar.png under your package's res/drawable/ directory, use
36    // "file:///android_res/drawable/bar.png". Use "drawable" to refer to
37    // "drawable-hdpi" directory as well.
38    static final String RESOURCE_BASE = "file:///android_res/";
39    static final String FILE_BASE = "file://";
40    static final String PROXY_BASE = "file:///cookieless_proxy/";
41
42    /**
43     * Cleans up (if possible) user-entered web addresses
44     */
45    public static String guessUrl(String inUrl) {
46
47        String retVal = inUrl;
48        WebAddress webAddress;
49
50        if (DebugFlags.URL_UTIL) Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl);
51
52        if (inUrl.length() == 0) return inUrl;
53        if (inUrl.startsWith("about:")) return inUrl;
54        // Do not try to interpret data scheme URLs
55        if (inUrl.startsWith("data:")) return inUrl;
56        // Do not try to interpret file scheme URLs
57        if (inUrl.startsWith("file:")) return inUrl;
58        // Do not try to interpret javascript scheme URLs
59        if (inUrl.startsWith("javascript:")) return inUrl;
60
61        // bug 762454: strip period off end of url
62        if (inUrl.endsWith(".") == true) {
63            inUrl = inUrl.substring(0, inUrl.length() - 1);
64        }
65
66        try {
67            webAddress = new WebAddress(inUrl);
68        } catch (ParseException ex) {
69
70            if (DebugFlags.URL_UTIL) {
71                Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl);
72            }
73            return retVal;
74        }
75
76        // Check host
77        if (webAddress.getHost().indexOf('.') == -1) {
78            // no dot: user probably entered a bare domain.  try .com
79            webAddress.setHost("www." + webAddress.getHost() + ".com");
80        }
81        return webAddress.toString();
82    }
83
84    public static String composeSearchUrl(String inQuery, String template,
85                                          String queryPlaceHolder) {
86        int placeHolderIndex = template.indexOf(queryPlaceHolder);
87        if (placeHolderIndex < 0) {
88            return null;
89        }
90
91        String query;
92        StringBuilder buffer = new StringBuilder();
93        buffer.append(template.substring(0, placeHolderIndex));
94
95        try {
96            query = java.net.URLEncoder.encode(inQuery, "utf-8");
97            buffer.append(query);
98        } catch (UnsupportedEncodingException ex) {
99            return null;
100        }
101
102        buffer.append(template.substring(
103                placeHolderIndex + queryPlaceHolder.length()));
104
105        return buffer.toString();
106    }
107
108    public static byte[] decode(byte[] url) throws IllegalArgumentException {
109        if (url.length == 0) {
110            return new byte[0];
111        }
112
113        // Create a new byte array with the same length to ensure capacity
114        byte[] tempData = new byte[url.length];
115
116        int tempCount = 0;
117        for (int i = 0; i < url.length; i++) {
118            byte b = url[i];
119            if (b == '%') {
120                if (url.length - i > 2) {
121                    b = (byte) (parseHex(url[i + 1]) * 16
122                            + parseHex(url[i + 2]));
123                    i += 2;
124                } else {
125                    throw new IllegalArgumentException("Invalid format");
126                }
127            }
128            tempData[tempCount++] = b;
129        }
130        byte[] retData = new byte[tempCount];
131        System.arraycopy(tempData, 0, retData, 0, tempCount);
132        return retData;
133    }
134
135    /**
136     * @return True iff the url is correctly URL encoded
137     */
138    static boolean verifyURLEncoding(String url) {
139        int count = url.length();
140        if (count == 0) {
141            return false;
142        }
143
144        int index = url.indexOf('%');
145        while (index >= 0 && index < count) {
146            if (index < count - 2) {
147                try {
148                    parseHex((byte) url.charAt(++index));
149                    parseHex((byte) url.charAt(++index));
150                } catch (IllegalArgumentException e) {
151                    return false;
152                }
153            } else {
154                return false;
155            }
156            index = url.indexOf('%', index + 1);
157        }
158        return true;
159    }
160
161    private static int parseHex(byte b) {
162        if (b >= '0' && b <= '9') return (b - '0');
163        if (b >= 'A' && b <= 'F') return (b - 'A' + 10);
164        if (b >= 'a' && b <= 'f') return (b - 'a' + 10);
165
166        throw new IllegalArgumentException("Invalid hex char '" + b + "'");
167    }
168
169    /**
170     * @return True iff the url is an asset file.
171     */
172    public static boolean isAssetUrl(String url) {
173        return (null != url) && url.startsWith(ASSET_BASE);
174    }
175
176    /**
177     * @return True iff the url is a resource file.
178     * @hide
179     */
180    public static boolean isResourceUrl(String url) {
181        return (null != url) && url.startsWith(RESOURCE_BASE);
182    }
183
184    /**
185     * @return True iff the url is an proxy url to allow cookieless network
186     * requests from a file url.
187     * @deprecated Cookieless proxy is no longer supported.
188     */
189    @Deprecated
190    public static boolean isCookielessProxyUrl(String url) {
191        return (null != url) && url.startsWith(PROXY_BASE);
192    }
193
194    /**
195     * @return True iff the url is a local file.
196     */
197    public static boolean isFileUrl(String url) {
198        return (null != url) && (url.startsWith(FILE_BASE) &&
199                                 !url.startsWith(ASSET_BASE) &&
200                                 !url.startsWith(PROXY_BASE));
201    }
202
203    /**
204     * @return True iff the url is an about: url.
205     */
206    public static boolean isAboutUrl(String url) {
207        return (null != url) && url.startsWith("about:");
208    }
209
210    /**
211     * @return True iff the url is a data: url.
212     */
213    public static boolean isDataUrl(String url) {
214        return (null != url) && url.startsWith("data:");
215    }
216
217    /**
218     * @return True iff the url is a javascript: url.
219     */
220    public static boolean isJavaScriptUrl(String url) {
221        return (null != url) && url.startsWith("javascript:");
222    }
223
224    /**
225     * @return True iff the url is an http: url.
226     */
227    public static boolean isHttpUrl(String url) {
228        return (null != url) &&
229               (url.length() > 6) &&
230               url.substring(0, 7).equalsIgnoreCase("http://");
231    }
232
233    /**
234     * @return True iff the url is an https: url.
235     */
236    public static boolean isHttpsUrl(String url) {
237        return (null != url) &&
238               (url.length() > 7) &&
239               url.substring(0, 8).equalsIgnoreCase("https://");
240    }
241
242    /**
243     * @return True iff the url is a network url.
244     */
245    public static boolean isNetworkUrl(String url) {
246        if (url == null || url.length() == 0) {
247            return false;
248        }
249        return isHttpUrl(url) || isHttpsUrl(url);
250    }
251
252    /**
253     * @return True iff the url is a content: url.
254     */
255    public static boolean isContentUrl(String url) {
256        return (null != url) && url.startsWith("content:");
257    }
258
259    /**
260     * @return True iff the url is valid.
261     */
262    public static boolean isValidUrl(String url) {
263        if (url == null || url.length() == 0) {
264            return false;
265        }
266
267        return (isAssetUrl(url) ||
268                isResourceUrl(url) ||
269                isFileUrl(url) ||
270                isAboutUrl(url) ||
271                isHttpUrl(url) ||
272                isHttpsUrl(url) ||
273                isJavaScriptUrl(url) ||
274                isContentUrl(url));
275    }
276
277    /**
278     * Strips the url of the anchor.
279     */
280    public static String stripAnchor(String url) {
281        int anchorIndex = url.indexOf('#');
282        if (anchorIndex != -1) {
283            return url.substring(0, anchorIndex);
284        }
285        return url;
286    }
287
288    /**
289     * Guesses canonical filename that a download would have, using
290     * the URL and contentDisposition. File extension, if not defined,
291     * is added based on the mimetype
292     * @param url Url to the content
293     * @param contentDisposition Content-Disposition HTTP header or null
294     * @param mimeType Mime-type of the content or null
295     *
296     * @return suggested filename
297     */
298    public static final String guessFileName(
299            String url,
300            String contentDisposition,
301            String mimeType) {
302        String filename = null;
303        String extension = null;
304
305        // If we couldn't do anything with the hint, move toward the content disposition
306        if (filename == null && contentDisposition != null) {
307            filename = parseContentDisposition(contentDisposition);
308            if (filename != null) {
309                int index = filename.lastIndexOf('/') + 1;
310                if (index > 0) {
311                    filename = filename.substring(index);
312                }
313            }
314        }
315
316        // If all the other http-related approaches failed, use the plain uri
317        if (filename == null) {
318            String decodedUrl = Uri.decode(url);
319            if (decodedUrl != null) {
320                int queryIndex = decodedUrl.indexOf('?');
321                // If there is a query string strip it, same as desktop browsers
322                if (queryIndex > 0) {
323                    decodedUrl = decodedUrl.substring(0, queryIndex);
324                }
325                if (!decodedUrl.endsWith("/")) {
326                    int index = decodedUrl.lastIndexOf('/') + 1;
327                    if (index > 0) {
328                        filename = decodedUrl.substring(index);
329                    }
330                }
331            }
332        }
333
334        // Finally, if couldn't get filename from URI, get a generic filename
335        if (filename == null) {
336            filename = "downloadfile";
337        }
338
339        // Split filename between base and extension
340        // Add an extension if filename does not have one
341        int dotIndex = filename.indexOf('.');
342        if (dotIndex < 0) {
343            if (mimeType != null) {
344                extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
345                if (extension != null) {
346                    extension = "." + extension;
347                }
348            }
349            if (extension == null) {
350                if (mimeType != null && mimeType.toLowerCase().startsWith("text/")) {
351                    if (mimeType.equalsIgnoreCase("text/html")) {
352                        extension = ".html";
353                    } else {
354                        extension = ".txt";
355                    }
356                } else {
357                    extension = ".bin";
358                }
359            }
360        } else {
361            if (mimeType != null) {
362                // Compare the last segment of the extension against the mime type.
363                // If there's a mismatch, discard the entire extension.
364                int lastDotIndex = filename.lastIndexOf('.');
365                String typeFromExt = MimeTypeMap.getSingleton().getMimeTypeFromExtension(
366                        filename.substring(lastDotIndex + 1));
367                if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) {
368                    extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
369                    if (extension != null) {
370                        extension = "." + extension;
371                    }
372                }
373            }
374            if (extension == null) {
375                extension = filename.substring(dotIndex);
376            }
377            filename = filename.substring(0, dotIndex);
378        }
379
380        return filename + extension;
381    }
382
383    /** Regex used to parse content-disposition headers */
384    private static final Pattern CONTENT_DISPOSITION_PATTERN =
385            Pattern.compile("attachment;\\s*filename\\s*=\\s*(\"?)([^\"]*)\\1\\s*$",
386            Pattern.CASE_INSENSITIVE);
387
388    /*
389     * Parse the Content-Disposition HTTP Header. The format of the header
390     * is defined here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html
391     * This header provides a filename for content that is going to be
392     * downloaded to the file system. We only support the attachment type.
393     * Note that RFC 2616 specifies the filename value must be double-quoted.
394     * Unfortunately some servers do not quote the value so to maintain
395     * consistent behaviour with other browsers, we allow unquoted values too.
396     */
397    static String parseContentDisposition(String contentDisposition) {
398        try {
399            Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition);
400            if (m.find()) {
401                return m.group(2);
402            }
403        } catch (IllegalStateException ex) {
404             // This function is defined as returning null when it can't parse the header
405        }
406        return null;
407    }
408}
409