1/*
2 * Copyright (C) 2006 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.webkit;
18
19import java.io.UnsupportedEncodingException;
20import java.util.Locale;
21import java.util.regex.Matcher;
22import java.util.regex.Pattern;
23
24import android.net.Uri;
25import android.net.ParseException;
26import android.net.WebAddress;
27import android.util.Log;
28
29public final class URLUtil {
30
31    private static final String LOGTAG = "webkit";
32
33    // to refer to bar.png under your package's asset/foo/ directory, use
34    // "file:///android_asset/foo/bar.png".
35    static final String ASSET_BASE = "file:///android_asset/";
36    // to refer to bar.png under your package's res/drawable/ directory, use
37    // "file:///android_res/drawable/bar.png". Use "drawable" to refer to
38    // "drawable-hdpi" directory as well.
39    static final String RESOURCE_BASE = "file:///android_res/";
40    static final String FILE_BASE = "file://";
41    static final String PROXY_BASE = "file:///cookieless_proxy/";
42    static final String CONTENT_BASE = "content:";
43
44    /**
45     * Cleans up (if possible) user-entered web addresses
46     */
47    public static String guessUrl(String inUrl) {
48
49        String retVal = inUrl;
50        WebAddress webAddress;
51
52        if (DebugFlags.URL_UTIL) Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl);
53
54        if (inUrl.length() == 0) return inUrl;
55        if (inUrl.startsWith("about:")) return inUrl;
56        // Do not try to interpret data scheme URLs
57        if (inUrl.startsWith("data:")) return inUrl;
58        // Do not try to interpret file scheme URLs
59        if (inUrl.startsWith("file:")) return inUrl;
60        // Do not try to interpret javascript scheme URLs
61        if (inUrl.startsWith("javascript:")) return inUrl;
62
63        // bug 762454: strip period off end of url
64        if (inUrl.endsWith(".") == true) {
65            inUrl = inUrl.substring(0, inUrl.length() - 1);
66        }
67
68        try {
69            webAddress = new WebAddress(inUrl);
70        } catch (ParseException ex) {
71
72            if (DebugFlags.URL_UTIL) {
73                Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl);
74            }
75            return retVal;
76        }
77
78        // Check host
79        if (webAddress.getHost().indexOf('.') == -1) {
80            // no dot: user probably entered a bare domain.  try .com
81            webAddress.setHost("www." + webAddress.getHost() + ".com");
82        }
83        return webAddress.toString();
84    }
85
86    public static String composeSearchUrl(String inQuery, String template,
87                                          String queryPlaceHolder) {
88        int placeHolderIndex = template.indexOf(queryPlaceHolder);
89        if (placeHolderIndex < 0) {
90            return null;
91        }
92
93        String query;
94        StringBuilder buffer = new StringBuilder();
95        buffer.append(template.substring(0, placeHolderIndex));
96
97        try {
98            query = java.net.URLEncoder.encode(inQuery, "utf-8");
99            buffer.append(query);
100        } catch (UnsupportedEncodingException ex) {
101            return null;
102        }
103
104        buffer.append(template.substring(
105                placeHolderIndex + queryPlaceHolder.length()));
106
107        return buffer.toString();
108    }
109
110    public static byte[] decode(byte[] url) throws IllegalArgumentException {
111        if (url.length == 0) {
112            return new byte[0];
113        }
114
115        // Create a new byte array with the same length to ensure capacity
116        byte[] tempData = new byte[url.length];
117
118        int tempCount = 0;
119        for (int i = 0; i < url.length; i++) {
120            byte b = url[i];
121            if (b == '%') {
122                if (url.length - i > 2) {
123                    b = (byte) (parseHex(url[i + 1]) * 16
124                            + parseHex(url[i + 2]));
125                    i += 2;
126                } else {
127                    throw new IllegalArgumentException("Invalid format");
128                }
129            }
130            tempData[tempCount++] = b;
131        }
132        byte[] retData = new byte[tempCount];
133        System.arraycopy(tempData, 0, retData, 0, tempCount);
134        return retData;
135    }
136
137    /**
138     * @return True iff the url is correctly URL encoded
139     */
140    static boolean verifyURLEncoding(String url) {
141        int count = url.length();
142        if (count == 0) {
143            return false;
144        }
145
146        int index = url.indexOf('%');
147        while (index >= 0 && index < count) {
148            if (index < count - 2) {
149                try {
150                    parseHex((byte) url.charAt(++index));
151                    parseHex((byte) url.charAt(++index));
152                } catch (IllegalArgumentException e) {
153                    return false;
154                }
155            } else {
156                return false;
157            }
158            index = url.indexOf('%', index + 1);
159        }
160        return true;
161    }
162
163    private static int parseHex(byte b) {
164        if (b >= '0' && b <= '9') return (b - '0');
165        if (b >= 'A' && b <= 'F') return (b - 'A' + 10);
166        if (b >= 'a' && b <= 'f') return (b - 'a' + 10);
167
168        throw new IllegalArgumentException("Invalid hex char '" + b + "'");
169    }
170
171    /**
172     * @return True iff the url is an asset file.
173     */
174    public static boolean isAssetUrl(String url) {
175        return (null != url) && url.startsWith(ASSET_BASE);
176    }
177
178    /**
179     * @return True iff the url is a resource file.
180     * @hide
181     */
182    public static boolean isResourceUrl(String url) {
183        return (null != url) && url.startsWith(RESOURCE_BASE);
184    }
185
186    /**
187     * @return True iff the url is a proxy url to allow cookieless network
188     * requests from a file url.
189     * @deprecated Cookieless proxy is no longer supported.
190     */
191    @Deprecated
192    public static boolean isCookielessProxyUrl(String url) {
193        return (null != url) && url.startsWith(PROXY_BASE);
194    }
195
196    /**
197     * @return True iff the url is a local file.
198     */
199    public static boolean isFileUrl(String url) {
200        return (null != url) && (url.startsWith(FILE_BASE) &&
201                                 !url.startsWith(ASSET_BASE) &&
202                                 !url.startsWith(PROXY_BASE));
203    }
204
205    /**
206     * @return True iff the url is an about: url.
207     */
208    public static boolean isAboutUrl(String url) {
209        return (null != url) && url.startsWith("about:");
210    }
211
212    /**
213     * @return True iff the url is a data: url.
214     */
215    public static boolean isDataUrl(String url) {
216        return (null != url) && url.startsWith("data:");
217    }
218
219    /**
220     * @return True iff the url is a javascript: url.
221     */
222    public static boolean isJavaScriptUrl(String url) {
223        return (null != url) && url.startsWith("javascript:");
224    }
225
226    /**
227     * @return True iff the url is an http: url.
228     */
229    public static boolean isHttpUrl(String url) {
230        return (null != url) &&
231               (url.length() > 6) &&
232               url.substring(0, 7).equalsIgnoreCase("http://");
233    }
234
235    /**
236     * @return True iff the url is an https: url.
237     */
238    public static boolean isHttpsUrl(String url) {
239        return (null != url) &&
240               (url.length() > 7) &&
241               url.substring(0, 8).equalsIgnoreCase("https://");
242    }
243
244    /**
245     * @return True iff the url is a network url.
246     */
247    public static boolean isNetworkUrl(String url) {
248        if (url == null || url.length() == 0) {
249            return false;
250        }
251        return isHttpUrl(url) || isHttpsUrl(url);
252    }
253
254    /**
255     * @return True iff the url is a content: url.
256     */
257    public static boolean isContentUrl(String url) {
258        return (null != url) && url.startsWith(CONTENT_BASE);
259    }
260
261    /**
262     * @return True iff the url is valid.
263     */
264    public static boolean isValidUrl(String url) {
265        if (url == null || url.length() == 0) {
266            return false;
267        }
268
269        return (isAssetUrl(url) ||
270                isResourceUrl(url) ||
271                isFileUrl(url) ||
272                isAboutUrl(url) ||
273                isHttpUrl(url) ||
274                isHttpsUrl(url) ||
275                isJavaScriptUrl(url) ||
276                isContentUrl(url));
277    }
278
279    /**
280     * Strips the url of the anchor.
281     */
282    public static String stripAnchor(String url) {
283        int anchorIndex = url.indexOf('#');
284        if (anchorIndex != -1) {
285            return url.substring(0, anchorIndex);
286        }
287        return url;
288    }
289
290    /**
291     * Guesses canonical filename that a download would have, using
292     * the URL and contentDisposition. File extension, if not defined,
293     * is added based on the mimetype
294     * @param url Url to the content
295     * @param contentDisposition Content-Disposition HTTP header or null
296     * @param mimeType Mime-type of the content or null
297     *
298     * @return suggested filename
299     */
300    public static final String guessFileName(
301            String url,
302            String contentDisposition,
303            String mimeType) {
304        String filename = null;
305        String extension = null;
306
307        // If we couldn't do anything with the hint, move toward the content disposition
308        if (filename == null && contentDisposition != null) {
309            filename = parseContentDisposition(contentDisposition);
310            if (filename != null) {
311                int index = filename.lastIndexOf('/') + 1;
312                if (index > 0) {
313                    filename = filename.substring(index);
314                }
315            }
316        }
317
318        // If all the other http-related approaches failed, use the plain uri
319        if (filename == null) {
320            String decodedUrl = Uri.decode(url);
321            if (decodedUrl != null) {
322                int queryIndex = decodedUrl.indexOf('?');
323                // If there is a query string strip it, same as desktop browsers
324                if (queryIndex > 0) {
325                    decodedUrl = decodedUrl.substring(0, queryIndex);
326                }
327                if (!decodedUrl.endsWith("/")) {
328                    int index = decodedUrl.lastIndexOf('/') + 1;
329                    if (index > 0) {
330                        filename = decodedUrl.substring(index);
331                    }
332                }
333            }
334        }
335
336        // Finally, if couldn't get filename from URI, get a generic filename
337        if (filename == null) {
338            filename = "downloadfile";
339        }
340
341        // Split filename between base and extension
342        // Add an extension if filename does not have one
343        int dotIndex = filename.indexOf('.');
344        if (dotIndex < 0) {
345            if (mimeType != null) {
346                extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
347                if (extension != null) {
348                    extension = "." + extension;
349                }
350            }
351            if (extension == null) {
352                if (mimeType != null && mimeType.toLowerCase(Locale.ROOT).startsWith("text/")) {
353                    if (mimeType.equalsIgnoreCase("text/html")) {
354                        extension = ".html";
355                    } else {
356                        extension = ".txt";
357                    }
358                } else {
359                    extension = ".bin";
360                }
361            }
362        } else {
363            if (mimeType != null) {
364                // Compare the last segment of the extension against the mime type.
365                // If there's a mismatch, discard the entire extension.
366                int lastDotIndex = filename.lastIndexOf('.');
367                String typeFromExt = MimeTypeMap.getSingleton().getMimeTypeFromExtension(
368                        filename.substring(lastDotIndex + 1));
369                if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) {
370                    extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
371                    if (extension != null) {
372                        extension = "." + extension;
373                    }
374                }
375            }
376            if (extension == null) {
377                extension = filename.substring(dotIndex);
378            }
379            filename = filename.substring(0, dotIndex);
380        }
381
382        return filename + extension;
383    }
384
385    /** Regex used to parse content-disposition headers */
386    private static final Pattern CONTENT_DISPOSITION_PATTERN =
387            Pattern.compile("attachment;\\s*filename\\s*=\\s*(\"?)([^\"]*)\\1\\s*$",
388            Pattern.CASE_INSENSITIVE);
389
390    /*
391     * Parse the Content-Disposition HTTP Header. The format of the header
392     * is defined here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html
393     * This header provides a filename for content that is going to be
394     * downloaded to the file system. We only support the attachment type.
395     * Note that RFC 2616 specifies the filename value must be double-quoted.
396     * Unfortunately some servers do not quote the value so to maintain
397     * consistent behaviour with other browsers, we allow unquoted values too.
398     */
399    static String parseContentDisposition(String contentDisposition) {
400        try {
401            Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition);
402            if (m.find()) {
403                return m.group(2);
404            }
405        } catch (IllegalStateException ex) {
406             // This function is defined as returning null when it can't parse the header
407        }
408        return null;
409    }
410}
411