1/*
2 * Copyright (C) 2006 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.webkit;
18
19import android.annotation.Nullable;
20import android.net.ParseException;
21import android.net.Uri;
22import android.net.WebAddress;
23import android.util.Log;
24
25import java.io.UnsupportedEncodingException;
26import java.util.Locale;
27import java.util.regex.Matcher;
28import java.util.regex.Pattern;
29
30public final class URLUtil {
31
32    private static final String LOGTAG = "webkit";
33    private static final boolean TRACE = false;
34
35    // to refer to bar.png under your package's asset/foo/ directory, use
36    // "file:///android_asset/foo/bar.png".
37    static final String ASSET_BASE = "file:///android_asset/";
38    // to refer to bar.png under your package's res/drawable/ directory, use
39    // "file:///android_res/drawable/bar.png". Use "drawable" to refer to
40    // "drawable-hdpi" directory as well.
41    static final String RESOURCE_BASE = "file:///android_res/";
42    static final String FILE_BASE = "file:";
43    static final String PROXY_BASE = "file:///cookieless_proxy/";
44    static final String CONTENT_BASE = "content:";
45
46    /**
47     * Cleans up (if possible) user-entered web addresses
48     */
49    public static String guessUrl(String inUrl) {
50
51        String retVal = inUrl;
52        WebAddress webAddress;
53
54        if (TRACE) Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl);
55
56        if (inUrl.length() == 0) return inUrl;
57        if (inUrl.startsWith("about:")) return inUrl;
58        // Do not try to interpret data scheme URLs
59        if (inUrl.startsWith("data:")) return inUrl;
60        // Do not try to interpret file scheme URLs
61        if (inUrl.startsWith("file:")) return inUrl;
62        // Do not try to interpret javascript scheme URLs
63        if (inUrl.startsWith("javascript:")) return inUrl;
64
65        // bug 762454: strip period off end of url
66        if (inUrl.endsWith(".") == true) {
67            inUrl = inUrl.substring(0, inUrl.length() - 1);
68        }
69
70        try {
71            webAddress = new WebAddress(inUrl);
72        } catch (ParseException ex) {
73
74            if (TRACE) {
75                Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl);
76            }
77            return retVal;
78        }
79
80        // Check host
81        if (webAddress.getHost().indexOf('.') == -1) {
82            // no dot: user probably entered a bare domain.  try .com
83            webAddress.setHost("www." + webAddress.getHost() + ".com");
84        }
85        return webAddress.toString();
86    }
87
88    public static String composeSearchUrl(String inQuery, String template,
89                                          String queryPlaceHolder) {
90        int placeHolderIndex = template.indexOf(queryPlaceHolder);
91        if (placeHolderIndex < 0) {
92            return null;
93        }
94
95        String query;
96        StringBuilder buffer = new StringBuilder();
97        buffer.append(template.substring(0, placeHolderIndex));
98
99        try {
100            query = java.net.URLEncoder.encode(inQuery, "utf-8");
101            buffer.append(query);
102        } catch (UnsupportedEncodingException ex) {
103            return null;
104        }
105
106        buffer.append(template.substring(
107                placeHolderIndex + queryPlaceHolder.length()));
108
109        return buffer.toString();
110    }
111
112    public static byte[] decode(byte[] url) throws IllegalArgumentException {
113        if (url.length == 0) {
114            return new byte[0];
115        }
116
117        // Create a new byte array with the same length to ensure capacity
118        byte[] tempData = new byte[url.length];
119
120        int tempCount = 0;
121        for (int i = 0; i < url.length; i++) {
122            byte b = url[i];
123            if (b == '%') {
124                if (url.length - i > 2) {
125                    b = (byte) (parseHex(url[i + 1]) * 16
126                            + parseHex(url[i + 2]));
127                    i += 2;
128                } else {
129                    throw new IllegalArgumentException("Invalid format");
130                }
131            }
132            tempData[tempCount++] = b;
133        }
134        byte[] retData = new byte[tempCount];
135        System.arraycopy(tempData, 0, retData, 0, tempCount);
136        return retData;
137    }
138
139    /**
140     * @return {@code true} if the url is correctly URL encoded
141     */
142    static boolean verifyURLEncoding(String url) {
143        int count = url.length();
144        if (count == 0) {
145            return false;
146        }
147
148        int index = url.indexOf('%');
149        while (index >= 0 && index < count) {
150            if (index < count - 2) {
151                try {
152                    parseHex((byte) url.charAt(++index));
153                    parseHex((byte) url.charAt(++index));
154                } catch (IllegalArgumentException e) {
155                    return false;
156                }
157            } else {
158                return false;
159            }
160            index = url.indexOf('%', index + 1);
161        }
162        return true;
163    }
164
165    private static int parseHex(byte b) {
166        if (b >= '0' && b <= '9') return (b - '0');
167        if (b >= 'A' && b <= 'F') return (b - 'A' + 10);
168        if (b >= 'a' && b <= 'f') return (b - 'a' + 10);
169
170        throw new IllegalArgumentException("Invalid hex char '" + b + "'");
171    }
172
173    /**
174     * @return {@code true} if the url is an asset file.
175     */
176    public static boolean isAssetUrl(String url) {
177        return (null != url) && url.startsWith(ASSET_BASE);
178    }
179
180    /**
181     * @return {@code true} if the url is a resource file.
182     * @hide
183     */
184    public static boolean isResourceUrl(String url) {
185        return (null != url) && url.startsWith(RESOURCE_BASE);
186    }
187
188    /**
189     * @return {@code true} if the url is a proxy url to allow cookieless network
190     * requests from a file url.
191     * @deprecated Cookieless proxy is no longer supported.
192     */
193    @Deprecated
194    public static boolean isCookielessProxyUrl(String url) {
195        return (null != url) && url.startsWith(PROXY_BASE);
196    }
197
198    /**
199     * @return {@code true} if the url is a local file.
200     */
201    public static boolean isFileUrl(String url) {
202        return (null != url) && (url.startsWith(FILE_BASE) &&
203                                 !url.startsWith(ASSET_BASE) &&
204                                 !url.startsWith(PROXY_BASE));
205    }
206
207    /**
208     * @return {@code true} if the url is an about: url.
209     */
210    public static boolean isAboutUrl(String url) {
211        return (null != url) && url.startsWith("about:");
212    }
213
214    /**
215     * @return {@code true} if the url is a data: url.
216     */
217    public static boolean isDataUrl(String url) {
218        return (null != url) && url.startsWith("data:");
219    }
220
221    /**
222     * @return {@code true} if the url is a javascript: url.
223     */
224    public static boolean isJavaScriptUrl(String url) {
225        return (null != url) && url.startsWith("javascript:");
226    }
227
228    /**
229     * @return {@code true} if the url is an http: url.
230     */
231    public static boolean isHttpUrl(String url) {
232        return (null != url) &&
233               (url.length() > 6) &&
234               url.substring(0, 7).equalsIgnoreCase("http://");
235    }
236
237    /**
238     * @return {@code true} if the url is an https: url.
239     */
240    public static boolean isHttpsUrl(String url) {
241        return (null != url) &&
242               (url.length() > 7) &&
243               url.substring(0, 8).equalsIgnoreCase("https://");
244    }
245
246    /**
247     * @return {@code true} if the url is a network url.
248     */
249    public static boolean isNetworkUrl(String url) {
250        if (url == null || url.length() == 0) {
251            return false;
252        }
253        return isHttpUrl(url) || isHttpsUrl(url);
254    }
255
256    /**
257     * @return {@code true} if the url is a content: url.
258     */
259    public static boolean isContentUrl(String url) {
260        return (null != url) && url.startsWith(CONTENT_BASE);
261    }
262
263    /**
264     * @return {@code true} if the url is valid.
265     */
266    public static boolean isValidUrl(String url) {
267        if (url == null || url.length() == 0) {
268            return false;
269        }
270
271        return (isAssetUrl(url) ||
272                isResourceUrl(url) ||
273                isFileUrl(url) ||
274                isAboutUrl(url) ||
275                isHttpUrl(url) ||
276                isHttpsUrl(url) ||
277                isJavaScriptUrl(url) ||
278                isContentUrl(url));
279    }
280
281    /**
282     * Strips the url of the anchor.
283     */
284    public static String stripAnchor(String url) {
285        int anchorIndex = url.indexOf('#');
286        if (anchorIndex != -1) {
287            return url.substring(0, anchorIndex);
288        }
289        return url;
290    }
291
292    /**
293     * Guesses canonical filename that a download would have, using
294     * the URL and contentDisposition. File extension, if not defined,
295     * is added based on the mimetype
296     * @param url Url to the content
297     * @param contentDisposition Content-Disposition HTTP header or {@code null}
298     * @param mimeType Mime-type of the content or {@code null}
299     *
300     * @return suggested filename
301     */
302    public static final String guessFileName(
303            String url,
304            @Nullable String contentDisposition,
305            @Nullable String mimeType) {
306        String filename = null;
307        String extension = null;
308
309        // If we couldn't do anything with the hint, move toward the content disposition
310        if (filename == null && contentDisposition != null) {
311            filename = parseContentDisposition(contentDisposition);
312            if (filename != null) {
313                int index = filename.lastIndexOf('/') + 1;
314                if (index > 0) {
315                    filename = filename.substring(index);
316                }
317            }
318        }
319
320        // If all the other http-related approaches failed, use the plain uri
321        if (filename == null) {
322            String decodedUrl = Uri.decode(url);
323            if (decodedUrl != null) {
324                int queryIndex = decodedUrl.indexOf('?');
325                // If there is a query string strip it, same as desktop browsers
326                if (queryIndex > 0) {
327                    decodedUrl = decodedUrl.substring(0, queryIndex);
328                }
329                if (!decodedUrl.endsWith("/")) {
330                    int index = decodedUrl.lastIndexOf('/') + 1;
331                    if (index > 0) {
332                        filename = decodedUrl.substring(index);
333                    }
334                }
335            }
336        }
337
338        // Finally, if couldn't get filename from URI, get a generic filename
339        if (filename == null) {
340            filename = "downloadfile";
341        }
342
343        // Split filename between base and extension
344        // Add an extension if filename does not have one
345        int dotIndex = filename.indexOf('.');
346        if (dotIndex < 0) {
347            if (mimeType != null) {
348                extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
349                if (extension != null) {
350                    extension = "." + extension;
351                }
352            }
353            if (extension == null) {
354                if (mimeType != null && mimeType.toLowerCase(Locale.ROOT).startsWith("text/")) {
355                    if (mimeType.equalsIgnoreCase("text/html")) {
356                        extension = ".html";
357                    } else {
358                        extension = ".txt";
359                    }
360                } else {
361                    extension = ".bin";
362                }
363            }
364        } else {
365            if (mimeType != null) {
366                // Compare the last segment of the extension against the mime type.
367                // If there's a mismatch, discard the entire extension.
368                int lastDotIndex = filename.lastIndexOf('.');
369                String typeFromExt = MimeTypeMap.getSingleton().getMimeTypeFromExtension(
370                        filename.substring(lastDotIndex + 1));
371                if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) {
372                    extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
373                    if (extension != null) {
374                        extension = "." + extension;
375                    }
376                }
377            }
378            if (extension == null) {
379                extension = filename.substring(dotIndex);
380            }
381            filename = filename.substring(0, dotIndex);
382        }
383
384        return filename + extension;
385    }
386
387    /** Regex used to parse content-disposition headers */
388    private static final Pattern CONTENT_DISPOSITION_PATTERN =
389            Pattern.compile("attachment;\\s*filename\\s*=\\s*(\"?)([^\"]*)\\1\\s*$",
390            Pattern.CASE_INSENSITIVE);
391
392    /**
393     * Parse the Content-Disposition HTTP Header. The format of the header
394     * is defined here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html
395     * This header provides a filename for content that is going to be
396     * downloaded to the file system. We only support the attachment type.
397     * Note that RFC 2616 specifies the filename value must be double-quoted.
398     * Unfortunately some servers do not quote the value so to maintain
399     * consistent behaviour with other browsers, we allow unquoted values too.
400     */
401    static String parseContentDisposition(String contentDisposition) {
402        try {
403            Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition);
404            if (m.find()) {
405                return m.group(2);
406            }
407        } catch (IllegalStateException ex) {
408             // This function is defined as returning null when it can't parse the header
409        }
410        return null;
411    }
412}
413