1/*
2 * Copyright (C) 2006 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.webkit;
18
19import java.io.UnsupportedEncodingException;
20import java.util.Locale;
21import java.util.regex.Matcher;
22import java.util.regex.Pattern;
23
24import android.net.Uri;
25import android.net.ParseException;
26import android.net.WebAddress;
27import android.util.Log;
28
29public final class URLUtil {
30
31    private static final String LOGTAG = "webkit";
32    private static final boolean TRACE = false;
33
34    // to refer to bar.png under your package's asset/foo/ directory, use
35    // "file:///android_asset/foo/bar.png".
36    static final String ASSET_BASE = "file:///android_asset/";
37    // to refer to bar.png under your package's res/drawable/ directory, use
38    // "file:///android_res/drawable/bar.png". Use "drawable" to refer to
39    // "drawable-hdpi" directory as well.
40    static final String RESOURCE_BASE = "file:///android_res/";
41    static final String FILE_BASE = "file://";
42    static final String PROXY_BASE = "file:///cookieless_proxy/";
43    static final String CONTENT_BASE = "content:";
44
45    /**
46     * Cleans up (if possible) user-entered web addresses
47     */
48    public static String guessUrl(String inUrl) {
49
50        String retVal = inUrl;
51        WebAddress webAddress;
52
53        if (TRACE) Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl);
54
55        if (inUrl.length() == 0) return inUrl;
56        if (inUrl.startsWith("about:")) return inUrl;
57        // Do not try to interpret data scheme URLs
58        if (inUrl.startsWith("data:")) return inUrl;
59        // Do not try to interpret file scheme URLs
60        if (inUrl.startsWith("file:")) return inUrl;
61        // Do not try to interpret javascript scheme URLs
62        if (inUrl.startsWith("javascript:")) return inUrl;
63
64        // bug 762454: strip period off end of url
65        if (inUrl.endsWith(".") == true) {
66            inUrl = inUrl.substring(0, inUrl.length() - 1);
67        }
68
69        try {
70            webAddress = new WebAddress(inUrl);
71        } catch (ParseException ex) {
72
73            if (TRACE) {
74                Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl);
75            }
76            return retVal;
77        }
78
79        // Check host
80        if (webAddress.getHost().indexOf('.') == -1) {
81            // no dot: user probably entered a bare domain.  try .com
82            webAddress.setHost("www." + webAddress.getHost() + ".com");
83        }
84        return webAddress.toString();
85    }
86
87    public static String composeSearchUrl(String inQuery, String template,
88                                          String queryPlaceHolder) {
89        int placeHolderIndex = template.indexOf(queryPlaceHolder);
90        if (placeHolderIndex < 0) {
91            return null;
92        }
93
94        String query;
95        StringBuilder buffer = new StringBuilder();
96        buffer.append(template.substring(0, placeHolderIndex));
97
98        try {
99            query = java.net.URLEncoder.encode(inQuery, "utf-8");
100            buffer.append(query);
101        } catch (UnsupportedEncodingException ex) {
102            return null;
103        }
104
105        buffer.append(template.substring(
106                placeHolderIndex + queryPlaceHolder.length()));
107
108        return buffer.toString();
109    }
110
111    public static byte[] decode(byte[] url) throws IllegalArgumentException {
112        if (url.length == 0) {
113            return new byte[0];
114        }
115
116        // Create a new byte array with the same length to ensure capacity
117        byte[] tempData = new byte[url.length];
118
119        int tempCount = 0;
120        for (int i = 0; i < url.length; i++) {
121            byte b = url[i];
122            if (b == '%') {
123                if (url.length - i > 2) {
124                    b = (byte) (parseHex(url[i + 1]) * 16
125                            + parseHex(url[i + 2]));
126                    i += 2;
127                } else {
128                    throw new IllegalArgumentException("Invalid format");
129                }
130            }
131            tempData[tempCount++] = b;
132        }
133        byte[] retData = new byte[tempCount];
134        System.arraycopy(tempData, 0, retData, 0, tempCount);
135        return retData;
136    }
137
138    /**
139     * @return True iff the url is correctly URL encoded
140     */
141    static boolean verifyURLEncoding(String url) {
142        int count = url.length();
143        if (count == 0) {
144            return false;
145        }
146
147        int index = url.indexOf('%');
148        while (index >= 0 && index < count) {
149            if (index < count - 2) {
150                try {
151                    parseHex((byte) url.charAt(++index));
152                    parseHex((byte) url.charAt(++index));
153                } catch (IllegalArgumentException e) {
154                    return false;
155                }
156            } else {
157                return false;
158            }
159            index = url.indexOf('%', index + 1);
160        }
161        return true;
162    }
163
164    private static int parseHex(byte b) {
165        if (b >= '0' && b <= '9') return (b - '0');
166        if (b >= 'A' && b <= 'F') return (b - 'A' + 10);
167        if (b >= 'a' && b <= 'f') return (b - 'a' + 10);
168
169        throw new IllegalArgumentException("Invalid hex char '" + b + "'");
170    }
171
172    /**
173     * @return True iff the url is an asset file.
174     */
175    public static boolean isAssetUrl(String url) {
176        return (null != url) && url.startsWith(ASSET_BASE);
177    }
178
179    /**
180     * @return True iff the url is a resource file.
181     * @hide
182     */
183    public static boolean isResourceUrl(String url) {
184        return (null != url) && url.startsWith(RESOURCE_BASE);
185    }
186
187    /**
188     * @return True iff the url is a proxy url to allow cookieless network
189     * requests from a file url.
190     * @deprecated Cookieless proxy is no longer supported.
191     */
192    @Deprecated
193    public static boolean isCookielessProxyUrl(String url) {
194        return (null != url) && url.startsWith(PROXY_BASE);
195    }
196
197    /**
198     * @return True iff the url is a local file.
199     */
200    public static boolean isFileUrl(String url) {
201        return (null != url) && (url.startsWith(FILE_BASE) &&
202                                 !url.startsWith(ASSET_BASE) &&
203                                 !url.startsWith(PROXY_BASE));
204    }
205
206    /**
207     * @return True iff the url is an about: url.
208     */
209    public static boolean isAboutUrl(String url) {
210        return (null != url) && url.startsWith("about:");
211    }
212
213    /**
214     * @return True iff the url is a data: url.
215     */
216    public static boolean isDataUrl(String url) {
217        return (null != url) && url.startsWith("data:");
218    }
219
220    /**
221     * @return True iff the url is a javascript: url.
222     */
223    public static boolean isJavaScriptUrl(String url) {
224        return (null != url) && url.startsWith("javascript:");
225    }
226
227    /**
228     * @return True iff the url is an http: url.
229     */
230    public static boolean isHttpUrl(String url) {
231        return (null != url) &&
232               (url.length() > 6) &&
233               url.substring(0, 7).equalsIgnoreCase("http://");
234    }
235
236    /**
237     * @return True iff the url is an https: url.
238     */
239    public static boolean isHttpsUrl(String url) {
240        return (null != url) &&
241               (url.length() > 7) &&
242               url.substring(0, 8).equalsIgnoreCase("https://");
243    }
244
245    /**
246     * @return True iff the url is a network url.
247     */
248    public static boolean isNetworkUrl(String url) {
249        if (url == null || url.length() == 0) {
250            return false;
251        }
252        return isHttpUrl(url) || isHttpsUrl(url);
253    }
254
255    /**
256     * @return True iff the url is a content: url.
257     */
258    public static boolean isContentUrl(String url) {
259        return (null != url) && url.startsWith(CONTENT_BASE);
260    }
261
262    /**
263     * @return True iff the url is valid.
264     */
265    public static boolean isValidUrl(String url) {
266        if (url == null || url.length() == 0) {
267            return false;
268        }
269
270        return (isAssetUrl(url) ||
271                isResourceUrl(url) ||
272                isFileUrl(url) ||
273                isAboutUrl(url) ||
274                isHttpUrl(url) ||
275                isHttpsUrl(url) ||
276                isJavaScriptUrl(url) ||
277                isContentUrl(url));
278    }
279
280    /**
281     * Strips the url of the anchor.
282     */
283    public static String stripAnchor(String url) {
284        int anchorIndex = url.indexOf('#');
285        if (anchorIndex != -1) {
286            return url.substring(0, anchorIndex);
287        }
288        return url;
289    }
290
291    /**
292     * Guesses canonical filename that a download would have, using
293     * the URL and contentDisposition. File extension, if not defined,
294     * is added based on the mimetype
295     * @param url Url to the content
296     * @param contentDisposition Content-Disposition HTTP header or null
297     * @param mimeType Mime-type of the content or null
298     *
299     * @return suggested filename
300     */
301    public static final String guessFileName(
302            String url,
303            String contentDisposition,
304            String mimeType) {
305        String filename = null;
306        String extension = null;
307
308        // If we couldn't do anything with the hint, move toward the content disposition
309        if (filename == null && contentDisposition != null) {
310            filename = parseContentDisposition(contentDisposition);
311            if (filename != null) {
312                int index = filename.lastIndexOf('/') + 1;
313                if (index > 0) {
314                    filename = filename.substring(index);
315                }
316            }
317        }
318
319        // If all the other http-related approaches failed, use the plain uri
320        if (filename == null) {
321            String decodedUrl = Uri.decode(url);
322            if (decodedUrl != null) {
323                int queryIndex = decodedUrl.indexOf('?');
324                // If there is a query string strip it, same as desktop browsers
325                if (queryIndex > 0) {
326                    decodedUrl = decodedUrl.substring(0, queryIndex);
327                }
328                if (!decodedUrl.endsWith("/")) {
329                    int index = decodedUrl.lastIndexOf('/') + 1;
330                    if (index > 0) {
331                        filename = decodedUrl.substring(index);
332                    }
333                }
334            }
335        }
336
337        // Finally, if couldn't get filename from URI, get a generic filename
338        if (filename == null) {
339            filename = "downloadfile";
340        }
341
342        // Split filename between base and extension
343        // Add an extension if filename does not have one
344        int dotIndex = filename.indexOf('.');
345        if (dotIndex < 0) {
346            if (mimeType != null) {
347                extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
348                if (extension != null) {
349                    extension = "." + extension;
350                }
351            }
352            if (extension == null) {
353                if (mimeType != null && mimeType.toLowerCase(Locale.ROOT).startsWith("text/")) {
354                    if (mimeType.equalsIgnoreCase("text/html")) {
355                        extension = ".html";
356                    } else {
357                        extension = ".txt";
358                    }
359                } else {
360                    extension = ".bin";
361                }
362            }
363        } else {
364            if (mimeType != null) {
365                // Compare the last segment of the extension against the mime type.
366                // If there's a mismatch, discard the entire extension.
367                int lastDotIndex = filename.lastIndexOf('.');
368                String typeFromExt = MimeTypeMap.getSingleton().getMimeTypeFromExtension(
369                        filename.substring(lastDotIndex + 1));
370                if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) {
371                    extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
372                    if (extension != null) {
373                        extension = "." + extension;
374                    }
375                }
376            }
377            if (extension == null) {
378                extension = filename.substring(dotIndex);
379            }
380            filename = filename.substring(0, dotIndex);
381        }
382
383        return filename + extension;
384    }
385
386    /** Regex used to parse content-disposition headers */
387    private static final Pattern CONTENT_DISPOSITION_PATTERN =
388            Pattern.compile("attachment;\\s*filename\\s*=\\s*(\"?)([^\"]*)\\1\\s*$",
389            Pattern.CASE_INSENSITIVE);
390
391    /*
392     * Parse the Content-Disposition HTTP Header. The format of the header
393     * is defined here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html
394     * This header provides a filename for content that is going to be
395     * downloaded to the file system. We only support the attachment type.
396     * Note that RFC 2616 specifies the filename value must be double-quoted.
397     * Unfortunately some servers do not quote the value so to maintain
398     * consistent behaviour with other browsers, we allow unquoted values too.
399     */
400    static String parseContentDisposition(String contentDisposition) {
401        try {
402            Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition);
403            if (m.find()) {
404                return m.group(2);
405            }
406        } catch (IllegalStateException ex) {
407             // This function is defined as returning null when it can't parse the header
408        }
409        return null;
410    }
411}
412