1/*
2 * Copyright (C) 2006 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.webkit;
18
19import java.io.UnsupportedEncodingException;
20import java.util.regex.Matcher;
21import java.util.regex.Pattern;
22
23import android.net.Uri;
24import android.net.ParseException;
25import android.net.WebAddress;
26import android.util.Log;
27
28public final class URLUtil {
29
30    private static final String LOGTAG = "webkit";
31
32    // to refer to bar.png under your package's asset/foo/ directory, use
33    // "file:///android_asset/foo/bar.png".
34    static final String ASSET_BASE = "file:///android_asset/";
35    // to refer to bar.png under your package's res/drawable/ directory, use
36    // "file:///android_res/drawable/bar.png". Use "drawable" to refer to
37    // "drawable-hdpi" directory as well.
38    static final String RESOURCE_BASE = "file:///android_res/";
39    static final String FILE_BASE = "file://";
40    static final String PROXY_BASE = "file:///cookieless_proxy/";
41    static final String CONTENT_BASE = "content:";
42
43    /**
44     * Cleans up (if possible) user-entered web addresses
45     */
46    public static String guessUrl(String inUrl) {
47
48        String retVal = inUrl;
49        WebAddress webAddress;
50
51        if (DebugFlags.URL_UTIL) Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl);
52
53        if (inUrl.length() == 0) return inUrl;
54        if (inUrl.startsWith("about:")) return inUrl;
55        // Do not try to interpret data scheme URLs
56        if (inUrl.startsWith("data:")) return inUrl;
57        // Do not try to interpret file scheme URLs
58        if (inUrl.startsWith("file:")) return inUrl;
59        // Do not try to interpret javascript scheme URLs
60        if (inUrl.startsWith("javascript:")) return inUrl;
61
62        // bug 762454: strip period off end of url
63        if (inUrl.endsWith(".") == true) {
64            inUrl = inUrl.substring(0, inUrl.length() - 1);
65        }
66
67        try {
68            webAddress = new WebAddress(inUrl);
69        } catch (ParseException ex) {
70
71            if (DebugFlags.URL_UTIL) {
72                Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl);
73            }
74            return retVal;
75        }
76
77        // Check host
78        if (webAddress.getHost().indexOf('.') == -1) {
79            // no dot: user probably entered a bare domain.  try .com
80            webAddress.setHost("www." + webAddress.getHost() + ".com");
81        }
82        return webAddress.toString();
83    }
84
85    public static String composeSearchUrl(String inQuery, String template,
86                                          String queryPlaceHolder) {
87        int placeHolderIndex = template.indexOf(queryPlaceHolder);
88        if (placeHolderIndex < 0) {
89            return null;
90        }
91
92        String query;
93        StringBuilder buffer = new StringBuilder();
94        buffer.append(template.substring(0, placeHolderIndex));
95
96        try {
97            query = java.net.URLEncoder.encode(inQuery, "utf-8");
98            buffer.append(query);
99        } catch (UnsupportedEncodingException ex) {
100            return null;
101        }
102
103        buffer.append(template.substring(
104                placeHolderIndex + queryPlaceHolder.length()));
105
106        return buffer.toString();
107    }
108
109    public static byte[] decode(byte[] url) throws IllegalArgumentException {
110        if (url.length == 0) {
111            return new byte[0];
112        }
113
114        // Create a new byte array with the same length to ensure capacity
115        byte[] tempData = new byte[url.length];
116
117        int tempCount = 0;
118        for (int i = 0; i < url.length; i++) {
119            byte b = url[i];
120            if (b == '%') {
121                if (url.length - i > 2) {
122                    b = (byte) (parseHex(url[i + 1]) * 16
123                            + parseHex(url[i + 2]));
124                    i += 2;
125                } else {
126                    throw new IllegalArgumentException("Invalid format");
127                }
128            }
129            tempData[tempCount++] = b;
130        }
131        byte[] retData = new byte[tempCount];
132        System.arraycopy(tempData, 0, retData, 0, tempCount);
133        return retData;
134    }
135
136    /**
137     * @return True iff the url is correctly URL encoded
138     */
139    static boolean verifyURLEncoding(String url) {
140        int count = url.length();
141        if (count == 0) {
142            return false;
143        }
144
145        int index = url.indexOf('%');
146        while (index >= 0 && index < count) {
147            if (index < count - 2) {
148                try {
149                    parseHex((byte) url.charAt(++index));
150                    parseHex((byte) url.charAt(++index));
151                } catch (IllegalArgumentException e) {
152                    return false;
153                }
154            } else {
155                return false;
156            }
157            index = url.indexOf('%', index + 1);
158        }
159        return true;
160    }
161
162    private static int parseHex(byte b) {
163        if (b >= '0' && b <= '9') return (b - '0');
164        if (b >= 'A' && b <= 'F') return (b - 'A' + 10);
165        if (b >= 'a' && b <= 'f') return (b - 'a' + 10);
166
167        throw new IllegalArgumentException("Invalid hex char '" + b + "'");
168    }
169
170    /**
171     * @return True iff the url is an asset file.
172     */
173    public static boolean isAssetUrl(String url) {
174        return (null != url) && url.startsWith(ASSET_BASE);
175    }
176
177    /**
178     * @return True iff the url is a resource file.
179     * @hide
180     */
181    public static boolean isResourceUrl(String url) {
182        return (null != url) && url.startsWith(RESOURCE_BASE);
183    }
184
185    /**
186     * @return True iff the url is a proxy url to allow cookieless network
187     * requests from a file url.
188     * @deprecated Cookieless proxy is no longer supported.
189     */
190    @Deprecated
191    public static boolean isCookielessProxyUrl(String url) {
192        return (null != url) && url.startsWith(PROXY_BASE);
193    }
194
195    /**
196     * @return True iff the url is a local file.
197     */
198    public static boolean isFileUrl(String url) {
199        return (null != url) && (url.startsWith(FILE_BASE) &&
200                                 !url.startsWith(ASSET_BASE) &&
201                                 !url.startsWith(PROXY_BASE));
202    }
203
204    /**
205     * @return True iff the url is an about: url.
206     */
207    public static boolean isAboutUrl(String url) {
208        return (null != url) && url.startsWith("about:");
209    }
210
211    /**
212     * @return True iff the url is a data: url.
213     */
214    public static boolean isDataUrl(String url) {
215        return (null != url) && url.startsWith("data:");
216    }
217
218    /**
219     * @return True iff the url is a javascript: url.
220     */
221    public static boolean isJavaScriptUrl(String url) {
222        return (null != url) && url.startsWith("javascript:");
223    }
224
225    /**
226     * @return True iff the url is an http: url.
227     */
228    public static boolean isHttpUrl(String url) {
229        return (null != url) &&
230               (url.length() > 6) &&
231               url.substring(0, 7).equalsIgnoreCase("http://");
232    }
233
234    /**
235     * @return True iff the url is an https: url.
236     */
237    public static boolean isHttpsUrl(String url) {
238        return (null != url) &&
239               (url.length() > 7) &&
240               url.substring(0, 8).equalsIgnoreCase("https://");
241    }
242
243    /**
244     * @return True iff the url is a network url.
245     */
246    public static boolean isNetworkUrl(String url) {
247        if (url == null || url.length() == 0) {
248            return false;
249        }
250        return isHttpUrl(url) || isHttpsUrl(url);
251    }
252
253    /**
254     * @return True iff the url is a content: url.
255     */
256    public static boolean isContentUrl(String url) {
257        return (null != url) && url.startsWith(CONTENT_BASE);
258    }
259
260    /**
261     * @return True iff the url is valid.
262     */
263    public static boolean isValidUrl(String url) {
264        if (url == null || url.length() == 0) {
265            return false;
266        }
267
268        return (isAssetUrl(url) ||
269                isResourceUrl(url) ||
270                isFileUrl(url) ||
271                isAboutUrl(url) ||
272                isHttpUrl(url) ||
273                isHttpsUrl(url) ||
274                isJavaScriptUrl(url) ||
275                isContentUrl(url));
276    }
277
278    /**
279     * Strips the url of the anchor.
280     */
281    public static String stripAnchor(String url) {
282        int anchorIndex = url.indexOf('#');
283        if (anchorIndex != -1) {
284            return url.substring(0, anchorIndex);
285        }
286        return url;
287    }
288
289    /**
290     * Guesses canonical filename that a download would have, using
291     * the URL and contentDisposition. File extension, if not defined,
292     * is added based on the mimetype
293     * @param url Url to the content
294     * @param contentDisposition Content-Disposition HTTP header or null
295     * @param mimeType Mime-type of the content or null
296     *
297     * @return suggested filename
298     */
299    public static final String guessFileName(
300            String url,
301            String contentDisposition,
302            String mimeType) {
303        String filename = null;
304        String extension = null;
305
306        // If we couldn't do anything with the hint, move toward the content disposition
307        if (filename == null && contentDisposition != null) {
308            filename = parseContentDisposition(contentDisposition);
309            if (filename != null) {
310                int index = filename.lastIndexOf('/') + 1;
311                if (index > 0) {
312                    filename = filename.substring(index);
313                }
314            }
315        }
316
317        // If all the other http-related approaches failed, use the plain uri
318        if (filename == null) {
319            String decodedUrl = Uri.decode(url);
320            if (decodedUrl != null) {
321                int queryIndex = decodedUrl.indexOf('?');
322                // If there is a query string strip it, same as desktop browsers
323                if (queryIndex > 0) {
324                    decodedUrl = decodedUrl.substring(0, queryIndex);
325                }
326                if (!decodedUrl.endsWith("/")) {
327                    int index = decodedUrl.lastIndexOf('/') + 1;
328                    if (index > 0) {
329                        filename = decodedUrl.substring(index);
330                    }
331                }
332            }
333        }
334
335        // Finally, if couldn't get filename from URI, get a generic filename
336        if (filename == null) {
337            filename = "downloadfile";
338        }
339
340        // Split filename between base and extension
341        // Add an extension if filename does not have one
342        int dotIndex = filename.indexOf('.');
343        if (dotIndex < 0) {
344            if (mimeType != null) {
345                extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
346                if (extension != null) {
347                    extension = "." + extension;
348                }
349            }
350            if (extension == null) {
351                if (mimeType != null && mimeType.toLowerCase().startsWith("text/")) {
352                    if (mimeType.equalsIgnoreCase("text/html")) {
353                        extension = ".html";
354                    } else {
355                        extension = ".txt";
356                    }
357                } else {
358                    extension = ".bin";
359                }
360            }
361        } else {
362            if (mimeType != null) {
363                // Compare the last segment of the extension against the mime type.
364                // If there's a mismatch, discard the entire extension.
365                int lastDotIndex = filename.lastIndexOf('.');
366                String typeFromExt = MimeTypeMap.getSingleton().getMimeTypeFromExtension(
367                        filename.substring(lastDotIndex + 1));
368                if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) {
369                    extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
370                    if (extension != null) {
371                        extension = "." + extension;
372                    }
373                }
374            }
375            if (extension == null) {
376                extension = filename.substring(dotIndex);
377            }
378            filename = filename.substring(0, dotIndex);
379        }
380
381        return filename + extension;
382    }
383
384    /** Regex used to parse content-disposition headers */
385    private static final Pattern CONTENT_DISPOSITION_PATTERN =
386            Pattern.compile("attachment;\\s*filename\\s*=\\s*(\"?)([^\"]*)\\1\\s*$",
387            Pattern.CASE_INSENSITIVE);
388
389    /*
390     * Parse the Content-Disposition HTTP Header. The format of the header
391     * is defined here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html
392     * This header provides a filename for content that is going to be
393     * downloaded to the file system. We only support the attachment type.
394     * Note that RFC 2616 specifies the filename value must be double-quoted.
395     * Unfortunately some servers do not quote the value so to maintain
396     * consistent behaviour with other browsers, we allow unquoted values too.
397     */
398    static String parseContentDisposition(String contentDisposition) {
399        try {
400            Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition);
401            if (m.find()) {
402                return m.group(2);
403            }
404        } catch (IllegalStateException ex) {
405             // This function is defined as returning null when it can't parse the header
406        }
407        return null;
408    }
409}
410