1fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck/*
2fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * Copyright (C) 2010 The Android Open Source Project
3fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck *
4fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * Licensed under the Apache License, Version 2.0 (the "License");
5fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * you may not use this file except in compliance with the License.
6fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * You may obtain a copy of the License at
7fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck *
8fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck *      http://www.apache.org/licenses/LICENSE-2.0
9fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck *
10fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * Unless required by applicable law or agreed to in writing, software
11fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * distributed under the License is distributed on an "AS IS" BASIS,
12fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * See the License for the specific language governing permissions and
14fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * limitations under the License.
15fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck */
168233facddcc51865d612a919d450db6954aa48e3Michael Kolb
17fb3017ffd8aa3f2342380270cf468e3a68914e69John Reckpackage com.android.browser;
18fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck
198233facddcc51865d612a919d450db6954aa48e3Michael Kolbimport android.net.Uri;
208233facddcc51865d612a919d450db6954aa48e3Michael Kolbimport android.util.Patterns;
218233facddcc51865d612a919d450db6954aa48e3Michael Kolbimport android.webkit.URLUtil;
228233facddcc51865d612a919d450db6954aa48e3Michael Kolb
23fb3017ffd8aa3f2342380270cf468e3a68914e69John Reckimport java.util.regex.Matcher;
24fb3017ffd8aa3f2342380270cf468e3a68914e69John Reckimport java.util.regex.Pattern;
25fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck
268233facddcc51865d612a919d450db6954aa48e3Michael Kolb/**
278233facddcc51865d612a919d450db6954aa48e3Michael Kolb * Utility methods for Url manipulation
288233facddcc51865d612a919d450db6954aa48e3Michael Kolb */
29fb3017ffd8aa3f2342380270cf468e3a68914e69John Reckpublic class UrlUtils {
30fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck
318233facddcc51865d612a919d450db6954aa48e3Michael Kolb    static final Pattern ACCEPTED_URI_SCHEMA = Pattern.compile(
328233facddcc51865d612a919d450db6954aa48e3Michael Kolb            "(?i)" + // switch on case insensitive matching
338233facddcc51865d612a919d450db6954aa48e3Michael Kolb            "(" +    // begin group for schema
348233facddcc51865d612a919d450db6954aa48e3Michael Kolb            "(?:http|https|file):\\/\\/" +
35b92bbb498f1eeab477d62632c235d0a68a4f4ea2Patrick Scott            "|(?:inline|data|about|javascript):" +
368233facddcc51865d612a919d450db6954aa48e3Michael Kolb            ")" +
378233facddcc51865d612a919d450db6954aa48e3Michael Kolb            "(.*)" );
388233facddcc51865d612a919d450db6954aa48e3Michael Kolb
398233facddcc51865d612a919d450db6954aa48e3Michael Kolb    // Google search
408233facddcc51865d612a919d450db6954aa48e3Michael Kolb    private final static String QUICKSEARCH_G = "http://www.google.com/m?q=%s";
418233facddcc51865d612a919d450db6954aa48e3Michael Kolb    private final static String QUERY_PLACE_HOLDER = "%s";
428233facddcc51865d612a919d450db6954aa48e3Michael Kolb
43fe5b94d91cc026aa5e8e9476ed77ad59df6f9ddfJohn Reck    // Regular expression to strip http:// and optionally
44434e9f83e13c0758dcdefe214357fc9cc9f104d5John Reck    // the trailing slash
45434e9f83e13c0758dcdefe214357fc9cc9f104d5John Reck    private static final Pattern STRIP_URL_PATTERN =
46fe5b94d91cc026aa5e8e9476ed77ad59df6f9ddfJohn Reck            Pattern.compile("^http://(.*?)/?$");
47fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck
48fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck    private UrlUtils() { /* cannot be instantiated */ }
49fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck
50fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck    /**
51fe5b94d91cc026aa5e8e9476ed77ad59df6f9ddfJohn Reck     * Strips the provided url of preceding "http://" and any trailing "/". Does not
52fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck     * strip "https://". If the provided string cannot be stripped, the original string
53fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck     * is returned.
54fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck     *
55fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck     * TODO: Put this in TextUtils to be used by other packages doing something similar.
56fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck     *
57fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck     * @param url a url to strip, like "http://www.google.com/"
58fe5b94d91cc026aa5e8e9476ed77ad59df6f9ddfJohn Reck     * @return a stripped url like "www.google.com", or the original string if it could
59fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck     *         not be stripped
60fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck     */
612cb9dc2db14154753d906ff4cce87c333df54214John Reck    public static String stripUrl(String url) {
62fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck        if (url == null) return null;
63fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck        Matcher m = STRIP_URL_PATTERN.matcher(url);
64434e9f83e13c0758dcdefe214357fc9cc9f104d5John Reck        if (m.matches()) {
65434e9f83e13c0758dcdefe214357fc9cc9f104d5John Reck            return m.group(1);
66fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck        } else {
67fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck            return url;
68fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck        }
69fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck    }
708233facddcc51865d612a919d450db6954aa48e3Michael Kolb
718233facddcc51865d612a919d450db6954aa48e3Michael Kolb    protected static String smartUrlFilter(Uri inUri) {
728233facddcc51865d612a919d450db6954aa48e3Michael Kolb        if (inUri != null) {
738233facddcc51865d612a919d450db6954aa48e3Michael Kolb            return smartUrlFilter(inUri.toString());
748233facddcc51865d612a919d450db6954aa48e3Michael Kolb        }
758233facddcc51865d612a919d450db6954aa48e3Michael Kolb        return null;
768233facddcc51865d612a919d450db6954aa48e3Michael Kolb    }
778233facddcc51865d612a919d450db6954aa48e3Michael Kolb
788233facddcc51865d612a919d450db6954aa48e3Michael Kolb    /**
798233facddcc51865d612a919d450db6954aa48e3Michael Kolb     * Attempts to determine whether user input is a URL or search
808233facddcc51865d612a919d450db6954aa48e3Michael Kolb     * terms.  Anything with a space is passed to search.
818233facddcc51865d612a919d450db6954aa48e3Michael Kolb     *
828233facddcc51865d612a919d450db6954aa48e3Michael Kolb     * Converts to lowercase any mistakenly uppercased schema (i.e.,
838233facddcc51865d612a919d450db6954aa48e3Michael Kolb     * "Http://" converts to "http://"
848233facddcc51865d612a919d450db6954aa48e3Michael Kolb     *
858233facddcc51865d612a919d450db6954aa48e3Michael Kolb     * @return Original or modified URL
868233facddcc51865d612a919d450db6954aa48e3Michael Kolb     *
878233facddcc51865d612a919d450db6954aa48e3Michael Kolb     */
88961d35d6e9cd58d930d8572b91d7e0c9f928f1e8John Reck    public static String smartUrlFilter(String url) {
8971e5142e4237a0f9d51a93504da36b778e8b79e4John Reck        return smartUrlFilter(url, true);
9071e5142e4237a0f9d51a93504da36b778e8b79e4John Reck    }
918233facddcc51865d612a919d450db6954aa48e3Michael Kolb
9271e5142e4237a0f9d51a93504da36b778e8b79e4John Reck    /**
9371e5142e4237a0f9d51a93504da36b778e8b79e4John Reck     * Attempts to determine whether user input is a URL or search
9471e5142e4237a0f9d51a93504da36b778e8b79e4John Reck     * terms.  Anything with a space is passed to search if canBeSearch is true.
9571e5142e4237a0f9d51a93504da36b778e8b79e4John Reck     *
9671e5142e4237a0f9d51a93504da36b778e8b79e4John Reck     * Converts to lowercase any mistakenly uppercased schema (i.e.,
9771e5142e4237a0f9d51a93504da36b778e8b79e4John Reck     * "Http://" converts to "http://"
9871e5142e4237a0f9d51a93504da36b778e8b79e4John Reck     *
9971e5142e4237a0f9d51a93504da36b778e8b79e4John Reck     * @param canBeSearch If true, will return a search url if it isn't a valid
10071e5142e4237a0f9d51a93504da36b778e8b79e4John Reck     *                    URL. If false, invalid URLs will return null
10171e5142e4237a0f9d51a93504da36b778e8b79e4John Reck     * @return Original or modified URL
10271e5142e4237a0f9d51a93504da36b778e8b79e4John Reck     *
10371e5142e4237a0f9d51a93504da36b778e8b79e4John Reck     */
10471e5142e4237a0f9d51a93504da36b778e8b79e4John Reck    public static String smartUrlFilter(String url, boolean canBeSearch) {
1058233facddcc51865d612a919d450db6954aa48e3Michael Kolb        String inUrl = url.trim();
1068233facddcc51865d612a919d450db6954aa48e3Michael Kolb        boolean hasSpace = inUrl.indexOf(' ') != -1;
1078233facddcc51865d612a919d450db6954aa48e3Michael Kolb
1088233facddcc51865d612a919d450db6954aa48e3Michael Kolb        Matcher matcher = ACCEPTED_URI_SCHEMA.matcher(inUrl);
1098233facddcc51865d612a919d450db6954aa48e3Michael Kolb        if (matcher.matches()) {
1108233facddcc51865d612a919d450db6954aa48e3Michael Kolb            // force scheme to lowercase
1118233facddcc51865d612a919d450db6954aa48e3Michael Kolb            String scheme = matcher.group(1);
1128233facddcc51865d612a919d450db6954aa48e3Michael Kolb            String lcScheme = scheme.toLowerCase();
1138233facddcc51865d612a919d450db6954aa48e3Michael Kolb            if (!lcScheme.equals(scheme)) {
1148233facddcc51865d612a919d450db6954aa48e3Michael Kolb                inUrl = lcScheme + matcher.group(2);
1158233facddcc51865d612a919d450db6954aa48e3Michael Kolb            }
11671e5142e4237a0f9d51a93504da36b778e8b79e4John Reck            if (hasSpace && Patterns.WEB_URL.matcher(inUrl).matches()) {
1178233facddcc51865d612a919d450db6954aa48e3Michael Kolb                inUrl = inUrl.replace(" ", "%20");
1188233facddcc51865d612a919d450db6954aa48e3Michael Kolb            }
1198233facddcc51865d612a919d450db6954aa48e3Michael Kolb            return inUrl;
1208233facddcc51865d612a919d450db6954aa48e3Michael Kolb        }
1218233facddcc51865d612a919d450db6954aa48e3Michael Kolb        if (!hasSpace) {
1228233facddcc51865d612a919d450db6954aa48e3Michael Kolb            if (Patterns.WEB_URL.matcher(inUrl).matches()) {
1238233facddcc51865d612a919d450db6954aa48e3Michael Kolb                return URLUtil.guessUrl(inUrl);
1248233facddcc51865d612a919d450db6954aa48e3Michael Kolb            }
1258233facddcc51865d612a919d450db6954aa48e3Michael Kolb        }
12671e5142e4237a0f9d51a93504da36b778e8b79e4John Reck        if (canBeSearch) {
12771e5142e4237a0f9d51a93504da36b778e8b79e4John Reck            return URLUtil.composeSearchUrl(inUrl,
12871e5142e4237a0f9d51a93504da36b778e8b79e4John Reck                    QUICKSEARCH_G, QUERY_PLACE_HOLDER);
12971e5142e4237a0f9d51a93504da36b778e8b79e4John Reck        }
13071e5142e4237a0f9d51a93504da36b778e8b79e4John Reck        return null;
1318233facddcc51865d612a919d450db6954aa48e3Michael Kolb    }
1328233facddcc51865d612a919d450db6954aa48e3Michael Kolb
1338233facddcc51865d612a919d450db6954aa48e3Michael Kolb    /* package */ static String fixUrl(String inUrl) {
1348233facddcc51865d612a919d450db6954aa48e3Michael Kolb        // FIXME: Converting the url to lower case
1358233facddcc51865d612a919d450db6954aa48e3Michael Kolb        // duplicates functionality in smartUrlFilter().
1368233facddcc51865d612a919d450db6954aa48e3Michael Kolb        // However, changing all current callers of fixUrl to
1378233facddcc51865d612a919d450db6954aa48e3Michael Kolb        // call smartUrlFilter in addition may have unwanted
1388233facddcc51865d612a919d450db6954aa48e3Michael Kolb        // consequences, and is deferred for now.
1398233facddcc51865d612a919d450db6954aa48e3Michael Kolb        int colon = inUrl.indexOf(':');
1408233facddcc51865d612a919d450db6954aa48e3Michael Kolb        boolean allLower = true;
1418233facddcc51865d612a919d450db6954aa48e3Michael Kolb        for (int index = 0; index < colon; index++) {
1428233facddcc51865d612a919d450db6954aa48e3Michael Kolb            char ch = inUrl.charAt(index);
1438233facddcc51865d612a919d450db6954aa48e3Michael Kolb            if (!Character.isLetter(ch)) {
1448233facddcc51865d612a919d450db6954aa48e3Michael Kolb                break;
1458233facddcc51865d612a919d450db6954aa48e3Michael Kolb            }
1468233facddcc51865d612a919d450db6954aa48e3Michael Kolb            allLower &= Character.isLowerCase(ch);
1478233facddcc51865d612a919d450db6954aa48e3Michael Kolb            if (index == colon - 1 && !allLower) {
1488233facddcc51865d612a919d450db6954aa48e3Michael Kolb                inUrl = inUrl.substring(0, colon).toLowerCase()
1498233facddcc51865d612a919d450db6954aa48e3Michael Kolb                        + inUrl.substring(colon);
1508233facddcc51865d612a919d450db6954aa48e3Michael Kolb            }
1518233facddcc51865d612a919d450db6954aa48e3Michael Kolb        }
1528233facddcc51865d612a919d450db6954aa48e3Michael Kolb        if (inUrl.startsWith("http://") || inUrl.startsWith("https://"))
1538233facddcc51865d612a919d450db6954aa48e3Michael Kolb            return inUrl;
1548233facddcc51865d612a919d450db6954aa48e3Michael Kolb        if (inUrl.startsWith("http:") ||
1558233facddcc51865d612a919d450db6954aa48e3Michael Kolb                inUrl.startsWith("https:")) {
1568233facddcc51865d612a919d450db6954aa48e3Michael Kolb            if (inUrl.startsWith("http:/") || inUrl.startsWith("https:/")) {
1578233facddcc51865d612a919d450db6954aa48e3Michael Kolb                inUrl = inUrl.replaceFirst("/", "//");
1588233facddcc51865d612a919d450db6954aa48e3Michael Kolb            } else inUrl = inUrl.replaceFirst(":", "://");
1598233facddcc51865d612a919d450db6954aa48e3Michael Kolb        }
1608233facddcc51865d612a919d450db6954aa48e3Michael Kolb        return inUrl;
1618233facddcc51865d612a919d450db6954aa48e3Michael Kolb    }
1628233facddcc51865d612a919d450db6954aa48e3Michael Kolb
163324d440ea7dc44da8beef8a6c2e6f7a33b20a41dJohn Reck    // Returns the filtered URL. Cannot return null, but can return an empty string
164324d440ea7dc44da8beef8a6c2e6f7a33b20a41dJohn Reck    /* package */ static String filteredUrl(String inUrl) {
165324d440ea7dc44da8beef8a6c2e6f7a33b20a41dJohn Reck        if (inUrl == null) {
166324d440ea7dc44da8beef8a6c2e6f7a33b20a41dJohn Reck            return "";
167324d440ea7dc44da8beef8a6c2e6f7a33b20a41dJohn Reck        }
168e44e5622a593cacb20d0c4b1a82d4aeed6a9f4c1John Reck        if (inUrl.startsWith("content:")
169e44e5622a593cacb20d0c4b1a82d4aeed6a9f4c1John Reck                || inUrl.startsWith("browser:")) {
170324d440ea7dc44da8beef8a6c2e6f7a33b20a41dJohn Reck            return "";
171324d440ea7dc44da8beef8a6c2e6f7a33b20a41dJohn Reck        }
172324d440ea7dc44da8beef8a6c2e6f7a33b20a41dJohn Reck        return inUrl;
173324d440ea7dc44da8beef8a6c2e6f7a33b20a41dJohn Reck    }
174324d440ea7dc44da8beef8a6c2e6f7a33b20a41dJohn Reck
175fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck}
176