1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.browser;
18
19import android.net.Uri;
20import android.util.Patterns;
21import android.webkit.URLUtil;
22
23import java.util.regex.Matcher;
24import java.util.regex.Pattern;
25
26/**
27 * Utility methods for Url manipulation
28 */
29public class UrlUtils {
30
31    static final Pattern ACCEPTED_URI_SCHEMA = Pattern.compile(
32            "(?i)" + // switch on case insensitive matching
33            "(" +    // begin group for schema
34            "(?:http|https|file):\\/\\/" +
35            "|(?:data|about|javascript):" +
36            "|(?:.*:.*@)" +
37            ")" +
38            "(.*)" );
39
40    // Google search
41    private final static String QUICKSEARCH_G = "http://www.google.com/m?q=%s";
42    private final static String QUERY_PLACE_HOLDER = "%s";
43
44    // Regular expression to strip http:// and optionally
45    // the trailing slash
46    private static final Pattern STRIP_URL_PATTERN =
47            Pattern.compile("^http://(.*?)/?$");
48
49    private UrlUtils() { /* cannot be instantiated */ }
50
51    /**
52     * Strips the provided url of preceding "http://" and any trailing "/". Does not
53     * strip "https://". If the provided string cannot be stripped, the original string
54     * is returned.
55     *
56     * TODO: Put this in TextUtils to be used by other packages doing something similar.
57     *
58     * @param url a url to strip, like "http://www.google.com/"
59     * @return a stripped url like "www.google.com", or the original string if it could
60     *         not be stripped
61     */
62    public static String stripUrl(String url) {
63        if (url == null) return null;
64        Matcher m = STRIP_URL_PATTERN.matcher(url);
65        if (m.matches()) {
66            return m.group(1);
67        } else {
68            return url;
69        }
70    }
71
72    protected static String smartUrlFilter(Uri inUri) {
73        if (inUri != null) {
74            return smartUrlFilter(inUri.toString());
75        }
76        return null;
77    }
78
79    /**
80     * Attempts to determine whether user input is a URL or search
81     * terms.  Anything with a space is passed to search.
82     *
83     * Converts to lowercase any mistakenly uppercased schema (i.e.,
84     * "Http://" converts to "http://"
85     *
86     * @return Original or modified URL
87     *
88     */
89    public static String smartUrlFilter(String url) {
90        return smartUrlFilter(url, true);
91    }
92
93    /**
94     * Attempts to determine whether user input is a URL or search
95     * terms.  Anything with a space is passed to search if canBeSearch is true.
96     *
97     * Converts to lowercase any mistakenly uppercased schema (i.e.,
98     * "Http://" converts to "http://"
99     *
100     * @param canBeSearch If true, will return a search url if it isn't a valid
101     *                    URL. If false, invalid URLs will return null
102     * @return Original or modified URL
103     *
104     */
105    public static String smartUrlFilter(String url, boolean canBeSearch) {
106        String inUrl = url.trim();
107        boolean hasSpace = inUrl.indexOf(' ') != -1;
108
109        Matcher matcher = ACCEPTED_URI_SCHEMA.matcher(inUrl);
110        if (matcher.matches()) {
111            // force scheme to lowercase
112            String scheme = matcher.group(1);
113            String lcScheme = scheme.toLowerCase();
114            if (!lcScheme.equals(scheme)) {
115                inUrl = lcScheme + matcher.group(2);
116            }
117            if (hasSpace && Patterns.WEB_URL.matcher(inUrl).matches()) {
118                inUrl = inUrl.replace(" ", "%20");
119            }
120            return inUrl;
121        }
122        if (!hasSpace) {
123            if (Patterns.WEB_URL.matcher(inUrl).matches()) {
124                return URLUtil.guessUrl(inUrl);
125            }
126        }
127        if (canBeSearch) {
128            return URLUtil.composeSearchUrl(inUrl,
129                    QUICKSEARCH_G, QUERY_PLACE_HOLDER);
130        }
131        return null;
132    }
133
134    /* package */ static String fixUrl(String inUrl) {
135        // FIXME: Converting the url to lower case
136        // duplicates functionality in smartUrlFilter().
137        // However, changing all current callers of fixUrl to
138        // call smartUrlFilter in addition may have unwanted
139        // consequences, and is deferred for now.
140        int colon = inUrl.indexOf(':');
141        boolean allLower = true;
142        for (int index = 0; index < colon; index++) {
143            char ch = inUrl.charAt(index);
144            if (!Character.isLetter(ch)) {
145                break;
146            }
147            allLower &= Character.isLowerCase(ch);
148            if (index == colon - 1 && !allLower) {
149                inUrl = inUrl.substring(0, colon).toLowerCase()
150                        + inUrl.substring(colon);
151            }
152        }
153        if (inUrl.startsWith("http://") || inUrl.startsWith("https://"))
154            return inUrl;
155        if (inUrl.startsWith("http:") ||
156                inUrl.startsWith("https:")) {
157            if (inUrl.startsWith("http:/") || inUrl.startsWith("https:/")) {
158                inUrl = inUrl.replaceFirst("/", "//");
159            } else inUrl = inUrl.replaceFirst(":", "://");
160        }
161        return inUrl;
162    }
163
164    // Returns the filtered URL. Cannot return null, but can return an empty string
165    /* package */ static String filteredUrl(String inUrl) {
166        if (inUrl == null) {
167            return "";
168        }
169        if (inUrl.startsWith("content:")
170                || inUrl.startsWith("browser:")) {
171            return "";
172        }
173        return inUrl;
174    }
175
176}
177