1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.browser;
18
19import android.net.Uri;
20import android.util.Patterns;
21import android.webkit.URLUtil;
22
23import java.util.regex.Matcher;
24import java.util.regex.Pattern;
25
26/**
27 * Utility methods for Url manipulation
28 */
29public class UrlUtils {
30
31    static final Pattern ACCEPTED_URI_SCHEMA = Pattern.compile(
32            "(?i)" + // switch on case insensitive matching
33            "(" +    // begin group for schema
34            "(?:http|https|file):\\/\\/" +
35            "|(?:inline|data|about|javascript):" +
36            ")" +
37            "(.*)" );
38
39    // Google search
40    private final static String QUICKSEARCH_G = "http://www.google.com/m?q=%s";
41    private final static String QUERY_PLACE_HOLDER = "%s";
42
43    // Regular expression to strip http:// and optionally
44    // the trailing slash
45    private static final Pattern STRIP_URL_PATTERN =
46            Pattern.compile("^http://(.*?)/?$");
47
48    private UrlUtils() { /* cannot be instantiated */ }
49
50    /**
51     * Strips the provided url of preceding "http://" and any trailing "/". Does not
52     * strip "https://". If the provided string cannot be stripped, the original string
53     * is returned.
54     *
55     * TODO: Put this in TextUtils to be used by other packages doing something similar.
56     *
57     * @param url a url to strip, like "http://www.google.com/"
58     * @return a stripped url like "www.google.com", or the original string if it could
59     *         not be stripped
60     */
61    public static String stripUrl(String url) {
62        if (url == null) return null;
63        Matcher m = STRIP_URL_PATTERN.matcher(url);
64        if (m.matches()) {
65            return m.group(1);
66        } else {
67            return url;
68        }
69    }
70
71    protected static String smartUrlFilter(Uri inUri) {
72        if (inUri != null) {
73            return smartUrlFilter(inUri.toString());
74        }
75        return null;
76    }
77
78    /**
79     * Attempts to determine whether user input is a URL or search
80     * terms.  Anything with a space is passed to search.
81     *
82     * Converts to lowercase any mistakenly uppercased schema (i.e.,
83     * "Http://" converts to "http://"
84     *
85     * @return Original or modified URL
86     *
87     */
88    public static String smartUrlFilter(String url) {
89        return smartUrlFilter(url, true);
90    }
91
92    /**
93     * Attempts to determine whether user input is a URL or search
94     * terms.  Anything with a space is passed to search if canBeSearch is true.
95     *
96     * Converts to lowercase any mistakenly uppercased schema (i.e.,
97     * "Http://" converts to "http://"
98     *
99     * @param canBeSearch If true, will return a search url if it isn't a valid
100     *                    URL. If false, invalid URLs will return null
101     * @return Original or modified URL
102     *
103     */
104    public static String smartUrlFilter(String url, boolean canBeSearch) {
105        String inUrl = url.trim();
106        boolean hasSpace = inUrl.indexOf(' ') != -1;
107
108        Matcher matcher = ACCEPTED_URI_SCHEMA.matcher(inUrl);
109        if (matcher.matches()) {
110            // force scheme to lowercase
111            String scheme = matcher.group(1);
112            String lcScheme = scheme.toLowerCase();
113            if (!lcScheme.equals(scheme)) {
114                inUrl = lcScheme + matcher.group(2);
115            }
116            if (hasSpace && Patterns.WEB_URL.matcher(inUrl).matches()) {
117                inUrl = inUrl.replace(" ", "%20");
118            }
119            return inUrl;
120        }
121        if (!hasSpace) {
122            if (Patterns.WEB_URL.matcher(inUrl).matches()) {
123                return URLUtil.guessUrl(inUrl);
124            }
125        }
126        if (canBeSearch) {
127            return URLUtil.composeSearchUrl(inUrl,
128                    QUICKSEARCH_G, QUERY_PLACE_HOLDER);
129        }
130        return null;
131    }
132
133    /* package */ static String fixUrl(String inUrl) {
134        // FIXME: Converting the url to lower case
135        // duplicates functionality in smartUrlFilter().
136        // However, changing all current callers of fixUrl to
137        // call smartUrlFilter in addition may have unwanted
138        // consequences, and is deferred for now.
139        int colon = inUrl.indexOf(':');
140        boolean allLower = true;
141        for (int index = 0; index < colon; index++) {
142            char ch = inUrl.charAt(index);
143            if (!Character.isLetter(ch)) {
144                break;
145            }
146            allLower &= Character.isLowerCase(ch);
147            if (index == colon - 1 && !allLower) {
148                inUrl = inUrl.substring(0, colon).toLowerCase()
149                        + inUrl.substring(colon);
150            }
151        }
152        if (inUrl.startsWith("http://") || inUrl.startsWith("https://"))
153            return inUrl;
154        if (inUrl.startsWith("http:") ||
155                inUrl.startsWith("https:")) {
156            if (inUrl.startsWith("http:/") || inUrl.startsWith("https:/")) {
157                inUrl = inUrl.replaceFirst("/", "//");
158            } else inUrl = inUrl.replaceFirst(":", "://");
159        }
160        return inUrl;
161    }
162
163    // Returns the filtered URL. Cannot return null, but can return an empty string
164    /* package */ static String filteredUrl(String inUrl) {
165        if (inUrl == null) {
166            return "";
167        }
168        if (inUrl.startsWith("content:")
169                || inUrl.startsWith("browser:")) {
170            return "";
171        }
172        return inUrl;
173    }
174
175}
176