1fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck/* 2fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * Copyright (C) 2010 The Android Open Source Project 3fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * 4fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * Licensed under the Apache License, Version 2.0 (the "License"); 5fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * you may not use this file except in compliance with the License. 6fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * You may obtain a copy of the License at 7fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * 8fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * http://www.apache.org/licenses/LICENSE-2.0 9fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * 10fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * Unless required by applicable law or agreed to in writing, software 11fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * distributed under the License is distributed on an "AS IS" BASIS, 12fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * See the License for the specific language governing permissions and 14fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * limitations under the License. 15fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck */ 168233facddcc51865d612a919d450db6954aa48e3Michael Kolb 17fb3017ffd8aa3f2342380270cf468e3a68914e69John Reckpackage com.android.browser; 18fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck 198233facddcc51865d612a919d450db6954aa48e3Michael Kolbimport android.net.Uri; 208233facddcc51865d612a919d450db6954aa48e3Michael Kolbimport android.util.Patterns; 218233facddcc51865d612a919d450db6954aa48e3Michael Kolbimport android.webkit.URLUtil; 228233facddcc51865d612a919d450db6954aa48e3Michael Kolb 23fb3017ffd8aa3f2342380270cf468e3a68914e69John Reckimport java.util.regex.Matcher; 24fb3017ffd8aa3f2342380270cf468e3a68914e69John Reckimport java.util.regex.Pattern; 25fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck 268233facddcc51865d612a919d450db6954aa48e3Michael Kolb/** 278233facddcc51865d612a919d450db6954aa48e3Michael Kolb * Utility methods for Url manipulation 288233facddcc51865d612a919d450db6954aa48e3Michael Kolb */ 29fb3017ffd8aa3f2342380270cf468e3a68914e69John Reckpublic class UrlUtils { 30fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck 318233facddcc51865d612a919d450db6954aa48e3Michael Kolb static final Pattern ACCEPTED_URI_SCHEMA = Pattern.compile( 328233facddcc51865d612a919d450db6954aa48e3Michael Kolb "(?i)" + // switch on case insensitive matching 338233facddcc51865d612a919d450db6954aa48e3Michael Kolb "(" + // begin group for schema 348233facddcc51865d612a919d450db6954aa48e3Michael Kolb "(?:http|https|file):\\/\\/" + 35b92bbb498f1eeab477d62632c235d0a68a4f4ea2Patrick Scott "|(?:inline|data|about|javascript):" + 368233facddcc51865d612a919d450db6954aa48e3Michael Kolb ")" + 378233facddcc51865d612a919d450db6954aa48e3Michael Kolb "(.*)" ); 388233facddcc51865d612a919d450db6954aa48e3Michael Kolb 398233facddcc51865d612a919d450db6954aa48e3Michael Kolb // Google search 408233facddcc51865d612a919d450db6954aa48e3Michael Kolb private final static String QUICKSEARCH_G = "http://www.google.com/m?q=%s"; 418233facddcc51865d612a919d450db6954aa48e3Michael Kolb private final static String QUERY_PLACE_HOLDER = "%s"; 428233facddcc51865d612a919d450db6954aa48e3Michael Kolb 43fe5b94d91cc026aa5e8e9476ed77ad59df6f9ddfJohn Reck // Regular expression to strip http:// and optionally 44434e9f83e13c0758dcdefe214357fc9cc9f104d5John Reck // the trailing slash 45434e9f83e13c0758dcdefe214357fc9cc9f104d5John Reck private static final Pattern STRIP_URL_PATTERN = 46fe5b94d91cc026aa5e8e9476ed77ad59df6f9ddfJohn Reck Pattern.compile("^http://(.*?)/?$"); 47fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck 48fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck private UrlUtils() { /* cannot be instantiated */ } 49fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck 50fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck /** 51fe5b94d91cc026aa5e8e9476ed77ad59df6f9ddfJohn Reck * Strips the provided url of preceding "http://" and any trailing "/". Does not 52fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * strip "https://". If the provided string cannot be stripped, the original string 53fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * is returned. 54fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * 55fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * TODO: Put this in TextUtils to be used by other packages doing something similar. 56fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * 57fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * @param url a url to strip, like "http://www.google.com/" 58fe5b94d91cc026aa5e8e9476ed77ad59df6f9ddfJohn Reck * @return a stripped url like "www.google.com", or the original string if it could 59fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck * not be stripped 60fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck */ 612cb9dc2db14154753d906ff4cce87c333df54214John Reck public static String stripUrl(String url) { 62fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck if (url == null) return null; 63fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck Matcher m = STRIP_URL_PATTERN.matcher(url); 64434e9f83e13c0758dcdefe214357fc9cc9f104d5John Reck if (m.matches()) { 65434e9f83e13c0758dcdefe214357fc9cc9f104d5John Reck return m.group(1); 66fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck } else { 67fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck return url; 68fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck } 69fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck } 708233facddcc51865d612a919d450db6954aa48e3Michael Kolb 718233facddcc51865d612a919d450db6954aa48e3Michael Kolb protected static String smartUrlFilter(Uri inUri) { 728233facddcc51865d612a919d450db6954aa48e3Michael Kolb if (inUri != null) { 738233facddcc51865d612a919d450db6954aa48e3Michael Kolb return smartUrlFilter(inUri.toString()); 748233facddcc51865d612a919d450db6954aa48e3Michael Kolb } 758233facddcc51865d612a919d450db6954aa48e3Michael Kolb return null; 768233facddcc51865d612a919d450db6954aa48e3Michael Kolb } 778233facddcc51865d612a919d450db6954aa48e3Michael Kolb 788233facddcc51865d612a919d450db6954aa48e3Michael Kolb /** 798233facddcc51865d612a919d450db6954aa48e3Michael Kolb * Attempts to determine whether user input is a URL or search 808233facddcc51865d612a919d450db6954aa48e3Michael Kolb * terms. Anything with a space is passed to search. 818233facddcc51865d612a919d450db6954aa48e3Michael Kolb * 828233facddcc51865d612a919d450db6954aa48e3Michael Kolb * Converts to lowercase any mistakenly uppercased schema (i.e., 838233facddcc51865d612a919d450db6954aa48e3Michael Kolb * "Http://" converts to "http://" 848233facddcc51865d612a919d450db6954aa48e3Michael Kolb * 858233facddcc51865d612a919d450db6954aa48e3Michael Kolb * @return Original or modified URL 868233facddcc51865d612a919d450db6954aa48e3Michael Kolb * 878233facddcc51865d612a919d450db6954aa48e3Michael Kolb */ 88961d35d6e9cd58d930d8572b91d7e0c9f928f1e8John Reck public static String smartUrlFilter(String url) { 8971e5142e4237a0f9d51a93504da36b778e8b79e4John Reck return smartUrlFilter(url, true); 9071e5142e4237a0f9d51a93504da36b778e8b79e4John Reck } 918233facddcc51865d612a919d450db6954aa48e3Michael Kolb 9271e5142e4237a0f9d51a93504da36b778e8b79e4John Reck /** 9371e5142e4237a0f9d51a93504da36b778e8b79e4John Reck * Attempts to determine whether user input is a URL or search 9471e5142e4237a0f9d51a93504da36b778e8b79e4John Reck * terms. Anything with a space is passed to search if canBeSearch is true. 9571e5142e4237a0f9d51a93504da36b778e8b79e4John Reck * 9671e5142e4237a0f9d51a93504da36b778e8b79e4John Reck * Converts to lowercase any mistakenly uppercased schema (i.e., 9771e5142e4237a0f9d51a93504da36b778e8b79e4John Reck * "Http://" converts to "http://" 9871e5142e4237a0f9d51a93504da36b778e8b79e4John Reck * 9971e5142e4237a0f9d51a93504da36b778e8b79e4John Reck * @param canBeSearch If true, will return a search url if it isn't a valid 10071e5142e4237a0f9d51a93504da36b778e8b79e4John Reck * URL. If false, invalid URLs will return null 10171e5142e4237a0f9d51a93504da36b778e8b79e4John Reck * @return Original or modified URL 10271e5142e4237a0f9d51a93504da36b778e8b79e4John Reck * 10371e5142e4237a0f9d51a93504da36b778e8b79e4John Reck */ 10471e5142e4237a0f9d51a93504da36b778e8b79e4John Reck public static String smartUrlFilter(String url, boolean canBeSearch) { 1058233facddcc51865d612a919d450db6954aa48e3Michael Kolb String inUrl = url.trim(); 1068233facddcc51865d612a919d450db6954aa48e3Michael Kolb boolean hasSpace = inUrl.indexOf(' ') != -1; 1078233facddcc51865d612a919d450db6954aa48e3Michael Kolb 1088233facddcc51865d612a919d450db6954aa48e3Michael Kolb Matcher matcher = ACCEPTED_URI_SCHEMA.matcher(inUrl); 1098233facddcc51865d612a919d450db6954aa48e3Michael Kolb if (matcher.matches()) { 1108233facddcc51865d612a919d450db6954aa48e3Michael Kolb // force scheme to lowercase 1118233facddcc51865d612a919d450db6954aa48e3Michael Kolb String scheme = matcher.group(1); 1128233facddcc51865d612a919d450db6954aa48e3Michael Kolb String lcScheme = scheme.toLowerCase(); 1138233facddcc51865d612a919d450db6954aa48e3Michael Kolb if (!lcScheme.equals(scheme)) { 1148233facddcc51865d612a919d450db6954aa48e3Michael Kolb inUrl = lcScheme + matcher.group(2); 1158233facddcc51865d612a919d450db6954aa48e3Michael Kolb } 11671e5142e4237a0f9d51a93504da36b778e8b79e4John Reck if (hasSpace && Patterns.WEB_URL.matcher(inUrl).matches()) { 1178233facddcc51865d612a919d450db6954aa48e3Michael Kolb inUrl = inUrl.replace(" ", "%20"); 1188233facddcc51865d612a919d450db6954aa48e3Michael Kolb } 1198233facddcc51865d612a919d450db6954aa48e3Michael Kolb return inUrl; 1208233facddcc51865d612a919d450db6954aa48e3Michael Kolb } 1218233facddcc51865d612a919d450db6954aa48e3Michael Kolb if (!hasSpace) { 1228233facddcc51865d612a919d450db6954aa48e3Michael Kolb if (Patterns.WEB_URL.matcher(inUrl).matches()) { 1238233facddcc51865d612a919d450db6954aa48e3Michael Kolb return URLUtil.guessUrl(inUrl); 1248233facddcc51865d612a919d450db6954aa48e3Michael Kolb } 1258233facddcc51865d612a919d450db6954aa48e3Michael Kolb } 12671e5142e4237a0f9d51a93504da36b778e8b79e4John Reck if (canBeSearch) { 12771e5142e4237a0f9d51a93504da36b778e8b79e4John Reck return URLUtil.composeSearchUrl(inUrl, 12871e5142e4237a0f9d51a93504da36b778e8b79e4John Reck QUICKSEARCH_G, QUERY_PLACE_HOLDER); 12971e5142e4237a0f9d51a93504da36b778e8b79e4John Reck } 13071e5142e4237a0f9d51a93504da36b778e8b79e4John Reck return null; 1318233facddcc51865d612a919d450db6954aa48e3Michael Kolb } 1328233facddcc51865d612a919d450db6954aa48e3Michael Kolb 1338233facddcc51865d612a919d450db6954aa48e3Michael Kolb /* package */ static String fixUrl(String inUrl) { 1348233facddcc51865d612a919d450db6954aa48e3Michael Kolb // FIXME: Converting the url to lower case 1358233facddcc51865d612a919d450db6954aa48e3Michael Kolb // duplicates functionality in smartUrlFilter(). 1368233facddcc51865d612a919d450db6954aa48e3Michael Kolb // However, changing all current callers of fixUrl to 1378233facddcc51865d612a919d450db6954aa48e3Michael Kolb // call smartUrlFilter in addition may have unwanted 1388233facddcc51865d612a919d450db6954aa48e3Michael Kolb // consequences, and is deferred for now. 1398233facddcc51865d612a919d450db6954aa48e3Michael Kolb int colon = inUrl.indexOf(':'); 1408233facddcc51865d612a919d450db6954aa48e3Michael Kolb boolean allLower = true; 1418233facddcc51865d612a919d450db6954aa48e3Michael Kolb for (int index = 0; index < colon; index++) { 1428233facddcc51865d612a919d450db6954aa48e3Michael Kolb char ch = inUrl.charAt(index); 1438233facddcc51865d612a919d450db6954aa48e3Michael Kolb if (!Character.isLetter(ch)) { 1448233facddcc51865d612a919d450db6954aa48e3Michael Kolb break; 1458233facddcc51865d612a919d450db6954aa48e3Michael Kolb } 1468233facddcc51865d612a919d450db6954aa48e3Michael Kolb allLower &= Character.isLowerCase(ch); 1478233facddcc51865d612a919d450db6954aa48e3Michael Kolb if (index == colon - 1 && !allLower) { 1488233facddcc51865d612a919d450db6954aa48e3Michael Kolb inUrl = inUrl.substring(0, colon).toLowerCase() 1498233facddcc51865d612a919d450db6954aa48e3Michael Kolb + inUrl.substring(colon); 1508233facddcc51865d612a919d450db6954aa48e3Michael Kolb } 1518233facddcc51865d612a919d450db6954aa48e3Michael Kolb } 1528233facddcc51865d612a919d450db6954aa48e3Michael Kolb if (inUrl.startsWith("http://") || inUrl.startsWith("https://")) 1538233facddcc51865d612a919d450db6954aa48e3Michael Kolb return inUrl; 1548233facddcc51865d612a919d450db6954aa48e3Michael Kolb if (inUrl.startsWith("http:") || 1558233facddcc51865d612a919d450db6954aa48e3Michael Kolb inUrl.startsWith("https:")) { 1568233facddcc51865d612a919d450db6954aa48e3Michael Kolb if (inUrl.startsWith("http:/") || inUrl.startsWith("https:/")) { 1578233facddcc51865d612a919d450db6954aa48e3Michael Kolb inUrl = inUrl.replaceFirst("/", "//"); 1588233facddcc51865d612a919d450db6954aa48e3Michael Kolb } else inUrl = inUrl.replaceFirst(":", "://"); 1598233facddcc51865d612a919d450db6954aa48e3Michael Kolb } 1608233facddcc51865d612a919d450db6954aa48e3Michael Kolb return inUrl; 1618233facddcc51865d612a919d450db6954aa48e3Michael Kolb } 1628233facddcc51865d612a919d450db6954aa48e3Michael Kolb 163324d440ea7dc44da8beef8a6c2e6f7a33b20a41dJohn Reck // Returns the filtered URL. Cannot return null, but can return an empty string 164324d440ea7dc44da8beef8a6c2e6f7a33b20a41dJohn Reck /* package */ static String filteredUrl(String inUrl) { 165324d440ea7dc44da8beef8a6c2e6f7a33b20a41dJohn Reck if (inUrl == null) { 166324d440ea7dc44da8beef8a6c2e6f7a33b20a41dJohn Reck return ""; 167324d440ea7dc44da8beef8a6c2e6f7a33b20a41dJohn Reck } 168e44e5622a593cacb20d0c4b1a82d4aeed6a9f4c1John Reck if (inUrl.startsWith("content:") 169e44e5622a593cacb20d0c4b1a82d4aeed6a9f4c1John Reck || inUrl.startsWith("browser:")) { 170324d440ea7dc44da8beef8a6c2e6f7a33b20a41dJohn Reck return ""; 171324d440ea7dc44da8beef8a6c2e6f7a33b20a41dJohn Reck } 172324d440ea7dc44da8beef8a6c2e6f7a33b20a41dJohn Reck return inUrl; 173324d440ea7dc44da8beef8a6c2e6f7a33b20a41dJohn Reck } 174324d440ea7dc44da8beef8a6c2e6f7a33b20a41dJohn Reck 175fb3017ffd8aa3f2342380270cf468e3a68914e69John Reck} 176