158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// Copyright 2013 The Chromium Authors. All rights reserved. 258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)// found in the LICENSE file. 458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)package org.chromium.chrome.browser; 658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)import android.text.TextUtils; 858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)import org.chromium.base.CollectionUtil; 1058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 1158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)import java.net.URI; 1258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)import java.net.URISyntaxException; 1358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)import java.util.HashSet; 1458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 1558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)/** 1658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * Utilities for working with URIs (and URLs). These methods may be used in security-sensitive 1758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * contexts (after all, origins are the security boundary on the web), and so the correctness bar 1858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * must be high. 1958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) */ 2058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)public class UrlUtilities { 2158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) /** 2258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * URI schemes that ContentView can handle. 2358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) */ 2458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) private static final HashSet<String> ACCEPTED_SCHEMES = CollectionUtil.newHashSet( 2558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) "about", "data", "file", "http", "https", "inline", "javascript"); 2658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 2758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) /** 2858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * URI schemes that Chrome can download. 2958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) */ 3058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) private static final HashSet<String> DOWNLOADABLE_SCHEMES = CollectionUtil.newHashSet( 3158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) "data", "filesystem", "http", "https"); 3258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 3358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) /** 3458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * @param uri A URI. 3558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * 3658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * @return True if the URI's scheme is one that ContentView can handle. 3758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) */ 3858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) public static boolean isAcceptedScheme(URI uri) { 3958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) return ACCEPTED_SCHEMES.contains(uri.getScheme()); 4058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) } 4158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 4258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) /** 4358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * @param uri A URI. 4458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * 4558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * @return True if the URI's scheme is one that ContentView can handle. 4658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) */ 4758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) public static boolean isAcceptedScheme(String uri) { 4858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) try { 4958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) return isAcceptedScheme(new URI(uri)); 5058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) } catch (URISyntaxException e) { 5158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) return false; 5258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) } 5358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) } 5458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 5558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) /** 5658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * @param uri A URI. 5758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * 5858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * @return True if the URI's scheme is one that Chrome can download. 5958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) */ 6058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) public static boolean isDownloadableScheme(URI uri) { 6158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) return DOWNLOADABLE_SCHEMES.contains(uri.getScheme()); 6258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) } 6358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 6458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) /** 6558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * @param uri A URI. 6658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * 6758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * @return True if the URI's scheme is one that Chrome can download. 6858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) */ 6958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) public static boolean isDownloadableScheme(String uri) { 7058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) try { 7158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) return isDownloadableScheme(new URI(uri)); 7258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) } catch (URISyntaxException e) { 7358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) return false; 7458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) } 7558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) } 7658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 7758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) /** 7858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * @param uri A URI to repair. 7958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * 8058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * @return A String representation of a URI that will be valid for loading in a ContentView. 8158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) */ 8258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) public static String fixUrl(String uri) { 8358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) if (uri == null) return null; 8458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 8558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) try { 8658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) String fixedUri = uri.trim(); 8758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) if (fixedUri.indexOf("://") == 0) { 8858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) return "http" + fixedUri; 8958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) } 9058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) if (fixedUri.indexOf(":") == -1) { 9158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) return "http://" + fixedUri; 9258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) } 9358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 9458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) URI parsed = new URI(fixedUri); 9558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) if (parsed.getScheme() == null) { 9658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) parsed = new URI( 9758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) "http", 9858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) null, // userInfo 9958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) parsed.getHost(), 10058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) parsed.getPort(), 10158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) parsed.getRawPath(), 10258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) parsed.getRawQuery(), 10358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) parsed.getRawFragment()); 10458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) } 10558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) return parsed.toString(); 10658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) } catch (URISyntaxException e) { 10758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) // Can't do anything. 10858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) return uri; 10958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) } 11058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) } 11158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 11258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) /** 113f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) * Refer to UrlFixerUpper::FixupURL. 114f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) * 115f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) * Compare to {@link #fixUrl(String)}, This fixes URL more aggressively including Chrome 116f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) * specific cases. For example, "about:" becomes "chrome://version/". However, this is not a 117f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) * superset of {@link #fixUrl(String)} either. For example, this function doesn't do anything 118f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) * with "://mail.google.com:/", while the other one prepends "http". Also, for 119f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) * "//mail.google.com:/", this function prepends "file" while the other one prepends "http". 120f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) */ 121f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) public static String fixupUrl(String uri) { 122f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) return nativeFixupUrl(uri, null); 123f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 124f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 125f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) /** 12658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * Builds a String that strips down the URL to the its scheme, host, and port. 12758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * @param uri URI to break down. 12868043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) * @param showScheme Whether or not to show the scheme. If the URL can't be parsed, this value 12968043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) * is ignored. 13068043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) * @return Stripped-down String containing the essential bits of the URL, or the original URL if 13168043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) * it fails to parse it. 13258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) */ 13368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) public static String getOriginForDisplay(URI uri, boolean showScheme) { 13458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) String scheme = uri.getScheme(); 13558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) String host = uri.getHost(); 13658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) int port = uri.getPort(); 13758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 13868043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) String displayUrl; 13968043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) if (TextUtils.isEmpty(scheme) || TextUtils.isEmpty(host)) { 14068043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) displayUrl = uri.toString(); 14158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) } else { 14268043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) if (showScheme) { 14368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) scheme += "://"; 14468043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) } else { 14568043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) scheme = ""; 14668043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) } 14768043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) 14868043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) if (port == -1 || (port == 80 && "http".equals(scheme)) 14968043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) || (port == 443 && "https".equals(scheme))) { 15068043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) displayUrl = scheme + host; 15168043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) } else { 15268043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) displayUrl = scheme + host + ":" + port; 15368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) } 15458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) } 15568043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) 15668043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) return displayUrl; 15758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) } 15858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 15958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) /** 16058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * Determines whether or not the given URLs belong to the same broad domain or host. 16158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * "Broad domain" is defined as the TLD + 1 or the host. 16258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * 16358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * For example, the TLD + 1 for http://news.google.com would be "google.com" and would be shared 16458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * with other Google properties like http://finance.google.com. 16558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * 16658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * If {@code includePrivateRegistries} is marked as true, then private domain registries (like 16758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * appspot.com) are considered "effective TLDs" -- all subdomains of appspot.com would be 16858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * considered distinct (effective TLD = ".appspot.com" + 1). 16958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * This means that http://chromiumreview.appspot.com and http://example.appspot.com would not 17058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * belong to the same host. 17158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * If {@code includePrivateRegistries} is false, all subdomains of appspot.com 17258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * would be considered to be the same domain (TLD = ".com" + 1). 17358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * 17458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * @param primaryUrl First URL 17558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * @param secondaryUrl Second URL 17658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * @param includePrivateRegistries Whether or not to consider private registries. 17758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * @return True iff the two URIs belong to the same domain or host. 17858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) */ 17958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) public static boolean sameDomainOrHost(String primaryUrl, String secondaryUrl, 18058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) boolean includePrivateRegistries) { 18158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) return nativeSameDomainOrHost(primaryUrl, secondaryUrl, includePrivateRegistries); 18258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) } 18358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 18458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) /** 18558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * This function works by calling net::registry_controlled_domains::GetDomainAndRegistry 18658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * 18758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * @param uri A URI 18858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * @param includePrivateRegistries Whether or not to consider private registries. 18958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * 19058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * @return The registered, organization-identifying host and all its registry information, but 19158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * no subdomains, from the given URI. Returns an empty string if the URI is invalid, has no host 19258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * (e.g. a file: URI), has multiple trailing dots, is an IP address, has only one subcomponent 19358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * (i.e. no dots other than leading/trailing ones), or is itself a recognized registry 19458537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) * identifier. 19558537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) */ 19658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) public static String getDomainAndRegistry(String uri, boolean includePrivateRegistries) { 19758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) return nativeGetDomainAndRegistry(uri, includePrivateRegistries); 19858537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) } 19958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) 200116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch /** 201116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch * @param url A URL. 202116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch * @return Whether a given URL is one of [...]google.TLD or [...]youtube.TLD URLs. 203116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch */ 204116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch public static boolean isGooglePropertyUrl(String url) { 205116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch if (TextUtils.isEmpty(url)) return false; 206116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch return nativeIsGooglePropertyUrl(url); 207116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch } 208116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch 20958537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) private static native boolean nativeSameDomainOrHost(String primaryUrl, String secondaryUrl, 21058537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) boolean includePrivateRegistries); 21158537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) private static native String nativeGetDomainAndRegistry(String url, 21258537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) boolean includePrivateRegistries); 21358537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) public static native boolean nativeIsGoogleSearchUrl(String url); 2140f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) public static native boolean nativeIsGoogleHomePageUrl(String url); 215f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) public static native String nativeFixupUrl(String url, String desiredTld); 216116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch private static native boolean nativeIsGooglePropertyUrl(String url); 21758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)} 218