URLUtil.java revision 9066cfe9886ac131c34d59ed0e2d287b0e3c0087
1/* 2 * Copyright (C) 2006 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package android.webkit; 18 19import java.io.UnsupportedEncodingException; 20import java.util.regex.Matcher; 21import java.util.regex.Pattern; 22 23import android.net.Uri; 24import android.net.ParseException; 25import android.net.WebAddress; 26import android.util.Config; 27import android.util.Log; 28 29public final class URLUtil { 30 31 private static final String LOGTAG = "webkit"; 32 33 static final String ASSET_BASE = "file:///android_asset/"; 34 static final String FILE_BASE = "file://"; 35 static final String PROXY_BASE = "file:///cookieless_proxy/"; 36 37 /** 38 * Cleans up (if possible) user-entered web addresses 39 */ 40 public static String guessUrl(String inUrl) { 41 42 String retVal = inUrl; 43 WebAddress webAddress; 44 45 Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl); 46 47 if (inUrl.length() == 0) return inUrl; 48 if (inUrl.startsWith("about:")) return inUrl; 49 // Do not try to interpret data scheme URLs 50 if (inUrl.startsWith("data:")) return inUrl; 51 // Do not try to interpret file scheme URLs 52 if (inUrl.startsWith("file:")) return inUrl; 53 // Do not try to interpret javascript scheme URLs 54 if (inUrl.startsWith("javascript:")) return inUrl; 55 56 // bug 762454: strip period off end of url 57 if (inUrl.endsWith(".") == true) { 58 inUrl = inUrl.substring(0, inUrl.length() - 1); 59 } 60 61 try { 62 webAddress = new WebAddress(inUrl); 63 } catch (ParseException ex) { 64 65 if (Config.LOGV) { 66 Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl); 67 } 68 return retVal; 69 } 70 71 // Check host 72 if (webAddress.mHost.indexOf('.') == -1) { 73 // no dot: user probably entered a bare domain. try .com 74 webAddress.mHost = "www." + webAddress.mHost + ".com"; 75 } 76 return webAddress.toString(); 77 } 78 79 public static String composeSearchUrl(String inQuery, String template, 80 String queryPlaceHolder) { 81 int placeHolderIndex = template.indexOf(queryPlaceHolder); 82 if (placeHolderIndex < 0) { 83 return null; 84 } 85 86 String query; 87 StringBuilder buffer = new StringBuilder(); 88 buffer.append(template.substring(0, placeHolderIndex)); 89 90 try { 91 query = java.net.URLEncoder.encode(inQuery, "utf-8"); 92 buffer.append(query); 93 } catch (UnsupportedEncodingException ex) { 94 return null; 95 } 96 97 buffer.append(template.substring( 98 placeHolderIndex + queryPlaceHolder.length())); 99 100 return buffer.toString(); 101 } 102 103 public static byte[] decode(byte[] url) throws IllegalArgumentException { 104 if (url.length == 0) { 105 return new byte[0]; 106 } 107 108 // Create a new byte array with the same length to ensure capacity 109 byte[] tempData = new byte[url.length]; 110 111 int tempCount = 0; 112 for (int i = 0; i < url.length; i++) { 113 byte b = url[i]; 114 if (b == '%') { 115 if (url.length - i > 2) { 116 b = (byte) (parseHex(url[i + 1]) * 16 117 + parseHex(url[i + 2])); 118 i += 2; 119 } else { 120 throw new IllegalArgumentException("Invalid format"); 121 } 122 } 123 tempData[tempCount++] = b; 124 } 125 byte[] retData = new byte[tempCount]; 126 System.arraycopy(tempData, 0, retData, 0, tempCount); 127 return retData; 128 } 129 130 private static int parseHex(byte b) { 131 if (b >= '0' && b <= '9') return (b - '0'); 132 if (b >= 'A' && b <= 'F') return (b - 'A' + 10); 133 if (b >= 'a' && b <= 'f') return (b - 'a' + 10); 134 135 throw new IllegalArgumentException("Invalid hex char '" + b + "'"); 136 } 137 138 /** 139 * @return True iff the url is an asset file. 140 */ 141 public static boolean isAssetUrl(String url) { 142 return (null != url) && url.startsWith(ASSET_BASE); 143 } 144 145 /** 146 * @return True iff the url is an proxy url to allow cookieless network 147 * requests from a file url. 148 * @deprecated Cookieless proxy is no longer supported. 149 */ 150 public static boolean isCookielessProxyUrl(String url) { 151 return (null != url) && url.startsWith(PROXY_BASE); 152 } 153 154 /** 155 * @return True iff the url is a local file. 156 */ 157 public static boolean isFileUrl(String url) { 158 return (null != url) && (url.startsWith(FILE_BASE) && 159 !url.startsWith(ASSET_BASE) && 160 !url.startsWith(PROXY_BASE)); 161 } 162 163 /** 164 * @return True iff the url is an about: url. 165 */ 166 public static boolean isAboutUrl(String url) { 167 return (null != url) && url.startsWith("about:"); 168 } 169 170 /** 171 * @return True iff the url is a data: url. 172 */ 173 public static boolean isDataUrl(String url) { 174 return (null != url) && url.startsWith("data:"); 175 } 176 177 /** 178 * @return True iff the url is a javascript: url. 179 */ 180 public static boolean isJavaScriptUrl(String url) { 181 return (null != url) && url.startsWith("javascript:"); 182 } 183 184 /** 185 * @return True iff the url is an http: url. 186 */ 187 public static boolean isHttpUrl(String url) { 188 return (null != url) && 189 (url.length() > 6) && 190 url.substring(0, 7).equalsIgnoreCase("http://"); 191 } 192 193 /** 194 * @return True iff the url is an https: url. 195 */ 196 public static boolean isHttpsUrl(String url) { 197 return (null != url) && 198 (url.length() > 7) && 199 url.substring(0, 8).equalsIgnoreCase("https://"); 200 } 201 202 /** 203 * @return True iff the url is a network url. 204 */ 205 public static boolean isNetworkUrl(String url) { 206 if (url == null || url.length() == 0) { 207 return false; 208 } 209 return isHttpUrl(url) || isHttpsUrl(url); 210 } 211 212 /** 213 * @return True iff the url is a content: url. 214 */ 215 public static boolean isContentUrl(String url) { 216 return (null != url) && url.startsWith("content:"); 217 } 218 219 /** 220 * @return True iff the url is valid. 221 */ 222 public static boolean isValidUrl(String url) { 223 if (url == null || url.length() == 0) { 224 return false; 225 } 226 227 return (isAssetUrl(url) || 228 isFileUrl(url) || 229 isAboutUrl(url) || 230 isHttpUrl(url) || 231 isHttpsUrl(url) || 232 isJavaScriptUrl(url) || 233 isContentUrl(url)); 234 } 235 236 /** 237 * Strips the url of the anchor. 238 */ 239 public static String stripAnchor(String url) { 240 int anchorIndex = url.indexOf('#'); 241 if (anchorIndex != -1) { 242 return url.substring(0, anchorIndex); 243 } 244 return url; 245 } 246 247 /** 248 * Guesses canonical filename that a download would have, using 249 * the URL and contentDisposition. File extension, if not defined, 250 * is added based on the mimetype 251 * @param url Url to the content 252 * @param contentDisposition Content-Disposition HTTP header or null 253 * @param mimeType Mime-type of the content or null 254 * 255 * @return suggested filename 256 */ 257 public static final String guessFileName( 258 String url, 259 String contentDisposition, 260 String mimeType) { 261 String filename = null; 262 String extension = null; 263 264 // If we couldn't do anything with the hint, move toward the content disposition 265 if (filename == null && contentDisposition != null) { 266 filename = parseContentDisposition(contentDisposition); 267 if (filename != null) { 268 int index = filename.lastIndexOf('/') + 1; 269 if (index > 0) { 270 filename = filename.substring(index); 271 } 272 } 273 } 274 275 // If all the other http-related approaches failed, use the plain uri 276 if (filename == null) { 277 String decodedUrl = Uri.decode(url); 278 if (decodedUrl != null) { 279 int queryIndex = decodedUrl.indexOf('?'); 280 // If there is a query string strip it, same as desktop browsers 281 if (queryIndex > 0) { 282 decodedUrl = decodedUrl.substring(0, queryIndex); 283 } 284 if (!decodedUrl.endsWith("/")) { 285 int index = decodedUrl.lastIndexOf('/') + 1; 286 if (index > 0) { 287 filename = decodedUrl.substring(index); 288 } 289 } 290 } 291 } 292 293 // Finally, if couldn't get filename from URI, get a generic filename 294 if (filename == null) { 295 filename = "downloadfile"; 296 } 297 298 // Split filename between base and extension 299 // Add an extension if filename does not have one 300 int dotIndex = filename.indexOf('.'); 301 if (dotIndex < 0) { 302 if (mimeType != null) { 303 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType); 304 if (extension != null) { 305 extension = "." + extension; 306 } 307 } 308 if (extension == null) { 309 if (mimeType != null && mimeType.toLowerCase().startsWith("text/")) { 310 if (mimeType.equalsIgnoreCase("text/html")) { 311 extension = ".html"; 312 } else { 313 extension = ".txt"; 314 } 315 } else { 316 extension = ".bin"; 317 } 318 } 319 } else { 320 if (mimeType != null) { 321 // Compare the last segment of the extension against the mime type. 322 // If there's a mismatch, discard the entire extension. 323 int lastDotIndex = filename.lastIndexOf('.'); 324 String typeFromExt = MimeTypeMap.getSingleton().getMimeTypeFromExtension( 325 filename.substring(lastDotIndex + 1)); 326 if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) { 327 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType); 328 if (extension != null) { 329 extension = "." + extension; 330 } 331 } 332 } 333 if (extension == null) { 334 extension = filename.substring(dotIndex); 335 } 336 filename = filename.substring(0, dotIndex); 337 } 338 339 return filename + extension; 340 } 341 342 /** Regex used to parse content-disposition headers */ 343 private static final Pattern CONTENT_DISPOSITION_PATTERN = 344 Pattern.compile("attachment;\\s*filename\\s*=\\s*\"([^\"]*)\""); 345 346 /* 347 * Parse the Content-Disposition HTTP Header. The format of the header 348 * is defined here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html 349 * This header provides a filename for content that is going to be 350 * downloaded to the file system. We only support the attachment type. 351 */ 352 private static String parseContentDisposition(String contentDisposition) { 353 try { 354 Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition); 355 if (m.find()) { 356 return m.group(1); 357 } 358 } catch (IllegalStateException ex) { 359 // This function is defined as returning null when it can't parse the header 360 } 361 return null; 362 } 363} 364