1/* 2 * Copyright (C) 2006 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package android.webkit; 18 19import android.annotation.Nullable; 20import android.net.ParseException; 21import android.net.Uri; 22import android.net.WebAddress; 23import android.util.Log; 24 25import java.io.UnsupportedEncodingException; 26import java.util.Locale; 27import java.util.regex.Matcher; 28import java.util.regex.Pattern; 29 30public final class URLUtil { 31 32 private static final String LOGTAG = "webkit"; 33 private static final boolean TRACE = false; 34 35 // to refer to bar.png under your package's asset/foo/ directory, use 36 // "file:///android_asset/foo/bar.png". 37 static final String ASSET_BASE = "file:///android_asset/"; 38 // to refer to bar.png under your package's res/drawable/ directory, use 39 // "file:///android_res/drawable/bar.png". Use "drawable" to refer to 40 // "drawable-hdpi" directory as well. 41 static final String RESOURCE_BASE = "file:///android_res/"; 42 static final String FILE_BASE = "file:"; 43 static final String PROXY_BASE = "file:///cookieless_proxy/"; 44 static final String CONTENT_BASE = "content:"; 45 46 /** 47 * Cleans up (if possible) user-entered web addresses 48 */ 49 public static String guessUrl(String inUrl) { 50 51 String retVal = inUrl; 52 WebAddress webAddress; 53 54 if (TRACE) Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl); 55 56 if (inUrl.length() == 0) return inUrl; 57 if (inUrl.startsWith("about:")) return inUrl; 58 // Do not try to interpret data scheme URLs 59 if (inUrl.startsWith("data:")) return inUrl; 60 // Do not try to interpret file scheme URLs 61 if (inUrl.startsWith("file:")) return inUrl; 62 // Do not try to interpret javascript scheme URLs 63 if (inUrl.startsWith("javascript:")) return inUrl; 64 65 // bug 762454: strip period off end of url 66 if (inUrl.endsWith(".") == true) { 67 inUrl = inUrl.substring(0, inUrl.length() - 1); 68 } 69 70 try { 71 webAddress = new WebAddress(inUrl); 72 } catch (ParseException ex) { 73 74 if (TRACE) { 75 Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl); 76 } 77 return retVal; 78 } 79 80 // Check host 81 if (webAddress.getHost().indexOf('.') == -1) { 82 // no dot: user probably entered a bare domain. try .com 83 webAddress.setHost("www." + webAddress.getHost() + ".com"); 84 } 85 return webAddress.toString(); 86 } 87 88 public static String composeSearchUrl(String inQuery, String template, 89 String queryPlaceHolder) { 90 int placeHolderIndex = template.indexOf(queryPlaceHolder); 91 if (placeHolderIndex < 0) { 92 return null; 93 } 94 95 String query; 96 StringBuilder buffer = new StringBuilder(); 97 buffer.append(template.substring(0, placeHolderIndex)); 98 99 try { 100 query = java.net.URLEncoder.encode(inQuery, "utf-8"); 101 buffer.append(query); 102 } catch (UnsupportedEncodingException ex) { 103 return null; 104 } 105 106 buffer.append(template.substring( 107 placeHolderIndex + queryPlaceHolder.length())); 108 109 return buffer.toString(); 110 } 111 112 public static byte[] decode(byte[] url) throws IllegalArgumentException { 113 if (url.length == 0) { 114 return new byte[0]; 115 } 116 117 // Create a new byte array with the same length to ensure capacity 118 byte[] tempData = new byte[url.length]; 119 120 int tempCount = 0; 121 for (int i = 0; i < url.length; i++) { 122 byte b = url[i]; 123 if (b == '%') { 124 if (url.length - i > 2) { 125 b = (byte) (parseHex(url[i + 1]) * 16 126 + parseHex(url[i + 2])); 127 i += 2; 128 } else { 129 throw new IllegalArgumentException("Invalid format"); 130 } 131 } 132 tempData[tempCount++] = b; 133 } 134 byte[] retData = new byte[tempCount]; 135 System.arraycopy(tempData, 0, retData, 0, tempCount); 136 return retData; 137 } 138 139 /** 140 * @return {@code true} if the url is correctly URL encoded 141 */ 142 static boolean verifyURLEncoding(String url) { 143 int count = url.length(); 144 if (count == 0) { 145 return false; 146 } 147 148 int index = url.indexOf('%'); 149 while (index >= 0 && index < count) { 150 if (index < count - 2) { 151 try { 152 parseHex((byte) url.charAt(++index)); 153 parseHex((byte) url.charAt(++index)); 154 } catch (IllegalArgumentException e) { 155 return false; 156 } 157 } else { 158 return false; 159 } 160 index = url.indexOf('%', index + 1); 161 } 162 return true; 163 } 164 165 private static int parseHex(byte b) { 166 if (b >= '0' && b <= '9') return (b - '0'); 167 if (b >= 'A' && b <= 'F') return (b - 'A' + 10); 168 if (b >= 'a' && b <= 'f') return (b - 'a' + 10); 169 170 throw new IllegalArgumentException("Invalid hex char '" + b + "'"); 171 } 172 173 /** 174 * @return {@code true} if the url is an asset file. 175 */ 176 public static boolean isAssetUrl(String url) { 177 return (null != url) && url.startsWith(ASSET_BASE); 178 } 179 180 /** 181 * @return {@code true} if the url is a resource file. 182 * @hide 183 */ 184 public static boolean isResourceUrl(String url) { 185 return (null != url) && url.startsWith(RESOURCE_BASE); 186 } 187 188 /** 189 * @return {@code true} if the url is a proxy url to allow cookieless network 190 * requests from a file url. 191 * @deprecated Cookieless proxy is no longer supported. 192 */ 193 @Deprecated 194 public static boolean isCookielessProxyUrl(String url) { 195 return (null != url) && url.startsWith(PROXY_BASE); 196 } 197 198 /** 199 * @return {@code true} if the url is a local file. 200 */ 201 public static boolean isFileUrl(String url) { 202 return (null != url) && (url.startsWith(FILE_BASE) && 203 !url.startsWith(ASSET_BASE) && 204 !url.startsWith(PROXY_BASE)); 205 } 206 207 /** 208 * @return {@code true} if the url is an about: url. 209 */ 210 public static boolean isAboutUrl(String url) { 211 return (null != url) && url.startsWith("about:"); 212 } 213 214 /** 215 * @return {@code true} if the url is a data: url. 216 */ 217 public static boolean isDataUrl(String url) { 218 return (null != url) && url.startsWith("data:"); 219 } 220 221 /** 222 * @return {@code true} if the url is a javascript: url. 223 */ 224 public static boolean isJavaScriptUrl(String url) { 225 return (null != url) && url.startsWith("javascript:"); 226 } 227 228 /** 229 * @return {@code true} if the url is an http: url. 230 */ 231 public static boolean isHttpUrl(String url) { 232 return (null != url) && 233 (url.length() > 6) && 234 url.substring(0, 7).equalsIgnoreCase("http://"); 235 } 236 237 /** 238 * @return {@code true} if the url is an https: url. 239 */ 240 public static boolean isHttpsUrl(String url) { 241 return (null != url) && 242 (url.length() > 7) && 243 url.substring(0, 8).equalsIgnoreCase("https://"); 244 } 245 246 /** 247 * @return {@code true} if the url is a network url. 248 */ 249 public static boolean isNetworkUrl(String url) { 250 if (url == null || url.length() == 0) { 251 return false; 252 } 253 return isHttpUrl(url) || isHttpsUrl(url); 254 } 255 256 /** 257 * @return {@code true} if the url is a content: url. 258 */ 259 public static boolean isContentUrl(String url) { 260 return (null != url) && url.startsWith(CONTENT_BASE); 261 } 262 263 /** 264 * @return {@code true} if the url is valid. 265 */ 266 public static boolean isValidUrl(String url) { 267 if (url == null || url.length() == 0) { 268 return false; 269 } 270 271 return (isAssetUrl(url) || 272 isResourceUrl(url) || 273 isFileUrl(url) || 274 isAboutUrl(url) || 275 isHttpUrl(url) || 276 isHttpsUrl(url) || 277 isJavaScriptUrl(url) || 278 isContentUrl(url)); 279 } 280 281 /** 282 * Strips the url of the anchor. 283 */ 284 public static String stripAnchor(String url) { 285 int anchorIndex = url.indexOf('#'); 286 if (anchorIndex != -1) { 287 return url.substring(0, anchorIndex); 288 } 289 return url; 290 } 291 292 /** 293 * Guesses canonical filename that a download would have, using 294 * the URL and contentDisposition. File extension, if not defined, 295 * is added based on the mimetype 296 * @param url Url to the content 297 * @param contentDisposition Content-Disposition HTTP header or {@code null} 298 * @param mimeType Mime-type of the content or {@code null} 299 * 300 * @return suggested filename 301 */ 302 public static final String guessFileName( 303 String url, 304 @Nullable String contentDisposition, 305 @Nullable String mimeType) { 306 String filename = null; 307 String extension = null; 308 309 // If we couldn't do anything with the hint, move toward the content disposition 310 if (filename == null && contentDisposition != null) { 311 filename = parseContentDisposition(contentDisposition); 312 if (filename != null) { 313 int index = filename.lastIndexOf('/') + 1; 314 if (index > 0) { 315 filename = filename.substring(index); 316 } 317 } 318 } 319 320 // If all the other http-related approaches failed, use the plain uri 321 if (filename == null) { 322 String decodedUrl = Uri.decode(url); 323 if (decodedUrl != null) { 324 int queryIndex = decodedUrl.indexOf('?'); 325 // If there is a query string strip it, same as desktop browsers 326 if (queryIndex > 0) { 327 decodedUrl = decodedUrl.substring(0, queryIndex); 328 } 329 if (!decodedUrl.endsWith("/")) { 330 int index = decodedUrl.lastIndexOf('/') + 1; 331 if (index > 0) { 332 filename = decodedUrl.substring(index); 333 } 334 } 335 } 336 } 337 338 // Finally, if couldn't get filename from URI, get a generic filename 339 if (filename == null) { 340 filename = "downloadfile"; 341 } 342 343 // Split filename between base and extension 344 // Add an extension if filename does not have one 345 int dotIndex = filename.indexOf('.'); 346 if (dotIndex < 0) { 347 if (mimeType != null) { 348 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType); 349 if (extension != null) { 350 extension = "." + extension; 351 } 352 } 353 if (extension == null) { 354 if (mimeType != null && mimeType.toLowerCase(Locale.ROOT).startsWith("text/")) { 355 if (mimeType.equalsIgnoreCase("text/html")) { 356 extension = ".html"; 357 } else { 358 extension = ".txt"; 359 } 360 } else { 361 extension = ".bin"; 362 } 363 } 364 } else { 365 if (mimeType != null) { 366 // Compare the last segment of the extension against the mime type. 367 // If there's a mismatch, discard the entire extension. 368 int lastDotIndex = filename.lastIndexOf('.'); 369 String typeFromExt = MimeTypeMap.getSingleton().getMimeTypeFromExtension( 370 filename.substring(lastDotIndex + 1)); 371 if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) { 372 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType); 373 if (extension != null) { 374 extension = "." + extension; 375 } 376 } 377 } 378 if (extension == null) { 379 extension = filename.substring(dotIndex); 380 } 381 filename = filename.substring(0, dotIndex); 382 } 383 384 return filename + extension; 385 } 386 387 /** Regex used to parse content-disposition headers */ 388 private static final Pattern CONTENT_DISPOSITION_PATTERN = 389 Pattern.compile("attachment;\\s*filename\\s*=\\s*(\"?)([^\"]*)\\1\\s*$", 390 Pattern.CASE_INSENSITIVE); 391 392 /** 393 * Parse the Content-Disposition HTTP Header. The format of the header 394 * is defined here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html 395 * This header provides a filename for content that is going to be 396 * downloaded to the file system. We only support the attachment type. 397 * Note that RFC 2616 specifies the filename value must be double-quoted. 398 * Unfortunately some servers do not quote the value so to maintain 399 * consistent behaviour with other browsers, we allow unquoted values too. 400 */ 401 static String parseContentDisposition(String contentDisposition) { 402 try { 403 Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition); 404 if (m.find()) { 405 return m.group(2); 406 } 407 } catch (IllegalStateException ex) { 408 // This function is defined as returning null when it can't parse the header 409 } 410 return null; 411 } 412} 413