1/* 2 * Copyright (C) 2007 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package android.net; 18 19import java.util.ArrayList; 20import java.util.HashMap; 21import java.util.List; 22import java.util.Set; 23import java.util.StringTokenizer; 24 25/** 26 * 27 * Sanitizes the Query portion of a URL. Simple example: 28 * <code> 29 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(); 30 * sanitizer.setAllowUnregisteredParamaters(true); 31 * sanitizer.parseUrl("http://example.com/?name=Joe+User"); 32 * String name = sanitizer.getValue("name")); 33 * // name now contains "Joe_User" 34 * </code> 35 * 36 * Register ValueSanitizers to customize the way individual 37 * parameters are sanitized: 38 * <code> 39 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(); 40 * sanitizer.registerParamater("name", UrlQuerySanitizer.createSpaceLegal()); 41 * sanitizer.parseUrl("http://example.com/?name=Joe+User"); 42 * String name = sanitizer.getValue("name")); 43 * // name now contains "Joe User". (The string is first decoded, which 44 * // converts the '+' to a ' '. Then the string is sanitized, which 45 * // converts the ' ' to an '_'. (The ' ' is converted because the default 46 * unregistered parameter sanitizer does not allow any special characters, 47 * and ' ' is a special character.) 48 * </code> 49 * 50 * There are several ways to create ValueSanitizers. In order of increasing 51 * sophistication: 52 * <ol> 53 * <li>Call one of the UrlQuerySanitizer.createXXX() methods. 54 * <li>Construct your own instance of 55 * UrlQuerySanitizer.IllegalCharacterValueSanitizer. 56 * <li>Subclass UrlQuerySanitizer.ValueSanitizer to define your own value 57 * sanitizer. 58 * </ol> 59 * 60 */ 61public class UrlQuerySanitizer { 62 63 /** 64 * A simple tuple that holds parameter-value pairs. 65 * 66 */ 67 public class ParameterValuePair { 68 /** 69 * Construct a parameter-value tuple. 70 * @param parameter an unencoded parameter 71 * @param value an unencoded value 72 */ 73 public ParameterValuePair(String parameter, 74 String value) { 75 mParameter = parameter; 76 mValue = value; 77 } 78 /** 79 * The unencoded parameter 80 */ 81 public String mParameter; 82 /** 83 * The unencoded value 84 */ 85 public String mValue; 86 } 87 88 final private HashMap<String, ValueSanitizer> mSanitizers = 89 new HashMap<String, ValueSanitizer>(); 90 final private HashMap<String, String> mEntries = 91 new HashMap<String, String>(); 92 final private ArrayList<ParameterValuePair> mEntriesList = 93 new ArrayList<ParameterValuePair>(); 94 private boolean mAllowUnregisteredParamaters; 95 private boolean mPreferFirstRepeatedParameter; 96 private ValueSanitizer mUnregisteredParameterValueSanitizer = 97 getAllIllegal(); 98 99 /** 100 * A functor used to sanitize a single query value. 101 * 102 */ 103 public static interface ValueSanitizer { 104 /** 105 * Sanitize an unencoded value. 106 * @param value 107 * @return the sanitized unencoded value 108 */ 109 public String sanitize(String value); 110 } 111 112 /** 113 * Sanitize values based on which characters they contain. Illegal 114 * characters are replaced with either space or '_', depending upon 115 * whether space is a legal character or not. 116 */ 117 public static class IllegalCharacterValueSanitizer implements 118 ValueSanitizer { 119 private int mFlags; 120 121 /** 122 * Allow space (' ') characters. 123 */ 124 public final static int SPACE_OK = 1 << 0; 125 /** 126 * Allow whitespace characters other than space. The 127 * other whitespace characters are 128 * '\t' '\f' '\n' '\r' and '\0x000b' (vertical tab) 129 */ 130 public final static int OTHER_WHITESPACE_OK = 1 << 1; 131 /** 132 * Allow characters with character codes 128 to 255. 133 */ 134 public final static int NON_7_BIT_ASCII_OK = 1 << 2; 135 /** 136 * Allow double quote characters. ('"') 137 */ 138 public final static int DQUOTE_OK = 1 << 3; 139 /** 140 * Allow single quote characters. ('\'') 141 */ 142 public final static int SQUOTE_OK = 1 << 4; 143 /** 144 * Allow less-than characters. ('<') 145 */ 146 public final static int LT_OK = 1 << 5; 147 /** 148 * Allow greater-than characters. ('>') 149 */ 150 public final static int GT_OK = 1 << 6; 151 /** 152 * Allow ampersand characters ('&') 153 */ 154 public final static int AMP_OK = 1 << 7; 155 /** 156 * Allow percent-sign characters ('%') 157 */ 158 public final static int PCT_OK = 1 << 8; 159 /** 160 * Allow nul characters ('\0') 161 */ 162 public final static int NUL_OK = 1 << 9; 163 /** 164 * Allow text to start with a script URL 165 * such as "javascript:" or "vbscript:" 166 */ 167 public final static int SCRIPT_URL_OK = 1 << 10; 168 169 /** 170 * Mask with all fields set to OK 171 */ 172 public final static int ALL_OK = 0x7ff; 173 174 /** 175 * Mask with both regular space and other whitespace OK 176 */ 177 public final static int ALL_WHITESPACE_OK = 178 SPACE_OK | OTHER_WHITESPACE_OK; 179 180 181 // Common flag combinations: 182 183 /** 184 * <ul> 185 * <li>Deny all special characters. 186 * <li>Deny script URLs. 187 * </ul> 188 */ 189 public final static int ALL_ILLEGAL = 190 0; 191 /** 192 * <ul> 193 * <li>Allow all special characters except Nul. ('\0'). 194 * <li>Allow script URLs. 195 * </ul> 196 */ 197 public final static int ALL_BUT_NUL_LEGAL = 198 ALL_OK & ~NUL_OK; 199 /** 200 * <ul> 201 * <li>Allow all special characters except for: 202 * <ul> 203 * <li>whitespace characters 204 * <li>Nul ('\0') 205 * </ul> 206 * <li>Allow script URLs. 207 * </ul> 208 */ 209 public final static int ALL_BUT_WHITESPACE_LEGAL = 210 ALL_OK & ~(ALL_WHITESPACE_OK | NUL_OK); 211 /** 212 * <ul> 213 * <li>Allow characters used by encoded URLs. 214 * <li>Deny script URLs. 215 * </ul> 216 */ 217 public final static int URL_LEGAL = 218 NON_7_BIT_ASCII_OK | SQUOTE_OK | AMP_OK | PCT_OK; 219 /** 220 * <ul> 221 * <li>Allow characters used by encoded URLs. 222 * <li>Allow spaces. 223 * <li>Deny script URLs. 224 * </ul> 225 */ 226 public final static int URL_AND_SPACE_LEGAL = 227 URL_LEGAL | SPACE_OK; 228 /** 229 * <ul> 230 * <li>Allow ampersand. 231 * <li>Deny script URLs. 232 * </ul> 233 */ 234 public final static int AMP_LEGAL = 235 AMP_OK; 236 /** 237 * <ul> 238 * <li>Allow ampersand. 239 * <li>Allow space. 240 * <li>Deny script URLs. 241 * </ul> 242 */ 243 public final static int AMP_AND_SPACE_LEGAL = 244 AMP_OK | SPACE_OK; 245 /** 246 * <ul> 247 * <li>Allow space. 248 * <li>Deny script URLs. 249 * </ul> 250 */ 251 public final static int SPACE_LEGAL = 252 SPACE_OK; 253 /** 254 * <ul> 255 * <li>Allow all but. 256 * <ul> 257 * <li>Nul ('\0') 258 * <li>Angle brackets ('<', '>') 259 * </ul> 260 * <li>Deny script URLs. 261 * </ul> 262 */ 263 public final static int ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL = 264 ALL_OK & ~(NUL_OK | LT_OK | GT_OK); 265 266 /** 267 * Script URL definitions 268 */ 269 270 private final static String JAVASCRIPT_PREFIX = "javascript:"; 271 272 private final static String VBSCRIPT_PREFIX = "vbscript:"; 273 274 private final static int MIN_SCRIPT_PREFIX_LENGTH = Math.min( 275 JAVASCRIPT_PREFIX.length(), VBSCRIPT_PREFIX.length()); 276 277 /** 278 * Construct a sanitizer. The parameters set the behavior of the 279 * sanitizer. 280 * @param flags some combination of the XXX_OK flags. 281 */ 282 public IllegalCharacterValueSanitizer( 283 int flags) { 284 mFlags = flags; 285 } 286 /** 287 * Sanitize a value. 288 * <ol> 289 * <li>If script URLs are not OK, the will be removed. 290 * <li>If neither spaces nor other white space is OK, then 291 * white space will be trimmed from the beginning and end of 292 * the URL. (Just the actual white space characters are trimmed, not 293 * other control codes.) 294 * <li> Illegal characters will be replaced with 295 * either ' ' or '_', depending on whether a space is itself a 296 * legal character. 297 * </ol> 298 * @param value 299 * @return the sanitized value 300 */ 301 public String sanitize(String value) { 302 if (value == null) { 303 return null; 304 } 305 int length = value.length(); 306 if ((mFlags & SCRIPT_URL_OK) != 0) { 307 if (length >= MIN_SCRIPT_PREFIX_LENGTH) { 308 String asLower = value.toLowerCase(); 309 if (asLower.startsWith(JAVASCRIPT_PREFIX) || 310 asLower.startsWith(VBSCRIPT_PREFIX)) { 311 return ""; 312 } 313 } 314 } 315 316 // If whitespace isn't OK, get rid of whitespace at beginning 317 // and end of value. 318 if ( (mFlags & ALL_WHITESPACE_OK) == 0) { 319 value = trimWhitespace(value); 320 // The length could have changed, so we need to correct 321 // the length variable. 322 length = value.length(); 323 } 324 325 StringBuilder stringBuilder = new StringBuilder(length); 326 for(int i = 0; i < length; i++) { 327 char c = value.charAt(i); 328 if (!characterIsLegal(c)) { 329 if ((mFlags & SPACE_OK) != 0) { 330 c = ' '; 331 } 332 else { 333 c = '_'; 334 } 335 } 336 stringBuilder.append(c); 337 } 338 return stringBuilder.toString(); 339 } 340 341 /** 342 * Trim whitespace from the beginning and end of a string. 343 * <p> 344 * Note: can't use {@link String#trim} because {@link String#trim} has a 345 * different definition of whitespace than we want. 346 * @param value the string to trim 347 * @return the trimmed string 348 */ 349 private String trimWhitespace(String value) { 350 int start = 0; 351 int last = value.length() - 1; 352 int end = last; 353 while (start <= end && isWhitespace(value.charAt(start))) { 354 start++; 355 } 356 while (end >= start && isWhitespace(value.charAt(end))) { 357 end--; 358 } 359 if (start == 0 && end == last) { 360 return value; 361 } 362 return value.substring(start, end + 1); 363 } 364 365 /** 366 * Check if c is whitespace. 367 * @param c character to test 368 * @return true if c is a whitespace character 369 */ 370 private boolean isWhitespace(char c) { 371 switch(c) { 372 case ' ': 373 case '\t': 374 case '\f': 375 case '\n': 376 case '\r': 377 case 11: /* VT */ 378 return true; 379 default: 380 return false; 381 } 382 } 383 384 /** 385 * Check whether an individual character is legal. Uses the 386 * flag bit-set passed into the constructor. 387 * @param c 388 * @return true if c is a legal character 389 */ 390 private boolean characterIsLegal(char c) { 391 switch(c) { 392 case ' ' : return (mFlags & SPACE_OK) != 0; 393 case '\t': case '\f': case '\n': case '\r': case 11: /* VT */ 394 return (mFlags & OTHER_WHITESPACE_OK) != 0; 395 case '\"': return (mFlags & DQUOTE_OK) != 0; 396 case '\'': return (mFlags & SQUOTE_OK) != 0; 397 case '<' : return (mFlags & LT_OK) != 0; 398 case '>' : return (mFlags & GT_OK) != 0; 399 case '&' : return (mFlags & AMP_OK) != 0; 400 case '%' : return (mFlags & PCT_OK) != 0; 401 case '\0': return (mFlags & NUL_OK) != 0; 402 default : return (c >= 32 && c < 127) || 403 ((c >= 128) && ((mFlags & NON_7_BIT_ASCII_OK) != 0)); 404 } 405 } 406 } 407 408 /** 409 * Get the current value sanitizer used when processing 410 * unregistered parameter values. 411 * <p> 412 * <b>Note:</b> The default unregistered parameter value sanitizer is 413 * one that doesn't allow any special characters, similar to what 414 * is returned by calling createAllIllegal. 415 * 416 * @return the current ValueSanitizer used to sanitize unregistered 417 * parameter values. 418 */ 419 public ValueSanitizer getUnregisteredParameterValueSanitizer() { 420 return mUnregisteredParameterValueSanitizer; 421 } 422 423 /** 424 * Set the value sanitizer used when processing unregistered 425 * parameter values. 426 * @param sanitizer set the ValueSanitizer used to sanitize unregistered 427 * parameter values. 428 */ 429 public void setUnregisteredParameterValueSanitizer( 430 ValueSanitizer sanitizer) { 431 mUnregisteredParameterValueSanitizer = sanitizer; 432 } 433 434 435 // Private fields for singleton sanitizers: 436 437 private static final ValueSanitizer sAllIllegal = 438 new IllegalCharacterValueSanitizer( 439 IllegalCharacterValueSanitizer.ALL_ILLEGAL); 440 441 private static final ValueSanitizer sAllButNulLegal = 442 new IllegalCharacterValueSanitizer( 443 IllegalCharacterValueSanitizer.ALL_BUT_NUL_LEGAL); 444 445 private static final ValueSanitizer sAllButWhitespaceLegal = 446 new IllegalCharacterValueSanitizer( 447 IllegalCharacterValueSanitizer.ALL_BUT_WHITESPACE_LEGAL); 448 449 private static final ValueSanitizer sURLLegal = 450 new IllegalCharacterValueSanitizer( 451 IllegalCharacterValueSanitizer.URL_LEGAL); 452 453 private static final ValueSanitizer sUrlAndSpaceLegal = 454 new IllegalCharacterValueSanitizer( 455 IllegalCharacterValueSanitizer.URL_AND_SPACE_LEGAL); 456 457 private static final ValueSanitizer sAmpLegal = 458 new IllegalCharacterValueSanitizer( 459 IllegalCharacterValueSanitizer.AMP_LEGAL); 460 461 private static final ValueSanitizer sAmpAndSpaceLegal = 462 new IllegalCharacterValueSanitizer( 463 IllegalCharacterValueSanitizer.AMP_AND_SPACE_LEGAL); 464 465 private static final ValueSanitizer sSpaceLegal = 466 new IllegalCharacterValueSanitizer( 467 IllegalCharacterValueSanitizer.SPACE_LEGAL); 468 469 private static final ValueSanitizer sAllButNulAndAngleBracketsLegal = 470 new IllegalCharacterValueSanitizer( 471 IllegalCharacterValueSanitizer.ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL); 472 473 /** 474 * Return a value sanitizer that does not allow any special characters, 475 * and also does not allow script URLs. 476 * @return a value sanitizer 477 */ 478 public static final ValueSanitizer getAllIllegal() { 479 return sAllIllegal; 480 } 481 482 /** 483 * Return a value sanitizer that allows everything except Nul ('\0') 484 * characters. Script URLs are allowed. 485 * @return a value sanitizer 486 */ 487 public static final ValueSanitizer getAllButNulLegal() { 488 return sAllButNulLegal; 489 } 490 /** 491 * Return a value sanitizer that allows everything except Nul ('\0') 492 * characters, space (' '), and other whitespace characters. 493 * Script URLs are allowed. 494 * @return a value sanitizer 495 */ 496 public static final ValueSanitizer getAllButWhitespaceLegal() { 497 return sAllButWhitespaceLegal; 498 } 499 /** 500 * Return a value sanitizer that allows all the characters used by 501 * encoded URLs. Does not allow script URLs. 502 * @return a value sanitizer 503 */ 504 public static final ValueSanitizer getUrlLegal() { 505 return sURLLegal; 506 } 507 /** 508 * Return a value sanitizer that allows all the characters used by 509 * encoded URLs and allows spaces, which are not technically legal 510 * in encoded URLs, but commonly appear anyway. 511 * Does not allow script URLs. 512 * @return a value sanitizer 513 */ 514 public static final ValueSanitizer getUrlAndSpaceLegal() { 515 return sUrlAndSpaceLegal; 516 } 517 /** 518 * Return a value sanitizer that does not allow any special characters 519 * except ampersand ('&'). Does not allow script URLs. 520 * @return a value sanitizer 521 */ 522 public static final ValueSanitizer getAmpLegal() { 523 return sAmpLegal; 524 } 525 /** 526 * Return a value sanitizer that does not allow any special characters 527 * except ampersand ('&') and space (' '). Does not allow script URLs. 528 * @return a value sanitizer 529 */ 530 public static final ValueSanitizer getAmpAndSpaceLegal() { 531 return sAmpAndSpaceLegal; 532 } 533 /** 534 * Return a value sanitizer that does not allow any special characters 535 * except space (' '). Does not allow script URLs. 536 * @return a value sanitizer 537 */ 538 public static final ValueSanitizer getSpaceLegal() { 539 return sSpaceLegal; 540 } 541 /** 542 * Return a value sanitizer that allows any special characters 543 * except angle brackets ('<' and '>') and Nul ('\0'). 544 * Allows script URLs. 545 * @return a value sanitizer 546 */ 547 public static final ValueSanitizer getAllButNulAndAngleBracketsLegal() { 548 return sAllButNulAndAngleBracketsLegal; 549 } 550 551 /** 552 * Constructs a UrlQuerySanitizer. 553 * <p> 554 * Defaults: 555 * <ul> 556 * <li>unregistered parameters are not allowed. 557 * <li>the last instance of a repeated parameter is preferred. 558 * <li>The default value sanitizer is an AllIllegal value sanitizer. 559 * <ul> 560 */ 561 public UrlQuerySanitizer() { 562 } 563 564 /** 565 * Constructs a UrlQuerySanitizer and parse a URL. 566 * This constructor is provided for convenience when the 567 * default parsing behavior is acceptable. 568 * <p> 569 * Because the URL is parsed before the constructor returns, there isn't 570 * a chance to configure the sanitizer to change the parsing behavior. 571 * <p> 572 * <code> 573 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(myUrl); 574 * String name = sanitizer.getValue("name"); 575 * </code> 576 * <p> 577 * Defaults: 578 * <ul> 579 * <li>unregistered parameters <em>are</em> allowed. 580 * <li>the last instance of a repeated parameter is preferred. 581 * <li>The default value sanitizer is an AllIllegal value sanitizer. 582 * <ul> 583 */ 584 public UrlQuerySanitizer(String url) { 585 setAllowUnregisteredParamaters(true); 586 parseUrl(url); 587 } 588 589 /** 590 * Parse the query parameters out of an encoded URL. 591 * Works by extracting the query portion from the URL and then 592 * calling parseQuery(). If there is no query portion it is 593 * treated as if the query portion is an empty string. 594 * @param url the encoded URL to parse. 595 */ 596 public void parseUrl(String url) { 597 int queryIndex = url.indexOf('?'); 598 String query; 599 if (queryIndex >= 0) { 600 query = url.substring(queryIndex + 1); 601 } 602 else { 603 query = ""; 604 } 605 parseQuery(query); 606 } 607 608 /** 609 * Parse a query. A query string is any number of parameter-value clauses 610 * separated by any non-zero number of ampersands. A parameter-value clause 611 * is a parameter followed by an equal sign, followed by a value. If the 612 * equal sign is missing, the value is assumed to be the empty string. 613 * @param query the query to parse. 614 */ 615 public void parseQuery(String query) { 616 clear(); 617 // Split by '&' 618 StringTokenizer tokenizer = new StringTokenizer(query, "&"); 619 while(tokenizer.hasMoreElements()) { 620 String attributeValuePair = tokenizer.nextToken(); 621 if (attributeValuePair.length() > 0) { 622 int assignmentIndex = attributeValuePair.indexOf('='); 623 if (assignmentIndex < 0) { 624 // No assignment found, treat as if empty value 625 parseEntry(attributeValuePair, ""); 626 } 627 else { 628 parseEntry(attributeValuePair.substring(0, assignmentIndex), 629 attributeValuePair.substring(assignmentIndex + 1)); 630 } 631 } 632 } 633 } 634 635 /** 636 * Get a set of all of the parameters found in the sanitized query. 637 * <p> 638 * Note: Do not modify this set. Treat it as a read-only set. 639 * @return all the parameters found in the current query. 640 */ 641 public Set<String> getParameterSet() { 642 return mEntries.keySet(); 643 } 644 645 /** 646 * An array list of all of the parameter value pairs in the sanitized 647 * query, in the order they appeared in the query. May contain duplicate 648 * parameters. 649 * <p class="note"><b>Note:</b> Do not modify this list. Treat it as a read-only list.</p> 650 */ 651 public List<ParameterValuePair> getParameterList() { 652 return mEntriesList; 653 } 654 655 /** 656 * Check if a parameter exists in the current sanitized query. 657 * @param parameter the unencoded name of a parameter. 658 * @return true if the paramater exists in the current sanitized queary. 659 */ 660 public boolean hasParameter(String parameter) { 661 return mEntries.containsKey(parameter); 662 } 663 664 /** 665 * Get the value for a parameter in the current sanitized query. 666 * Returns null if the parameter does not 667 * exit. 668 * @param parameter the unencoded name of a parameter. 669 * @return the sanitized unencoded value of the parameter, 670 * or null if the parameter does not exist. 671 */ 672 public String getValue(String parameter) { 673 return mEntries.get(parameter); 674 } 675 676 /** 677 * Register a value sanitizer for a particular parameter. Can also be used 678 * to replace or remove an already-set value sanitizer. 679 * <p> 680 * Registering a non-null value sanitizer for a particular parameter 681 * makes that parameter a registered parameter. 682 * @param parameter an unencoded parameter name 683 * @param valueSanitizer the value sanitizer to use for a particular 684 * parameter. May be null in order to unregister that parameter. 685 * @see #getAllowUnregisteredParamaters() 686 */ 687 public void registerParameter(String parameter, 688 ValueSanitizer valueSanitizer) { 689 if (valueSanitizer == null) { 690 mSanitizers.remove(parameter); 691 } 692 mSanitizers.put(parameter, valueSanitizer); 693 } 694 695 /** 696 * Register a value sanitizer for an array of parameters. 697 * @param parameters An array of unencoded parameter names. 698 * @param valueSanitizer 699 * @see #registerParameter 700 */ 701 public void registerParameters(String[] parameters, 702 ValueSanitizer valueSanitizer) { 703 int length = parameters.length; 704 for(int i = 0; i < length; i++) { 705 mSanitizers.put(parameters[i], valueSanitizer); 706 } 707 } 708 709 /** 710 * Set whether or not unregistered parameters are allowed. If they 711 * are not allowed, then they will be dropped when a query is sanitized. 712 * <p> 713 * Defaults to false. 714 * @param allowUnregisteredParamaters true to allow unregistered parameters. 715 * @see #getAllowUnregisteredParamaters() 716 */ 717 public void setAllowUnregisteredParamaters( 718 boolean allowUnregisteredParamaters) { 719 mAllowUnregisteredParamaters = allowUnregisteredParamaters; 720 } 721 722 /** 723 * Get whether or not unregistered parameters are allowed. If not 724 * allowed, they will be dropped when a query is parsed. 725 * @return true if unregistered parameters are allowed. 726 * @see #setAllowUnregisteredParamaters(boolean) 727 */ 728 public boolean getAllowUnregisteredParamaters() { 729 return mAllowUnregisteredParamaters; 730 } 731 732 /** 733 * Set whether or not the first occurrence of a repeated parameter is 734 * preferred. True means the first repeated parameter is preferred. 735 * False means that the last repeated parameter is preferred. 736 * <p> 737 * The preferred parameter is the one that is returned when getParameter 738 * is called. 739 * <p> 740 * defaults to false. 741 * @param preferFirstRepeatedParameter True if the first repeated 742 * parameter is preferred. 743 * @see #getPreferFirstRepeatedParameter() 744 */ 745 public void setPreferFirstRepeatedParameter( 746 boolean preferFirstRepeatedParameter) { 747 mPreferFirstRepeatedParameter = preferFirstRepeatedParameter; 748 } 749 750 /** 751 * Get whether or not the first occurrence of a repeated parameter is 752 * preferred. 753 * @return true if the first occurrence of a repeated parameter is 754 * preferred. 755 * @see #setPreferFirstRepeatedParameter(boolean) 756 */ 757 public boolean getPreferFirstRepeatedParameter() { 758 return mPreferFirstRepeatedParameter; 759 } 760 761 /** 762 * Parse an escaped parameter-value pair. The default implementation 763 * unescapes both the parameter and the value, then looks up the 764 * effective value sanitizer for the parameter and uses it to sanitize 765 * the value. If all goes well then addSanitizedValue is called with 766 * the unescaped parameter and the sanitized unescaped value. 767 * @param parameter an escaped parameter 768 * @param value an unsanitzied escaped value 769 */ 770 protected void parseEntry(String parameter, String value) { 771 String unescapedParameter = unescape(parameter); 772 ValueSanitizer valueSanitizer = 773 getEffectiveValueSanitizer(unescapedParameter); 774 775 if (valueSanitizer == null) { 776 return; 777 } 778 String unescapedValue = unescape(value); 779 String sanitizedValue = valueSanitizer.sanitize(unescapedValue); 780 addSanitizedEntry(unescapedParameter, sanitizedValue); 781 } 782 783 /** 784 * Record a sanitized parameter-value pair. Override if you want to 785 * do additional filtering or validation. 786 * @param parameter an unescaped parameter 787 * @param value a sanitized unescaped value 788 */ 789 protected void addSanitizedEntry(String parameter, String value) { 790 mEntriesList.add( 791 new ParameterValuePair(parameter, value)); 792 if (mPreferFirstRepeatedParameter) { 793 if (mEntries.containsKey(parameter)) { 794 return; 795 } 796 } 797 mEntries.put(parameter, value); 798 } 799 800 /** 801 * Get the value sanitizer for a parameter. Returns null if there 802 * is no value sanitizer registered for the parameter. 803 * @param parameter the unescaped parameter 804 * @return the currently registered value sanitizer for this parameter. 805 * @see #registerParameter(String, android.net.UrlQuerySanitizer.ValueSanitizer) 806 */ 807 public ValueSanitizer getValueSanitizer(String parameter) { 808 return mSanitizers.get(parameter); 809 } 810 811 /** 812 * Get the effective value sanitizer for a parameter. Like getValueSanitizer, 813 * except if there is no value sanitizer registered for a parameter, and 814 * unregistered paramaters are allowed, then the default value sanitizer is 815 * returned. 816 * @param parameter an unescaped parameter 817 * @return the effective value sanitizer for a parameter. 818 */ 819 public ValueSanitizer getEffectiveValueSanitizer(String parameter) { 820 ValueSanitizer sanitizer = getValueSanitizer(parameter); 821 if (sanitizer == null && mAllowUnregisteredParamaters) { 822 sanitizer = getUnregisteredParameterValueSanitizer(); 823 } 824 return sanitizer; 825 } 826 827 /** 828 * Unescape an escaped string. 829 * <ul> 830 * <li>'+' characters are replaced by 831 * ' ' characters. 832 * <li>Valid "%xx" escape sequences are replaced by the 833 * corresponding unescaped character. 834 * <li>Invalid escape sequences such as %1z", are passed through unchanged. 835 * <ol> 836 * @param string the escaped string 837 * @return the unescaped string. 838 */ 839 public String unescape(String string) { 840 // Early exit if no escaped characters. 841 int firstEscape = string.indexOf('%'); 842 if ( firstEscape < 0) { 843 firstEscape = string.indexOf('+'); 844 if (firstEscape < 0) { 845 return string; 846 } 847 } 848 849 int length = string.length(); 850 851 StringBuilder stringBuilder = new StringBuilder(length); 852 stringBuilder.append(string.substring(0, firstEscape)); 853 for (int i = firstEscape; i < length; i++) { 854 char c = string.charAt(i); 855 if (c == '+') { 856 c = ' '; 857 } 858 else if ( c == '%' && i + 2 < length) { 859 char c1 = string.charAt(i + 1); 860 char c2 = string.charAt(i + 2); 861 if (isHexDigit(c1) && isHexDigit(c2)) { 862 c = (char) (decodeHexDigit(c1) * 16 + decodeHexDigit(c2)); 863 i += 2; 864 } 865 } 866 stringBuilder.append(c); 867 } 868 return stringBuilder.toString(); 869 } 870 871 /** 872 * Test if a character is a hexidecimal digit. Both upper case and lower 873 * case hex digits are allowed. 874 * @param c the character to test 875 * @return true if c is a hex digit. 876 */ 877 protected boolean isHexDigit(char c) { 878 return decodeHexDigit(c) >= 0; 879 } 880 881 /** 882 * Convert a character that represents a hexidecimal digit into an integer. 883 * If the character is not a hexidecimal digit, then -1 is returned. 884 * Both upper case and lower case hex digits are allowed. 885 * @param c the hexidecimal digit. 886 * @return the integer value of the hexidecimal digit. 887 */ 888 889 protected int decodeHexDigit(char c) { 890 if (c >= '0' && c <= '9') { 891 return c - '0'; 892 } 893 else if (c >= 'A' && c <= 'F') { 894 return c - 'A' + 10; 895 } 896 else if (c >= 'a' && c <= 'f') { 897 return c - 'a' + 10; 898 } 899 else { 900 return -1; 901 } 902 } 903 904 /** 905 * Clear the existing entries. Called to get ready to parse a new 906 * query string. 907 */ 908 protected void clear() { 909 mEntries.clear(); 910 mEntriesList.clear(); 911 } 912} 913 914