1/*
2 * Copyright (C) 2007 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.net;
18
19import java.util.ArrayList;
20import java.util.HashMap;
21import java.util.List;
22import java.util.Set;
23import java.util.StringTokenizer;
24
25/**
26 *
27 * Sanitizes the Query portion of a URL. Simple example:
28 * <code>
29 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
30 * sanitizer.setAllowUnregisteredParamaters(true);
31 * sanitizer.parseUrl("http://example.com/?name=Joe+User");
32 * String name = sanitizer.getValue("name"));
33 * // name now contains "Joe_User"
34 * </code>
35 *
36 * Register ValueSanitizers to customize the way individual
37 * parameters are sanitized:
38 * <code>
39 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
40 * sanitizer.registerParamater("name", UrlQuerySanitizer.createSpaceLegal());
41 * sanitizer.parseUrl("http://example.com/?name=Joe+User");
42 * String name = sanitizer.getValue("name"));
43 * // name now contains "Joe User". (The string is first decoded, which
44 * // converts the '+' to a ' '. Then the string is sanitized, which
45 * // converts the ' ' to an '_'. (The ' ' is converted because the default
46 * unregistered parameter sanitizer does not allow any special characters,
47 * and ' ' is a special character.)
48 * </code>
49 *
50 * There are several ways to create ValueSanitizers. In order of increasing
51 * sophistication:
52 * <ol>
53 * <li>Call one of the UrlQuerySanitizer.createXXX() methods.
54 * <li>Construct your own instance of
55 * UrlQuerySanitizer.IllegalCharacterValueSanitizer.
56 * <li>Subclass UrlQuerySanitizer.ValueSanitizer to define your own value
57 * sanitizer.
58 * </ol>
59 *
60 */
61public class UrlQuerySanitizer {
62
63    /**
64     * A simple tuple that holds parameter-value pairs.
65     *
66     */
67    public class ParameterValuePair {
68        /**
69         * Construct a parameter-value tuple.
70         * @param parameter an unencoded parameter
71         * @param value an unencoded value
72         */
73        public ParameterValuePair(String parameter,
74                String value) {
75            mParameter = parameter;
76            mValue = value;
77        }
78        /**
79         * The unencoded parameter
80         */
81        public String mParameter;
82        /**
83         * The unencoded value
84         */
85        public String mValue;
86    }
87
88    final private HashMap<String, ValueSanitizer> mSanitizers =
89        new HashMap<String, ValueSanitizer>();
90    final private HashMap<String, String> mEntries =
91        new HashMap<String, String>();
92    final private ArrayList<ParameterValuePair> mEntriesList =
93        new ArrayList<ParameterValuePair>();
94    private boolean mAllowUnregisteredParamaters;
95    private boolean mPreferFirstRepeatedParameter;
96    private ValueSanitizer mUnregisteredParameterValueSanitizer =
97        getAllIllegal();
98
99    /**
100     * A functor used to sanitize a single query value.
101     *
102     */
103    public static interface ValueSanitizer {
104        /**
105         * Sanitize an unencoded value.
106         * @param value
107         * @return the sanitized unencoded value
108         */
109        public String sanitize(String value);
110    }
111
112    /**
113     * Sanitize values based on which characters they contain. Illegal
114     * characters are replaced with either space or '_', depending upon
115     * whether space is a legal character or not.
116     */
117    public static class IllegalCharacterValueSanitizer implements
118        ValueSanitizer {
119        private int mFlags;
120
121        /**
122         * Allow space (' ') characters.
123         */
124        public final static int SPACE_OK =              1 << 0;
125        /**
126         * Allow whitespace characters other than space. The
127         * other whitespace characters are
128         * '\t' '\f' '\n' '\r' and '\0x000b' (vertical tab)
129         */
130        public final static int OTHER_WHITESPACE_OK =  1 << 1;
131        /**
132         * Allow characters with character codes 128 to 255.
133         */
134        public final static int NON_7_BIT_ASCII_OK =    1 << 2;
135        /**
136         * Allow double quote characters. ('"')
137         */
138        public final static int DQUOTE_OK =             1 << 3;
139        /**
140         * Allow single quote characters. ('\'')
141         */
142        public final static int SQUOTE_OK =             1 << 4;
143        /**
144         * Allow less-than characters. ('<')
145         */
146        public final static int LT_OK =                 1 << 5;
147        /**
148         * Allow greater-than characters. ('>')
149         */
150        public final static int GT_OK =                 1 << 6;
151        /**
152         * Allow ampersand characters ('&')
153         */
154        public final static int AMP_OK =                1 << 7;
155        /**
156         * Allow percent-sign characters ('%')
157         */
158        public final static int PCT_OK =                1 << 8;
159        /**
160         * Allow nul characters ('\0')
161         */
162        public final static int NUL_OK =                1 << 9;
163        /**
164         * Allow text to start with a script URL
165         * such as "javascript:" or "vbscript:"
166         */
167        public final static int SCRIPT_URL_OK =         1 << 10;
168
169        /**
170         * Mask with all fields set to OK
171         */
172        public final static int ALL_OK =                0x7ff;
173
174        /**
175         * Mask with both regular space and other whitespace OK
176         */
177        public final static int ALL_WHITESPACE_OK =
178            SPACE_OK | OTHER_WHITESPACE_OK;
179
180
181        // Common flag combinations:
182
183        /**
184         * <ul>
185         * <li>Deny all special characters.
186         * <li>Deny script URLs.
187         * </ul>
188         */
189        public final static int ALL_ILLEGAL =
190            0;
191        /**
192         * <ul>
193         * <li>Allow all special characters except Nul. ('\0').
194         * <li>Allow script URLs.
195         * </ul>
196         */
197        public final static int ALL_BUT_NUL_LEGAL =
198            ALL_OK & ~NUL_OK;
199        /**
200         * <ul>
201         * <li>Allow all special characters except for:
202         * <ul>
203         *  <li>whitespace characters
204         *  <li>Nul ('\0')
205         * </ul>
206         * <li>Allow script URLs.
207         * </ul>
208         */
209        public final static int ALL_BUT_WHITESPACE_LEGAL =
210            ALL_OK & ~(ALL_WHITESPACE_OK | NUL_OK);
211        /**
212         * <ul>
213         * <li>Allow characters used by encoded URLs.
214         * <li>Deny script URLs.
215         * </ul>
216         */
217        public final static int URL_LEGAL =
218            NON_7_BIT_ASCII_OK | SQUOTE_OK | AMP_OK | PCT_OK;
219        /**
220         * <ul>
221         * <li>Allow characters used by encoded URLs.
222         * <li>Allow spaces.
223         * <li>Deny script URLs.
224         * </ul>
225         */
226        public final static int URL_AND_SPACE_LEGAL =
227            URL_LEGAL | SPACE_OK;
228        /**
229         * <ul>
230         * <li>Allow ampersand.
231         * <li>Deny script URLs.
232         * </ul>
233         */
234        public final static int AMP_LEGAL =
235            AMP_OK;
236        /**
237         * <ul>
238         * <li>Allow ampersand.
239         * <li>Allow space.
240         * <li>Deny script URLs.
241         * </ul>
242         */
243        public final static int AMP_AND_SPACE_LEGAL =
244            AMP_OK | SPACE_OK;
245        /**
246         * <ul>
247         * <li>Allow space.
248         * <li>Deny script URLs.
249         * </ul>
250         */
251        public final static int SPACE_LEGAL =
252            SPACE_OK;
253        /**
254         * <ul>
255         * <li>Allow all but.
256         * <ul>
257         *  <li>Nul ('\0')
258         *  <li>Angle brackets ('<', '>')
259         * </ul>
260         * <li>Deny script URLs.
261         * </ul>
262         */
263        public final static int ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL =
264            ALL_OK & ~(NUL_OK | LT_OK | GT_OK);
265
266        /**
267         *  Script URL definitions
268         */
269
270        private final static String JAVASCRIPT_PREFIX = "javascript:";
271
272        private final static String VBSCRIPT_PREFIX = "vbscript:";
273
274        private final static int MIN_SCRIPT_PREFIX_LENGTH = Math.min(
275                JAVASCRIPT_PREFIX.length(), VBSCRIPT_PREFIX.length());
276
277        /**
278         * Construct a sanitizer. The parameters set the behavior of the
279         * sanitizer.
280         * @param flags some combination of the XXX_OK flags.
281         */
282        public IllegalCharacterValueSanitizer(
283            int flags) {
284            mFlags = flags;
285        }
286        /**
287         * Sanitize a value.
288         * <ol>
289         * <li>If script URLs are not OK, the will be removed.
290         * <li>If neither spaces nor other white space is OK, then
291         * white space will be trimmed from the beginning and end of
292         * the URL. (Just the actual white space characters are trimmed, not
293         * other control codes.)
294         * <li> Illegal characters will be replaced with
295         * either ' ' or '_', depending on whether a space is itself a
296         * legal character.
297         * </ol>
298         * @param value
299         * @return the sanitized value
300         */
301        public String sanitize(String value) {
302            if (value == null) {
303                return null;
304            }
305            int length = value.length();
306            if ((mFlags & SCRIPT_URL_OK) != 0) {
307                if (length >= MIN_SCRIPT_PREFIX_LENGTH) {
308                    String asLower = value.toLowerCase();
309                    if (asLower.startsWith(JAVASCRIPT_PREFIX)  ||
310                        asLower.startsWith(VBSCRIPT_PREFIX)) {
311                        return "";
312                    }
313                }
314            }
315
316            // If whitespace isn't OK, get rid of whitespace at beginning
317            // and end of value.
318            if ( (mFlags & ALL_WHITESPACE_OK) == 0) {
319                value = trimWhitespace(value);
320                // The length could have changed, so we need to correct
321                // the length variable.
322                length = value.length();
323            }
324
325            StringBuilder stringBuilder = new StringBuilder(length);
326            for(int i = 0; i < length; i++) {
327                char c = value.charAt(i);
328                if (!characterIsLegal(c)) {
329                    if ((mFlags & SPACE_OK) != 0) {
330                        c = ' ';
331                    }
332                    else {
333                        c = '_';
334                    }
335                }
336                stringBuilder.append(c);
337            }
338            return stringBuilder.toString();
339        }
340
341        /**
342         * Trim whitespace from the beginning and end of a string.
343         * <p>
344         * Note: can't use {@link String#trim} because {@link String#trim} has a
345         * different definition of whitespace than we want.
346         * @param value the string to trim
347         * @return the trimmed string
348         */
349        private String trimWhitespace(String value) {
350            int start = 0;
351            int last = value.length() - 1;
352            int end = last;
353            while (start <= end && isWhitespace(value.charAt(start))) {
354                start++;
355            }
356            while (end >= start && isWhitespace(value.charAt(end))) {
357                end--;
358            }
359            if (start == 0 && end == last) {
360                return value;
361            }
362            return value.substring(start, end + 1);
363        }
364
365        /**
366         * Check if c is whitespace.
367         * @param c character to test
368         * @return true if c is a whitespace character
369         */
370        private boolean isWhitespace(char c) {
371            switch(c) {
372            case ' ':
373            case '\t':
374            case '\f':
375            case '\n':
376            case '\r':
377            case 11: /* VT */
378                return true;
379            default:
380                return false;
381            }
382        }
383
384        /**
385         * Check whether an individual character is legal. Uses the
386         * flag bit-set passed into the constructor.
387         * @param c
388         * @return true if c is a legal character
389         */
390        private boolean characterIsLegal(char c) {
391            switch(c) {
392            case ' ' : return (mFlags & SPACE_OK) != 0;
393            case '\t': case '\f': case '\n': case '\r': case 11: /* VT */
394              return (mFlags & OTHER_WHITESPACE_OK) != 0;
395            case '\"': return (mFlags & DQUOTE_OK) != 0;
396            case '\'': return (mFlags & SQUOTE_OK) != 0;
397            case '<' : return (mFlags & LT_OK) != 0;
398            case '>' : return (mFlags & GT_OK) != 0;
399            case '&' : return (mFlags & AMP_OK) != 0;
400            case '%' : return (mFlags & PCT_OK) != 0;
401            case '\0': return (mFlags & NUL_OK) != 0;
402            default  : return (c >= 32 && c < 127) ||
403                ((c >= 128) && ((mFlags & NON_7_BIT_ASCII_OK) != 0));
404            }
405        }
406    }
407
408    /**
409     * Get the current value sanitizer used when processing
410     * unregistered parameter values.
411     * <p>
412     * <b>Note:</b> The default unregistered parameter value sanitizer is
413     * one that doesn't allow any special characters, similar to what
414     * is returned by calling createAllIllegal.
415     *
416     * @return the current ValueSanitizer used to sanitize unregistered
417     * parameter values.
418     */
419    public ValueSanitizer getUnregisteredParameterValueSanitizer() {
420        return mUnregisteredParameterValueSanitizer;
421    }
422
423    /**
424     * Set the value sanitizer used when processing unregistered
425     * parameter values.
426     * @param sanitizer set the ValueSanitizer used to sanitize unregistered
427     * parameter values.
428     */
429    public void setUnregisteredParameterValueSanitizer(
430            ValueSanitizer sanitizer) {
431        mUnregisteredParameterValueSanitizer = sanitizer;
432    }
433
434
435    // Private fields for singleton sanitizers:
436
437    private static final ValueSanitizer sAllIllegal =
438        new IllegalCharacterValueSanitizer(
439                IllegalCharacterValueSanitizer.ALL_ILLEGAL);
440
441    private static final ValueSanitizer sAllButNulLegal =
442        new IllegalCharacterValueSanitizer(
443                IllegalCharacterValueSanitizer.ALL_BUT_NUL_LEGAL);
444
445    private static final ValueSanitizer sAllButWhitespaceLegal =
446        new IllegalCharacterValueSanitizer(
447                IllegalCharacterValueSanitizer.ALL_BUT_WHITESPACE_LEGAL);
448
449    private static final ValueSanitizer sURLLegal =
450        new IllegalCharacterValueSanitizer(
451                IllegalCharacterValueSanitizer.URL_LEGAL);
452
453    private static final ValueSanitizer sUrlAndSpaceLegal =
454        new IllegalCharacterValueSanitizer(
455                IllegalCharacterValueSanitizer.URL_AND_SPACE_LEGAL);
456
457    private static final ValueSanitizer sAmpLegal =
458        new IllegalCharacterValueSanitizer(
459                IllegalCharacterValueSanitizer.AMP_LEGAL);
460
461    private static final ValueSanitizer sAmpAndSpaceLegal =
462        new IllegalCharacterValueSanitizer(
463                IllegalCharacterValueSanitizer.AMP_AND_SPACE_LEGAL);
464
465    private static final ValueSanitizer sSpaceLegal =
466        new IllegalCharacterValueSanitizer(
467                IllegalCharacterValueSanitizer.SPACE_LEGAL);
468
469    private static final ValueSanitizer sAllButNulAndAngleBracketsLegal =
470        new IllegalCharacterValueSanitizer(
471                IllegalCharacterValueSanitizer.ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL);
472
473    /**
474     * Return a value sanitizer that does not allow any special characters,
475     * and also does not allow script URLs.
476     * @return a value sanitizer
477     */
478    public static final ValueSanitizer getAllIllegal() {
479        return sAllIllegal;
480    }
481
482    /**
483     * Return a value sanitizer that allows everything except Nul ('\0')
484     * characters. Script URLs are allowed.
485     * @return a value sanitizer
486     */
487    public static final ValueSanitizer getAllButNulLegal() {
488        return sAllButNulLegal;
489    }
490    /**
491     * Return a value sanitizer that allows everything except Nul ('\0')
492     * characters, space (' '), and other whitespace characters.
493     * Script URLs are allowed.
494     * @return a value sanitizer
495     */
496    public static final ValueSanitizer getAllButWhitespaceLegal() {
497        return sAllButWhitespaceLegal;
498    }
499    /**
500     * Return a value sanitizer that allows all the characters used by
501     * encoded URLs. Does not allow script URLs.
502     * @return a value sanitizer
503     */
504    public static final ValueSanitizer getUrlLegal() {
505        return sURLLegal;
506    }
507    /**
508     * Return a value sanitizer that allows all the characters used by
509     * encoded URLs and allows spaces, which are not technically legal
510     * in encoded URLs, but commonly appear anyway.
511     * Does not allow script URLs.
512     * @return a value sanitizer
513     */
514    public static final ValueSanitizer getUrlAndSpaceLegal() {
515        return sUrlAndSpaceLegal;
516    }
517    /**
518     * Return a value sanitizer that does not allow any special characters
519     * except ampersand ('&'). Does not allow script URLs.
520     * @return a value sanitizer
521     */
522    public static final ValueSanitizer getAmpLegal() {
523        return sAmpLegal;
524    }
525    /**
526     * Return a value sanitizer that does not allow any special characters
527     * except ampersand ('&') and space (' '). Does not allow script URLs.
528     * @return a value sanitizer
529     */
530    public static final ValueSanitizer getAmpAndSpaceLegal() {
531        return sAmpAndSpaceLegal;
532    }
533    /**
534     * Return a value sanitizer that does not allow any special characters
535     * except space (' '). Does not allow script URLs.
536     * @return a value sanitizer
537     */
538    public static final ValueSanitizer getSpaceLegal() {
539        return sSpaceLegal;
540    }
541    /**
542     * Return a value sanitizer that allows any special characters
543     * except angle brackets ('<' and '>') and Nul ('\0').
544     * Allows script URLs.
545     * @return a value sanitizer
546     */
547    public static final ValueSanitizer getAllButNulAndAngleBracketsLegal() {
548        return sAllButNulAndAngleBracketsLegal;
549    }
550
551    /**
552     * Constructs a UrlQuerySanitizer.
553     * <p>
554     * Defaults:
555     * <ul>
556     * <li>unregistered parameters are not allowed.
557     * <li>the last instance of a repeated parameter is preferred.
558     * <li>The default value sanitizer is an AllIllegal value sanitizer.
559     * <ul>
560     */
561    public UrlQuerySanitizer() {
562    }
563
564    /**
565     * Constructs a UrlQuerySanitizer and parse a URL.
566     * This constructor is provided for convenience when the
567     * default parsing behavior is acceptable.
568     * <p>
569     * Because the URL is parsed before the constructor returns, there isn't
570     * a chance to configure the sanitizer to change the parsing behavior.
571     * <p>
572     * <code>
573     * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(myUrl);
574     * String name = sanitizer.getValue("name");
575     * </code>
576     * <p>
577     * Defaults:
578     * <ul>
579     * <li>unregistered parameters <em>are</em> allowed.
580     * <li>the last instance of a repeated parameter is preferred.
581     * <li>The default value sanitizer is an AllIllegal value sanitizer.
582     * <ul>
583     */
584    public UrlQuerySanitizer(String url) {
585        setAllowUnregisteredParamaters(true);
586        parseUrl(url);
587    }
588
589    /**
590     * Parse the query parameters out of an encoded URL.
591     * Works by extracting the query portion from the URL and then
592     * calling parseQuery(). If there is no query portion it is
593     * treated as if the query portion is an empty string.
594     * @param url the encoded URL to parse.
595     */
596    public void parseUrl(String url) {
597        int queryIndex = url.indexOf('?');
598        String query;
599        if (queryIndex >= 0) {
600            query = url.substring(queryIndex + 1);
601        }
602        else {
603            query = "";
604        }
605        parseQuery(query);
606    }
607
608    /**
609     * Parse a query. A query string is any number of parameter-value clauses
610     * separated by any non-zero number of ampersands. A parameter-value clause
611     * is a parameter followed by an equal sign, followed by a value. If the
612     * equal sign is missing, the value is assumed to be the empty string.
613     * @param query the query to parse.
614     */
615    public void parseQuery(String query) {
616        clear();
617        // Split by '&'
618        StringTokenizer tokenizer = new StringTokenizer(query, "&");
619        while(tokenizer.hasMoreElements()) {
620            String attributeValuePair = tokenizer.nextToken();
621            if (attributeValuePair.length() > 0) {
622                int assignmentIndex = attributeValuePair.indexOf('=');
623                if (assignmentIndex < 0) {
624                    // No assignment found, treat as if empty value
625                    parseEntry(attributeValuePair, "");
626                }
627                else {
628                    parseEntry(attributeValuePair.substring(0, assignmentIndex),
629                            attributeValuePair.substring(assignmentIndex + 1));
630                }
631            }
632        }
633    }
634
635    /**
636     * Get a set of all of the parameters found in the sanitized query.
637     * <p>
638     * Note: Do not modify this set. Treat it as a read-only set.
639     * @return all the parameters found in the current query.
640     */
641    public Set<String> getParameterSet() {
642        return mEntries.keySet();
643    }
644
645    /**
646     * An array list of all of the parameter value pairs in the sanitized
647     * query, in the order they appeared in the query. May contain duplicate
648     * parameters.
649     * <p class="note"><b>Note:</b> Do not modify this list. Treat it as a read-only list.</p>
650     */
651    public List<ParameterValuePair> getParameterList() {
652        return mEntriesList;
653    }
654
655    /**
656     * Check if a parameter exists in the current sanitized query.
657     * @param parameter the unencoded name of a parameter.
658     * @return true if the paramater exists in the current sanitized queary.
659     */
660    public boolean hasParameter(String parameter) {
661        return mEntries.containsKey(parameter);
662    }
663
664    /**
665     * Get the value for a parameter in the current sanitized query.
666     * Returns null if the parameter does not
667     * exit.
668     * @param parameter the unencoded name of a parameter.
669     * @return the sanitized unencoded value of the parameter,
670     * or null if the parameter does not exist.
671     */
672    public String getValue(String parameter) {
673        return mEntries.get(parameter);
674    }
675
676    /**
677     * Register a value sanitizer for a particular parameter. Can also be used
678     * to replace or remove an already-set value sanitizer.
679     * <p>
680     * Registering a non-null value sanitizer for a particular parameter
681     * makes that parameter a registered parameter.
682     * @param parameter an unencoded parameter name
683     * @param valueSanitizer the value sanitizer to use for a particular
684     * parameter. May be null in order to unregister that parameter.
685     * @see #getAllowUnregisteredParamaters()
686     */
687    public void registerParameter(String parameter,
688            ValueSanitizer valueSanitizer) {
689        if (valueSanitizer == null) {
690            mSanitizers.remove(parameter);
691        }
692        mSanitizers.put(parameter, valueSanitizer);
693    }
694
695    /**
696     * Register a value sanitizer for an array of parameters.
697     * @param parameters An array of unencoded parameter names.
698     * @param valueSanitizer
699     * @see #registerParameter
700     */
701    public void registerParameters(String[] parameters,
702            ValueSanitizer valueSanitizer) {
703        int length = parameters.length;
704        for(int i = 0; i < length; i++) {
705            mSanitizers.put(parameters[i], valueSanitizer);
706        }
707    }
708
709    /**
710     * Set whether or not unregistered parameters are allowed. If they
711     * are not allowed, then they will be dropped when a query is sanitized.
712     * <p>
713     * Defaults to false.
714     * @param allowUnregisteredParamaters true to allow unregistered parameters.
715     * @see #getAllowUnregisteredParamaters()
716     */
717    public void setAllowUnregisteredParamaters(
718            boolean allowUnregisteredParamaters) {
719        mAllowUnregisteredParamaters = allowUnregisteredParamaters;
720    }
721
722    /**
723     * Get whether or not unregistered parameters are allowed. If not
724     * allowed, they will be dropped when a query is parsed.
725     * @return true if unregistered parameters are allowed.
726     * @see #setAllowUnregisteredParamaters(boolean)
727     */
728    public boolean getAllowUnregisteredParamaters() {
729        return mAllowUnregisteredParamaters;
730    }
731
732    /**
733     * Set whether or not the first occurrence of a repeated parameter is
734     * preferred. True means the first repeated parameter is preferred.
735     * False means that the last repeated parameter is preferred.
736     * <p>
737     * The preferred parameter is the one that is returned when getParameter
738     * is called.
739     * <p>
740     * defaults to false.
741     * @param preferFirstRepeatedParameter True if the first repeated
742     * parameter is preferred.
743     * @see #getPreferFirstRepeatedParameter()
744     */
745    public void setPreferFirstRepeatedParameter(
746            boolean preferFirstRepeatedParameter) {
747        mPreferFirstRepeatedParameter = preferFirstRepeatedParameter;
748    }
749
750    /**
751     * Get whether or not the first occurrence of a repeated parameter is
752     * preferred.
753     * @return true if the first occurrence of a repeated parameter is
754     * preferred.
755     * @see #setPreferFirstRepeatedParameter(boolean)
756     */
757    public boolean getPreferFirstRepeatedParameter() {
758        return mPreferFirstRepeatedParameter;
759    }
760
761    /**
762     * Parse an escaped parameter-value pair. The default implementation
763     * unescapes both the parameter and the value, then looks up the
764     * effective value sanitizer for the parameter and uses it to sanitize
765     * the value. If all goes well then addSanitizedValue is called with
766     * the unescaped parameter and the sanitized unescaped value.
767     * @param parameter an escaped parameter
768     * @param value an unsanitzied escaped value
769     */
770    protected void parseEntry(String parameter, String value) {
771        String unescapedParameter = unescape(parameter);
772         ValueSanitizer valueSanitizer =
773            getEffectiveValueSanitizer(unescapedParameter);
774
775        if (valueSanitizer == null) {
776            return;
777        }
778        String unescapedValue = unescape(value);
779        String sanitizedValue = valueSanitizer.sanitize(unescapedValue);
780        addSanitizedEntry(unescapedParameter, sanitizedValue);
781    }
782
783    /**
784     * Record a sanitized parameter-value pair. Override if you want to
785     * do additional filtering or validation.
786     * @param parameter an unescaped parameter
787     * @param value a sanitized unescaped value
788     */
789    protected void addSanitizedEntry(String parameter, String value) {
790        mEntriesList.add(
791                new ParameterValuePair(parameter, value));
792        if (mPreferFirstRepeatedParameter) {
793            if (mEntries.containsKey(parameter)) {
794                return;
795            }
796        }
797        mEntries.put(parameter, value);
798    }
799
800    /**
801     * Get the value sanitizer for a parameter. Returns null if there
802     * is no value sanitizer registered for the parameter.
803     * @param parameter the unescaped parameter
804     * @return the currently registered value sanitizer for this parameter.
805     * @see #registerParameter(String, android.net.UrlQuerySanitizer.ValueSanitizer)
806     */
807    public ValueSanitizer getValueSanitizer(String parameter) {
808        return mSanitizers.get(parameter);
809    }
810
811    /**
812     * Get the effective value sanitizer for a parameter. Like getValueSanitizer,
813     * except if there is no value sanitizer registered for a parameter, and
814     * unregistered paramaters are allowed, then the default value sanitizer is
815     * returned.
816     * @param parameter an unescaped parameter
817     * @return the effective value sanitizer for a parameter.
818     */
819    public ValueSanitizer getEffectiveValueSanitizer(String parameter) {
820        ValueSanitizer sanitizer = getValueSanitizer(parameter);
821        if (sanitizer == null && mAllowUnregisteredParamaters) {
822            sanitizer = getUnregisteredParameterValueSanitizer();
823        }
824        return sanitizer;
825    }
826
827    /**
828     * Unescape an escaped string.
829     * <ul>
830     * <li>'+' characters are replaced by
831     * ' ' characters.
832     * <li>Valid "%xx" escape sequences are replaced by the
833     * corresponding unescaped character.
834     * <li>Invalid escape sequences such as %1z", are passed through unchanged.
835     * <ol>
836     * @param string the escaped string
837     * @return the unescaped string.
838     */
839    public String unescape(String string) {
840        // Early exit if no escaped characters.
841        int firstEscape = string.indexOf('%');
842        if ( firstEscape < 0) {
843            firstEscape = string.indexOf('+');
844            if (firstEscape < 0) {
845                return string;
846            }
847        }
848
849        int length = string.length();
850
851        StringBuilder stringBuilder = new StringBuilder(length);
852        stringBuilder.append(string.substring(0, firstEscape));
853        for (int i = firstEscape; i < length; i++) {
854            char c = string.charAt(i);
855            if (c == '+') {
856                c = ' ';
857            }
858            else if ( c == '%' && i + 2 < length) {
859                char c1 = string.charAt(i + 1);
860                char c2 = string.charAt(i + 2);
861                if (isHexDigit(c1) && isHexDigit(c2)) {
862                    c = (char) (decodeHexDigit(c1) * 16 + decodeHexDigit(c2));
863                    i += 2;
864                }
865            }
866            stringBuilder.append(c);
867        }
868        return stringBuilder.toString();
869    }
870
871    /**
872     * Test if a character is a hexidecimal digit. Both upper case and lower
873     * case hex digits are allowed.
874     * @param c the character to test
875     * @return true if c is a hex digit.
876     */
877    protected boolean isHexDigit(char c) {
878        return decodeHexDigit(c) >= 0;
879    }
880
881    /**
882     * Convert a character that represents a hexidecimal digit into an integer.
883     * If the character is not a hexidecimal digit, then -1 is returned.
884     * Both upper case and lower case hex digits are allowed.
885     * @param c the hexidecimal digit.
886     * @return the integer value of the hexidecimal digit.
887     */
888
889    protected int decodeHexDigit(char c) {
890        if (c >= '0' && c <= '9') {
891            return c - '0';
892        }
893        else if (c >= 'A' && c <= 'F') {
894            return c - 'A' + 10;
895        }
896        else if (c >= 'a' && c <= 'f') {
897            return c - 'a' + 10;
898        }
899        else {
900            return -1;
901        }
902    }
903
904    /**
905     * Clear the existing entries. Called to get ready to parse a new
906     * query string.
907     */
908    protected void clear() {
909        mEntries.clear();
910        mEntriesList.clear();
911    }
912}
913
914