1/*
2 * Copyright (C) 2007 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.net;
18
19import java.util.ArrayList;
20import java.util.HashMap;
21import java.util.List;
22import java.util.Locale;
23import java.util.Set;
24import java.util.StringTokenizer;
25
26/**
27 *
28 * Sanitizes the Query portion of a URL. Simple example:
29 * <code>
30 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
31 * sanitizer.setAllowUnregisteredParamaters(true);
32 * sanitizer.parseUrl("http://example.com/?name=Joe+User");
33 * String name = sanitizer.getValue("name"));
34 * // name now contains "Joe_User"
35 * </code>
36 *
37 * Register ValueSanitizers to customize the way individual
38 * parameters are sanitized:
39 * <code>
40 * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer();
41 * sanitizer.registerParamater("name", UrlQuerySanitizer.createSpaceLegal());
42 * sanitizer.parseUrl("http://example.com/?name=Joe+User");
43 * String name = sanitizer.getValue("name"));
44 * // name now contains "Joe User". (The string is first decoded, which
45 * // converts the '+' to a ' '. Then the string is sanitized, which
46 * // converts the ' ' to an '_'. (The ' ' is converted because the default
47 * unregistered parameter sanitizer does not allow any special characters,
48 * and ' ' is a special character.)
49 * </code>
50 *
51 * There are several ways to create ValueSanitizers. In order of increasing
52 * sophistication:
53 * <ol>
54 * <li>Call one of the UrlQuerySanitizer.createXXX() methods.
55 * <li>Construct your own instance of
56 * UrlQuerySanitizer.IllegalCharacterValueSanitizer.
57 * <li>Subclass UrlQuerySanitizer.ValueSanitizer to define your own value
58 * sanitizer.
59 * </ol>
60 *
61 */
62public class UrlQuerySanitizer {
63
64    /**
65     * A simple tuple that holds parameter-value pairs.
66     *
67     */
68    public class ParameterValuePair {
69        /**
70         * Construct a parameter-value tuple.
71         * @param parameter an unencoded parameter
72         * @param value an unencoded value
73         */
74        public ParameterValuePair(String parameter,
75                String value) {
76            mParameter = parameter;
77            mValue = value;
78        }
79        /**
80         * The unencoded parameter
81         */
82        public String mParameter;
83        /**
84         * The unencoded value
85         */
86        public String mValue;
87    }
88
89    final private HashMap<String, ValueSanitizer> mSanitizers =
90        new HashMap<String, ValueSanitizer>();
91    final private HashMap<String, String> mEntries =
92        new HashMap<String, String>();
93    final private ArrayList<ParameterValuePair> mEntriesList =
94        new ArrayList<ParameterValuePair>();
95    private boolean mAllowUnregisteredParamaters;
96    private boolean mPreferFirstRepeatedParameter;
97    private ValueSanitizer mUnregisteredParameterValueSanitizer =
98        getAllIllegal();
99
100    /**
101     * A functor used to sanitize a single query value.
102     *
103     */
104    public static interface ValueSanitizer {
105        /**
106         * Sanitize an unencoded value.
107         * @param value
108         * @return the sanitized unencoded value
109         */
110        public String sanitize(String value);
111    }
112
113    /**
114     * Sanitize values based on which characters they contain. Illegal
115     * characters are replaced with either space or '_', depending upon
116     * whether space is a legal character or not.
117     */
118    public static class IllegalCharacterValueSanitizer implements
119        ValueSanitizer {
120        private int mFlags;
121
122        /**
123         * Allow space (' ') characters.
124         */
125        public final static int SPACE_OK =              1 << 0;
126        /**
127         * Allow whitespace characters other than space. The
128         * other whitespace characters are
129         * '\t' '\f' '\n' '\r' and '\0x000b' (vertical tab)
130         */
131        public final static int OTHER_WHITESPACE_OK =  1 << 1;
132        /**
133         * Allow characters with character codes 128 to 255.
134         */
135        public final static int NON_7_BIT_ASCII_OK =    1 << 2;
136        /**
137         * Allow double quote characters. ('"')
138         */
139        public final static int DQUOTE_OK =             1 << 3;
140        /**
141         * Allow single quote characters. ('\'')
142         */
143        public final static int SQUOTE_OK =             1 << 4;
144        /**
145         * Allow less-than characters. ('<')
146         */
147        public final static int LT_OK =                 1 << 5;
148        /**
149         * Allow greater-than characters. ('>')
150         */
151        public final static int GT_OK =                 1 << 6;
152        /**
153         * Allow ampersand characters ('&')
154         */
155        public final static int AMP_OK =                1 << 7;
156        /**
157         * Allow percent-sign characters ('%')
158         */
159        public final static int PCT_OK =                1 << 8;
160        /**
161         * Allow nul characters ('\0')
162         */
163        public final static int NUL_OK =                1 << 9;
164        /**
165         * Allow text to start with a script URL
166         * such as "javascript:" or "vbscript:"
167         */
168        public final static int SCRIPT_URL_OK =         1 << 10;
169
170        /**
171         * Mask with all fields set to OK
172         */
173        public final static int ALL_OK =                0x7ff;
174
175        /**
176         * Mask with both regular space and other whitespace OK
177         */
178        public final static int ALL_WHITESPACE_OK =
179            SPACE_OK | OTHER_WHITESPACE_OK;
180
181
182        // Common flag combinations:
183
184        /**
185         * <ul>
186         * <li>Deny all special characters.
187         * <li>Deny script URLs.
188         * </ul>
189         */
190        public final static int ALL_ILLEGAL =
191            0;
192        /**
193         * <ul>
194         * <li>Allow all special characters except Nul. ('\0').
195         * <li>Allow script URLs.
196         * </ul>
197         */
198        public final static int ALL_BUT_NUL_LEGAL =
199            ALL_OK & ~NUL_OK;
200        /**
201         * <ul>
202         * <li>Allow all special characters except for:
203         * <ul>
204         *  <li>whitespace characters
205         *  <li>Nul ('\0')
206         * </ul>
207         * <li>Allow script URLs.
208         * </ul>
209         */
210        public final static int ALL_BUT_WHITESPACE_LEGAL =
211            ALL_OK & ~(ALL_WHITESPACE_OK | NUL_OK);
212        /**
213         * <ul>
214         * <li>Allow characters used by encoded URLs.
215         * <li>Deny script URLs.
216         * </ul>
217         */
218        public final static int URL_LEGAL =
219            NON_7_BIT_ASCII_OK | SQUOTE_OK | AMP_OK | PCT_OK;
220        /**
221         * <ul>
222         * <li>Allow characters used by encoded URLs.
223         * <li>Allow spaces.
224         * <li>Deny script URLs.
225         * </ul>
226         */
227        public final static int URL_AND_SPACE_LEGAL =
228            URL_LEGAL | SPACE_OK;
229        /**
230         * <ul>
231         * <li>Allow ampersand.
232         * <li>Deny script URLs.
233         * </ul>
234         */
235        public final static int AMP_LEGAL =
236            AMP_OK;
237        /**
238         * <ul>
239         * <li>Allow ampersand.
240         * <li>Allow space.
241         * <li>Deny script URLs.
242         * </ul>
243         */
244        public final static int AMP_AND_SPACE_LEGAL =
245            AMP_OK | SPACE_OK;
246        /**
247         * <ul>
248         * <li>Allow space.
249         * <li>Deny script URLs.
250         * </ul>
251         */
252        public final static int SPACE_LEGAL =
253            SPACE_OK;
254        /**
255         * <ul>
256         * <li>Allow all but.
257         * <ul>
258         *  <li>Nul ('\0')
259         *  <li>Angle brackets ('<', '>')
260         * </ul>
261         * <li>Deny script URLs.
262         * </ul>
263         */
264        public final static int ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL =
265            ALL_OK & ~(NUL_OK | LT_OK | GT_OK);
266
267        /**
268         *  Script URL definitions
269         */
270
271        private final static String JAVASCRIPT_PREFIX = "javascript:";
272
273        private final static String VBSCRIPT_PREFIX = "vbscript:";
274
275        private final static int MIN_SCRIPT_PREFIX_LENGTH = Math.min(
276                JAVASCRIPT_PREFIX.length(), VBSCRIPT_PREFIX.length());
277
278        /**
279         * Construct a sanitizer. The parameters set the behavior of the
280         * sanitizer.
281         * @param flags some combination of the XXX_OK flags.
282         */
283        public IllegalCharacterValueSanitizer(
284            int flags) {
285            mFlags = flags;
286        }
287        /**
288         * Sanitize a value.
289         * <ol>
290         * <li>If script URLs are not OK, the will be removed.
291         * <li>If neither spaces nor other white space is OK, then
292         * white space will be trimmed from the beginning and end of
293         * the URL. (Just the actual white space characters are trimmed, not
294         * other control codes.)
295         * <li> Illegal characters will be replaced with
296         * either ' ' or '_', depending on whether a space is itself a
297         * legal character.
298         * </ol>
299         * @param value
300         * @return the sanitized value
301         */
302        public String sanitize(String value) {
303            if (value == null) {
304                return null;
305            }
306            int length = value.length();
307            if ((mFlags & SCRIPT_URL_OK) != 0) {
308                if (length >= MIN_SCRIPT_PREFIX_LENGTH) {
309                    String asLower = value.toLowerCase(Locale.ROOT);
310                    if (asLower.startsWith(JAVASCRIPT_PREFIX)  ||
311                        asLower.startsWith(VBSCRIPT_PREFIX)) {
312                        return "";
313                    }
314                }
315            }
316
317            // If whitespace isn't OK, get rid of whitespace at beginning
318            // and end of value.
319            if ( (mFlags & ALL_WHITESPACE_OK) == 0) {
320                value = trimWhitespace(value);
321                // The length could have changed, so we need to correct
322                // the length variable.
323                length = value.length();
324            }
325
326            StringBuilder stringBuilder = new StringBuilder(length);
327            for(int i = 0; i < length; i++) {
328                char c = value.charAt(i);
329                if (!characterIsLegal(c)) {
330                    if ((mFlags & SPACE_OK) != 0) {
331                        c = ' ';
332                    }
333                    else {
334                        c = '_';
335                    }
336                }
337                stringBuilder.append(c);
338            }
339            return stringBuilder.toString();
340        }
341
342        /**
343         * Trim whitespace from the beginning and end of a string.
344         * <p>
345         * Note: can't use {@link String#trim} because {@link String#trim} has a
346         * different definition of whitespace than we want.
347         * @param value the string to trim
348         * @return the trimmed string
349         */
350        private String trimWhitespace(String value) {
351            int start = 0;
352            int last = value.length() - 1;
353            int end = last;
354            while (start <= end && isWhitespace(value.charAt(start))) {
355                start++;
356            }
357            while (end >= start && isWhitespace(value.charAt(end))) {
358                end--;
359            }
360            if (start == 0 && end == last) {
361                return value;
362            }
363            return value.substring(start, end + 1);
364        }
365
366        /**
367         * Check if c is whitespace.
368         * @param c character to test
369         * @return true if c is a whitespace character
370         */
371        private boolean isWhitespace(char c) {
372            switch(c) {
373            case ' ':
374            case '\t':
375            case '\f':
376            case '\n':
377            case '\r':
378            case 11: /* VT */
379                return true;
380            default:
381                return false;
382            }
383        }
384
385        /**
386         * Check whether an individual character is legal. Uses the
387         * flag bit-set passed into the constructor.
388         * @param c
389         * @return true if c is a legal character
390         */
391        private boolean characterIsLegal(char c) {
392            switch(c) {
393            case ' ' : return (mFlags & SPACE_OK) != 0;
394            case '\t': case '\f': case '\n': case '\r': case 11: /* VT */
395              return (mFlags & OTHER_WHITESPACE_OK) != 0;
396            case '\"': return (mFlags & DQUOTE_OK) != 0;
397            case '\'': return (mFlags & SQUOTE_OK) != 0;
398            case '<' : return (mFlags & LT_OK) != 0;
399            case '>' : return (mFlags & GT_OK) != 0;
400            case '&' : return (mFlags & AMP_OK) != 0;
401            case '%' : return (mFlags & PCT_OK) != 0;
402            case '\0': return (mFlags & NUL_OK) != 0;
403            default  : return (c >= 32 && c < 127) ||
404                ((c >= 128) && ((mFlags & NON_7_BIT_ASCII_OK) != 0));
405            }
406        }
407    }
408
409    /**
410     * Get the current value sanitizer used when processing
411     * unregistered parameter values.
412     * <p>
413     * <b>Note:</b> The default unregistered parameter value sanitizer is
414     * one that doesn't allow any special characters, similar to what
415     * is returned by calling createAllIllegal.
416     *
417     * @return the current ValueSanitizer used to sanitize unregistered
418     * parameter values.
419     */
420    public ValueSanitizer getUnregisteredParameterValueSanitizer() {
421        return mUnregisteredParameterValueSanitizer;
422    }
423
424    /**
425     * Set the value sanitizer used when processing unregistered
426     * parameter values.
427     * @param sanitizer set the ValueSanitizer used to sanitize unregistered
428     * parameter values.
429     */
430    public void setUnregisteredParameterValueSanitizer(
431            ValueSanitizer sanitizer) {
432        mUnregisteredParameterValueSanitizer = sanitizer;
433    }
434
435
436    // Private fields for singleton sanitizers:
437
438    private static final ValueSanitizer sAllIllegal =
439        new IllegalCharacterValueSanitizer(
440                IllegalCharacterValueSanitizer.ALL_ILLEGAL);
441
442    private static final ValueSanitizer sAllButNulLegal =
443        new IllegalCharacterValueSanitizer(
444                IllegalCharacterValueSanitizer.ALL_BUT_NUL_LEGAL);
445
446    private static final ValueSanitizer sAllButWhitespaceLegal =
447        new IllegalCharacterValueSanitizer(
448                IllegalCharacterValueSanitizer.ALL_BUT_WHITESPACE_LEGAL);
449
450    private static final ValueSanitizer sURLLegal =
451        new IllegalCharacterValueSanitizer(
452                IllegalCharacterValueSanitizer.URL_LEGAL);
453
454    private static final ValueSanitizer sUrlAndSpaceLegal =
455        new IllegalCharacterValueSanitizer(
456                IllegalCharacterValueSanitizer.URL_AND_SPACE_LEGAL);
457
458    private static final ValueSanitizer sAmpLegal =
459        new IllegalCharacterValueSanitizer(
460                IllegalCharacterValueSanitizer.AMP_LEGAL);
461
462    private static final ValueSanitizer sAmpAndSpaceLegal =
463        new IllegalCharacterValueSanitizer(
464                IllegalCharacterValueSanitizer.AMP_AND_SPACE_LEGAL);
465
466    private static final ValueSanitizer sSpaceLegal =
467        new IllegalCharacterValueSanitizer(
468                IllegalCharacterValueSanitizer.SPACE_LEGAL);
469
470    private static final ValueSanitizer sAllButNulAndAngleBracketsLegal =
471        new IllegalCharacterValueSanitizer(
472                IllegalCharacterValueSanitizer.ALL_BUT_NUL_AND_ANGLE_BRACKETS_LEGAL);
473
474    /**
475     * Return a value sanitizer that does not allow any special characters,
476     * and also does not allow script URLs.
477     * @return a value sanitizer
478     */
479    public static final ValueSanitizer getAllIllegal() {
480        return sAllIllegal;
481    }
482
483    /**
484     * Return a value sanitizer that allows everything except Nul ('\0')
485     * characters. Script URLs are allowed.
486     * @return a value sanitizer
487     */
488    public static final ValueSanitizer getAllButNulLegal() {
489        return sAllButNulLegal;
490    }
491    /**
492     * Return a value sanitizer that allows everything except Nul ('\0')
493     * characters, space (' '), and other whitespace characters.
494     * Script URLs are allowed.
495     * @return a value sanitizer
496     */
497    public static final ValueSanitizer getAllButWhitespaceLegal() {
498        return sAllButWhitespaceLegal;
499    }
500    /**
501     * Return a value sanitizer that allows all the characters used by
502     * encoded URLs. Does not allow script URLs.
503     * @return a value sanitizer
504     */
505    public static final ValueSanitizer getUrlLegal() {
506        return sURLLegal;
507    }
508    /**
509     * Return a value sanitizer that allows all the characters used by
510     * encoded URLs and allows spaces, which are not technically legal
511     * in encoded URLs, but commonly appear anyway.
512     * Does not allow script URLs.
513     * @return a value sanitizer
514     */
515    public static final ValueSanitizer getUrlAndSpaceLegal() {
516        return sUrlAndSpaceLegal;
517    }
518    /**
519     * Return a value sanitizer that does not allow any special characters
520     * except ampersand ('&'). Does not allow script URLs.
521     * @return a value sanitizer
522     */
523    public static final ValueSanitizer getAmpLegal() {
524        return sAmpLegal;
525    }
526    /**
527     * Return a value sanitizer that does not allow any special characters
528     * except ampersand ('&') and space (' '). Does not allow script URLs.
529     * @return a value sanitizer
530     */
531    public static final ValueSanitizer getAmpAndSpaceLegal() {
532        return sAmpAndSpaceLegal;
533    }
534    /**
535     * Return a value sanitizer that does not allow any special characters
536     * except space (' '). Does not allow script URLs.
537     * @return a value sanitizer
538     */
539    public static final ValueSanitizer getSpaceLegal() {
540        return sSpaceLegal;
541    }
542    /**
543     * Return a value sanitizer that allows any special characters
544     * except angle brackets ('<' and '>') and Nul ('\0').
545     * Allows script URLs.
546     * @return a value sanitizer
547     */
548    public static final ValueSanitizer getAllButNulAndAngleBracketsLegal() {
549        return sAllButNulAndAngleBracketsLegal;
550    }
551
552    /**
553     * Constructs a UrlQuerySanitizer.
554     * <p>
555     * Defaults:
556     * <ul>
557     * <li>unregistered parameters are not allowed.
558     * <li>the last instance of a repeated parameter is preferred.
559     * <li>The default value sanitizer is an AllIllegal value sanitizer.
560     * <ul>
561     */
562    public UrlQuerySanitizer() {
563    }
564
565    /**
566     * Constructs a UrlQuerySanitizer and parse a URL.
567     * This constructor is provided for convenience when the
568     * default parsing behavior is acceptable.
569     * <p>
570     * Because the URL is parsed before the constructor returns, there isn't
571     * a chance to configure the sanitizer to change the parsing behavior.
572     * <p>
573     * <code>
574     * UrlQuerySanitizer sanitizer = new UrlQuerySanitizer(myUrl);
575     * String name = sanitizer.getValue("name");
576     * </code>
577     * <p>
578     * Defaults:
579     * <ul>
580     * <li>unregistered parameters <em>are</em> allowed.
581     * <li>the last instance of a repeated parameter is preferred.
582     * <li>The default value sanitizer is an AllIllegal value sanitizer.
583     * <ul>
584     */
585    public UrlQuerySanitizer(String url) {
586        setAllowUnregisteredParamaters(true);
587        parseUrl(url);
588    }
589
590    /**
591     * Parse the query parameters out of an encoded URL.
592     * Works by extracting the query portion from the URL and then
593     * calling parseQuery(). If there is no query portion it is
594     * treated as if the query portion is an empty string.
595     * @param url the encoded URL to parse.
596     */
597    public void parseUrl(String url) {
598        int queryIndex = url.indexOf('?');
599        String query;
600        if (queryIndex >= 0) {
601            query = url.substring(queryIndex + 1);
602        }
603        else {
604            query = "";
605        }
606        parseQuery(query);
607    }
608
609    /**
610     * Parse a query. A query string is any number of parameter-value clauses
611     * separated by any non-zero number of ampersands. A parameter-value clause
612     * is a parameter followed by an equal sign, followed by a value. If the
613     * equal sign is missing, the value is assumed to be the empty string.
614     * @param query the query to parse.
615     */
616    public void parseQuery(String query) {
617        clear();
618        // Split by '&'
619        StringTokenizer tokenizer = new StringTokenizer(query, "&");
620        while(tokenizer.hasMoreElements()) {
621            String attributeValuePair = tokenizer.nextToken();
622            if (attributeValuePair.length() > 0) {
623                int assignmentIndex = attributeValuePair.indexOf('=');
624                if (assignmentIndex < 0) {
625                    // No assignment found, treat as if empty value
626                    parseEntry(attributeValuePair, "");
627                }
628                else {
629                    parseEntry(attributeValuePair.substring(0, assignmentIndex),
630                            attributeValuePair.substring(assignmentIndex + 1));
631                }
632            }
633        }
634    }
635
636    /**
637     * Get a set of all of the parameters found in the sanitized query.
638     * <p>
639     * Note: Do not modify this set. Treat it as a read-only set.
640     * @return all the parameters found in the current query.
641     */
642    public Set<String> getParameterSet() {
643        return mEntries.keySet();
644    }
645
646    /**
647     * An array list of all of the parameter value pairs in the sanitized
648     * query, in the order they appeared in the query. May contain duplicate
649     * parameters.
650     * <p class="note"><b>Note:</b> Do not modify this list. Treat it as a read-only list.</p>
651     */
652    public List<ParameterValuePair> getParameterList() {
653        return mEntriesList;
654    }
655
656    /**
657     * Check if a parameter exists in the current sanitized query.
658     * @param parameter the unencoded name of a parameter.
659     * @return true if the paramater exists in the current sanitized queary.
660     */
661    public boolean hasParameter(String parameter) {
662        return mEntries.containsKey(parameter);
663    }
664
665    /**
666     * Get the value for a parameter in the current sanitized query.
667     * Returns null if the parameter does not
668     * exit.
669     * @param parameter the unencoded name of a parameter.
670     * @return the sanitized unencoded value of the parameter,
671     * or null if the parameter does not exist.
672     */
673    public String getValue(String parameter) {
674        return mEntries.get(parameter);
675    }
676
677    /**
678     * Register a value sanitizer for a particular parameter. Can also be used
679     * to replace or remove an already-set value sanitizer.
680     * <p>
681     * Registering a non-null value sanitizer for a particular parameter
682     * makes that parameter a registered parameter.
683     * @param parameter an unencoded parameter name
684     * @param valueSanitizer the value sanitizer to use for a particular
685     * parameter. May be null in order to unregister that parameter.
686     * @see #getAllowUnregisteredParamaters()
687     */
688    public void registerParameter(String parameter,
689            ValueSanitizer valueSanitizer) {
690        if (valueSanitizer == null) {
691            mSanitizers.remove(parameter);
692        }
693        mSanitizers.put(parameter, valueSanitizer);
694    }
695
696    /**
697     * Register a value sanitizer for an array of parameters.
698     * @param parameters An array of unencoded parameter names.
699     * @param valueSanitizer
700     * @see #registerParameter
701     */
702    public void registerParameters(String[] parameters,
703            ValueSanitizer valueSanitizer) {
704        int length = parameters.length;
705        for(int i = 0; i < length; i++) {
706            mSanitizers.put(parameters[i], valueSanitizer);
707        }
708    }
709
710    /**
711     * Set whether or not unregistered parameters are allowed. If they
712     * are not allowed, then they will be dropped when a query is sanitized.
713     * <p>
714     * Defaults to false.
715     * @param allowUnregisteredParamaters true to allow unregistered parameters.
716     * @see #getAllowUnregisteredParamaters()
717     */
718    public void setAllowUnregisteredParamaters(
719            boolean allowUnregisteredParamaters) {
720        mAllowUnregisteredParamaters = allowUnregisteredParamaters;
721    }
722
723    /**
724     * Get whether or not unregistered parameters are allowed. If not
725     * allowed, they will be dropped when a query is parsed.
726     * @return true if unregistered parameters are allowed.
727     * @see #setAllowUnregisteredParamaters(boolean)
728     */
729    public boolean getAllowUnregisteredParamaters() {
730        return mAllowUnregisteredParamaters;
731    }
732
733    /**
734     * Set whether or not the first occurrence of a repeated parameter is
735     * preferred. True means the first repeated parameter is preferred.
736     * False means that the last repeated parameter is preferred.
737     * <p>
738     * The preferred parameter is the one that is returned when getParameter
739     * is called.
740     * <p>
741     * defaults to false.
742     * @param preferFirstRepeatedParameter True if the first repeated
743     * parameter is preferred.
744     * @see #getPreferFirstRepeatedParameter()
745     */
746    public void setPreferFirstRepeatedParameter(
747            boolean preferFirstRepeatedParameter) {
748        mPreferFirstRepeatedParameter = preferFirstRepeatedParameter;
749    }
750
751    /**
752     * Get whether or not the first occurrence of a repeated parameter is
753     * preferred.
754     * @return true if the first occurrence of a repeated parameter is
755     * preferred.
756     * @see #setPreferFirstRepeatedParameter(boolean)
757     */
758    public boolean getPreferFirstRepeatedParameter() {
759        return mPreferFirstRepeatedParameter;
760    }
761
762    /**
763     * Parse an escaped parameter-value pair. The default implementation
764     * unescapes both the parameter and the value, then looks up the
765     * effective value sanitizer for the parameter and uses it to sanitize
766     * the value. If all goes well then addSanitizedValue is called with
767     * the unescaped parameter and the sanitized unescaped value.
768     * @param parameter an escaped parameter
769     * @param value an unsanitzied escaped value
770     */
771    protected void parseEntry(String parameter, String value) {
772        String unescapedParameter = unescape(parameter);
773         ValueSanitizer valueSanitizer =
774            getEffectiveValueSanitizer(unescapedParameter);
775
776        if (valueSanitizer == null) {
777            return;
778        }
779        String unescapedValue = unescape(value);
780        String sanitizedValue = valueSanitizer.sanitize(unescapedValue);
781        addSanitizedEntry(unescapedParameter, sanitizedValue);
782    }
783
784    /**
785     * Record a sanitized parameter-value pair. Override if you want to
786     * do additional filtering or validation.
787     * @param parameter an unescaped parameter
788     * @param value a sanitized unescaped value
789     */
790    protected void addSanitizedEntry(String parameter, String value) {
791        mEntriesList.add(
792                new ParameterValuePair(parameter, value));
793        if (mPreferFirstRepeatedParameter) {
794            if (mEntries.containsKey(parameter)) {
795                return;
796            }
797        }
798        mEntries.put(parameter, value);
799    }
800
801    /**
802     * Get the value sanitizer for a parameter. Returns null if there
803     * is no value sanitizer registered for the parameter.
804     * @param parameter the unescaped parameter
805     * @return the currently registered value sanitizer for this parameter.
806     * @see #registerParameter(String, android.net.UrlQuerySanitizer.ValueSanitizer)
807     */
808    public ValueSanitizer getValueSanitizer(String parameter) {
809        return mSanitizers.get(parameter);
810    }
811
812    /**
813     * Get the effective value sanitizer for a parameter. Like getValueSanitizer,
814     * except if there is no value sanitizer registered for a parameter, and
815     * unregistered paramaters are allowed, then the default value sanitizer is
816     * returned.
817     * @param parameter an unescaped parameter
818     * @return the effective value sanitizer for a parameter.
819     */
820    public ValueSanitizer getEffectiveValueSanitizer(String parameter) {
821        ValueSanitizer sanitizer = getValueSanitizer(parameter);
822        if (sanitizer == null && mAllowUnregisteredParamaters) {
823            sanitizer = getUnregisteredParameterValueSanitizer();
824        }
825        return sanitizer;
826    }
827
828    /**
829     * Unescape an escaped string.
830     * <ul>
831     * <li>'+' characters are replaced by
832     * ' ' characters.
833     * <li>Valid "%xx" escape sequences are replaced by the
834     * corresponding unescaped character.
835     * <li>Invalid escape sequences such as %1z", are passed through unchanged.
836     * <ol>
837     * @param string the escaped string
838     * @return the unescaped string.
839     */
840    public String unescape(String string) {
841        // Early exit if no escaped characters.
842        int firstEscape = string.indexOf('%');
843        if ( firstEscape < 0) {
844            firstEscape = string.indexOf('+');
845            if (firstEscape < 0) {
846                return string;
847            }
848        }
849
850        int length = string.length();
851
852        StringBuilder stringBuilder = new StringBuilder(length);
853        stringBuilder.append(string.substring(0, firstEscape));
854        for (int i = firstEscape; i < length; i++) {
855            char c = string.charAt(i);
856            if (c == '+') {
857                c = ' ';
858            }
859            else if ( c == '%' && i + 2 < length) {
860                char c1 = string.charAt(i + 1);
861                char c2 = string.charAt(i + 2);
862                if (isHexDigit(c1) && isHexDigit(c2)) {
863                    c = (char) (decodeHexDigit(c1) * 16 + decodeHexDigit(c2));
864                    i += 2;
865                }
866            }
867            stringBuilder.append(c);
868        }
869        return stringBuilder.toString();
870    }
871
872    /**
873     * Test if a character is a hexidecimal digit. Both upper case and lower
874     * case hex digits are allowed.
875     * @param c the character to test
876     * @return true if c is a hex digit.
877     */
878    protected boolean isHexDigit(char c) {
879        return decodeHexDigit(c) >= 0;
880    }
881
882    /**
883     * Convert a character that represents a hexidecimal digit into an integer.
884     * If the character is not a hexidecimal digit, then -1 is returned.
885     * Both upper case and lower case hex digits are allowed.
886     * @param c the hexidecimal digit.
887     * @return the integer value of the hexidecimal digit.
888     */
889
890    protected int decodeHexDigit(char c) {
891        if (c >= '0' && c <= '9') {
892            return c - '0';
893        }
894        else if (c >= 'A' && c <= 'F') {
895            return c - 'A' + 10;
896        }
897        else if (c >= 'a' && c <= 'f') {
898            return c - 'a' + 10;
899        }
900        else {
901            return -1;
902        }
903    }
904
905    /**
906     * Clear the existing entries. Called to get ready to parse a new
907     * query string.
908     */
909    protected void clear() {
910        mEntries.clear();
911        mEntriesList.clear();
912    }
913}
914
915