1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.  Oracle designates this
9 * particular file as subject to the "Classpath" exception as provided
10 * by Oracle in the LICENSE file that accompanied this code.
11 *
12 * This code is distributed in the hope that it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 * version 2 for more details (a copy is included in the LICENSE file that
16 * accompanied this code).
17 *
18 * You should have received a copy of the GNU General Public License version
19 * 2 along with this work; if not, write to the Free Software Foundation,
20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
23 * or visit www.oracle.com if you need additional information or have any
24 * questions.
25 */
26
27package java.net;
28
29import java.io.*;
30
31/**
32 * Utility class for HTML form decoding. This class contains static methods
33 * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
34 * MIME format.
35 * <p>
36 * The conversion process is the reverse of that used by the URLEncoder class. It is assumed
37 * that all characters in the encoded string are one of the following:
38 * &quot;{@code a}&quot; through &quot;{@code z}&quot;,
39 * &quot;{@code A}&quot; through &quot;{@code Z}&quot;,
40 * &quot;{@code 0}&quot; through &quot;{@code 9}&quot;, and
41 * &quot;{@code -}&quot;, &quot;{@code _}&quot;,
42 * &quot;{@code .}&quot;, and &quot;{@code *}&quot;. The
43 * character &quot;{@code %}&quot; is allowed but is interpreted
44 * as the start of a special escaped sequence.
45 * <p>
46 * The following rules are applied in the conversion:
47 *
48 * <ul>
49 * <li>The alphanumeric characters &quot;{@code a}&quot; through
50 *     &quot;{@code z}&quot;, &quot;{@code A}&quot; through
51 *     &quot;{@code Z}&quot; and &quot;{@code 0}&quot;
52 *     through &quot;{@code 9}&quot; remain the same.
53 * <li>The special characters &quot;{@code .}&quot;,
54 *     &quot;{@code -}&quot;, &quot;{@code *}&quot;, and
55 *     &quot;{@code _}&quot; remain the same.
56 * <li>The plus sign &quot;{@code +}&quot; is converted into a
57 *     space character &quot; &nbsp; &quot; .
58 * <li>A sequence of the form "<i>{@code %xy}</i>" will be
59 *     treated as representing a byte where <i>xy</i> is the two-digit
60 *     hexadecimal representation of the 8 bits. Then, all substrings
61 *     that contain one or more of these byte sequences consecutively
62 *     will be replaced by the character(s) whose encoding would result
63 *     in those consecutive bytes.
64 *     The encoding scheme used to decode these characters may be specified,
65 *     or if unspecified, the default encoding of the platform will be used.
66 * </ul>
67 * <p>
68 * There are two possible ways in which this decoder could deal with
69 * illegal strings.  It could either leave illegal characters alone or
70 * it could throw an {@code {@link java.lang.IllegalArgumentException}}.
71 * Which approach the decoder takes is left to the
72 * implementation.
73 *
74 * @author  Mark Chamness
75 * @author  Michael McCloskey
76 * @since   1.2
77 */
78
79public class URLDecoder {
80
81    // The platform default encoding
82    static String dfltEncName = URLEncoder.dfltEncName;
83
84    /**
85     * Decodes a {@code x-www-form-urlencoded} string.
86     * The platform's default encoding is used to determine what characters
87     * are represented by any consecutive sequences of the form
88     * "<i>{@code %xy}</i>".
89     * @param s the {@code String} to decode
90     * @deprecated The resulting string may vary depending on the platform's
91     *          default encoding. Instead, use the decode(String,String) method
92     *          to specify the encoding.
93     * @return the newly decoded {@code String}
94     */
95    @Deprecated
96    public static String decode(String s) {
97
98        String str = null;
99
100        try {
101            str = decode(s, dfltEncName);
102        } catch (UnsupportedEncodingException e) {
103            // The system should always have the platform default
104        }
105
106        return str;
107    }
108
109    /**
110     * Decodes a {@code application/x-www-form-urlencoded} string using a specific
111     * encoding scheme.
112     * The supplied encoding is used to determine
113     * what characters are represented by any consecutive sequences of the
114     * form "<i>{@code %xy}</i>".
115     * <p>
116     * <em><strong>Note:</strong> The <a href=
117     * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
118     * World Wide Web Consortium Recommendation</a> states that
119     * UTF-8 should be used. Not doing so may introduce
120     * incompatibilities.</em>
121     *
122     * @param s the {@code String} to decode
123     * @param enc   The name of a supported
124     *    <a href="../lang/package-summary.html#charenc">character
125     *    encoding</a>.
126     * @return the newly decoded {@code String}
127     * @exception  UnsupportedEncodingException
128     *             If character encoding needs to be consulted, but
129     *             named character encoding is not supported
130     * @see URLEncoder#encode(java.lang.String, java.lang.String)
131     * @since 1.4
132     */
133    public static String decode(String s, String enc)
134        throws UnsupportedEncodingException{
135
136        boolean needToChange = false;
137        int numChars = s.length();
138        StringBuffer sb = new StringBuffer(numChars > 500 ? numChars / 2 : numChars);
139        int i = 0;
140
141        if (enc.length() == 0) {
142            throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
143        }
144
145        char c;
146        byte[] bytes = null;
147        while (i < numChars) {
148            c = s.charAt(i);
149            switch (c) {
150            case '+':
151                sb.append(' ');
152                i++;
153                needToChange = true;
154                break;
155            case '%':
156                /*
157                 * Starting with this instance of %, process all
158                 * consecutive substrings of the form %xy. Each
159                 * substring %xy will yield a byte. Convert all
160                 * consecutive  bytes obtained this way to whatever
161                 * character(s) they represent in the provided
162                 * encoding.
163                 */
164
165                try {
166
167                    // (numChars-i)/3 is an upper bound for the number
168                    // of remaining bytes
169                    if (bytes == null)
170                        bytes = new byte[(numChars-i)/3];
171                    int pos = 0;
172
173                    while ( ((i+2) < numChars) &&
174                            (c=='%')) {
175                        // BEGIN Android-changed
176                        if (!isValidHexChar(s.charAt(i+1)) || !isValidHexChar(s.charAt(i+2))) {
177                            throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern : "
178                                    + s.substring(i, i + 3));
179                        }
180                        // END Android-changed
181                        int v = Integer.parseInt(s.substring(i+1,i+3),16);
182                        if (v < 0)
183                            throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - negative value : "
184                                    + s.substring(i, i + 3));
185                        bytes[pos++] = (byte) v;
186                        i+= 3;
187                        if (i < numChars)
188                            c = s.charAt(i);
189                    }
190
191                    // A trailing, incomplete byte encoding such as
192                    // "%x" will cause an exception to be thrown
193
194                    if ((i < numChars) && (c=='%'))
195                        throw new IllegalArgumentException(
196                         "URLDecoder: Incomplete trailing escape (%) pattern");
197
198                    sb.append(new String(bytes, 0, pos, enc));
199                } catch (NumberFormatException e) {
200                    throw new IllegalArgumentException(
201                    "URLDecoder: Illegal hex characters in escape (%) pattern - "
202                    + e.getMessage());
203                }
204                needToChange = true;
205                break;
206            default:
207                sb.append(c);
208                i++;
209                break;
210            }
211        }
212
213        return (needToChange? sb.toString() : s);
214    }
215
216    // BEGIN Android-changed
217    private static boolean isValidHexChar(char c) {
218        return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F');
219    }
220    // END Android-changed
221}
222