1/*
2 * Copyright 2001-2004 The Apache Software Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package org.apache.commons.codec.net;
18
19import java.io.ByteArrayOutputStream;
20import java.io.UnsupportedEncodingException;
21import java.util.BitSet;
22
23import org.apache.commons.codec.BinaryDecoder;
24import org.apache.commons.codec.BinaryEncoder;
25import org.apache.commons.codec.DecoderException;
26import org.apache.commons.codec.EncoderException;
27import org.apache.commons.codec.StringDecoder;
28import org.apache.commons.codec.StringEncoder;
29
30/**
31 * <p>Implements the 'www-form-urlencoded' encoding scheme,
32 * also misleadingly known as URL encoding.</p>
33 *
34 * <p>For more detailed information please refer to
35 * <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">
36 * Chapter 17.13.4 'Form content types'</a> of the
37 * <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification<a></p>
38 *
39 * <p>
40 * This codec is meant to be a replacement for standard Java classes
41 * {@link java.net.URLEncoder} and {@link java.net.URLDecoder}
42 * on older Java platforms, as these classes in Java versions below
43 * 1.4 rely on the platform's default charset encoding.
44 * </p>
45 *
46 * @author Apache Software Foundation
47 * @since 1.2
48 * @version $Id: URLCodec.java,v 1.19 2004/03/29 07:59:00 ggregory Exp $
49 *
50 * @deprecated Please use {@link java.net.URL#openConnection} instead.
51 *     Please visit <a href="http://android-developers.blogspot.com/2011/09/androids-http-clients.html">this webpage</a>
52 *     for further details.
53 */
54@Deprecated
55public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
56
57    /**
58     * The default charset used for string decoding and encoding.
59     */
60    protected String charset = StringEncodings.UTF8;
61
62    protected static byte ESCAPE_CHAR = '%';
63    /**
64     * BitSet of www-form-url safe characters.
65     */
66    protected static final BitSet WWW_FORM_URL = new BitSet(256);
67
68    // Static initializer for www_form_url
69    static {
70        // alpha characters
71        for (int i = 'a'; i <= 'z'; i++) {
72            WWW_FORM_URL.set(i);
73        }
74        for (int i = 'A'; i <= 'Z'; i++) {
75            WWW_FORM_URL.set(i);
76        }
77        // numeric characters
78        for (int i = '0'; i <= '9'; i++) {
79            WWW_FORM_URL.set(i);
80        }
81        // special chars
82        WWW_FORM_URL.set('-');
83        WWW_FORM_URL.set('_');
84        WWW_FORM_URL.set('.');
85        WWW_FORM_URL.set('*');
86        // blank to be replaced with +
87        WWW_FORM_URL.set(' ');
88    }
89
90
91    /**
92     * Default constructor.
93     */
94    public URLCodec() {
95        super();
96    }
97
98    /**
99     * Constructor which allows for the selection of a default charset
100     *
101     * @param charset the default string charset to use.
102     */
103    public URLCodec(String charset) {
104        super();
105        this.charset = charset;
106    }
107
108    /**
109     * Encodes an array of bytes into an array of URL safe 7-bit
110     * characters. Unsafe characters are escaped.
111     *
112     * @param urlsafe bitset of characters deemed URL safe
113     * @param bytes array of bytes to convert to URL safe characters
114     * @return array of bytes containing URL safe characters
115     */
116    public static final byte[] encodeUrl(BitSet urlsafe, byte[] bytes)
117    {
118        if (bytes == null) {
119            return null;
120        }
121        if (urlsafe == null) {
122            urlsafe = WWW_FORM_URL;
123        }
124
125        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
126        for (int i = 0; i < bytes.length; i++) {
127            int b = bytes[i];
128            if (b < 0) {
129                b = 256 + b;
130            }
131            if (urlsafe.get(b)) {
132                if (b == ' ') {
133                    b = '+';
134                }
135                buffer.write(b);
136            } else {
137                buffer.write('%');
138                char hex1 = Character.toUpperCase(
139                  Character.forDigit((b >> 4) & 0xF, 16));
140                char hex2 = Character.toUpperCase(
141                  Character.forDigit(b & 0xF, 16));
142                buffer.write(hex1);
143                buffer.write(hex2);
144            }
145        }
146        return buffer.toByteArray();
147    }
148
149
150    /**
151     * Decodes an array of URL safe 7-bit characters into an array of
152     * original bytes. Escaped characters are converted back to their
153     * original representation.
154     *
155     * @param bytes array of URL safe characters
156     * @return array of original bytes
157     * @throws DecoderException Thrown if URL decoding is unsuccessful
158     */
159    public static final byte[] decodeUrl(byte[] bytes)
160         throws DecoderException
161    {
162        if (bytes == null) {
163            return null;
164        }
165        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
166        for (int i = 0; i < bytes.length; i++) {
167            int b = bytes[i];
168            if (b == '+') {
169                buffer.write(' ');
170            } else if (b == '%') {
171                try {
172                    int u = Character.digit((char)bytes[++i], 16);
173                    int l = Character.digit((char)bytes[++i], 16);
174                    if (u == -1 || l == -1) {
175                        throw new DecoderException("Invalid URL encoding");
176                    }
177                    buffer.write((char)((u << 4) + l));
178                } catch(ArrayIndexOutOfBoundsException e) {
179                    throw new DecoderException("Invalid URL encoding");
180                }
181            } else {
182                buffer.write(b);
183            }
184        }
185        return buffer.toByteArray();
186    }
187
188
189    /**
190     * Encodes an array of bytes into an array of URL safe 7-bit
191     * characters. Unsafe characters are escaped.
192     *
193     * @param bytes array of bytes to convert to URL safe characters
194     * @return array of bytes containing URL safe characters
195     */
196    public byte[] encode(byte[] bytes) {
197        return encodeUrl(WWW_FORM_URL, bytes);
198    }
199
200
201    /**
202     * Decodes an array of URL safe 7-bit characters into an array of
203     * original bytes. Escaped characters are converted back to their
204     * original representation.
205     *
206     * @param bytes array of URL safe characters
207     * @return array of original bytes
208     * @throws DecoderException Thrown if URL decoding is unsuccessful
209     */
210    public byte[] decode(byte[] bytes) throws DecoderException {
211        return decodeUrl(bytes);
212    }
213
214
215    /**
216     * Encodes a string into its URL safe form using the specified
217     * string charset. Unsafe characters are escaped.
218     *
219     * @param pString string to convert to a URL safe form
220     * @param charset the charset for pString
221     * @return URL safe string
222     * @throws UnsupportedEncodingException Thrown if charset is not
223     *                                      supported
224     */
225    public String encode(String pString, String charset)
226        throws UnsupportedEncodingException
227    {
228        if (pString == null) {
229            return null;
230        }
231        return new String(encode(pString.getBytes(charset)), StringEncodings.US_ASCII);
232    }
233
234
235    /**
236     * Encodes a string into its URL safe form using the default string
237     * charset. Unsafe characters are escaped.
238     *
239     * @param pString string to convert to a URL safe form
240     * @return URL safe string
241     * @throws EncoderException Thrown if URL encoding is unsuccessful
242     *
243     * @see #getDefaultCharset()
244     */
245    public String encode(String pString) throws EncoderException {
246        if (pString == null) {
247            return null;
248        }
249        try {
250            return encode(pString, getDefaultCharset());
251        } catch(UnsupportedEncodingException e) {
252            throw new EncoderException(e.getMessage());
253        }
254    }
255
256
257    /**
258     * Decodes a URL safe string into its original form using the
259     * specified encoding. Escaped characters are converted back
260     * to their original representation.
261     *
262     * @param pString URL safe string to convert into its original form
263     * @param charset the original string charset
264     * @return original string
265     * @throws DecoderException Thrown if URL decoding is unsuccessful
266     * @throws UnsupportedEncodingException Thrown if charset is not
267     *                                      supported
268     */
269    public String decode(String pString, String charset)
270        throws DecoderException, UnsupportedEncodingException
271    {
272        if (pString == null) {
273            return null;
274        }
275        return new String(decode(pString.getBytes(StringEncodings.US_ASCII)), charset);
276    }
277
278
279    /**
280     * Decodes a URL safe string into its original form using the default
281     * string charset. Escaped characters are converted back to their
282     * original representation.
283     *
284     * @param pString URL safe string to convert into its original form
285     * @return original string
286     * @throws DecoderException Thrown if URL decoding is unsuccessful
287     *
288     * @see #getDefaultCharset()
289     */
290    public String decode(String pString) throws DecoderException {
291        if (pString == null) {
292            return null;
293        }
294        try {
295            return decode(pString, getDefaultCharset());
296        } catch(UnsupportedEncodingException e) {
297            throw new DecoderException(e.getMessage());
298        }
299    }
300
301    /**
302     * Encodes an object into its URL safe form. Unsafe characters are
303     * escaped.
304     *
305     * @param pObject string to convert to a URL safe form
306     * @return URL safe object
307     * @throws EncoderException Thrown if URL encoding is not
308     *                          applicable to objects of this type or
309     *                          if encoding is unsuccessful
310     */
311    public Object encode(Object pObject) throws EncoderException {
312        if (pObject == null) {
313            return null;
314        } else if (pObject instanceof byte[]) {
315            return encode((byte[])pObject);
316        } else if (pObject instanceof String) {
317            return encode((String)pObject);
318        } else {
319            throw new EncoderException("Objects of type " +
320                pObject.getClass().getName() + " cannot be URL encoded");
321
322        }
323    }
324
325    /**
326     * Decodes a URL safe object into its original form. Escaped
327     * characters are converted back to their original representation.
328     *
329     * @param pObject URL safe object to convert into its original form
330     * @return original object
331     * @throws DecoderException Thrown if URL decoding is not
332     *                          applicable to objects of this type
333     *                          if decoding is unsuccessful
334     */
335    public Object decode(Object pObject) throws DecoderException {
336        if (pObject == null) {
337            return null;
338        } else if (pObject instanceof byte[]) {
339            return decode((byte[])pObject);
340        } else if (pObject instanceof String) {
341            return decode((String)pObject);
342        } else {
343            throw new DecoderException("Objects of type " +
344                pObject.getClass().getName() + " cannot be URL decoded");
345
346        }
347    }
348
349    /**
350     * The <code>String</code> encoding used for decoding and encoding.
351     *
352     * @return Returns the encoding.
353     *
354     * @deprecated use #getDefaultCharset()
355     */
356    public String getEncoding() {
357        return this.charset;
358    }
359
360    /**
361     * The default charset used for string decoding and encoding.
362     *
363     * @return the default string charset.
364     */
365    public String getDefaultCharset() {
366        return this.charset;
367    }
368
369}
370