1/*
2 * Copyright 2001-2004 The Apache Software Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package org.apache.commons.codec.net;
18
19import java.io.ByteArrayOutputStream;
20import java.io.UnsupportedEncodingException;
21import java.util.BitSet;
22
23import org.apache.commons.codec.BinaryDecoder;
24import org.apache.commons.codec.BinaryEncoder;
25import org.apache.commons.codec.DecoderException;
26import org.apache.commons.codec.EncoderException;
27import org.apache.commons.codec.StringDecoder;
28import org.apache.commons.codec.StringEncoder;
29
30/**
31 * <p>Implements the 'www-form-urlencoded' encoding scheme,
32 * also misleadingly known as URL encoding.</p>
33 *
34 * <p>For more detailed information please refer to
35 * <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">
36 * Chapter 17.13.4 'Form content types'</a> of the
37 * <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification<a></p>
38 *
39 * <p>
40 * This codec is meant to be a replacement for standard Java classes
41 * {@link java.net.URLEncoder} and {@link java.net.URLDecoder}
42 * on older Java platforms, as these classes in Java versions below
43 * 1.4 rely on the platform's default charset encoding.
44 * </p>
45 *
46 * @author Apache Software Foundation
47 * @since 1.2
48 * @version $Id: URLCodec.java,v 1.19 2004/03/29 07:59:00 ggregory Exp $
49 */
50public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
51
52    /**
53     * The default charset used for string decoding and encoding.
54     */
55    protected String charset = StringEncodings.UTF8;
56
57    protected static byte ESCAPE_CHAR = '%';
58    /**
59     * BitSet of www-form-url safe characters.
60     */
61    protected static final BitSet WWW_FORM_URL = new BitSet(256);
62
63    // Static initializer for www_form_url
64    static {
65        // alpha characters
66        for (int i = 'a'; i <= 'z'; i++) {
67            WWW_FORM_URL.set(i);
68        }
69        for (int i = 'A'; i <= 'Z'; i++) {
70            WWW_FORM_URL.set(i);
71        }
72        // numeric characters
73        for (int i = '0'; i <= '9'; i++) {
74            WWW_FORM_URL.set(i);
75        }
76        // special chars
77        WWW_FORM_URL.set('-');
78        WWW_FORM_URL.set('_');
79        WWW_FORM_URL.set('.');
80        WWW_FORM_URL.set('*');
81        // blank to be replaced with +
82        WWW_FORM_URL.set(' ');
83    }
84
85
86    /**
87     * Default constructor.
88     */
89    public URLCodec() {
90        super();
91    }
92
93    /**
94     * Constructor which allows for the selection of a default charset
95     *
96     * @param charset the default string charset to use.
97     */
98    public URLCodec(String charset) {
99        super();
100        this.charset = charset;
101    }
102
103    /**
104     * Encodes an array of bytes into an array of URL safe 7-bit
105     * characters. Unsafe characters are escaped.
106     *
107     * @param urlsafe bitset of characters deemed URL safe
108     * @param bytes array of bytes to convert to URL safe characters
109     * @return array of bytes containing URL safe characters
110     */
111    public static final byte[] encodeUrl(BitSet urlsafe, byte[] bytes)
112    {
113        if (bytes == null) {
114            return null;
115        }
116        if (urlsafe == null) {
117            urlsafe = WWW_FORM_URL;
118        }
119
120        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
121        for (int i = 0; i < bytes.length; i++) {
122            int b = bytes[i];
123            if (b < 0) {
124                b = 256 + b;
125            }
126            if (urlsafe.get(b)) {
127                if (b == ' ') {
128                    b = '+';
129                }
130                buffer.write(b);
131            } else {
132                buffer.write('%');
133                char hex1 = Character.toUpperCase(
134                  Character.forDigit((b >> 4) & 0xF, 16));
135                char hex2 = Character.toUpperCase(
136                  Character.forDigit(b & 0xF, 16));
137                buffer.write(hex1);
138                buffer.write(hex2);
139            }
140        }
141        return buffer.toByteArray();
142    }
143
144
145    /**
146     * Decodes an array of URL safe 7-bit characters into an array of
147     * original bytes. Escaped characters are converted back to their
148     * original representation.
149     *
150     * @param bytes array of URL safe characters
151     * @return array of original bytes
152     * @throws DecoderException Thrown if URL decoding is unsuccessful
153     */
154    public static final byte[] decodeUrl(byte[] bytes)
155         throws DecoderException
156    {
157        if (bytes == null) {
158            return null;
159        }
160        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
161        for (int i = 0; i < bytes.length; i++) {
162            int b = bytes[i];
163            if (b == '+') {
164                buffer.write(' ');
165            } else if (b == '%') {
166                try {
167                    int u = Character.digit((char)bytes[++i], 16);
168                    int l = Character.digit((char)bytes[++i], 16);
169                    if (u == -1 || l == -1) {
170                        throw new DecoderException("Invalid URL encoding");
171                    }
172                    buffer.write((char)((u << 4) + l));
173                } catch(ArrayIndexOutOfBoundsException e) {
174                    throw new DecoderException("Invalid URL encoding");
175                }
176            } else {
177                buffer.write(b);
178            }
179        }
180        return buffer.toByteArray();
181    }
182
183
184    /**
185     * Encodes an array of bytes into an array of URL safe 7-bit
186     * characters. Unsafe characters are escaped.
187     *
188     * @param bytes array of bytes to convert to URL safe characters
189     * @return array of bytes containing URL safe characters
190     */
191    public byte[] encode(byte[] bytes) {
192        return encodeUrl(WWW_FORM_URL, bytes);
193    }
194
195
196    /**
197     * Decodes an array of URL safe 7-bit characters into an array of
198     * original bytes. Escaped characters are converted back to their
199     * original representation.
200     *
201     * @param bytes array of URL safe characters
202     * @return array of original bytes
203     * @throws DecoderException Thrown if URL decoding is unsuccessful
204     */
205    public byte[] decode(byte[] bytes) throws DecoderException {
206        return decodeUrl(bytes);
207    }
208
209
210    /**
211     * Encodes a string into its URL safe form using the specified
212     * string charset. Unsafe characters are escaped.
213     *
214     * @param pString string to convert to a URL safe form
215     * @param charset the charset for pString
216     * @return URL safe string
217     * @throws UnsupportedEncodingException Thrown if charset is not
218     *                                      supported
219     */
220    public String encode(String pString, String charset)
221        throws UnsupportedEncodingException
222    {
223        if (pString == null) {
224            return null;
225        }
226        return new String(encode(pString.getBytes(charset)), StringEncodings.US_ASCII);
227    }
228
229
230    /**
231     * Encodes a string into its URL safe form using the default string
232     * charset. Unsafe characters are escaped.
233     *
234     * @param pString string to convert to a URL safe form
235     * @return URL safe string
236     * @throws EncoderException Thrown if URL encoding is unsuccessful
237     *
238     * @see #getDefaultCharset()
239     */
240    public String encode(String pString) throws EncoderException {
241        if (pString == null) {
242            return null;
243        }
244        try {
245            return encode(pString, getDefaultCharset());
246        } catch(UnsupportedEncodingException e) {
247            throw new EncoderException(e.getMessage());
248        }
249    }
250
251
252    /**
253     * Decodes a URL safe string into its original form using the
254     * specified encoding. Escaped characters are converted back
255     * to their original representation.
256     *
257     * @param pString URL safe string to convert into its original form
258     * @param charset the original string charset
259     * @return original string
260     * @throws DecoderException Thrown if URL decoding is unsuccessful
261     * @throws UnsupportedEncodingException Thrown if charset is not
262     *                                      supported
263     */
264    public String decode(String pString, String charset)
265        throws DecoderException, UnsupportedEncodingException
266    {
267        if (pString == null) {
268            return null;
269        }
270        return new String(decode(pString.getBytes(StringEncodings.US_ASCII)), charset);
271    }
272
273
274    /**
275     * Decodes a URL safe string into its original form using the default
276     * string charset. Escaped characters are converted back to their
277     * original representation.
278     *
279     * @param pString URL safe string to convert into its original form
280     * @return original string
281     * @throws DecoderException Thrown if URL decoding is unsuccessful
282     *
283     * @see #getDefaultCharset()
284     */
285    public String decode(String pString) throws DecoderException {
286        if (pString == null) {
287            return null;
288        }
289        try {
290            return decode(pString, getDefaultCharset());
291        } catch(UnsupportedEncodingException e) {
292            throw new DecoderException(e.getMessage());
293        }
294    }
295
296    /**
297     * Encodes an object into its URL safe form. Unsafe characters are
298     * escaped.
299     *
300     * @param pObject string to convert to a URL safe form
301     * @return URL safe object
302     * @throws EncoderException Thrown if URL encoding is not
303     *                          applicable to objects of this type or
304     *                          if encoding is unsuccessful
305     */
306    public Object encode(Object pObject) throws EncoderException {
307        if (pObject == null) {
308            return null;
309        } else if (pObject instanceof byte[]) {
310            return encode((byte[])pObject);
311        } else if (pObject instanceof String) {
312            return encode((String)pObject);
313        } else {
314            throw new EncoderException("Objects of type " +
315                pObject.getClass().getName() + " cannot be URL encoded");
316
317        }
318    }
319
320    /**
321     * Decodes a URL safe object into its original form. Escaped
322     * characters are converted back to their original representation.
323     *
324     * @param pObject URL safe object to convert into its original form
325     * @return original object
326     * @throws DecoderException Thrown if URL decoding is not
327     *                          applicable to objects of this type
328     *                          if decoding is unsuccessful
329     */
330    public Object decode(Object pObject) throws DecoderException {
331        if (pObject == null) {
332            return null;
333        } else if (pObject instanceof byte[]) {
334            return decode((byte[])pObject);
335        } else if (pObject instanceof String) {
336            return decode((String)pObject);
337        } else {
338            throw new DecoderException("Objects of type " +
339                pObject.getClass().getName() + " cannot be URL decoded");
340
341        }
342    }
343
344    /**
345     * The <code>String</code> encoding used for decoding and encoding.
346     *
347     * @return Returns the encoding.
348     *
349     * @deprecated use #getDefaultCharset()
350     */
351    public String getEncoding() {
352        return this.charset;
353    }
354
355    /**
356     * The default charset used for string decoding and encoding.
357     *
358     * @return the default string charset.
359     */
360    public String getDefaultCharset() {
361        return this.charset;
362    }
363
364}
365