QuotedPrintableCodec.java revision 417f3b92ba4549b2f22340e3107d869d2b9c5bb8
1/*
2 * Copyright 2001-2004 The Apache Software Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package org.apache.commons.codec.net;
18
19import java.io.ByteArrayOutputStream;
20import java.io.UnsupportedEncodingException;
21import java.util.BitSet;
22import org.apache.commons.codec.BinaryDecoder;
23import org.apache.commons.codec.BinaryEncoder;
24import org.apache.commons.codec.DecoderException;
25import org.apache.commons.codec.EncoderException;
26import org.apache.commons.codec.StringDecoder;
27import org.apache.commons.codec.StringEncoder;
28
29/**
30 * <p>
31 * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521 </a>.
32 * </p>
33 * <p>
34 * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to
35 * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are
36 * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the
37 * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable
38 * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
39 * gateway.
40 * </p>
41 *
42 * <p>
43 * Note:
44 * </p>
45 * <p>
46 * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec
47 * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the steamable codec
48 * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy
49 * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec.
50 * </p>
51 *
52 * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
53 *          Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
54 *
55 * @author Apache Software Foundation
56 * @since 1.3
57 * @version $Id: QuotedPrintableCodec.java,v 1.7 2004/04/09 22:21:07 ggregory Exp $
58 */
59public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
60    /**
61     * The default charset used for string decoding and encoding.
62     */
63    private String charset = StringEncodings.UTF8;
64
65    /**
66     * BitSet of printable characters as defined in RFC 1521.
67     */
68    private static final BitSet PRINTABLE_CHARS = new BitSet(256);
69
70    private static byte ESCAPE_CHAR = '=';
71
72    private static byte TAB = 9;
73
74    private static byte SPACE = 32;
75    // Static initializer for printable chars collection
76    static {
77        // alpha characters
78        for (int i = 33; i <= 60; i++) {
79            PRINTABLE_CHARS.set(i);
80        }
81        for (int i = 62; i <= 126; i++) {
82            PRINTABLE_CHARS.set(i);
83        }
84        PRINTABLE_CHARS.set(TAB);
85        PRINTABLE_CHARS.set(SPACE);
86    }
87
88    /**
89     * Default constructor.
90     */
91    public QuotedPrintableCodec() {
92        super();
93    }
94
95    /**
96     * Constructor which allows for the selection of a default charset
97     *
98     * @param charset
99     *                  the default string charset to use.
100     */
101    public QuotedPrintableCodec(String charset) {
102        super();
103        this.charset = charset;
104    }
105
106    /**
107     * Encodes byte into its quoted-printable representation.
108     *
109     * @param b
110     *                  byte to encode
111     * @param buffer
112     *                  the buffer to write to
113     */
114    private static final void encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) {
115        buffer.write(ESCAPE_CHAR);
116        char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
117        char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
118        buffer.write(hex1);
119        buffer.write(hex2);
120    }
121
122    /**
123     * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
124     *
125     * <p>
126     * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
127     * RFC 1521 and is suitable for encoding binary data and unformatted text.
128     * </p>
129     *
130     * @param printable
131     *                  bitset of characters deemed quoted-printable
132     * @param bytes
133     *                  array of bytes to be encoded
134     * @return array of bytes containing quoted-printable data
135     */
136    public static final byte[] encodeQuotedPrintable(BitSet printable, byte[] bytes) {
137        if (bytes == null) {
138            return null;
139        }
140        if (printable == null) {
141            printable = PRINTABLE_CHARS;
142        }
143        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
144        for (int i = 0; i < bytes.length; i++) {
145            int b = bytes[i];
146            if (b < 0) {
147                b = 256 + b;
148            }
149            if (printable.get(b)) {
150                buffer.write(b);
151            } else {
152                encodeQuotedPrintable(b, buffer);
153            }
154        }
155        return buffer.toByteArray();
156    }
157
158    /**
159     * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
160     * back to their original representation.
161     *
162     * <p>
163     * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
164     * RFC 1521.
165     * </p>
166     *
167     * @param bytes
168     *                  array of quoted-printable characters
169     * @return array of original bytes
170     * @throws DecoderException
171     *                  Thrown if quoted-printable decoding is unsuccessful
172     */
173    public static final byte[] decodeQuotedPrintable(byte[] bytes) throws DecoderException {
174        if (bytes == null) {
175            return null;
176        }
177        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
178        for (int i = 0; i < bytes.length; i++) {
179            int b = bytes[i];
180            if (b == ESCAPE_CHAR) {
181                try {
182                    int u = Character.digit((char) bytes[++i], 16);
183                    int l = Character.digit((char) bytes[++i], 16);
184                    if (u == -1 || l == -1) {
185                        throw new DecoderException("Invalid quoted-printable encoding");
186                    }
187                    buffer.write((char) ((u << 4) + l));
188                } catch (ArrayIndexOutOfBoundsException e) {
189                    throw new DecoderException("Invalid quoted-printable encoding");
190                }
191            } else {
192                buffer.write(b);
193            }
194        }
195        return buffer.toByteArray();
196    }
197
198    /**
199     * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
200     *
201     * <p>
202     * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
203     * RFC 1521 and is suitable for encoding binary data and unformatted text.
204     * </p>
205     *
206     * @param bytes
207     *                  array of bytes to be encoded
208     * @return array of bytes containing quoted-printable data
209     */
210    public byte[] encode(byte[] bytes) {
211        return encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
212    }
213
214    /**
215     * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted
216     * back to their original representation.
217     *
218     * <p>
219     * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
220     * RFC 1521.
221     * </p>
222     *
223     * @param bytes
224     *                  array of quoted-printable characters
225     * @return array of original bytes
226     * @throws DecoderException
227     *                  Thrown if quoted-printable decoding is unsuccessful
228     */
229    public byte[] decode(byte[] bytes) throws DecoderException {
230        return decodeQuotedPrintable(bytes);
231    }
232
233    /**
234     * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
235     *
236     * <p>
237     * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
238     * RFC 1521 and is suitable for encoding binary data.
239     * </p>
240     *
241     * @param pString
242     *                  string to convert to quoted-printable form
243     * @return quoted-printable string
244     *
245     * @throws EncoderException
246     *                  Thrown if quoted-printable encoding is unsuccessful
247     *
248     * @see #getDefaultCharset()
249     */
250    public String encode(String pString) throws EncoderException {
251        if (pString == null) {
252            return null;
253        }
254        try {
255            return encode(pString, getDefaultCharset());
256        } catch (UnsupportedEncodingException e) {
257            throw new EncoderException(e.getMessage());
258        }
259    }
260
261    /**
262     * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
263     * are converted back to their original representation.
264     *
265     * @param pString
266     *                  quoted-printable string to convert into its original form
267     * @param charset
268     *                  the original string charset
269     * @return original string
270     * @throws DecoderException
271     *                  Thrown if quoted-printable decoding is unsuccessful
272     * @throws UnsupportedEncodingException
273     *                  Thrown if charset is not supported
274     */
275    public String decode(String pString, String charset) throws DecoderException, UnsupportedEncodingException {
276        if (pString == null) {
277            return null;
278        }
279        return new String(decode(pString.getBytes(StringEncodings.US_ASCII)), charset);
280    }
281
282    /**
283     * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are
284     * converted back to their original representation.
285     *
286     * @param pString
287     *                  quoted-printable string to convert into its original form
288     * @return original string
289     * @throws DecoderException
290     *                  Thrown if quoted-printable decoding is unsuccessful
291     * @throws UnsupportedEncodingException
292     *                  Thrown if charset is not supported
293     * @see #getDefaultCharset()
294     */
295    public String decode(String pString) throws DecoderException {
296        if (pString == null) {
297            return null;
298        }
299        try {
300            return decode(pString, getDefaultCharset());
301        } catch (UnsupportedEncodingException e) {
302            throw new DecoderException(e.getMessage());
303        }
304    }
305
306    /**
307     * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped.
308     *
309     * @param pObject
310     *                  string to convert to a quoted-printable form
311     * @return quoted-printable object
312     * @throws EncoderException
313     *                  Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is
314     *                  unsuccessful
315     */
316    public Object encode(Object pObject) throws EncoderException {
317        if (pObject == null) {
318            return null;
319        } else if (pObject instanceof byte[]) {
320            return encode((byte[]) pObject);
321        } else if (pObject instanceof String) {
322            return encode((String) pObject);
323        } else {
324            throw new EncoderException("Objects of type "
325                + pObject.getClass().getName()
326                + " cannot be quoted-printable encoded");
327        }
328    }
329
330    /**
331     * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
332     * representation.
333     *
334     * @param pObject
335     *                  quoted-printable object to convert into its original form
336     * @return original object
337     * @throws DecoderException
338     *                  Thrown if quoted-printable decoding is not applicable to objects of this type if decoding is
339     *                  unsuccessful
340     */
341    public Object decode(Object pObject) throws DecoderException {
342        if (pObject == null) {
343            return null;
344        } else if (pObject instanceof byte[]) {
345            return decode((byte[]) pObject);
346        } else if (pObject instanceof String) {
347            return decode((String) pObject);
348        } else {
349            throw new DecoderException("Objects of type "
350                + pObject.getClass().getName()
351                + " cannot be quoted-printable decoded");
352        }
353    }
354
355    /**
356     * Returns the default charset used for string decoding and encoding.
357     *
358     * @return the default string charset.
359     */
360    public String getDefaultCharset() {
361        return this.charset;
362    }
363
364    /**
365     * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
366     *
367     * <p>
368     * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
369     * RFC 1521 and is suitable for encoding binary data and unformatted text.
370     * </p>
371     *
372     * @param pString
373     *                  string to convert to quoted-printable form
374     * @param charset
375     *                  the charset for pString
376     * @return quoted-printable string
377     *
378     * @throws UnsupportedEncodingException
379     *                  Thrown if the charset is not supported
380     */
381    public String encode(String pString, String charset) throws UnsupportedEncodingException {
382        if (pString == null) {
383            return null;
384        }
385        return new String(encode(pString.getBytes(charset)), StringEncodings.US_ASCII);
386    }
387}
388