1/****************************************************************
2 * Licensed to the Apache Software Foundation (ASF) under one   *
3 * or more contributor license agreements.  See the NOTICE file *
4 * distributed with this work for additional information        *
5 * regarding copyright ownership.  The ASF licenses this file   *
6 * to you under the Apache License, Version 2.0 (the            *
7 * "License"); you may not use this file except in compliance   *
8 * with the License.  You may obtain a copy of the License at   *
9 *                                                              *
10 *   http://www.apache.org/licenses/LICENSE-2.0                 *
11 *                                                              *
12 * Unless required by applicable law or agreed to in writing,   *
13 * software distributed under the License is distributed on an  *
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15 * KIND, either express or implied.  See the License for the    *
16 * specific language governing permissions and limitations      *
17 * under the License.                                           *
18 ****************************************************************/
19
20package org.apache.james.mime4j.decoder;
21
22//BEGIN android-changed: Stubbing out logging
23import org.apache.james.mime4j.Log;
24import org.apache.james.mime4j.LogFactory;
25//END android-changed
26import org.apache.james.mime4j.util.CharsetUtil;
27
28import java.io.ByteArrayInputStream;
29import java.io.ByteArrayOutputStream;
30import java.io.IOException;
31import java.io.UnsupportedEncodingException;
32
33/**
34 * Static methods for decoding strings, byte arrays and encoded words.
35 *
36 *
37 * @version $Id: DecoderUtil.java,v 1.3 2005/02/07 15:33:59 ntherning Exp $
38 */
39public class DecoderUtil {
40    private static Log log = LogFactory.getLog(DecoderUtil.class);
41
42    /**
43     * Decodes a string containing quoted-printable encoded data.
44     *
45     * @param s the string to decode.
46     * @return the decoded bytes.
47     */
48    public static byte[] decodeBaseQuotedPrintable(String s) {
49        ByteArrayOutputStream baos = new ByteArrayOutputStream();
50
51        try {
52            byte[] bytes = s.getBytes("US-ASCII");
53
54            QuotedPrintableInputStream is = new QuotedPrintableInputStream(
55                                               new ByteArrayInputStream(bytes));
56
57            int b = 0;
58            while ((b = is.read()) != -1) {
59                baos.write(b);
60            }
61        } catch (IOException e) {
62            /*
63             * This should never happen!
64             */
65            log.error(e);
66        }
67
68        return baos.toByteArray();
69    }
70
71    /**
72     * Decodes a string containing base64 encoded data.
73     *
74     * @param s the string to decode.
75     * @return the decoded bytes.
76     */
77    public static byte[] decodeBase64(String s) {
78        ByteArrayOutputStream baos = new ByteArrayOutputStream();
79
80        try {
81            byte[] bytes = s.getBytes("US-ASCII");
82
83            Base64InputStream is = new Base64InputStream(
84                                        new ByteArrayInputStream(bytes));
85
86            int b = 0;
87            while ((b = is.read()) != -1) {
88                baos.write(b);
89            }
90        } catch (IOException e) {
91            /*
92             * This should never happen!
93             */
94            log.error(e);
95        }
96
97        return baos.toByteArray();
98    }
99
100    /**
101     * Decodes an encoded word encoded with the 'B' encoding (described in
102     * RFC 2047) found in a header field body.
103     *
104     * @param encodedWord the encoded word to decode.
105     * @param charset the Java charset to use.
106     * @return the decoded string.
107     * @throws UnsupportedEncodingException if the given Java charset isn't
108     *         supported.
109     */
110    public static String decodeB(String encodedWord, String charset)
111            throws UnsupportedEncodingException {
112
113        return new String(decodeBase64(encodedWord), charset);
114    }
115
116    /**
117     * Decodes an encoded word encoded with the 'Q' encoding (described in
118     * RFC 2047) found in a header field body.
119     *
120     * @param encodedWord the encoded word to decode.
121     * @param charset the Java charset to use.
122     * @return the decoded string.
123     * @throws UnsupportedEncodingException if the given Java charset isn't
124     *         supported.
125     */
126    public static String decodeQ(String encodedWord, String charset)
127            throws UnsupportedEncodingException {
128
129        /*
130         * Replace _ with =20
131         */
132        StringBuffer sb = new StringBuffer();
133        for (int i = 0; i < encodedWord.length(); i++) {
134            char c = encodedWord.charAt(i);
135            if (c == '_') {
136                sb.append("=20");
137            } else {
138                sb.append(c);
139            }
140        }
141
142        return new String(decodeBaseQuotedPrintable(sb.toString()), charset);
143    }
144
145    /**
146     * Decodes a string containing encoded words as defined by RFC 2047.
147     * Encoded words in have the form
148     * =?charset?enc?Encoded word?= where enc is either 'Q' or 'q' for
149     * quoted-printable and 'B' or 'b' for Base64.
150     *
151     * ANDROID:  COPIED FROM A NEWER VERSION OF MIME4J
152     *
153     * @param body the string to decode.
154     * @return the decoded string.
155     */
156    public static String decodeEncodedWords(String body) {
157
158        // ANDROID:  Most strings will not include "=?" so a quick test can prevent unneeded
159        // object creation.  This could also be handled via lazy creation of the StringBuilder.
160        if (body.indexOf("=?") == -1) {
161            return body;
162        }
163
164        int previousEnd = 0;
165        boolean previousWasEncoded = false;
166
167        StringBuilder sb = new StringBuilder();
168
169        while (true) {
170            int begin = body.indexOf("=?", previousEnd);
171
172            // ANDROID:  The mime4j original version has an error here.  It gets confused if
173            // the encoded string begins with an '=' (just after "?Q?").  This patch seeks forward
174            // to find the two '?' in the "header", before looking for the final "?=".
175            if (begin == -1) {
176                break;
177            }
178            int qm1 = body.indexOf('?', begin + 2);
179            if (qm1 == -1) {
180                break;
181            }
182            int qm2 = body.indexOf('?', qm1 + 1);
183            if (qm2 == -1) {
184                break;
185            }
186            int end = body.indexOf("?=", qm2 + 1);
187            if (end == -1) {
188                break;
189            }
190            end += 2;
191
192            String sep = body.substring(previousEnd, begin);
193
194            String decoded = decodeEncodedWord(body, begin, end);
195            if (decoded == null) {
196                sb.append(sep);
197                sb.append(body.substring(begin, end));
198            } else {
199                if (!previousWasEncoded || !CharsetUtil.isWhitespace(sep)) {
200                    sb.append(sep);
201                }
202                sb.append(decoded);
203            }
204
205            previousEnd = end;
206            previousWasEncoded = decoded != null;
207        }
208
209        if (previousEnd == 0)
210            return body;
211
212        sb.append(body.substring(previousEnd));
213        return sb.toString();
214    }
215
216    // return null on error. Begin is index of '=?' in body.
217    public static String decodeEncodedWord(String body, int begin, int end) {
218        // Skip the '?=' chars in body and scan forward from there for next '?'
219        int qm1 = body.indexOf('?', begin + 2);
220        if (qm1 == -1 || qm1 == end - 2)
221            return null;
222
223        int qm2 = body.indexOf('?', qm1 + 1);
224        if (qm2 == -1 || qm2 == end - 2)
225            return null;
226
227        String mimeCharset = body.substring(begin + 2, qm1);
228        String encoding = body.substring(qm1 + 1, qm2);
229        String encodedText = body.substring(qm2 + 1, end - 2);
230
231        String charset = CharsetUtil.toJavaCharset(mimeCharset);
232        if (charset == null) {
233            if (log.isWarnEnabled()) {
234                log.warn("MIME charset '" + mimeCharset + "' in encoded word '"
235                        + body.substring(begin, end) + "' doesn't have a "
236                        + "corresponding Java charset");
237            }
238            return null;
239        } else if (!CharsetUtil.isDecodingSupported(charset)) {
240            if (log.isWarnEnabled()) {
241                log.warn("Current JDK doesn't support decoding of charset '"
242                        + charset + "' (MIME charset '" + mimeCharset
243                        + "' in encoded word '" + body.substring(begin, end)
244                        + "')");
245            }
246            return null;
247        }
248
249        if (encodedText.length() == 0) {
250            if (log.isWarnEnabled()) {
251                log.warn("Missing encoded text in encoded word: '"
252                        + body.substring(begin, end) + "'");
253            }
254            return null;
255        }
256
257        try {
258            if (encoding.equalsIgnoreCase("Q")) {
259                return DecoderUtil.decodeQ(encodedText, charset);
260            } else if (encoding.equalsIgnoreCase("B")) {
261                return DecoderUtil.decodeB(encodedText, charset);
262            } else {
263                if (log.isWarnEnabled()) {
264                    log.warn("Warning: Unknown encoding in encoded word '"
265                            + body.substring(begin, end) + "'");
266                }
267                return null;
268            }
269        } catch (UnsupportedEncodingException e) {
270            // should not happen because of isDecodingSupported check above
271            if (log.isWarnEnabled()) {
272                log.warn("Unsupported encoding in encoded word '"
273                        + body.substring(begin, end) + "'", e);
274            }
275            return null;
276        } catch (RuntimeException e) {
277            if (log.isWarnEnabled()) {
278                log.warn("Could not decode encoded word '"
279                        + body.substring(begin, end) + "'", e);
280            }
281            return null;
282        }
283    }
284}
285