1/****************************************************************
2 * Licensed to the Apache Software Foundation (ASF) under one   *
3 * or more contributor license agreements.  See the NOTICE file *
4 * distributed with this work for additional information        *
5 * regarding copyright ownership.  The ASF licenses this file   *
6 * to you under the Apache License, Version 2.0 (the            *
7 * "License"); you may not use this file except in compliance   *
8 * with the License.  You may obtain a copy of the License at   *
9 *                                                              *
10 *   http://www.apache.org/licenses/LICENSE-2.0                 *
11 *                                                              *
12 * Unless required by applicable law or agreed to in writing,   *
13 * software distributed under the License is distributed on an  *
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15 * KIND, either express or implied.  See the License for the    *
16 * specific language governing permissions and limitations      *
17 * under the License.                                           *
18 ****************************************************************/
19
20package org.apache.james.mime4j.decoder;
21
22//BEGIN android-changed: Stubbing out logging
23import org.apache.james.mime4j.Log;
24import org.apache.james.mime4j.LogFactory;
25//END android-changed
26import org.apache.james.mime4j.util.CharsetUtil;
27
28import java.io.ByteArrayInputStream;
29import java.io.ByteArrayOutputStream;
30import java.io.IOException;
31import java.io.UnsupportedEncodingException;
32
33/**
34 * Static methods for decoding strings, byte arrays and encoded words.
35 *
36 *
37 * @version $Id: DecoderUtil.java,v 1.3 2005/02/07 15:33:59 ntherning Exp $
38 */
39public class DecoderUtil {
40    private static Log log = LogFactory.getLog(DecoderUtil.class);
41
42    /**
43     * Decodes a string containing quoted-printable encoded data.
44     *
45     * @param s the string to decode.
46     * @return the decoded bytes.
47     */
48    public static byte[] decodeBaseQuotedPrintable(String s) {
49        ByteArrayOutputStream baos = new ByteArrayOutputStream();
50
51        try {
52            byte[] bytes = s.getBytes("US-ASCII");
53
54            QuotedPrintableInputStream is = new QuotedPrintableInputStream(
55                                               new ByteArrayInputStream(bytes));
56
57            int b = 0;
58            while ((b = is.read()) != -1) {
59                baos.write(b);
60            }
61        } catch (IOException e) {
62            /*
63             * This should never happen!
64             */
65            log.error(e);
66        }
67
68        return baos.toByteArray();
69    }
70
71    /**
72     * Decodes a string containing base64 encoded data.
73     *
74     * @param s the string to decode.
75     * @return the decoded bytes.
76     */
77    public static byte[] decodeBase64(String s) {
78        ByteArrayOutputStream baos = new ByteArrayOutputStream();
79
80        try {
81            byte[] bytes = s.getBytes("US-ASCII");
82
83            Base64InputStream is = new Base64InputStream(
84                                        new ByteArrayInputStream(bytes));
85
86            int b = 0;
87            while ((b = is.read()) != -1) {
88                baos.write(b);
89            }
90        } catch (IOException e) {
91            /*
92             * This should never happen!
93             */
94            log.error(e);
95        }
96
97        return baos.toByteArray();
98    }
99
100    /**
101     * Decodes an encoded word encoded with the 'B' encoding (described in
102     * RFC 2047) found in a header field body.
103     *
104     * @param encodedWord the encoded word to decode.
105     * @param charset the Java charset to use.
106     * @return the decoded string.
107     * @throws UnsupportedEncodingException if the given Java charset isn't
108     *         supported.
109     */
110    public static String decodeB(String encodedWord, String charset)
111            throws UnsupportedEncodingException {
112
113        return new String(decodeBase64(encodedWord), charset);
114    }
115
116    /**
117     * Decodes an encoded word encoded with the 'Q' encoding (described in
118     * RFC 2047) found in a header field body.
119     *
120     * @param encodedWord the encoded word to decode.
121     * @param charset the Java charset to use.
122     * @return the decoded string.
123     * @throws UnsupportedEncodingException if the given Java charset isn't
124     *         supported.
125     */
126    public static String decodeQ(String encodedWord, String charset)
127            throws UnsupportedEncodingException {
128
129        /*
130         * Replace _ with =20
131         */
132        StringBuffer sb = new StringBuffer();
133        for (int i = 0; i < encodedWord.length(); i++) {
134            char c = encodedWord.charAt(i);
135            if (c == '_') {
136                sb.append("=20");
137            } else {
138                sb.append(c);
139            }
140        }
141
142        return new String(decodeBaseQuotedPrintable(sb.toString()), charset);
143    }
144
145    /**
146     * Decodes a string containing encoded words as defined by RFC 2047.
147     * Encoded words in have the form
148     * =?charset?enc?Encoded word?= where enc is either 'Q' or 'q' for
149     * quoted-printable and 'B' or 'b' for Base64.
150     *
151     * ANDROID:  COPIED FROM A NEWER VERSION OF MIME4J
152     *
153     * @param body the string to decode.
154     * @return the decoded string.
155     */
156    public static String decodeEncodedWords(String body) {
157
158        // ANDROID:  Most strings will not include "=?" so a quick test can prevent unneeded
159        // object creation.  This could also be handled via lazy creation of the StringBuilder.
160        if (body.indexOf("=?") == -1) {
161            return body;
162        }
163
164        int previousEnd = 0;
165        boolean previousWasEncoded = false;
166
167        StringBuilder sb = new StringBuilder();
168
169        while (true) {
170            int begin = body.indexOf("=?", previousEnd);
171
172            // ANDROID:  The mime4j original version has an error here.  It gets confused if
173            // the encoded string begins with an '=' (just after "?Q?").  This patch seeks forward
174            // to find the two '?' in the "header", before looking for the final "?=".
175            int endScan = begin + 2;
176            if (begin != -1) {
177                int qm1 = body.indexOf('?', endScan + 2);
178                int qm2 = body.indexOf('?', qm1 + 1);
179                if (qm2 != -1) {
180                    endScan = qm2 + 1;
181                }
182            }
183
184            int end = begin == -1 ? -1 : body.indexOf("?=", endScan);
185            if (end == -1) {
186                if (previousEnd == 0)
187                    return body;
188
189                sb.append(body.substring(previousEnd));
190                return sb.toString();
191            }
192            end += 2;
193
194            String sep = body.substring(previousEnd, begin);
195
196            String decoded = decodeEncodedWord(body, begin, end);
197            if (decoded == null) {
198                sb.append(sep);
199                sb.append(body.substring(begin, end));
200            } else {
201                if (!previousWasEncoded || !CharsetUtil.isWhitespace(sep)) {
202                    sb.append(sep);
203                }
204                sb.append(decoded);
205            }
206
207            previousEnd = end;
208            previousWasEncoded = decoded != null;
209        }
210    }
211
212    // return null on error
213    private static String decodeEncodedWord(String body, int begin, int end) {
214        int qm1 = body.indexOf('?', begin + 2);
215        if (qm1 == end - 2)
216            return null;
217
218        int qm2 = body.indexOf('?', qm1 + 1);
219        if (qm2 == end - 2)
220            return null;
221
222        String mimeCharset = body.substring(begin + 2, qm1);
223        String encoding = body.substring(qm1 + 1, qm2);
224        String encodedText = body.substring(qm2 + 1, end - 2);
225
226        String charset = CharsetUtil.toJavaCharset(mimeCharset);
227        if (charset == null) {
228            if (log.isWarnEnabled()) {
229                log.warn("MIME charset '" + mimeCharset + "' in encoded word '"
230                        + body.substring(begin, end) + "' doesn't have a "
231                        + "corresponding Java charset");
232            }
233            return null;
234        } else if (!CharsetUtil.isDecodingSupported(charset)) {
235            if (log.isWarnEnabled()) {
236                log.warn("Current JDK doesn't support decoding of charset '"
237                        + charset + "' (MIME charset '" + mimeCharset
238                        + "' in encoded word '" + body.substring(begin, end)
239                        + "')");
240            }
241            return null;
242        }
243
244        if (encodedText.length() == 0) {
245            if (log.isWarnEnabled()) {
246                log.warn("Missing encoded text in encoded word: '"
247                        + body.substring(begin, end) + "'");
248            }
249            return null;
250        }
251
252        try {
253            if (encoding.equalsIgnoreCase("Q")) {
254                return DecoderUtil.decodeQ(encodedText, charset);
255            } else if (encoding.equalsIgnoreCase("B")) {
256                return DecoderUtil.decodeB(encodedText, charset);
257            } else {
258                if (log.isWarnEnabled()) {
259                    log.warn("Warning: Unknown encoding in encoded word '"
260                            + body.substring(begin, end) + "'");
261                }
262                return null;
263            }
264        } catch (UnsupportedEncodingException e) {
265            // should not happen because of isDecodingSupported check above
266            if (log.isWarnEnabled()) {
267                log.warn("Unsupported encoding in encoded word '"
268                        + body.substring(begin, end) + "'", e);
269            }
270            return null;
271        } catch (RuntimeException e) {
272            if (log.isWarnEnabled()) {
273                log.warn("Could not decode encoded word '"
274                        + body.substring(begin, end) + "'", e);
275            }
276            return null;
277        }
278    }
279}
280