1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.emailcommon.internet;
18
19import android.text.TextUtils;
20import android.util.Base64;
21import android.util.Base64DataException;
22import android.util.Base64InputStream;
23import android.util.Log;
24
25import com.android.emailcommon.mail.Body;
26import com.android.emailcommon.mail.BodyPart;
27import com.android.emailcommon.mail.Message;
28import com.android.emailcommon.mail.MessagingException;
29import com.android.emailcommon.mail.Multipart;
30import com.android.emailcommon.mail.Part;
31
32import org.apache.commons.io.IOUtils;
33import org.apache.james.mime4j.codec.EncoderUtil;
34import org.apache.james.mime4j.decoder.DecoderUtil;
35import org.apache.james.mime4j.decoder.QuotedPrintableInputStream;
36import org.apache.james.mime4j.util.CharsetUtil;
37
38import java.io.ByteArrayOutputStream;
39import java.io.IOException;
40import java.io.InputStream;
41import java.io.OutputStream;
42import java.util.ArrayList;
43import java.util.regex.Matcher;
44import java.util.regex.Pattern;
45
46public class MimeUtility {
47    private static final String LOG_TAG = "Email";
48
49    public static final String MIME_TYPE_RFC822 = "message/rfc822";
50    private final static Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n");
51
52    /**
53     * Replace sequences of CRLF+WSP with WSP.  Tries to preserve original string
54     * object whenever possible.
55     */
56    public static String unfold(String s) {
57        if (s == null) {
58            return null;
59        }
60        Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s);
61        if (patternMatcher.find()) {
62            patternMatcher.reset();
63            s = patternMatcher.replaceAll("");
64        }
65        return s;
66    }
67
68    public static String decode(String s) {
69        if (s == null) {
70            return null;
71        }
72        return DecoderUtil.decodeEncodedWords(s);
73    }
74
75    public static String unfoldAndDecode(String s) {
76        return decode(unfold(s));
77    }
78
79    // TODO implement proper foldAndEncode
80    // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent
81    // duplication of encoding.
82    public static String foldAndEncode(String s) {
83        return s;
84    }
85
86    /**
87     * INTERIM version of foldAndEncode that will be used only by Subject: headers.
88     * This is safer than implementing foldAndEncode() (see above) and risking unknown damage
89     * to other headers.
90     *
91     * TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK.
92     *
93     * @param s original string to encode and fold
94     * @param usedCharacters number of characters already used up by header name
95
96     * @return the String ready to be transmitted
97     */
98    public static String foldAndEncode2(String s, int usedCharacters) {
99        // james.mime4j.codec.EncoderUtil.java
100        // encode:  encodeIfNecessary(text, usage, numUsedInHeaderName)
101        // Usage.TEXT_TOKENlooks like the right thing for subjects
102        // use WORD_ENTITY for address/names
103
104        String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN,
105                usedCharacters);
106
107        return fold(encoded, usedCharacters);
108    }
109
110    /**
111     * INTERIM:  From newer version of org.apache.james (but we don't want to import
112     * the entire MimeUtil class).
113     *
114     * Splits the specified string into a multiple-line representation with
115     * lines no longer than 76 characters (because the line might contain
116     * encoded words; see <a href='http://www.faqs.org/rfcs/rfc2047.html'>RFC
117     * 2047</a> section 2). If the string contains non-whitespace sequences
118     * longer than 76 characters a line break is inserted at the whitespace
119     * character following the sequence resulting in a line longer than 76
120     * characters.
121     *
122     * @param s
123     *            string to split.
124     * @param usedCharacters
125     *            number of characters already used up. Usually the number of
126     *            characters for header field name plus colon and one space.
127     * @return a multiple-line representation of the given string.
128     */
129    public static String fold(String s, int usedCharacters) {
130        final int maxCharacters = 76;
131
132        final int length = s.length();
133        if (usedCharacters + length <= maxCharacters)
134            return s;
135
136        StringBuilder sb = new StringBuilder();
137
138        int lastLineBreak = -usedCharacters;
139        int wspIdx = indexOfWsp(s, 0);
140        while (true) {
141            if (wspIdx == length) {
142                sb.append(s.substring(Math.max(0, lastLineBreak)));
143                return sb.toString();
144            }
145
146            int nextWspIdx = indexOfWsp(s, wspIdx + 1);
147
148            if (nextWspIdx - lastLineBreak > maxCharacters) {
149                sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx));
150                sb.append("\r\n");
151                lastLineBreak = wspIdx;
152            }
153
154            wspIdx = nextWspIdx;
155        }
156    }
157
158    /**
159     * INTERIM:  From newer version of org.apache.james (but we don't want to import
160     * the entire MimeUtil class).
161     *
162     * Search for whitespace.
163     */
164    private static int indexOfWsp(String s, int fromIndex) {
165        final int len = s.length();
166        for (int index = fromIndex; index < len; index++) {
167            char c = s.charAt(index);
168            if (c == ' ' || c == '\t')
169                return index;
170        }
171        return len;
172    }
173
174    /**
175     * Returns the named parameter of a header field. If name is null the first
176     * parameter is returned, or if there are no additional parameters in the
177     * field the entire field is returned. Otherwise the named parameter is
178     * searched for in a case insensitive fashion and returned. If the parameter
179     * cannot be found the method returns null.
180     *
181     * TODO: quite inefficient with the inner trimming & splitting.
182     * TODO: Also has a latent bug: uses "startsWith" to match the name, which can false-positive.
183     * TODO: The doc says that for a null name you get the first param, but you get the header.
184     *    Should probably just fix the doc, but if other code assumes that behavior, fix the code.
185     * TODO: Need to decode %-escaped strings, as in: filename="ab%22d".
186     *       ('+' -> ' ' conversion too? check RFC)
187     *
188     * @param header
189     * @param name
190     * @return the entire header (if name=null), the found parameter, or null
191     */
192    public static String getHeaderParameter(String header, String name) {
193        if (header == null) {
194            return null;
195        }
196        String[] parts = unfold(header).split(";");
197        if (name == null) {
198            return parts[0].trim();
199        }
200        String lowerCaseName = name.toLowerCase();
201        for (String part : parts) {
202            if (part.trim().toLowerCase().startsWith(lowerCaseName)) {
203                String[] parameterParts = part.split("=", 2);
204                if (parameterParts.length < 2) {
205                    return null;
206                }
207                String parameter = parameterParts[1].trim();
208                if (parameter.startsWith("\"") && parameter.endsWith("\"")) {
209                    return parameter.substring(1, parameter.length() - 1);
210                } else {
211                    return parameter;
212                }
213            }
214        }
215        return null;
216    }
217
218    /**
219     * Reads the Part's body and returns a String based on any charset conversion that needed
220     * to be done.
221     * @param part The part containing a body
222     * @return a String containing the converted text in the body, or null if there was no text
223     * or an error during conversion.
224     */
225    public static String getTextFromPart(Part part) {
226        try {
227            if (part != null && part.getBody() != null) {
228                InputStream in = part.getBody().getInputStream();
229                String mimeType = part.getMimeType();
230                if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) {
231                    /*
232                     * Now we read the part into a buffer for further processing. Because
233                     * the stream is now wrapped we'll remove any transfer encoding at this point.
234                     */
235                    ByteArrayOutputStream out = new ByteArrayOutputStream();
236                    IOUtils.copy(in, out);
237                    in.close();
238                    in = null;      // we want all of our memory back, and close might not release
239
240                    /*
241                     * We've got a text part, so let's see if it needs to be processed further.
242                     */
243                    String charset = getHeaderParameter(part.getContentType(), "charset");
244                    if (charset != null) {
245                        /*
246                         * See if there is conversion from the MIME charset to the Java one.
247                         */
248                        charset = CharsetUtil.toJavaCharset(charset);
249                    }
250                    /*
251                     * No encoding, so use us-ascii, which is the standard.
252                     */
253                    if (charset == null) {
254                        charset = "ASCII";
255                    }
256                    /*
257                     * Convert and return as new String
258                     */
259                    String result = out.toString(charset);
260                    out.close();
261                    return result;
262                }
263            }
264
265        }
266        catch (OutOfMemoryError oom) {
267            /*
268             * If we are not able to process the body there's nothing we can do about it. Return
269             * null and let the upper layers handle the missing content.
270             */
271            Log.e(LOG_TAG, "Unable to getTextFromPart " + oom.toString());
272        }
273        catch (Exception e) {
274            /*
275             * If we are not able to process the body there's nothing we can do about it. Return
276             * null and let the upper layers handle the missing content.
277             */
278            Log.e(LOG_TAG, "Unable to getTextFromPart " + e.toString());
279        }
280        return null;
281    }
282
283    /**
284     * Returns true if the given mimeType matches the matchAgainst specification.  The comparison
285     * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*").
286     *
287     * @param mimeType A MIME type to check.
288     * @param matchAgainst A MIME type to check against. May include wildcards.
289     * @return true if the mimeType matches
290     */
291    public static boolean mimeTypeMatches(String mimeType, String matchAgainst) {
292        Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"),
293                Pattern.CASE_INSENSITIVE);
294        return p.matcher(mimeType).matches();
295    }
296
297    /**
298     * Returns true if the given mimeType matches any of the matchAgainst specifications.  The
299     * comparison ignores case and the matchAgainst strings may include "*" for a wildcard
300     * (e.g. "image/*").
301     *
302     * @param mimeType A MIME type to check.
303     * @param matchAgainst An array of MIME types to check against. May include wildcards.
304     * @return true if the mimeType matches any of the matchAgainst strings
305     */
306    public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) {
307        for (String matchType : matchAgainst) {
308            if (mimeTypeMatches(mimeType, matchType)) {
309                return true;
310            }
311        }
312        return false;
313    }
314
315    /**
316     * Given an input stream and a transfer encoding, return a wrapped input stream for that
317     * encoding (or the original if none is required)
318     * @param in the input stream
319     * @param contentTransferEncoding the content transfer encoding
320     * @return a properly wrapped stream
321     */
322    public static InputStream getInputStreamForContentTransferEncoding(InputStream in,
323            String contentTransferEncoding) {
324        if (contentTransferEncoding != null) {
325            contentTransferEncoding =
326                MimeUtility.getHeaderParameter(contentTransferEncoding, null);
327            if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) {
328                in = new QuotedPrintableInputStream(in);
329            }
330            else if ("base64".equalsIgnoreCase(contentTransferEncoding)) {
331                in = new Base64InputStream(in, Base64.DEFAULT);
332            }
333        }
334        return in;
335    }
336
337    /**
338     * Removes any content transfer encoding from the stream and returns a Body.
339     */
340    public static Body decodeBody(InputStream in, String contentTransferEncoding)
341            throws IOException {
342        /*
343         * We'll remove any transfer encoding by wrapping the stream.
344         */
345        in = getInputStreamForContentTransferEncoding(in, contentTransferEncoding);
346        BinaryTempFileBody tempBody = new BinaryTempFileBody();
347        OutputStream out = tempBody.getOutputStream();
348        try {
349            IOUtils.copy(in, out);
350        } catch (Base64DataException bde) {
351            // TODO Need to fix this somehow
352            //String warning = "\n\n" + Email.getMessageDecodeErrorString();
353            //out.write(warning.getBytes());
354        } finally {
355            out.close();
356        }
357        return tempBody;
358    }
359
360    /**
361     * Recursively scan a Part (usually a Message) and sort out which of its children will be
362     * "viewable" and which will be attachments.
363     *
364     * @param part The part to be broken down
365     * @param viewables This arraylist will be populated with all parts that appear to be
366     * the "message" (e.g. text/plain & text/html)
367     * @param attachments This arraylist will be populated with all parts that appear to be
368     * attachments (including inlines)
369     * @throws MessagingException
370     */
371    public static void collectParts(Part part, ArrayList<Part> viewables,
372            ArrayList<Part> attachments) throws MessagingException {
373        String disposition = part.getDisposition();
374        String dispositionType = MimeUtility.getHeaderParameter(disposition, null);
375        // If a disposition is not specified, default to "inline"
376        boolean inline =
377                TextUtils.isEmpty(dispositionType) || "inline".equalsIgnoreCase(dispositionType);
378        // The lower-case mime type
379        String mimeType = part.getMimeType().toLowerCase();
380
381        if (part.getBody() instanceof Multipart) {
382            // If the part is Multipart but not alternative it's either mixed or
383            // something we don't know about, which means we treat it as mixed
384            // per the spec. We just process its pieces recursively.
385            MimeMultipart mp = (MimeMultipart)part.getBody();
386            boolean foundHtml = false;
387            if (mp.getSubTypeForTest().equals("alternative")) {
388                for (int i = 0; i < mp.getCount(); i++) {
389                    if (mp.getBodyPart(i).isMimeType("text/html")) {
390                        foundHtml = true;
391                        break;
392                    }
393                }
394            }
395            for (int i = 0; i < mp.getCount(); i++) {
396                // See if we have text and html
397                BodyPart bp = mp.getBodyPart(i);
398                // If there's html, don't bother loading text
399                if (foundHtml && bp.isMimeType("text/plain")) {
400                    continue;
401                }
402                collectParts(bp, viewables, attachments);
403            }
404        } else if (part.getBody() instanceof Message) {
405            // If the part is an embedded message we just continue to process
406            // it, pulling any viewables or attachments into the running list.
407            Message message = (Message)part.getBody();
408            collectParts(message, viewables, attachments);
409        } else if (inline && (mimeType.startsWith("text") || (mimeType.startsWith("image")))) {
410            // We'll treat text and images as viewables
411            viewables.add(part);
412        } else {
413            // Everything else is an attachment.
414            attachments.add(part);
415        }
416    }
417}
418