1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.emailcommon.internet;
18
19import android.util.Base64;
20import android.util.Base64DataException;
21import android.util.Base64InputStream;
22import android.util.Log;
23
24import com.android.emailcommon.Logging;
25import com.android.emailcommon.mail.Body;
26import com.android.emailcommon.mail.BodyPart;
27import com.android.emailcommon.mail.Message;
28import com.android.emailcommon.mail.MessagingException;
29import com.android.emailcommon.mail.Multipart;
30import com.android.emailcommon.mail.Part;
31
32import org.apache.commons.io.IOUtils;
33import org.apache.james.mime4j.codec.EncoderUtil;
34import org.apache.james.mime4j.decoder.DecoderUtil;
35import org.apache.james.mime4j.decoder.QuotedPrintableInputStream;
36import org.apache.james.mime4j.util.CharsetUtil;
37
38import java.io.ByteArrayOutputStream;
39import java.io.IOException;
40import java.io.InputStream;
41import java.io.OutputStream;
42import java.util.ArrayList;
43import java.util.regex.Matcher;
44import java.util.regex.Pattern;
45
46public class MimeUtility {
47
48    public static final String MIME_TYPE_RFC822 = "message/rfc822";
49    private final static Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n");
50
51    /**
52     * Replace sequences of CRLF+WSP with WSP.  Tries to preserve original string
53     * object whenever possible.
54     */
55    public static String unfold(String s) {
56        if (s == null) {
57            return null;
58        }
59        Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s);
60        if (patternMatcher.find()) {
61            patternMatcher.reset();
62            s = patternMatcher.replaceAll("");
63        }
64        return s;
65    }
66
67    public static String decode(String s) {
68        if (s == null) {
69            return null;
70        }
71        return DecoderUtil.decodeEncodedWords(s);
72    }
73
74    public static String unfoldAndDecode(String s) {
75        return decode(unfold(s));
76    }
77
78    // TODO implement proper foldAndEncode
79    // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent
80    // duplication of encoding.
81    public static String foldAndEncode(String s) {
82        return s;
83    }
84
85    /**
86     * INTERIM version of foldAndEncode that will be used only by Subject: headers.
87     * This is safer than implementing foldAndEncode() (see above) and risking unknown damage
88     * to other headers.
89     *
90     * TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK.
91     *
92     * @param s original string to encode and fold
93     * @param usedCharacters number of characters already used up by header name
94
95     * @return the String ready to be transmitted
96     */
97    public static String foldAndEncode2(String s, int usedCharacters) {
98        // james.mime4j.codec.EncoderUtil.java
99        // encode:  encodeIfNecessary(text, usage, numUsedInHeaderName)
100        // Usage.TEXT_TOKENlooks like the right thing for subjects
101        // use WORD_ENTITY for address/names
102
103        String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN,
104                usedCharacters);
105
106        return fold(encoded, usedCharacters);
107    }
108
109    /**
110     * INTERIM:  From newer version of org.apache.james (but we don't want to import
111     * the entire MimeUtil class).
112     *
113     * Splits the specified string into a multiple-line representation with
114     * lines no longer than 76 characters (because the line might contain
115     * encoded words; see <a href='http://www.faqs.org/rfcs/rfc2047.html'>RFC
116     * 2047</a> section 2). If the string contains non-whitespace sequences
117     * longer than 76 characters a line break is inserted at the whitespace
118     * character following the sequence resulting in a line longer than 76
119     * characters.
120     *
121     * @param s
122     *            string to split.
123     * @param usedCharacters
124     *            number of characters already used up. Usually the number of
125     *            characters for header field name plus colon and one space.
126     * @return a multiple-line representation of the given string.
127     */
128    public static String fold(String s, int usedCharacters) {
129        final int maxCharacters = 76;
130
131        final int length = s.length();
132        if (usedCharacters + length <= maxCharacters)
133            return s;
134
135        StringBuilder sb = new StringBuilder();
136
137        int lastLineBreak = -usedCharacters;
138        int wspIdx = indexOfWsp(s, 0);
139        while (true) {
140            if (wspIdx == length) {
141                sb.append(s.substring(Math.max(0, lastLineBreak)));
142                return sb.toString();
143            }
144
145            int nextWspIdx = indexOfWsp(s, wspIdx + 1);
146
147            if (nextWspIdx - lastLineBreak > maxCharacters) {
148                sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx));
149                sb.append("\r\n");
150                lastLineBreak = wspIdx;
151            }
152
153            wspIdx = nextWspIdx;
154        }
155    }
156
157    /**
158     * INTERIM:  From newer version of org.apache.james (but we don't want to import
159     * the entire MimeUtil class).
160     *
161     * Search for whitespace.
162     */
163    private static int indexOfWsp(String s, int fromIndex) {
164        final int len = s.length();
165        for (int index = fromIndex; index < len; index++) {
166            char c = s.charAt(index);
167            if (c == ' ' || c == '\t')
168                return index;
169        }
170        return len;
171    }
172
173    /**
174     * Returns the named parameter of a header field. If name is null the first
175     * parameter is returned, or if there are no additional parameters in the
176     * field the entire field is returned. Otherwise the named parameter is
177     * searched for in a case insensitive fashion and returned. If the parameter
178     * cannot be found the method returns null.
179     *
180     * TODO: quite inefficient with the inner trimming & splitting.
181     * TODO: Also has a latent bug: uses "startsWith" to match the name, which can false-positive.
182     * TODO: The doc says that for a null name you get the first param, but you get the header.
183     *    Should probably just fix the doc, but if other code assumes that behavior, fix the code.
184     * TODO: Need to decode %-escaped strings, as in: filename="ab%22d".
185     *       ('+' -> ' ' conversion too? check RFC)
186     *
187     * @param header
188     * @param name
189     * @return the entire header (if name=null), the found parameter, or null
190     */
191    public static String getHeaderParameter(String header, String name) {
192        if (header == null) {
193            return null;
194        }
195        String[] parts = unfold(header).split(";");
196        if (name == null) {
197            return parts[0].trim();
198        }
199        String lowerCaseName = name.toLowerCase();
200        for (String part : parts) {
201            if (part.trim().toLowerCase().startsWith(lowerCaseName)) {
202                String[] parameterParts = part.split("=", 2);
203                if (parameterParts.length < 2) {
204                    return null;
205                }
206                String parameter = parameterParts[1].trim();
207                if (parameter.startsWith("\"") && parameter.endsWith("\"")) {
208                    return parameter.substring(1, parameter.length() - 1);
209                } else {
210                    return parameter;
211                }
212            }
213        }
214        return null;
215    }
216
217    public static Part findFirstPartByMimeType(Part part, String mimeType)
218            throws MessagingException {
219        if (part.getBody() instanceof Multipart) {
220            Multipart multipart = (Multipart)part.getBody();
221            for (int i = 0, count = multipart.getCount(); i < count; i++) {
222                BodyPart bodyPart = multipart.getBodyPart(i);
223                Part ret = findFirstPartByMimeType(bodyPart, mimeType);
224                if (ret != null) {
225                    return ret;
226                }
227            }
228        }
229        else if (part.getMimeType().equalsIgnoreCase(mimeType)) {
230            return part;
231        }
232        return null;
233    }
234
235    public static Part findPartByContentId(Part part, String contentId) throws Exception {
236        if (part.getBody() instanceof Multipart) {
237            Multipart multipart = (Multipart)part.getBody();
238            for (int i = 0, count = multipart.getCount(); i < count; i++) {
239                BodyPart bodyPart = multipart.getBodyPart(i);
240                Part ret = findPartByContentId(bodyPart, contentId);
241                if (ret != null) {
242                    return ret;
243                }
244            }
245        }
246        String cid = part.getContentId();
247        if (contentId.equals(cid)) {
248            return part;
249        }
250        return null;
251    }
252
253    /**
254     * Reads the Part's body and returns a String based on any charset conversion that needed
255     * to be done.
256     * @param part The part containing a body
257     * @return a String containing the converted text in the body, or null if there was no text
258     * or an error during conversion.
259     */
260    public static String getTextFromPart(Part part) {
261        try {
262            if (part != null && part.getBody() != null) {
263                InputStream in = part.getBody().getInputStream();
264                String mimeType = part.getMimeType();
265                if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) {
266                    /*
267                     * Now we read the part into a buffer for further processing. Because
268                     * the stream is now wrapped we'll remove any transfer encoding at this point.
269                     */
270                    ByteArrayOutputStream out = new ByteArrayOutputStream();
271                    IOUtils.copy(in, out);
272                    in.close();
273                    in = null;      // we want all of our memory back, and close might not release
274
275                    /*
276                     * We've got a text part, so let's see if it needs to be processed further.
277                     */
278                    String charset = getHeaderParameter(part.getContentType(), "charset");
279                    if (charset != null) {
280                        /*
281                         * See if there is conversion from the MIME charset to the Java one.
282                         */
283                        charset = CharsetUtil.toJavaCharset(charset);
284                    }
285                    /*
286                     * No encoding, so use us-ascii, which is the standard.
287                     */
288                    if (charset == null) {
289                        charset = "ASCII";
290                    }
291                    /*
292                     * Convert and return as new String
293                     */
294                    String result = out.toString(charset);
295                    out.close();
296                    return result;
297                }
298            }
299
300        }
301        catch (OutOfMemoryError oom) {
302            /*
303             * If we are not able to process the body there's nothing we can do about it. Return
304             * null and let the upper layers handle the missing content.
305             */
306            Log.e(Logging.LOG_TAG, "Unable to getTextFromPart " + oom.toString());
307        }
308        catch (Exception e) {
309            /*
310             * If we are not able to process the body there's nothing we can do about it. Return
311             * null and let the upper layers handle the missing content.
312             */
313            Log.e(Logging.LOG_TAG, "Unable to getTextFromPart " + e.toString());
314        }
315        return null;
316    }
317
318    /**
319     * Returns true if the given mimeType matches the matchAgainst specification.  The comparison
320     * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*").
321     *
322     * @param mimeType A MIME type to check.
323     * @param matchAgainst A MIME type to check against. May include wildcards.
324     * @return true if the mimeType matches
325     */
326    public static boolean mimeTypeMatches(String mimeType, String matchAgainst) {
327        Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"),
328                Pattern.CASE_INSENSITIVE);
329        return p.matcher(mimeType).matches();
330    }
331
332    /**
333     * Returns true if the given mimeType matches any of the matchAgainst specifications.  The
334     * comparison ignores case and the matchAgainst strings may include "*" for a wildcard
335     * (e.g. "image/*").
336     *
337     * @param mimeType A MIME type to check.
338     * @param matchAgainst An array of MIME types to check against. May include wildcards.
339     * @return true if the mimeType matches any of the matchAgainst strings
340     */
341    public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) {
342        for (String matchType : matchAgainst) {
343            if (mimeTypeMatches(mimeType, matchType)) {
344                return true;
345            }
346        }
347        return false;
348    }
349
350    /**
351     * Given an input stream and a transfer encoding, return a wrapped input stream for that
352     * encoding (or the original if none is required)
353     * @param in the input stream
354     * @param contentTransferEncoding the content transfer encoding
355     * @return a properly wrapped stream
356     */
357    public static InputStream getInputStreamForContentTransferEncoding(InputStream in,
358            String contentTransferEncoding) {
359        if (contentTransferEncoding != null) {
360            contentTransferEncoding =
361                MimeUtility.getHeaderParameter(contentTransferEncoding, null);
362            if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) {
363                in = new QuotedPrintableInputStream(in);
364            }
365            else if ("base64".equalsIgnoreCase(contentTransferEncoding)) {
366                in = new Base64InputStream(in, Base64.DEFAULT);
367            }
368        }
369        return in;
370    }
371
372    /**
373     * Removes any content transfer encoding from the stream and returns a Body.
374     */
375    public static Body decodeBody(InputStream in, String contentTransferEncoding)
376            throws IOException {
377        /*
378         * We'll remove any transfer encoding by wrapping the stream.
379         */
380        in = getInputStreamForContentTransferEncoding(in, contentTransferEncoding);
381        BinaryTempFileBody tempBody = new BinaryTempFileBody();
382        OutputStream out = tempBody.getOutputStream();
383        try {
384            IOUtils.copy(in, out);
385        } catch (Base64DataException bde) {
386            // TODO Need to fix this somehow
387            //String warning = "\n\n" + Email.getMessageDecodeErrorString();
388            //out.write(warning.getBytes());
389        } finally {
390            out.close();
391        }
392        return tempBody;
393    }
394
395    /**
396     * Recursively scan a Part (usually a Message) and sort out which of its children will be
397     * "viewable" and which will be attachments.
398     *
399     * @param part The part to be broken down
400     * @param viewables This arraylist will be populated with all parts that appear to be
401     * the "message" (e.g. text/plain & text/html)
402     * @param attachments This arraylist will be populated with all parts that appear to be
403     * attachments (including inlines)
404     * @throws MessagingException
405     */
406    public static void collectParts(Part part, ArrayList<Part> viewables,
407            ArrayList<Part> attachments) throws MessagingException {
408        String disposition = part.getDisposition();
409        String dispositionType = null;
410        String dispositionFilename = null;
411        if (disposition != null) {
412            dispositionType = MimeUtility.getHeaderParameter(disposition, null);
413            dispositionFilename = MimeUtility.getHeaderParameter(disposition, "filename");
414        }
415        // An attachment filename can be defined in either the Content-Disposition header
416        // or the Content-Type header. Content-Disposition is preferred, so we only try
417        // the Content-Type header as a last resort.
418        if (dispositionFilename == null) {
419            String contentType = part.getContentType();
420            dispositionFilename = MimeUtility.getHeaderParameter(contentType, "name");
421        }
422        boolean attachmentDisposition = "attachment".equalsIgnoreCase(dispositionType);
423        // If a disposition is not specified, default to "inline"
424        boolean inlineDisposition = dispositionType == null
425                || "inline".equalsIgnoreCase(dispositionType);
426
427        // A guess that this part is intended to be an attachment
428        boolean attachment = attachmentDisposition
429                || (dispositionFilename != null && !inlineDisposition);
430
431        // A guess that this part is intended to be an inline.
432        boolean inline = inlineDisposition && (dispositionFilename != null);
433
434        // One or the other
435        boolean attachmentOrInline = attachment || inline;
436
437        if (part.getBody() instanceof Multipart) {
438            // If the part is Multipart but not alternative it's either mixed or
439            // something we don't know about, which means we treat it as mixed
440            // per the spec. We just process its pieces recursively.
441            MimeMultipart mp = (MimeMultipart)part.getBody();
442            boolean foundHtml = false;
443            if (mp.getSubTypeForTest().equals("alternative")) {
444                for (int i = 0; i < mp.getCount(); i++) {
445                    if (mp.getBodyPart(i).isMimeType("text/html")) {
446                        foundHtml = true;
447                        break;
448                    }
449                }
450            }
451            for (int i = 0; i < mp.getCount(); i++) {
452                // See if we have text and html
453                BodyPart bp = mp.getBodyPart(i);
454                // If there's html, don't bother loading text
455                if (foundHtml && bp.isMimeType("text/plain")) {
456                    continue;
457                }
458                collectParts(bp, viewables, attachments);
459            }
460        } else if (part.getBody() instanceof Message) {
461            // If the part is an embedded message we just continue to process
462            // it, pulling any viewables or attachments into the running list.
463            Message message = (Message)part.getBody();
464            collectParts(message, viewables, attachments);
465        } else if ((!attachmentOrInline) && ("text/html".equalsIgnoreCase(part.getMimeType()))) {
466            // If the part is HTML and we got this far, it's a viewable part of a mixed
467            viewables.add(part);
468        } else if ((!attachmentOrInline) && ("text/plain".equalsIgnoreCase(part.getMimeType()))) {
469            // If the part is text and we got this far, it's a viewable part of a mixed
470            viewables.add(part);
471        } else if (attachmentOrInline) {
472            // Finally, if it's an attachment or an inline we will include it as an attachment.
473            attachments.add(part);
474        }
475    }
476}
477