1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16package com.android.voicemail.impl.mail.internet;
17
18import android.text.TextUtils;
19import android.util.Base64;
20import android.util.Base64DataException;
21import android.util.Base64InputStream;
22import com.android.voicemail.impl.VvmLog;
23import com.android.voicemail.impl.mail.Body;
24import com.android.voicemail.impl.mail.BodyPart;
25import com.android.voicemail.impl.mail.Message;
26import com.android.voicemail.impl.mail.MessagingException;
27import com.android.voicemail.impl.mail.Multipart;
28import com.android.voicemail.impl.mail.Part;
29import java.io.ByteArrayOutputStream;
30import java.io.IOException;
31import java.io.InputStream;
32import java.io.OutputStream;
33import java.util.ArrayList;
34import java.util.regex.Matcher;
35import java.util.regex.Pattern;
36import org.apache.commons.io.IOUtils;
37import org.apache.james.mime4j.codec.DecodeMonitor;
38import org.apache.james.mime4j.codec.DecoderUtil;
39import org.apache.james.mime4j.codec.EncoderUtil;
40import org.apache.james.mime4j.codec.QuotedPrintableInputStream;
41import org.apache.james.mime4j.util.CharsetUtil;
42
43public class MimeUtility {
44  private static final String LOG_TAG = "Email";
45
46  public static final String MIME_TYPE_RFC822 = "message/rfc822";
47  private static final Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n");
48
49  /**
50   * Replace sequences of CRLF+WSP with WSP. Tries to preserve original string object whenever
51   * possible.
52   */
53  public static String unfold(String s) {
54    if (s == null) {
55      return null;
56    }
57    Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s);
58    if (patternMatcher.find()) {
59      patternMatcher.reset();
60      s = patternMatcher.replaceAll("");
61    }
62    return s;
63  }
64
65  public static String decode(String s) {
66    if (s == null) {
67      return null;
68    }
69    return DecoderUtil.decodeEncodedWords(s, DecodeMonitor.STRICT);
70  }
71
72  public static String unfoldAndDecode(String s) {
73    return decode(unfold(s));
74  }
75
76  // TODO implement proper foldAndEncode
77  // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent
78  // duplication of encoding.
79  public static String foldAndEncode(String s) {
80    return s;
81  }
82
83  /**
84   * INTERIM version of foldAndEncode that will be used only by Subject: headers. This is safer than
85   * implementing foldAndEncode() (see above) and risking unknown damage to other headers.
86   *
87   * <p>TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK.
88   *
89   * @param s original string to encode and fold
90   * @param usedCharacters number of characters already used up by header name
91   * @return the String ready to be transmitted
92   */
93  public static String foldAndEncode2(String s, int usedCharacters) {
94    // james.mime4j.codec.EncoderUtil.java
95    // encode:  encodeIfNecessary(text, usage, numUsedInHeaderName)
96    // Usage.TEXT_TOKENlooks like the right thing for subjects
97    // use WORD_ENTITY for address/names
98
99    String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN, usedCharacters);
100
101    return fold(encoded, usedCharacters);
102  }
103
104  /**
105   * INTERIM: From newer version of org.apache.james (but we don't want to import the entire
106   * MimeUtil class).
107   *
108   * <p>Splits the specified string into a multiple-line representation with lines no longer than 76
109   * characters (because the line might contain encoded words; see <a
110   * href='http://www.faqs.org/rfcs/rfc2047.html'>RFC 2047</a> section 2). If the string contains
111   * non-whitespace sequences longer than 76 characters a line break is inserted at the whitespace
112   * character following the sequence resulting in a line longer than 76 characters.
113   *
114   * @param s string to split.
115   * @param usedCharacters number of characters already used up. Usually the number of characters
116   *     for header field name plus colon and one space.
117   * @return a multiple-line representation of the given string.
118   */
119  public static String fold(String s, int usedCharacters) {
120    final int maxCharacters = 76;
121
122    final int length = s.length();
123    if (usedCharacters + length <= maxCharacters) {
124      return s;
125    }
126
127    StringBuilder sb = new StringBuilder();
128
129    int lastLineBreak = -usedCharacters;
130    int wspIdx = indexOfWsp(s, 0);
131    while (true) {
132      if (wspIdx == length) {
133        sb.append(s.substring(Math.max(0, lastLineBreak)));
134        return sb.toString();
135      }
136
137      int nextWspIdx = indexOfWsp(s, wspIdx + 1);
138
139      if (nextWspIdx - lastLineBreak > maxCharacters) {
140        sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx));
141        sb.append("\r\n");
142        lastLineBreak = wspIdx;
143      }
144
145      wspIdx = nextWspIdx;
146    }
147  }
148
149  /**
150   * INTERIM: From newer version of org.apache.james (but we don't want to import the entire
151   * MimeUtil class).
152   *
153   * <p>Search for whitespace.
154   */
155  private static int indexOfWsp(String s, int fromIndex) {
156    final int len = s.length();
157    for (int index = fromIndex; index < len; index++) {
158      char c = s.charAt(index);
159      if (c == ' ' || c == '\t') {
160        return index;
161      }
162    }
163    return len;
164  }
165
166  /**
167   * Returns the named parameter of a header field. If name is null the first parameter is returned,
168   * or if there are no additional parameters in the field the entire field is returned. Otherwise
169   * the named parameter is searched for in a case insensitive fashion and returned. If the
170   * parameter cannot be found the method returns null.
171   *
172   * <p>TODO: quite inefficient with the inner trimming & splitting. TODO: Also has a latent bug:
173   * uses "startsWith" to match the name, which can false-positive. TODO: The doc says that for a
174   * null name you get the first param, but you get the header. Should probably just fix the doc,
175   * but if other code assumes that behavior, fix the code. TODO: Need to decode %-escaped strings,
176   * as in: filename="ab%22d". ('+' -> ' ' conversion too? check RFC)
177   *
178   * @param header
179   * @param name
180   * @return the entire header (if name=null), the found parameter, or null
181   */
182  public static String getHeaderParameter(String header, String name) {
183    if (header == null) {
184      return null;
185    }
186    String[] parts = unfold(header).split(";");
187    if (name == null) {
188      return parts[0].trim();
189    }
190    String lowerCaseName = name.toLowerCase();
191    for (String part : parts) {
192      if (part.trim().toLowerCase().startsWith(lowerCaseName)) {
193        String[] parameterParts = part.split("=", 2);
194        if (parameterParts.length < 2) {
195          return null;
196        }
197        String parameter = parameterParts[1].trim();
198        if (parameter.startsWith("\"") && parameter.endsWith("\"")) {
199          return parameter.substring(1, parameter.length() - 1);
200        } else {
201          return parameter;
202        }
203      }
204    }
205    return null;
206  }
207
208  /**
209   * Reads the Part's body and returns a String based on any charset conversion that needed to be
210   * done.
211   *
212   * @param part The part containing a body
213   * @return a String containing the converted text in the body, or null if there was no text or an
214   *     error during conversion.
215   */
216  public static String getTextFromPart(Part part) {
217    try {
218      if (part != null && part.getBody() != null) {
219        InputStream in = part.getBody().getInputStream();
220        String mimeType = part.getMimeType();
221        if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) {
222          /*
223           * Now we read the part into a buffer for further processing. Because
224           * the stream is now wrapped we'll remove any transfer encoding at this point.
225           */
226          ByteArrayOutputStream out = new ByteArrayOutputStream();
227          IOUtils.copy(in, out);
228          in.close();
229          in = null; // we want all of our memory back, and close might not release
230
231          /*
232           * We've got a text part, so let's see if it needs to be processed further.
233           */
234          String charset = getHeaderParameter(part.getContentType(), "charset");
235          if (charset != null) {
236            /*
237             * See if there is conversion from the MIME charset to the Java one.
238             */
239            charset = CharsetUtil.lookup(charset).name();
240          }
241          /*
242           * No encoding, so use us-ascii, which is the standard.
243           */
244          if (charset == null) {
245            charset = "ASCII";
246          }
247          /*
248           * Convert and return as new String
249           */
250          String result = out.toString(charset);
251          out.close();
252          return result;
253        }
254      }
255
256    } catch (OutOfMemoryError oom) {
257      /*
258       * If we are not able to process the body there's nothing we can do about it. Return
259       * null and let the upper layers handle the missing content.
260       */
261      VvmLog.e(LOG_TAG, "Unable to getTextFromPart " + oom.toString());
262    } catch (Exception e) {
263      /*
264       * If we are not able to process the body there's nothing we can do about it. Return
265       * null and let the upper layers handle the missing content.
266       */
267      VvmLog.e(LOG_TAG, "Unable to getTextFromPart " + e.toString());
268    }
269    return null;
270  }
271
272  /**
273   * Returns true if the given mimeType matches the matchAgainst specification. The comparison
274   * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*").
275   *
276   * @param mimeType A MIME type to check.
277   * @param matchAgainst A MIME type to check against. May include wildcards.
278   * @return true if the mimeType matches
279   */
280  public static boolean mimeTypeMatches(String mimeType, String matchAgainst) {
281    Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"), Pattern.CASE_INSENSITIVE);
282    return p.matcher(mimeType).matches();
283  }
284
285  /**
286   * Returns true if the given mimeType matches any of the matchAgainst specifications. The
287   * comparison ignores case and the matchAgainst strings may include "*" for a wildcard (e.g.
288   * "image/*").
289   *
290   * @param mimeType A MIME type to check.
291   * @param matchAgainst An array of MIME types to check against. May include wildcards.
292   * @return true if the mimeType matches any of the matchAgainst strings
293   */
294  public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) {
295    for (String matchType : matchAgainst) {
296      if (mimeTypeMatches(mimeType, matchType)) {
297        return true;
298      }
299    }
300    return false;
301  }
302
303  /**
304   * Given an input stream and a transfer encoding, return a wrapped input stream for that encoding
305   * (or the original if none is required)
306   *
307   * @param in the input stream
308   * @param contentTransferEncoding the content transfer encoding
309   * @return a properly wrapped stream
310   */
311  public static InputStream getInputStreamForContentTransferEncoding(
312      InputStream in, String contentTransferEncoding) {
313    if (contentTransferEncoding != null) {
314      contentTransferEncoding = MimeUtility.getHeaderParameter(contentTransferEncoding, null);
315      if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) {
316        in = new QuotedPrintableInputStream(in);
317      } else if ("base64".equalsIgnoreCase(contentTransferEncoding)) {
318        in = new Base64InputStream(in, Base64.DEFAULT);
319      }
320    }
321    return in;
322  }
323
324  /** Removes any content transfer encoding from the stream and returns a Body. */
325  public static Body decodeBody(InputStream in, String contentTransferEncoding) throws IOException {
326    /*
327     * We'll remove any transfer encoding by wrapping the stream.
328     */
329    in = getInputStreamForContentTransferEncoding(in, contentTransferEncoding);
330    BinaryTempFileBody tempBody = new BinaryTempFileBody();
331    OutputStream out = tempBody.getOutputStream();
332    try {
333      IOUtils.copy(in, out);
334    } catch (Base64DataException bde) {
335      // TODO Need to fix this somehow
336      //String warning = "\n\n" + Email.getMessageDecodeErrorString();
337      //out.write(warning.getBytes());
338    } finally {
339      out.close();
340    }
341    return tempBody;
342  }
343
344  /**
345   * Recursively scan a Part (usually a Message) and sort out which of its children will be
346   * "viewable" and which will be attachments.
347   *
348   * @param part The part to be broken down
349   * @param viewables This arraylist will be populated with all parts that appear to be the
350   *     "message" (e.g. text/plain & text/html)
351   * @param attachments This arraylist will be populated with all parts that appear to be
352   *     attachments (including inlines)
353   * @throws MessagingException
354   */
355  public static void collectParts(Part part, ArrayList<Part> viewables, ArrayList<Part> attachments)
356      throws MessagingException {
357    String disposition = part.getDisposition();
358    String dispositionType = MimeUtility.getHeaderParameter(disposition, null);
359    // If a disposition is not specified, default to "inline"
360    boolean inline =
361        TextUtils.isEmpty(dispositionType) || "inline".equalsIgnoreCase(dispositionType);
362    // The lower-case mime type
363    String mimeType = part.getMimeType().toLowerCase();
364
365    if (part.getBody() instanceof Multipart) {
366      // If the part is Multipart but not alternative it's either mixed or
367      // something we don't know about, which means we treat it as mixed
368      // per the spec. We just process its pieces recursively.
369      MimeMultipart mp = (MimeMultipart) part.getBody();
370      boolean foundHtml = false;
371      if (mp.getSubTypeForTest().equals("alternative")) {
372        for (int i = 0; i < mp.getCount(); i++) {
373          if (mp.getBodyPart(i).isMimeType("text/html")) {
374            foundHtml = true;
375            break;
376          }
377        }
378      }
379      for (int i = 0; i < mp.getCount(); i++) {
380        // See if we have text and html
381        BodyPart bp = mp.getBodyPart(i);
382        // If there's html, don't bother loading text
383        if (foundHtml && bp.isMimeType("text/plain")) {
384          continue;
385        }
386        collectParts(bp, viewables, attachments);
387      }
388    } else if (part.getBody() instanceof Message) {
389      // If the part is an embedded message we just continue to process
390      // it, pulling any viewables or attachments into the running list.
391      Message message = (Message) part.getBody();
392      collectParts(message, viewables, attachments);
393    } else if (inline && (mimeType.startsWith("text") || (mimeType.startsWith("image")))) {
394      // We'll treat text and images as viewables
395      viewables.add(part);
396    } else {
397      // Everything else is an attachment.
398      attachments.add(part);
399    }
400  }
401}
402