/* * Copyright (C) 2008 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.android.emailcommon.internet; import android.util.Base64; import android.util.Base64DataException; import android.util.Base64InputStream; import android.util.Log; import com.android.emailcommon.Logging; import com.android.emailcommon.mail.Body; import com.android.emailcommon.mail.BodyPart; import com.android.emailcommon.mail.Message; import com.android.emailcommon.mail.MessagingException; import com.android.emailcommon.mail.Multipart; import com.android.emailcommon.mail.Part; import org.apache.commons.io.IOUtils; import org.apache.james.mime4j.codec.EncoderUtil; import org.apache.james.mime4j.decoder.DecoderUtil; import org.apache.james.mime4j.decoder.QuotedPrintableInputStream; import org.apache.james.mime4j.util.CharsetUtil; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; public class MimeUtility { public static final String MIME_TYPE_RFC822 = "message/rfc822"; private final static Pattern PATTERN_CR_OR_LF = Pattern.compile("\r|\n"); /** * Replace sequences of CRLF+WSP with WSP. Tries to preserve original string * object whenever possible. */ public static String unfold(String s) { if (s == null) { return null; } Matcher patternMatcher = PATTERN_CR_OR_LF.matcher(s); if (patternMatcher.find()) { patternMatcher.reset(); s = patternMatcher.replaceAll(""); } return s; } public static String decode(String s) { if (s == null) { return null; } return DecoderUtil.decodeEncodedWords(s); } public static String unfoldAndDecode(String s) { return decode(unfold(s)); } // TODO implement proper foldAndEncode // NOTE: When this really works, we *must* remove all calls to foldAndEncode2() to prevent // duplication of encoding. public static String foldAndEncode(String s) { return s; } /** * INTERIM version of foldAndEncode that will be used only by Subject: headers. * This is safer than implementing foldAndEncode() (see above) and risking unknown damage * to other headers. * * TODO: Copy this code to foldAndEncode(), get rid of this function, confirm all working OK. * * @param s original string to encode and fold * @param usedCharacters number of characters already used up by header name * @return the String ready to be transmitted */ public static String foldAndEncode2(String s, int usedCharacters) { // james.mime4j.codec.EncoderUtil.java // encode: encodeIfNecessary(text, usage, numUsedInHeaderName) // Usage.TEXT_TOKENlooks like the right thing for subjects // use WORD_ENTITY for address/names String encoded = EncoderUtil.encodeIfNecessary(s, EncoderUtil.Usage.TEXT_TOKEN, usedCharacters); return fold(encoded, usedCharacters); } /** * INTERIM: From newer version of org.apache.james (but we don't want to import * the entire MimeUtil class). * * Splits the specified string into a multiple-line representation with * lines no longer than 76 characters (because the line might contain * encoded words; see RFC * 2047 section 2). If the string contains non-whitespace sequences * longer than 76 characters a line break is inserted at the whitespace * character following the sequence resulting in a line longer than 76 * characters. * * @param s * string to split. * @param usedCharacters * number of characters already used up. Usually the number of * characters for header field name plus colon and one space. * @return a multiple-line representation of the given string. */ public static String fold(String s, int usedCharacters) { final int maxCharacters = 76; final int length = s.length(); if (usedCharacters + length <= maxCharacters) return s; StringBuilder sb = new StringBuilder(); int lastLineBreak = -usedCharacters; int wspIdx = indexOfWsp(s, 0); while (true) { if (wspIdx == length) { sb.append(s.substring(Math.max(0, lastLineBreak))); return sb.toString(); } int nextWspIdx = indexOfWsp(s, wspIdx + 1); if (nextWspIdx - lastLineBreak > maxCharacters) { sb.append(s.substring(Math.max(0, lastLineBreak), wspIdx)); sb.append("\r\n"); lastLineBreak = wspIdx; } wspIdx = nextWspIdx; } } /** * INTERIM: From newer version of org.apache.james (but we don't want to import * the entire MimeUtil class). * * Search for whitespace. */ private static int indexOfWsp(String s, int fromIndex) { final int len = s.length(); for (int index = fromIndex; index < len; index++) { char c = s.charAt(index); if (c == ' ' || c == '\t') return index; } return len; } /** * Returns the named parameter of a header field. If name is null the first * parameter is returned, or if there are no additional parameters in the * field the entire field is returned. Otherwise the named parameter is * searched for in a case insensitive fashion and returned. If the parameter * cannot be found the method returns null. * * TODO: quite inefficient with the inner trimming & splitting. * TODO: Also has a latent bug: uses "startsWith" to match the name, which can false-positive. * TODO: The doc says that for a null name you get the first param, but you get the header. * Should probably just fix the doc, but if other code assumes that behavior, fix the code. * TODO: Need to decode %-escaped strings, as in: filename="ab%22d". * ('+' -> ' ' conversion too? check RFC) * * @param header * @param name * @return the entire header (if name=null), the found parameter, or null */ public static String getHeaderParameter(String header, String name) { if (header == null) { return null; } String[] parts = unfold(header).split(";"); if (name == null) { return parts[0].trim(); } String lowerCaseName = name.toLowerCase(); for (String part : parts) { if (part.trim().toLowerCase().startsWith(lowerCaseName)) { String[] parameterParts = part.split("=", 2); if (parameterParts.length < 2) { return null; } String parameter = parameterParts[1].trim(); if (parameter.startsWith("\"") && parameter.endsWith("\"")) { return parameter.substring(1, parameter.length() - 1); } else { return parameter; } } } return null; } public static Part findFirstPartByMimeType(Part part, String mimeType) throws MessagingException { if (part.getBody() instanceof Multipart) { Multipart multipart = (Multipart)part.getBody(); for (int i = 0, count = multipart.getCount(); i < count; i++) { BodyPart bodyPart = multipart.getBodyPart(i); Part ret = findFirstPartByMimeType(bodyPart, mimeType); if (ret != null) { return ret; } } } else if (part.getMimeType().equalsIgnoreCase(mimeType)) { return part; } return null; } public static Part findPartByContentId(Part part, String contentId) throws Exception { if (part.getBody() instanceof Multipart) { Multipart multipart = (Multipart)part.getBody(); for (int i = 0, count = multipart.getCount(); i < count; i++) { BodyPart bodyPart = multipart.getBodyPart(i); Part ret = findPartByContentId(bodyPart, contentId); if (ret != null) { return ret; } } } String cid = part.getContentId(); if (contentId.equals(cid)) { return part; } return null; } /** * Reads the Part's body and returns a String based on any charset conversion that needed * to be done. * @param part The part containing a body * @return a String containing the converted text in the body, or null if there was no text * or an error during conversion. */ public static String getTextFromPart(Part part) { try { if (part != null && part.getBody() != null) { InputStream in = part.getBody().getInputStream(); String mimeType = part.getMimeType(); if (mimeType != null && MimeUtility.mimeTypeMatches(mimeType, "text/*")) { /* * Now we read the part into a buffer for further processing. Because * the stream is now wrapped we'll remove any transfer encoding at this point. */ ByteArrayOutputStream out = new ByteArrayOutputStream(); IOUtils.copy(in, out); in.close(); in = null; // we want all of our memory back, and close might not release /* * We've got a text part, so let's see if it needs to be processed further. */ String charset = getHeaderParameter(part.getContentType(), "charset"); if (charset != null) { /* * See if there is conversion from the MIME charset to the Java one. */ charset = CharsetUtil.toJavaCharset(charset); } /* * No encoding, so use us-ascii, which is the standard. */ if (charset == null) { charset = "ASCII"; } /* * Convert and return as new String */ String result = out.toString(charset); out.close(); return result; } } } catch (OutOfMemoryError oom) { /* * If we are not able to process the body there's nothing we can do about it. Return * null and let the upper layers handle the missing content. */ Log.e(Logging.LOG_TAG, "Unable to getTextFromPart " + oom.toString()); } catch (Exception e) { /* * If we are not able to process the body there's nothing we can do about it. Return * null and let the upper layers handle the missing content. */ Log.e(Logging.LOG_TAG, "Unable to getTextFromPart " + e.toString()); } return null; } /** * Returns true if the given mimeType matches the matchAgainst specification. The comparison * ignores case and the matchAgainst string may include "*" for a wildcard (e.g. "image/*"). * * @param mimeType A MIME type to check. * @param matchAgainst A MIME type to check against. May include wildcards. * @return true if the mimeType matches */ public static boolean mimeTypeMatches(String mimeType, String matchAgainst) { Pattern p = Pattern.compile(matchAgainst.replaceAll("\\*", "\\.\\*"), Pattern.CASE_INSENSITIVE); return p.matcher(mimeType).matches(); } /** * Returns true if the given mimeType matches any of the matchAgainst specifications. The * comparison ignores case and the matchAgainst strings may include "*" for a wildcard * (e.g. "image/*"). * * @param mimeType A MIME type to check. * @param matchAgainst An array of MIME types to check against. May include wildcards. * @return true if the mimeType matches any of the matchAgainst strings */ public static boolean mimeTypeMatches(String mimeType, String[] matchAgainst) { for (String matchType : matchAgainst) { if (mimeTypeMatches(mimeType, matchType)) { return true; } } return false; } /** * Given an input stream and a transfer encoding, return a wrapped input stream for that * encoding (or the original if none is required) * @param in the input stream * @param contentTransferEncoding the content transfer encoding * @return a properly wrapped stream */ public static InputStream getInputStreamForContentTransferEncoding(InputStream in, String contentTransferEncoding) { if (contentTransferEncoding != null) { contentTransferEncoding = MimeUtility.getHeaderParameter(contentTransferEncoding, null); if ("quoted-printable".equalsIgnoreCase(contentTransferEncoding)) { in = new QuotedPrintableInputStream(in); } else if ("base64".equalsIgnoreCase(contentTransferEncoding)) { in = new Base64InputStream(in, Base64.DEFAULT); } } return in; } /** * Removes any content transfer encoding from the stream and returns a Body. */ public static Body decodeBody(InputStream in, String contentTransferEncoding) throws IOException { /* * We'll remove any transfer encoding by wrapping the stream. */ in = getInputStreamForContentTransferEncoding(in, contentTransferEncoding); BinaryTempFileBody tempBody = new BinaryTempFileBody(); OutputStream out = tempBody.getOutputStream(); try { IOUtils.copy(in, out); } catch (Base64DataException bde) { // TODO Need to fix this somehow //String warning = "\n\n" + Email.getMessageDecodeErrorString(); //out.write(warning.getBytes()); } finally { out.close(); } return tempBody; } /** * Recursively scan a Part (usually a Message) and sort out which of its children will be * "viewable" and which will be attachments. * * @param part The part to be broken down * @param viewables This arraylist will be populated with all parts that appear to be * the "message" (e.g. text/plain & text/html) * @param attachments This arraylist will be populated with all parts that appear to be * attachments (including inlines) * @throws MessagingException */ public static void collectParts(Part part, ArrayList viewables, ArrayList attachments) throws MessagingException { String disposition = part.getDisposition(); String dispositionType = null; String dispositionFilename = null; if (disposition != null) { dispositionType = MimeUtility.getHeaderParameter(disposition, null); dispositionFilename = MimeUtility.getHeaderParameter(disposition, "filename"); } // An attachment filename can be defined in either the Content-Disposition header // or the Content-Type header. Content-Disposition is preferred, so we only try // the Content-Type header as a last resort. if (dispositionFilename == null) { String contentType = part.getContentType(); dispositionFilename = MimeUtility.getHeaderParameter(contentType, "name"); } boolean attachmentDisposition = "attachment".equalsIgnoreCase(dispositionType); // If a disposition is not specified, default to "inline" boolean inlineDisposition = dispositionType == null || "inline".equalsIgnoreCase(dispositionType); // A guess that this part is intended to be an attachment boolean attachment = attachmentDisposition || (dispositionFilename != null && !inlineDisposition); // A guess that this part is intended to be an inline. boolean inline = inlineDisposition && (dispositionFilename != null); // One or the other boolean attachmentOrInline = attachment || inline; if (part.getBody() instanceof Multipart) { // If the part is Multipart but not alternative it's either mixed or // something we don't know about, which means we treat it as mixed // per the spec. We just process its pieces recursively. MimeMultipart mp = (MimeMultipart)part.getBody(); boolean foundHtml = false; if (mp.getSubTypeForTest().equals("alternative")) { for (int i = 0; i < mp.getCount(); i++) { if (mp.getBodyPart(i).isMimeType("text/html")) { foundHtml = true; break; } } } for (int i = 0; i < mp.getCount(); i++) { // See if we have text and html BodyPart bp = mp.getBodyPart(i); // If there's html, don't bother loading text if (foundHtml && bp.isMimeType("text/plain")) { continue; } collectParts(bp, viewables, attachments); } } else if (part.getBody() instanceof Message) { // If the part is an embedded message we just continue to process // it, pulling any viewables or attachments into the running list. Message message = (Message)part.getBody(); collectParts(message, viewables, attachments); } else if ((!attachmentOrInline) && ("text/html".equalsIgnoreCase(part.getMimeType()))) { // If the part is HTML and we got this far, it's a viewable part of a mixed viewables.add(part); } else if ((!attachmentOrInline) && ("text/plain".equalsIgnoreCase(part.getMimeType()))) { // If the part is text and we got this far, it's a viewable part of a mixed viewables.add(part); } else if (attachmentOrInline) { // Finally, if it's an attachment or an inline we will include it as an attachment. attachments.add(part); } } }