14fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy/**************************************************************** 24fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * Licensed to the Apache Software Foundation (ASF) under one * 34fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * or more contributor license agreements. See the NOTICE file * 44fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * distributed with this work for additional information * 54fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * regarding copyright ownership. The ASF licenses this file * 64fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * to you under the Apache License, Version 2.0 (the * 74fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * "License"); you may not use this file except in compliance * 84fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * with the License. You may obtain a copy of the License at * 94fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * * 104fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * http://www.apache.org/licenses/LICENSE-2.0 * 114fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * * 124fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * Unless required by applicable law or agreed to in writing, * 134fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * software distributed under the License is distributed on an * 144fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 154fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * KIND, either express or implied. See the License for the * 164fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * specific language governing permissions and limitations * 174fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * under the License. * 184fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy ****************************************************************/ 194fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 204fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedypackage org.apache.james.mime4j; 214fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 224fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedyimport org.apache.james.mime4j.decoder.Base64InputStream; 234fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedyimport org.apache.james.mime4j.decoder.QuotedPrintableInputStream; 244fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 254fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedyimport java.io.IOException; 264fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedyimport java.io.InputStream; 274fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedyimport java.util.BitSet; 284fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedyimport java.util.LinkedList; 294fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 304fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy/** 314fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * <p> 324fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * Parses MIME (or RFC822) message streams of bytes or characters and reports 334fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * parsing events to a <code>ContentHandler</code> instance. 344fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * </p> 354fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * <p> 364fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * Typical usage:<br/> 374fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * <pre> 384fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * ContentHandler handler = new MyHandler(); 394fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * MimeStreamParser parser = new MimeStreamParser(); 404fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * parser.setContentHandler(handler); 414fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * parser.parse(new BufferedInputStream(new FileInputStream("mime.msg"))); 424fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * </pre> 434fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * <strong>NOTE:</strong> All lines must end with CRLF 444fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * (<code>\r\n</code>). If you are unsure of the line endings in your stream 454fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * you should wrap it in a {@link org.apache.james.mime4j.EOLConvertingInputStream} instance. 464fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * 474fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * 484fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * @version $Id: MimeStreamParser.java,v 1.8 2005/02/11 10:12:02 ntherning Exp $ 494fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy */ 504fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedypublic class MimeStreamParser { 514fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy private static final Log log = LogFactory.getLog(MimeStreamParser.class); 524fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 534fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy private static BitSet fieldChars = null; 544fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 554fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy private RootInputStream rootStream = null; 564fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy private LinkedList<BodyDescriptor> bodyDescriptors = new LinkedList<BodyDescriptor>(); 574fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy private ContentHandler handler = null; 584fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy private boolean raw = false; 594fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy private boolean prematureEof = false; 604fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 614fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy static { 624fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy fieldChars = new BitSet(); 634fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy for (int i = 0x21; i <= 0x39; i++) { 644fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy fieldChars.set(i); 654fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 664fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy for (int i = 0x3b; i <= 0x7e; i++) { 674fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy fieldChars.set(i); 684fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 694fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 704fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 714fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy /** 724fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * Creates a new <code>MimeStreamParser</code> instance. 734fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy */ 744fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy public MimeStreamParser() { 754fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 764fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 774fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy /** 784fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * Parses a stream of bytes containing a MIME message. 794fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * 804fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * @param is the stream to parse. 814fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * @throws IOException on I/O errors. 824fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy */ 834fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy public void parse(InputStream is) throws IOException { 844fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy rootStream = new RootInputStream(is); 854fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy parseMessage(rootStream); 864fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 874fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 884fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy /** 894fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * Determines if this parser is currently in raw mode. 904fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * 914fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * @return <code>true</code> if in raw mode, <code>false</code> 924fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * otherwise. 934fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * @see #setRaw(boolean) 944fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy */ 954fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy public boolean isRaw() { 964fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy return raw; 974fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 984fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 994fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy /** 1004fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * Enables or disables raw mode. In raw mode all future entities 1014fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * (messages or body parts) in the stream will be reported to the 1024fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * {@link ContentHandler#raw(InputStream)} handler method only. 1034fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * The stream will contain the entire unparsed entity contents 1044fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * including header fields and whatever is in the body. 1054fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * 1064fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * @param raw <code>true</code> enables raw mode, <code>false</code> 1074fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * disables it. 1084fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy */ 1094fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy public void setRaw(boolean raw) { 1104fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy this.raw = raw; 1114fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 1124fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 1134fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy /** 1144fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * Finishes the parsing and stops reading lines. 1154fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * NOTE: No more lines will be parsed but the parser 1164fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * will still call 1174fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * {@link ContentHandler#endMultipart()}, 1184fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * {@link ContentHandler#endBodyPart()}, 1194fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * {@link ContentHandler#endMessage()}, etc to match previous calls 1204fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * to 1214fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * {@link ContentHandler#startMultipart(BodyDescriptor)}, 1224fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * {@link ContentHandler#startBodyPart()}, 1234fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * {@link ContentHandler#startMessage()}, etc. 1244fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy */ 1254fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy public void stop() { 1264fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy rootStream.truncate(); 1274fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 1284fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 1294fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy /** 1304fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * Parses an entity which consists of a header followed by a body containing 1314fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * arbitrary data, body parts or an embedded message. 1324fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * 1334fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * @param is the stream to parse. 1344fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * @throws IOException on I/O errors. 1354fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy */ 1364fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy private void parseEntity(InputStream is) throws IOException { 1374fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy BodyDescriptor bd = parseHeader(is); 1384fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 1394fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy if (bd.isMultipart()) { 1404fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy bodyDescriptors.addFirst(bd); 1414fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 1424fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy handler.startMultipart(bd); 1434fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 1444fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy MimeBoundaryInputStream tempIs = 1454fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy new MimeBoundaryInputStream(is, bd.getBoundary()); 1464fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy handler.preamble(new CloseShieldInputStream(tempIs)); 1474fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy tempIs.consume(); 1484fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 1494fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy while (tempIs.hasMoreParts()) { 1504fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy tempIs = new MimeBoundaryInputStream(is, bd.getBoundary()); 1514fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy parseBodyPart(tempIs); 1524fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy tempIs.consume(); 1534fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy if (tempIs.parentEOF()) { 1544fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy prematureEof = true; 1554fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy// if (log.isWarnEnabled()) { 1564fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy// log.warn("Line " + rootStream.getLineNumber() 1574fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy// + ": Body part ended prematurely. " 1584fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy// + "Higher level boundary detected or " 1594fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy// + "EOF reached."); 1604fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy// } 1614fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy break; 1624fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 1634fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 1644fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 1654fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy handler.epilogue(new CloseShieldInputStream(is)); 1664fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 1674fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy handler.endMultipart(); 1684fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 1694fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy bodyDescriptors.removeFirst(); 1704fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 1714fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } else if (bd.isMessage()) { 1724fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy if (bd.isBase64Encoded()) { 1734fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy log.warn("base64 encoded message/rfc822 detected"); 1744fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy is = new EOLConvertingInputStream( 1754fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy new Base64InputStream(is)); 1764fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } else if (bd.isQuotedPrintableEncoded()) { 1774fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy log.warn("quoted-printable encoded message/rfc822 detected"); 1784fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy is = new EOLConvertingInputStream( 1794fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy new QuotedPrintableInputStream(is)); 1804fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 1814fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy bodyDescriptors.addFirst(bd); 1824fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy parseMessage(is); 1834fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy bodyDescriptors.removeFirst(); 1844fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } else { 1854fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy handler.body(bd, new CloseShieldInputStream(is)); 1864fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 1874fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 1884fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy /* 1894fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * Make sure the stream has been consumed. 1904fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy */ 1914fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy while (is.read() != -1) { 1924fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 1934fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 1944fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 1954fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy private void parseMessage(InputStream is) throws IOException { 1964fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy if (raw) { 1974fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy handler.raw(new CloseShieldInputStream(is)); 1984fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } else { 1994fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy handler.startMessage(); 2004fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy parseEntity(is); 2014fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy handler.endMessage(); 2024fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 2034fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 2044fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 2054fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy public boolean getPrematureEof() { 2064fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy return prematureEof; 2074fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 2084fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 2094fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy private void parseBodyPart(InputStream is) throws IOException { 2104fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy if (raw) { 2114fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy handler.raw(new CloseShieldInputStream(is)); 2124fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } else { 2134fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy handler.startBodyPart(); 2144fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy parseEntity(is); 2154fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy handler.endBodyPart(); 2164fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 2174fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 2184fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 2194fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy /** 2204fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * Parses a header. 2214fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * 2224fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * @param is the stream to parse. 2234fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * @return a <code>BodyDescriptor</code> describing the body following 2244fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * the header. 2254fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy */ 2264fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy private BodyDescriptor parseHeader(InputStream is) throws IOException { 2274fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy BodyDescriptor bd = new BodyDescriptor(bodyDescriptors.isEmpty() 2284fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy ? null : (BodyDescriptor) bodyDescriptors.getFirst()); 2294fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 2304fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy handler.startHeader(); 2314fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 2324fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy int lineNumber = rootStream.getLineNumber(); 2334fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 2344fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy StringBuffer sb = new StringBuffer(); 2354fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy int curr = 0; 2364fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy int prev = 0; 2374fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy while ((curr = is.read()) != -1) { 2384fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy if (curr == '\n' && (prev == '\n' || prev == 0)) { 2394fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy /* 2404fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * [\r]\n[\r]\n or an immediate \r\n have been seen. 2414fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy */ 2424fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy sb.deleteCharAt(sb.length() - 1); 2434fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy break; 2444fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 2454fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy sb.append((char) curr); 2464fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy prev = curr == '\r' ? prev : curr; 2474fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 2484fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 2494fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy// if (curr == -1 && log.isWarnEnabled()) { 2504fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy// log.warn("Line " + rootStream.getLineNumber() 2514fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy// + ": Unexpected end of headers detected. " 2524fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy// + "Boundary detected in header or EOF reached."); 2534fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy// } 2544fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 2554fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy int start = 0; 2564fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy int pos = 0; 2574fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy int startLineNumber = lineNumber; 2584fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy while (pos < sb.length()) { 2594fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy while (pos < sb.length() && sb.charAt(pos) != '\r') { 2604fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy pos++; 2614fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 2624fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy if (pos < sb.length() - 1 && sb.charAt(pos + 1) != '\n') { 2634fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy pos++; 2644fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy continue; 2654fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 2664fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 2674fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy if (pos >= sb.length() - 2 || fieldChars.get(sb.charAt(pos + 2))) { 2684fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 2694fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy /* 2704fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * field should be the complete field data excluding the 2714fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * trailing \r\n. 2724fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy */ 2734fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy String field = sb.substring(start, pos); 2744fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy start = pos + 2; 2754fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 2764fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy /* 2774fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * Check for a valid field. 2784fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy */ 2794fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy int index = field.indexOf(':'); 2804fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy boolean valid = false; 2814fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy if (index != -1 && fieldChars.get(field.charAt(0))) { 2824fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy valid = true; 2834fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy String fieldName = field.substring(0, index).trim(); 2844fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy for (int i = 0; i < fieldName.length(); i++) { 2854fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy if (!fieldChars.get(fieldName.charAt(i))) { 2864fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy valid = false; 2874fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy break; 2884fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 2894fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 2904fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 2914fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy if (valid) { 2924fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy handler.field(field); 2934fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy bd.addField(fieldName, field.substring(index + 1)); 2944fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 2954fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 2964fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 2974fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy if (!valid && log.isWarnEnabled()) { 2984fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy log.warn("Line " + startLineNumber 2994fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy + ": Ignoring invalid field: '" + field.trim() + "'"); 3004fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 3014fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 3024fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy startLineNumber = lineNumber; 3034fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 3044fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 3054fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy pos += 2; 3064fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy lineNumber++; 3074fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 3084fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 3094fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy handler.endHeader(); 3104fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 3114fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy return bd; 3124fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 3134fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 3144fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy /** 3154fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * Sets the <code>ContentHandler</code> to use when reporting 3164fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * parsing events. 3174fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * 3184fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * @param h the <code>ContentHandler</code>. 3194fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy */ 3204fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy public void setContentHandler(ContentHandler h) { 3214fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy this.handler = h; 3224fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy } 3234fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy 3244fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy} 325