1bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook/**************************************************************** 2bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * Licensed to the Apache Software Foundation (ASF) under one * 3bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * or more contributor license agreements. See the NOTICE file * 4bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * distributed with this work for additional information * 5bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * regarding copyright ownership. The ASF licenses this file * 6bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * to you under the Apache License, Version 2.0 (the * 7bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * "License"); you may not use this file except in compliance * 8bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * with the License. You may obtain a copy of the License at * 9bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * * 10bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * http://www.apache.org/licenses/LICENSE-2.0 * 11bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * * 12bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * Unless required by applicable law or agreed to in writing, * 13bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * software distributed under the License is distributed on an * 14bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 15bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * KIND, either express or implied. See the License for the * 16bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * specific language governing permissions and limitations * 17bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * under the License. * 18bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook ****************************************************************/ 19bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 20bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrookpackage org.apache.james.mime4j; 21bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 22bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrookimport com.android.mail.utils.LoggingInputStream; 23bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 24bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrookimport org.apache.james.mime4j.decoder.Base64InputStream; 25bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrookimport org.apache.james.mime4j.decoder.QuotedPrintableInputStream; 26bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 27bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrookimport java.io.IOException; 28bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrookimport java.io.InputStream; 29bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrookimport java.util.BitSet; 30bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrookimport java.util.LinkedList; 31bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 32bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook/** 33bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * <p> 34bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * Parses MIME (or RFC822) message streams of bytes or characters and reports 35bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * parsing events to a <code>ContentHandler</code> instance. 36bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * </p> 37bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * <p> 38bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * Typical usage:<br/> 39bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * <pre> 40bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * ContentHandler handler = new MyHandler(); 41bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * MimeStreamParser parser = new MimeStreamParser(); 42bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * parser.setContentHandler(handler); 43bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * parser.parse(new BufferedInputStream(new FileInputStream("mime.msg"))); 44bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * </pre> 45bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * <strong>NOTE:</strong> All lines must end with CRLF 46bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * (<code>\r\n</code>). If you are unsure of the line endings in your stream 47bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * you should wrap it in a {@link org.apache.james.mime4j.EOLConvertingInputStream} instance. 48bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * 49bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * 50bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * @version $Id: MimeStreamParser.java,v 1.8 2005/02/11 10:12:02 ntherning Exp $ 51bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook */ 52bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrookpublic class MimeStreamParser { 53bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook private static final Log log = LogFactory.getLog(MimeStreamParser.class); 54bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 55bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook private static final boolean DEBUG_LOG_MESSAGE = false; //DO NOT RELEASE AS 'TRUE' 56bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 57bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook private static BitSet fieldChars = null; 58bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 59bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook private RootInputStream rootStream = null; 60bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook private LinkedList<BodyDescriptor> bodyDescriptors = new LinkedList<BodyDescriptor>(); 61bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook private ContentHandler handler = null; 62bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook private boolean raw = false; 63bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 64bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook static { 65bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook fieldChars = new BitSet(); 66bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook for (int i = 0x21; i <= 0x39; i++) { 67bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook fieldChars.set(i); 68bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 69bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook for (int i = 0x3b; i <= 0x7e; i++) { 70bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook fieldChars.set(i); 71bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 72bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 73bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 74bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook /** 75bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * Creates a new <code>MimeStreamParser</code> instance. 76bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook */ 77bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook public MimeStreamParser() { 78bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 79bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 80bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook /** 81bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * Parses a stream of bytes containing a MIME message. 82bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * 83bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * @param is the stream to parse. 84bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * @throws IOException on I/O errors. 85bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook */ 86bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook public void parse(InputStream is) throws IOException { 87bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook if (DEBUG_LOG_MESSAGE) { 88bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook is = new LoggingInputStream(is, "MIME", true); 89bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 90bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook rootStream = new RootInputStream(is); 91bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook parseMessage(rootStream); 92bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 93bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 94bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook /** 95bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * Determines if this parser is currently in raw mode. 96bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * 97bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * @return <code>true</code> if in raw mode, <code>false</code> 98bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * otherwise. 99bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * @see #setRaw(boolean) 100bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook */ 101bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook public boolean isRaw() { 102bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook return raw; 103bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 104bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 105bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook /** 106bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * Enables or disables raw mode. In raw mode all future entities 107bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * (messages or body parts) in the stream will be reported to the 108bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * {@link ContentHandler#raw(InputStream)} handler method only. 109bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * The stream will contain the entire unparsed entity contents 110bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * including header fields and whatever is in the body. 111bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * 112bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * @param raw <code>true</code> enables raw mode, <code>false</code> 113bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * disables it. 114bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook */ 115bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook public void setRaw(boolean raw) { 116bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook this.raw = raw; 117bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 118bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 119bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook /** 120bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * Finishes the parsing and stops reading lines. 121bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * NOTE: No more lines will be parsed but the parser 122bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * will still call 123bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * {@link ContentHandler#endMultipart()}, 124bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * {@link ContentHandler#endBodyPart()}, 125bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * {@link ContentHandler#endMessage()}, etc to match previous calls 126bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * to 127bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * {@link ContentHandler#startMultipart(BodyDescriptor)}, 128bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * {@link ContentHandler#startBodyPart()}, 129bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * {@link ContentHandler#startMessage()}, etc. 130bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook */ 131bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook public void stop() { 132bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook rootStream.truncate(); 133bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 134bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 135bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook /** 136bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * Parses an entity which consists of a header followed by a body containing 137bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * arbitrary data, body parts or an embedded message. 138bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * 139bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * @param is the stream to parse. 140bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * @throws IOException on I/O errors. 141bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook */ 142bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook private void parseEntity(InputStream is) throws IOException { 143bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook BodyDescriptor bd = parseHeader(is); 144bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 145bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook if (bd.isMultipart()) { 146bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook bodyDescriptors.addFirst(bd); 147bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 148bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook handler.startMultipart(bd); 149bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 150bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook MimeBoundaryInputStream tempIs = 151bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook new MimeBoundaryInputStream(is, bd.getBoundary()); 152bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook handler.preamble(new CloseShieldInputStream(tempIs)); 153bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook tempIs.consume(); 154bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 155bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook while (tempIs.hasMoreParts()) { 156bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook tempIs = new MimeBoundaryInputStream(is, bd.getBoundary()); 157bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook parseBodyPart(tempIs); 158bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook tempIs.consume(); 159bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook if (tempIs.parentEOF()) { 160bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook// if (log.isWarnEnabled()) { 161bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook// log.warn("Line " + rootStream.getLineNumber() 162bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook// + ": Body part ended prematurely. " 163bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook// + "Higher level boundary detected or " 164bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook// + "EOF reached."); 165bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook// } 166bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook break; 167bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 168bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 169bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 170bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook handler.epilogue(new CloseShieldInputStream(is)); 171bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 172bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook handler.endMultipart(); 173bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 174bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook bodyDescriptors.removeFirst(); 175bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 176bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } else if (bd.isMessage()) { 177bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook if (bd.isBase64Encoded()) { 178bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook log.warn("base64 encoded message/rfc822 detected"); 179bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook is = new EOLConvertingInputStream( 180bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook new Base64InputStream(is)); 181bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } else if (bd.isQuotedPrintableEncoded()) { 182bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook log.warn("quoted-printable encoded message/rfc822 detected"); 183bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook is = new EOLConvertingInputStream( 184bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook new QuotedPrintableInputStream(is)); 185bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 186bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook bodyDescriptors.addFirst(bd); 187bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook parseMessage(is); 188bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook bodyDescriptors.removeFirst(); 189bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } else { 190bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook handler.body(bd, new CloseShieldInputStream(is)); 191bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 192bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 193bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook /* 194bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * Make sure the stream has been consumed. 195bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook */ 196bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook while (is.read() != -1) { 197bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 198bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 199bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 200bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook private void parseMessage(InputStream is) throws IOException { 201bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook if (raw) { 202bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook handler.raw(new CloseShieldInputStream(is)); 203bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } else { 204bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook handler.startMessage(); 205bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook parseEntity(is); 206bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook handler.endMessage(); 207bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 208bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 209bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 210bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook private void parseBodyPart(InputStream is) throws IOException { 211bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook if (raw) { 212bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook handler.raw(new CloseShieldInputStream(is)); 213bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } else { 214bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook handler.startBodyPart(); 215bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook parseEntity(is); 216bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook handler.endBodyPart(); 217bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 218bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 219bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 220bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook /** 221bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * Parses a header. 222bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * 223bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * @param is the stream to parse. 224bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * @return a <code>BodyDescriptor</code> describing the body following 225bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * the header. 226bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook */ 227bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook private BodyDescriptor parseHeader(InputStream is) throws IOException { 228bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook BodyDescriptor bd = new BodyDescriptor(bodyDescriptors.isEmpty() 229bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook ? null : (BodyDescriptor) bodyDescriptors.getFirst()); 230bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 231bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook handler.startHeader(); 232bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 233bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook int lineNumber = rootStream.getLineNumber(); 234bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 235bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook StringBuffer sb = new StringBuffer(); 236bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook int curr = 0; 237bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook int prev = 0; 238bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook while ((curr = is.read()) != -1) { 239bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook if (curr == '\n' && (prev == '\n' || prev == 0)) { 240bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook /* 241bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * [\r]\n[\r]\n or an immediate \r\n have been seen. 242bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook */ 243bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook sb.deleteCharAt(sb.length() - 1); 244bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook break; 245bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 246bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook sb.append((char) curr); 247bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook prev = curr == '\r' ? prev : curr; 248bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 249bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 250bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook// if (curr == -1 && log.isWarnEnabled()) { 251bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook// log.warn("Line " + rootStream.getLineNumber() 252bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook// + ": Unexpected end of headers detected. " 253bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook// + "Boundary detected in header or EOF reached."); 254bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook// } 255bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 256bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook int start = 0; 257bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook int pos = 0; 258bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook int startLineNumber = lineNumber; 259bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook while (pos < sb.length()) { 260bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook while (pos < sb.length() && sb.charAt(pos) != '\r') { 261bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook pos++; 262bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 263bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook if (pos < sb.length() - 1 && sb.charAt(pos + 1) != '\n') { 264bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook pos++; 265bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook continue; 266bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 267bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 268bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook if (pos >= sb.length() - 2 || fieldChars.get(sb.charAt(pos + 2))) { 269bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 270bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook /* 271bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * field should be the complete field data excluding the 272bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * trailing \r\n. 273bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook */ 274bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook String field = sb.substring(start, pos); 275bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook start = pos + 2; 276bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 277bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook /* 278bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * Check for a valid field. 279bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook */ 280bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook int index = field.indexOf(':'); 281bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook boolean valid = false; 282bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook if (index != -1 && fieldChars.get(field.charAt(0))) { 283bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook valid = true; 284bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook String fieldName = field.substring(0, index).trim(); 285bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook for (int i = 0; i < fieldName.length(); i++) { 286bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook if (!fieldChars.get(fieldName.charAt(i))) { 287bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook valid = false; 288bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook break; 289bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 290bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 291bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 292bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook if (valid) { 293bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook handler.field(field); 294bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook bd.addField(fieldName, field.substring(index + 1)); 295bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 296bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 297bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 298bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook if (!valid && log.isWarnEnabled()) { 299bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook log.warn("Line " + startLineNumber 300bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook + ": Ignoring invalid field: '" + field.trim() + "'"); 301bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 302bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 303bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook startLineNumber = lineNumber; 304bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 305bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 306bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook pos += 2; 307bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook lineNumber++; 308bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 309bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 310bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook handler.endHeader(); 311bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 312bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook return bd; 313bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 314bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 315bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook /** 316bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * Sets the <code>ContentHandler</code> to use when reporting 317bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * parsing events. 318bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * 319bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * @param h the <code>ContentHandler</code>. 320bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook */ 321bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook public void setContentHandler(ContentHandler h) { 322bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook this.handler = h; 323bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook } 324bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook 325bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook} 326