14fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy/****************************************************************
24fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * Licensed to the Apache Software Foundation (ASF) under one   *
34fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * or more contributor license agreements.  See the NOTICE file *
44fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * distributed with this work for additional information        *
54fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * regarding copyright ownership.  The ASF licenses this file   *
64fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * to you under the Apache License, Version 2.0 (the            *
74fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * "License"); you may not use this file except in compliance   *
84fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * with the License.  You may obtain a copy of the License at   *
94fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy *                                                              *
104fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy *   http://www.apache.org/licenses/LICENSE-2.0                 *
114fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy *                                                              *
124fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * Unless required by applicable law or agreed to in writing,   *
134fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * software distributed under the License is distributed on an  *
144fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
154fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * KIND, either express or implied.  See the License for the    *
164fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * specific language governing permissions and limitations      *
174fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * under the License.                                           *
184fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy ****************************************************************/
194fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
204fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedypackage org.apache.james.mime4j;
214fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
224fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedyimport org.apache.james.mime4j.decoder.Base64InputStream;
234fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedyimport org.apache.james.mime4j.decoder.QuotedPrintableInputStream;
244fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
254fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedyimport java.io.IOException;
264fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedyimport java.io.InputStream;
274fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedyimport java.util.BitSet;
284fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedyimport java.util.LinkedList;
294fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
304fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy/**
314fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * <p>
324fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * Parses MIME (or RFC822) message streams of bytes or characters and reports
334fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * parsing events to a <code>ContentHandler</code> instance.
344fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * </p>
354fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * <p>
364fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * Typical usage:<br/>
374fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * <pre>
384fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy *      ContentHandler handler = new MyHandler();
394fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy *      MimeStreamParser parser = new MimeStreamParser();
404fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy *      parser.setContentHandler(handler);
414fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy *      parser.parse(new BufferedInputStream(new FileInputStream("mime.msg")));
424fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * </pre>
434fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * <strong>NOTE:</strong> All lines must end with CRLF
444fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * (<code>\r\n</code>). If you are unsure of the line endings in your stream
454fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * you should wrap it in a {@link org.apache.james.mime4j.EOLConvertingInputStream} instance.
464fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy *
474fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy *
484fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy * @version $Id: MimeStreamParser.java,v 1.8 2005/02/11 10:12:02 ntherning Exp $
494fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy */
504fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedypublic class MimeStreamParser {
514fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    private static final Log log = LogFactory.getLog(MimeStreamParser.class);
524fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
534fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    private static BitSet fieldChars = null;
544fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
554fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    private RootInputStream rootStream = null;
564fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    private LinkedList<BodyDescriptor> bodyDescriptors = new LinkedList<BodyDescriptor>();
574fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    private ContentHandler handler = null;
584fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    private boolean raw = false;
594fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    private boolean prematureEof = false;
604fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
614fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    static {
624fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        fieldChars = new BitSet();
634fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        for (int i = 0x21; i <= 0x39; i++) {
644fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            fieldChars.set(i);
654fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        }
664fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        for (int i = 0x3b; i <= 0x7e; i++) {
674fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            fieldChars.set(i);
684fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        }
694fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    }
704fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
714fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    /**
724fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * Creates a new <code>MimeStreamParser</code> instance.
734fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     */
744fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    public MimeStreamParser() {
754fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    }
764fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
774fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    /**
784fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * Parses a stream of bytes containing a MIME message.
794fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     *
804fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * @param is the stream to parse.
814fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * @throws IOException on I/O errors.
824fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     */
834fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    public void parse(InputStream is) throws IOException {
844fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        rootStream = new RootInputStream(is);
854fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        parseMessage(rootStream);
864fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    }
874fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
884fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    /**
894fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * Determines if this parser is currently in raw mode.
904fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     *
914fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * @return <code>true</code> if in raw mode, <code>false</code>
924fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     *         otherwise.
934fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * @see #setRaw(boolean)
944fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     */
954fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    public boolean isRaw() {
964fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        return raw;
974fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    }
984fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
994fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    /**
1004fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * Enables or disables raw mode. In raw mode all future entities
1014fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * (messages or body parts) in the stream will be reported to the
1024fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * {@link ContentHandler#raw(InputStream)} handler method only.
1034fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * The stream will contain the entire unparsed entity contents
1044fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * including header fields and whatever is in the body.
1054fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     *
1064fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * @param raw <code>true</code> enables raw mode, <code>false</code>
1074fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     *        disables it.
1084fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     */
1094fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    public void setRaw(boolean raw) {
1104fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        this.raw = raw;
1114fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    }
1124fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
1134fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    /**
1144fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * Finishes the parsing and stops reading lines.
1154fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * NOTE: No more lines will be parsed but the parser
1164fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * will still call
1174fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * {@link ContentHandler#endMultipart()},
1184fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * {@link ContentHandler#endBodyPart()},
1194fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * {@link ContentHandler#endMessage()}, etc to match previous calls
1204fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * to
1214fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * {@link ContentHandler#startMultipart(BodyDescriptor)},
1224fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * {@link ContentHandler#startBodyPart()},
1234fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * {@link ContentHandler#startMessage()}, etc.
1244fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     */
1254fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    public void stop() {
1264fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        rootStream.truncate();
1274fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    }
1284fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
1294fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    /**
1304fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * Parses an entity which consists of a header followed by a body containing
1314fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * arbitrary data, body parts or an embedded message.
1324fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     *
1334fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * @param is the stream to parse.
1344fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * @throws IOException on I/O errors.
1354fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     */
1364fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    private void parseEntity(InputStream is) throws IOException {
1374fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        BodyDescriptor bd = parseHeader(is);
1384fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
1394fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        if (bd.isMultipart()) {
1404fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            bodyDescriptors.addFirst(bd);
1414fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
1424fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            handler.startMultipart(bd);
1434fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
1444fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            MimeBoundaryInputStream tempIs =
1454fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                new MimeBoundaryInputStream(is, bd.getBoundary());
1464fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            handler.preamble(new CloseShieldInputStream(tempIs));
1474fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            tempIs.consume();
1484fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
1494fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            while (tempIs.hasMoreParts()) {
1504fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                tempIs = new MimeBoundaryInputStream(is, bd.getBoundary());
1514fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                parseBodyPart(tempIs);
1524fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                tempIs.consume();
1534fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                if (tempIs.parentEOF()) {
1544fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                    prematureEof = true;
1554fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy//                    if (log.isWarnEnabled()) {
1564fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy//                        log.warn("Line " + rootStream.getLineNumber()
1574fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy//                                + ": Body part ended prematurely. "
1584fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy//                                + "Higher level boundary detected or "
1594fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy//                                + "EOF reached.");
1604fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy//                    }
1614fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                    break;
1624fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                }
1634fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            }
1644fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
1654fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            handler.epilogue(new CloseShieldInputStream(is));
1664fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
1674fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            handler.endMultipart();
1684fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
1694fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            bodyDescriptors.removeFirst();
1704fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
1714fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        } else if (bd.isMessage()) {
1724fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            if (bd.isBase64Encoded()) {
1734fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                log.warn("base64 encoded message/rfc822 detected");
1744fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                is = new EOLConvertingInputStream(
1754fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                        new Base64InputStream(is));
1764fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            } else if (bd.isQuotedPrintableEncoded()) {
1774fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                log.warn("quoted-printable encoded message/rfc822 detected");
1784fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                is = new EOLConvertingInputStream(
1794fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                        new QuotedPrintableInputStream(is));
1804fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            }
1814fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            bodyDescriptors.addFirst(bd);
1824fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            parseMessage(is);
1834fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            bodyDescriptors.removeFirst();
1844fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        } else {
1854fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            handler.body(bd, new CloseShieldInputStream(is));
1864fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        }
1874fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
1884fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        /*
1894fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy         * Make sure the stream has been consumed.
1904fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy         */
1914fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        while (is.read() != -1) {
1924fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        }
1934fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    }
1944fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
1954fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    private void parseMessage(InputStream is) throws IOException {
1964fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        if (raw) {
1974fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            handler.raw(new CloseShieldInputStream(is));
1984fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        } else {
1994fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            handler.startMessage();
2004fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            parseEntity(is);
2014fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            handler.endMessage();
2024fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        }
2034fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    }
2044fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
2054fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    public boolean getPrematureEof() {
2064fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        return prematureEof;
2074fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    }
2084fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
2094fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    private void parseBodyPart(InputStream is) throws IOException {
2104fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        if (raw) {
2114fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            handler.raw(new CloseShieldInputStream(is));
2124fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        } else {
2134fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            handler.startBodyPart();
2144fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            parseEntity(is);
2154fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            handler.endBodyPart();
2164fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        }
2174fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    }
2184fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
2194fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    /**
2204fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * Parses a header.
2214fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     *
2224fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * @param is the stream to parse.
2234fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * @return a <code>BodyDescriptor</code> describing the body following
2244fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     *         the header.
2254fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     */
2264fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    private BodyDescriptor parseHeader(InputStream is) throws IOException {
2274fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        BodyDescriptor bd = new BodyDescriptor(bodyDescriptors.isEmpty()
2284fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                        ? null : (BodyDescriptor) bodyDescriptors.getFirst());
2294fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
2304fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        handler.startHeader();
2314fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
2324fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        int lineNumber = rootStream.getLineNumber();
2334fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
2344fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        StringBuffer sb = new StringBuffer();
2354fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        int curr = 0;
2364fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        int prev = 0;
2374fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        while ((curr = is.read()) != -1) {
2384fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            if (curr == '\n' && (prev == '\n' || prev == 0)) {
2394fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                /*
2404fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                 * [\r]\n[\r]\n or an immediate \r\n have been seen.
2414fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                 */
2424fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                sb.deleteCharAt(sb.length() - 1);
2434fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                break;
2444fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            }
2454fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            sb.append((char) curr);
2464fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            prev = curr == '\r' ? prev : curr;
2474fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        }
2484fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
2494fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy//        if (curr == -1 && log.isWarnEnabled()) {
2504fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy//            log.warn("Line " + rootStream.getLineNumber()
2514fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy//                    + ": Unexpected end of headers detected. "
2524fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy//                    + "Boundary detected in header or EOF reached.");
2534fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy//        }
2544fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
2554fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        int start = 0;
2564fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        int pos = 0;
2574fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        int startLineNumber = lineNumber;
2584fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        while (pos < sb.length()) {
2594fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            while (pos < sb.length() && sb.charAt(pos) != '\r') {
2604fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                pos++;
2614fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            }
2624fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            if (pos < sb.length() - 1 && sb.charAt(pos + 1) != '\n') {
2634fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                pos++;
2644fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                continue;
2654fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            }
2664fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
2674fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            if (pos >= sb.length() - 2 || fieldChars.get(sb.charAt(pos + 2))) {
2684fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
2694fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                /*
2704fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                 * field should be the complete field data excluding the
2714fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                 * trailing \r\n.
2724fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                 */
2734fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                String field = sb.substring(start, pos);
2744fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                start = pos + 2;
2754fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
2764fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                /*
2774fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                 * Check for a valid field.
2784fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                 */
2794fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                int index = field.indexOf(':');
2804fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                boolean valid = false;
2814fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                if (index != -1 && fieldChars.get(field.charAt(0))) {
2824fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                    valid = true;
2834fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                    String fieldName = field.substring(0, index).trim();
2844fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                    for (int i = 0; i < fieldName.length(); i++) {
2854fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                        if (!fieldChars.get(fieldName.charAt(i))) {
2864fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                            valid = false;
2874fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                            break;
2884fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                        }
2894fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                    }
2904fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
2914fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                    if (valid) {
2924fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                        handler.field(field);
2934fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                        bd.addField(fieldName, field.substring(index + 1));
2944fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                    }
2954fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                }
2964fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
2974fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                if (!valid && log.isWarnEnabled()) {
2984fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                    log.warn("Line " + startLineNumber
2994fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                            + ": Ignoring invalid field: '" + field.trim() + "'");
3004fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                }
3014fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
3024fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy                startLineNumber = lineNumber;
3034fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            }
3044fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
3054fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            pos += 2;
3064fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy            lineNumber++;
3074fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        }
3084fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
3094fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        handler.endHeader();
3104fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
3114fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        return bd;
3124fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    }
3134fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
3144fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    /**
3154fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * Sets the <code>ContentHandler</code> to use when reporting
3164fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * parsing events.
3174fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     *
3184fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     * @param h the <code>ContentHandler</code>.
3194fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy     */
3204fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    public void setContentHandler(ContentHandler h) {
3214fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy        this.handler = h;
3224fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy    }
3234fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy
3244fa0a3295bcacbdcd6a9e7709cf17aa5adb90356Scott Kennedy}
325