1bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook/****************************************************************
2bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * Licensed to the Apache Software Foundation (ASF) under one   *
3bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * or more contributor license agreements.  See the NOTICE file *
4bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * distributed with this work for additional information        *
5bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * regarding copyright ownership.  The ASF licenses this file   *
6bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * to you under the Apache License, Version 2.0 (the            *
7bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * "License"); you may not use this file except in compliance   *
8bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * with the License.  You may obtain a copy of the License at   *
9bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook *                                                              *
10bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook *   http://www.apache.org/licenses/LICENSE-2.0                 *
11bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook *                                                              *
12bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * Unless required by applicable law or agreed to in writing,   *
13bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * software distributed under the License is distributed on an  *
14bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * KIND, either express or implied.  See the License for the    *
16bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * specific language governing permissions and limitations      *
17bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * under the License.                                           *
18bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook ****************************************************************/
19bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
20bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrookpackage org.apache.james.mime4j;
21bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
22bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrookimport com.android.mail.utils.LoggingInputStream;
23bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
24bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrookimport org.apache.james.mime4j.decoder.Base64InputStream;
25bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrookimport org.apache.james.mime4j.decoder.QuotedPrintableInputStream;
26bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
27bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrookimport java.io.IOException;
28bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrookimport java.io.InputStream;
29bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrookimport java.util.BitSet;
30bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrookimport java.util.LinkedList;
31bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
32bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook/**
33bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * <p>
34bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * Parses MIME (or RFC822) message streams of bytes or characters and reports
35bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * parsing events to a <code>ContentHandler</code> instance.
36bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * </p>
37bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * <p>
38bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * Typical usage:<br/>
39bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * <pre>
40bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook *      ContentHandler handler = new MyHandler();
41bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook *      MimeStreamParser parser = new MimeStreamParser();
42bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook *      parser.setContentHandler(handler);
43bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook *      parser.parse(new BufferedInputStream(new FileInputStream("mime.msg")));
44bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * </pre>
45bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * <strong>NOTE:</strong> All lines must end with CRLF
46bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * (<code>\r\n</code>). If you are unsure of the line endings in your stream
47bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * you should wrap it in a {@link org.apache.james.mime4j.EOLConvertingInputStream} instance.
48bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook *
49bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook *
50bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook * @version $Id: MimeStreamParser.java,v 1.8 2005/02/11 10:12:02 ntherning Exp $
51bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook */
52bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrookpublic class MimeStreamParser {
53bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    private static final Log log = LogFactory.getLog(MimeStreamParser.class);
54bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
55bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    private static final boolean DEBUG_LOG_MESSAGE = false; //DO NOT RELEASE AS 'TRUE'
56bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
57bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    private static BitSet fieldChars = null;
58bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
59bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    private RootInputStream rootStream = null;
60bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    private LinkedList<BodyDescriptor> bodyDescriptors = new LinkedList<BodyDescriptor>();
61bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    private ContentHandler handler = null;
62bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    private boolean raw = false;
63bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
64bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    static {
65bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        fieldChars = new BitSet();
66bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        for (int i = 0x21; i <= 0x39; i++) {
67bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            fieldChars.set(i);
68bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        }
69bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        for (int i = 0x3b; i <= 0x7e; i++) {
70bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            fieldChars.set(i);
71bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        }
72bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    }
73bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
74bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    /**
75bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * Creates a new <code>MimeStreamParser</code> instance.
76bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     */
77bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    public MimeStreamParser() {
78bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    }
79bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
80bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    /**
81bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * Parses a stream of bytes containing a MIME message.
82bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     *
83bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * @param is the stream to parse.
84bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * @throws IOException on I/O errors.
85bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     */
86bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    public void parse(InputStream is) throws IOException {
87bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        if (DEBUG_LOG_MESSAGE) {
88bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            is = new LoggingInputStream(is, "MIME", true);
89bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        }
90bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        rootStream = new RootInputStream(is);
91bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        parseMessage(rootStream);
92bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    }
93bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
94bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    /**
95bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * Determines if this parser is currently in raw mode.
96bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     *
97bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * @return <code>true</code> if in raw mode, <code>false</code>
98bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     *         otherwise.
99bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * @see #setRaw(boolean)
100bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     */
101bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    public boolean isRaw() {
102bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        return raw;
103bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    }
104bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
105bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    /**
106bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * Enables or disables raw mode. In raw mode all future entities
107bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * (messages or body parts) in the stream will be reported to the
108bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * {@link ContentHandler#raw(InputStream)} handler method only.
109bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * The stream will contain the entire unparsed entity contents
110bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * including header fields and whatever is in the body.
111bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     *
112bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * @param raw <code>true</code> enables raw mode, <code>false</code>
113bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     *        disables it.
114bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     */
115bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    public void setRaw(boolean raw) {
116bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        this.raw = raw;
117bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    }
118bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
119bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    /**
120bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * Finishes the parsing and stops reading lines.
121bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * NOTE: No more lines will be parsed but the parser
122bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * will still call
123bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * {@link ContentHandler#endMultipart()},
124bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * {@link ContentHandler#endBodyPart()},
125bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * {@link ContentHandler#endMessage()}, etc to match previous calls
126bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * to
127bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * {@link ContentHandler#startMultipart(BodyDescriptor)},
128bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * {@link ContentHandler#startBodyPart()},
129bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * {@link ContentHandler#startMessage()}, etc.
130bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     */
131bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    public void stop() {
132bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        rootStream.truncate();
133bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    }
134bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
135bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    /**
136bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * Parses an entity which consists of a header followed by a body containing
137bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * arbitrary data, body parts or an embedded message.
138bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     *
139bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * @param is the stream to parse.
140bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * @throws IOException on I/O errors.
141bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     */
142bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    private void parseEntity(InputStream is) throws IOException {
143bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        BodyDescriptor bd = parseHeader(is);
144bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
145bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        if (bd.isMultipart()) {
146bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            bodyDescriptors.addFirst(bd);
147bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
148bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            handler.startMultipart(bd);
149bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
150bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            MimeBoundaryInputStream tempIs =
151bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                new MimeBoundaryInputStream(is, bd.getBoundary());
152bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            handler.preamble(new CloseShieldInputStream(tempIs));
153bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            tempIs.consume();
154bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
155bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            while (tempIs.hasMoreParts()) {
156bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                tempIs = new MimeBoundaryInputStream(is, bd.getBoundary());
157bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                parseBodyPart(tempIs);
158bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                tempIs.consume();
159bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                if (tempIs.parentEOF()) {
160bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook//                    if (log.isWarnEnabled()) {
161bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook//                        log.warn("Line " + rootStream.getLineNumber()
162bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook//                                + ": Body part ended prematurely. "
163bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook//                                + "Higher level boundary detected or "
164bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook//                                + "EOF reached.");
165bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook//                    }
166bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                    break;
167bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                }
168bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            }
169bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
170bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            handler.epilogue(new CloseShieldInputStream(is));
171bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
172bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            handler.endMultipart();
173bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
174bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            bodyDescriptors.removeFirst();
175bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
176bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        } else if (bd.isMessage()) {
177bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            if (bd.isBase64Encoded()) {
178bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                log.warn("base64 encoded message/rfc822 detected");
179bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                is = new EOLConvertingInputStream(
180bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                        new Base64InputStream(is));
181bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            } else if (bd.isQuotedPrintableEncoded()) {
182bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                log.warn("quoted-printable encoded message/rfc822 detected");
183bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                is = new EOLConvertingInputStream(
184bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                        new QuotedPrintableInputStream(is));
185bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            }
186bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            bodyDescriptors.addFirst(bd);
187bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            parseMessage(is);
188bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            bodyDescriptors.removeFirst();
189bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        } else {
190bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            handler.body(bd, new CloseShieldInputStream(is));
191bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        }
192bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
193bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        /*
194bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook         * Make sure the stream has been consumed.
195bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook         */
196bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        while (is.read() != -1) {
197bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        }
198bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    }
199bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
200bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    private void parseMessage(InputStream is) throws IOException {
201bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        if (raw) {
202bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            handler.raw(new CloseShieldInputStream(is));
203bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        } else {
204bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            handler.startMessage();
205bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            parseEntity(is);
206bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            handler.endMessage();
207bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        }
208bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    }
209bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
210bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    private void parseBodyPart(InputStream is) throws IOException {
211bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        if (raw) {
212bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            handler.raw(new CloseShieldInputStream(is));
213bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        } else {
214bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            handler.startBodyPart();
215bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            parseEntity(is);
216bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            handler.endBodyPart();
217bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        }
218bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    }
219bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
220bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    /**
221bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * Parses a header.
222bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     *
223bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * @param is the stream to parse.
224bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * @return a <code>BodyDescriptor</code> describing the body following
225bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     *         the header.
226bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     */
227bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    private BodyDescriptor parseHeader(InputStream is) throws IOException {
228bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        BodyDescriptor bd = new BodyDescriptor(bodyDescriptors.isEmpty()
229bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                        ? null : (BodyDescriptor) bodyDescriptors.getFirst());
230bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
231bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        handler.startHeader();
232bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
233bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        int lineNumber = rootStream.getLineNumber();
234bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
235bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        StringBuffer sb = new StringBuffer();
236bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        int curr = 0;
237bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        int prev = 0;
238bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        while ((curr = is.read()) != -1) {
239bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            if (curr == '\n' && (prev == '\n' || prev == 0)) {
240bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                /*
241bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                 * [\r]\n[\r]\n or an immediate \r\n have been seen.
242bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                 */
243bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                sb.deleteCharAt(sb.length() - 1);
244bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                break;
245bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            }
246bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            sb.append((char) curr);
247bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            prev = curr == '\r' ? prev : curr;
248bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        }
249bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
250bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook//        if (curr == -1 && log.isWarnEnabled()) {
251bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook//            log.warn("Line " + rootStream.getLineNumber()
252bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook//                    + ": Unexpected end of headers detected. "
253bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook//                    + "Boundary detected in header or EOF reached.");
254bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook//        }
255bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
256bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        int start = 0;
257bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        int pos = 0;
258bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        int startLineNumber = lineNumber;
259bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        while (pos < sb.length()) {
260bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            while (pos < sb.length() && sb.charAt(pos) != '\r') {
261bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                pos++;
262bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            }
263bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            if (pos < sb.length() - 1 && sb.charAt(pos + 1) != '\n') {
264bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                pos++;
265bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                continue;
266bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            }
267bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
268bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            if (pos >= sb.length() - 2 || fieldChars.get(sb.charAt(pos + 2))) {
269bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
270bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                /*
271bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                 * field should be the complete field data excluding the
272bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                 * trailing \r\n.
273bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                 */
274bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                String field = sb.substring(start, pos);
275bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                start = pos + 2;
276bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
277bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                /*
278bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                 * Check for a valid field.
279bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                 */
280bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                int index = field.indexOf(':');
281bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                boolean valid = false;
282bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                if (index != -1 && fieldChars.get(field.charAt(0))) {
283bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                    valid = true;
284bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                    String fieldName = field.substring(0, index).trim();
285bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                    for (int i = 0; i < fieldName.length(); i++) {
286bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                        if (!fieldChars.get(fieldName.charAt(i))) {
287bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                            valid = false;
288bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                            break;
289bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                        }
290bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                    }
291bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
292bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                    if (valid) {
293bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                        handler.field(field);
294bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                        bd.addField(fieldName, field.substring(index + 1));
295bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                    }
296bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                }
297bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
298bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                if (!valid && log.isWarnEnabled()) {
299bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                    log.warn("Line " + startLineNumber
300bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                            + ": Ignoring invalid field: '" + field.trim() + "'");
301bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                }
302bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
303bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook                startLineNumber = lineNumber;
304bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            }
305bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
306bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            pos += 2;
307bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook            lineNumber++;
308bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        }
309bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
310bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        handler.endHeader();
311bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
312bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        return bd;
313bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    }
314bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
315bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    /**
316bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * Sets the <code>ContentHandler</code> to use when reporting
317bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * parsing events.
318bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     *
319bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     * @param h the <code>ContentHandler</code>.
320bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook     */
321bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    public void setContentHandler(ContentHandler h) {
322bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook        this.handler = h;
323bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook    }
324bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook
325bc47398187c6ffd132435e51d8d61e6ec79a79dbPaul Westbrook}
326