QuotedPrintableInputStream.java revision 96c5af40d639d629267794f4f0338a267ff94ce5
1/****************************************************************
2 * Licensed to the Apache Software Foundation (ASF) under one   *
3 * or more contributor license agreements.  See the NOTICE file *
4 * distributed with this work for additional information        *
5 * regarding copyright ownership.  The ASF licenses this file   *
6 * to you under the Apache License, Version 2.0 (the            *
7 * "License"); you may not use this file except in compliance   *
8 * with the License.  You may obtain a copy of the License at   *
9 *                                                              *
10 *   http://www.apache.org/licenses/LICENSE-2.0                 *
11 *                                                              *
12 * Unless required by applicable law or agreed to in writing,   *
13 * software distributed under the License is distributed on an  *
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15 * KIND, either express or implied.  See the License for the    *
16 * specific language governing permissions and limitations      *
17 * under the License.                                           *
18 ****************************************************************/
19
20package org.apache.james.mime4j.decoder;
21
22import java.io.IOException;
23import java.io.InputStream;
24
25import org.apache.commons.logging.Log;
26import org.apache.commons.logging.LogFactory;
27
28/**
29 * Performs Quoted-Printable decoding on an underlying stream.
30 *
31 *
32 *
33 * @version $Id: QuotedPrintableInputStream.java,v 1.3 2004/11/29 13:15:47 ntherning Exp $
34 */
35public class QuotedPrintableInputStream extends InputStream {
36    private static Log log = LogFactory.getLog(QuotedPrintableInputStream.class);
37
38    private InputStream stream;
39    ByteQueue byteq = new ByteQueue();
40    ByteQueue pushbackq = new ByteQueue();
41    private byte state = 0;
42
43    public QuotedPrintableInputStream(InputStream stream) {
44        this.stream = stream;
45    }
46
47    /**
48     * Closes the underlying stream.
49     *
50     * @throws IOException on I/O errors.
51     */
52    public void close() throws IOException {
53        stream.close();
54    }
55
56    public int read() throws IOException {
57        fillBuffer();
58        if (byteq.count() == 0)
59            return -1;
60        else {
61            byte val = byteq.dequeue();
62            if (val >= 0)
63                return val;
64            else
65                return val & 0xFF;
66        }
67    }
68
69    /**
70     * Pulls bytes out of the underlying stream and places them in the
71     * pushback queue.  This is necessary (vs. reading from the
72     * underlying stream directly) to detect and filter out "transport
73     * padding" whitespace, i.e., all whitespace that appears immediately
74     * before a CRLF.
75     *
76     * @throws IOException Underlying stream threw IOException.
77     */
78    private void populatePushbackQueue() throws IOException {
79        //Debug.verify(pushbackq.count() == 0, "PopulatePushbackQueue called when pushback queue was not empty!");
80
81        if (pushbackq.count() != 0)
82            return;
83
84        while (true) {
85            int i = stream.read();
86            switch (i) {
87                case -1:
88                    // stream is done
89                    pushbackq.clear();  // discard any whitespace preceding EOF
90                    return;
91                case ' ':
92                case '\t':
93                    pushbackq.enqueue((byte)i);
94                    break;
95                case '\r':
96                case '\n':
97                    pushbackq.clear();  // discard any whitespace preceding EOL
98                    pushbackq.enqueue((byte)i);
99                    return;
100                default:
101                    pushbackq.enqueue((byte)i);
102                    return;
103            }
104        }
105    }
106
107    /**
108     * Causes the pushback queue to get populated if it is empty, then
109     * consumes and decodes bytes out of it until one or more bytes are
110     * in the byte queue.  This decoding step performs the actual QP
111     * decoding.
112     *
113     * @throws IOException Underlying stream threw IOException.
114     */
115    private void fillBuffer() throws IOException {
116        byte msdChar = 0;  // first digit of escaped num
117        while (byteq.count() == 0) {
118            if (pushbackq.count() == 0) {
119                populatePushbackQueue();
120                if (pushbackq.count() == 0)
121                    return;
122            }
123
124            byte b = (byte)pushbackq.dequeue();
125
126            switch (state) {
127                case 0:  // start state, no bytes pending
128                    if (b != '=') {
129                        byteq.enqueue(b);
130                        break;  // state remains 0
131                    } else {
132                        state = 1;
133                        break;
134                    }
135                case 1:  // encountered "=" so far
136                    if (b == '\r') {
137                        state = 2;
138                        break;
139                    } else if ((b >= '0' && b <= '9') || (b >= 'A' && b <= 'F') || (b >= 'a' && b <= 'f')) {
140                        state = 3;
141                        msdChar = b;  // save until next digit encountered
142                        break;
143                    } else if (b == '=') {
144                        /*
145                         * Special case when == is encountered.
146                         * Emit one = and stay in this state.
147                         */
148                        if (log.isWarnEnabled()) {
149                            log.warn("Malformed MIME; got ==");
150                        }
151                        byteq.enqueue((byte)'=');
152                        break;
153                    } else {
154                        if (log.isWarnEnabled()) {
155                            log.warn("Malformed MIME; expected \\r or "
156                                    + "[0-9A-Z], got " + b);
157                        }
158                        state = 0;
159                        byteq.enqueue((byte)'=');
160                        byteq.enqueue(b);
161                        break;
162                    }
163                case 2:  // encountered "=\r" so far
164                    if (b == '\n') {
165                        state = 0;
166                        break;
167                    } else {
168                        if (log.isWarnEnabled()) {
169                            log.warn("Malformed MIME; expected "
170                                    + (int)'\n' + ", got " + b);
171                        }
172                        state = 0;
173                        byteq.enqueue((byte)'=');
174                        byteq.enqueue((byte)'\r');
175                        byteq.enqueue(b);
176                        break;
177                    }
178                case 3:  // encountered =<digit> so far; expecting another <digit> to complete the octet
179                    if ((b >= '0' && b <= '9') || (b >= 'A' && b <= 'F') || (b >= 'a' && b <= 'f')) {
180                        byte msd = asciiCharToNumericValue(msdChar);
181                        byte low = asciiCharToNumericValue(b);
182                        state = 0;
183                        byteq.enqueue((byte)((msd << 4) | low));
184                        break;
185                    } else {
186                        if (log.isWarnEnabled()) {
187                            log.warn("Malformed MIME; expected "
188                                     + "[0-9A-Z], got " + b);
189                        }
190                        state = 0;
191                        byteq.enqueue((byte)'=');
192                        byteq.enqueue(msdChar);
193                        byteq.enqueue(b);
194                        break;
195                    }
196                default:  // should never happen
197                    log.error("Illegal state: " + state);
198                    state = 0;
199                    byteq.enqueue(b);
200                    break;
201            }
202        }
203    }
204
205    /**
206     * Converts '0' => 0, 'A' => 10, etc.
207     * @param c ASCII character value.
208     * @return Numeric value of hexadecimal character.
209     */
210    private byte asciiCharToNumericValue(byte c) {
211        if (c >= '0' && c <= '9') {
212            return (byte)(c - '0');
213        } else if (c >= 'A' && c <= 'Z') {
214            return (byte)(0xA + (c - 'A'));
215        } else if (c >= 'a' && c <= 'z') {
216            return (byte)(0xA + (c - 'a'));
217        } else {
218            /*
219             * This should never happen since all calls to this method
220             * are preceded by a check that c is in [0-9A-Za-z]
221             */
222            throw new IllegalArgumentException((char) c
223                    + " is not a hexadecimal digit");
224        }
225    }
226
227}
228