1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package libcore.io;
18
19import java.io.ByteArrayOutputStream;
20import java.io.Closeable;
21import java.io.EOFException;
22import java.io.InputStream;
23import java.io.IOException;
24import java.nio.charset.Charset;
25import java.nio.charset.StandardCharsets;
26
27/**
28 * Buffers input from an {@link InputStream} for reading lines.
29 *
30 * This class is used for buffered reading of lines. For purposes of this class, a line ends with
31 * "\n" or "\r\n". End of input is reported by throwing {@code EOFException}. Unterminated line at
32 * end of input is invalid and will be ignored, the caller may use {@code hasUnterminatedLine()}
33 * to detect it after catching the {@code EOFException}.
34 *
35 * This class is intended for reading input that strictly consists of lines, such as line-based
36 * cache entries or cache journal. Unlike the {@link BufferedReader} which in conjunction with
37 * {@link InputStreamReader} provides similar functionality, this class uses different
38 * end-of-input reporting and a more restrictive definition of a line.
39 *
40 * This class supports only charsets that encode '\r' and '\n' as a single byte with value 13
41 * and 10, respectively, and the representation of no other character contains these values.
42 * We currently check in constructor that the charset is one of US-ASCII, UTF-8 and ISO-8859-1.
43 * The default charset is US_ASCII.
44 */
45public class StrictLineReader implements Closeable {
46    private static final byte CR = (byte)'\r';
47    private static final byte LF = (byte)'\n';
48
49    private final InputStream in;
50    private final Charset charset;
51
52    /*
53     * Buffered data is stored in {@code buf}. As long as no exception occurs, 0 <= pos <= end
54     * and the data in the range [pos, end) is buffered for reading. At end of input, if there is
55     * an unterminated line, we set end == -1, otherwise end == pos. If the underlying
56     * {@code InputStream} throws an {@code IOException}, end may remain as either pos or -1.
57     */
58    private byte[] buf;
59    private int pos;
60    private int end;
61
62    /**
63     * Constructs a new {@code StrictLineReader} with the default capacity and charset.
64     *
65     * @param in the {@code InputStream} to read data from.
66     * @throws NullPointerException if {@code in} is null.
67     */
68    public StrictLineReader(InputStream in) {
69        this(in, 8192);
70    }
71
72    /**
73     * Constructs a new {@code LineReader} with the specified capacity and the default charset.
74     *
75     * @param in the {@code InputStream} to read data from.
76     * @param capacity the capacity of the buffer.
77     * @throws NullPointerException if {@code in} is null.
78     * @throws IllegalArgumentException for negative or zero {@code capacity}.
79     */
80    public StrictLineReader(InputStream in, int capacity) {
81        this(in, capacity, StandardCharsets.US_ASCII);
82    }
83
84    /**
85     * Constructs a new {@code LineReader} with the specified charset and the default capacity.
86     *
87     * @param in the {@code InputStream} to read data from.
88     * @param charset the charset used to decode data.
89     *         Only US-ASCII, UTF-8 and ISO-8859-1 is supported.
90     * @throws NullPointerException if {@code in} or {@code charset} is null.
91     * @throws IllegalArgumentException if the specified charset is not supported.
92     */
93    public StrictLineReader(InputStream in, Charset charset) {
94        this(in, 8192, charset);
95    }
96
97    /**
98     * Constructs a new {@code LineReader} with the specified capacity and charset.
99     *
100     * @param in the {@code InputStream} to read data from.
101     * @param capacity the capacity of the buffer.
102     * @param charset the charset used to decode data.
103     *         Only US-ASCII, UTF-8 and ISO-8859-1 is supported.
104     * @throws NullPointerException if {@code in} or {@code charset} is null.
105     * @throws IllegalArgumentException if {@code capacity} is negative or zero
106     *         or the specified charset is not supported.
107     */
108    public StrictLineReader(InputStream in, int capacity, Charset charset) {
109        if (in == null) {
110            throw new NullPointerException("in == null");
111        } else if (charset == null) {
112            throw new NullPointerException("charset == null");
113        }
114        if (capacity < 0) {
115            throw new IllegalArgumentException("capacity <= 0");
116        }
117        if (!(charset.equals(StandardCharsets.US_ASCII) || charset.equals(StandardCharsets.UTF_8) ||
118                charset.equals(StandardCharsets.ISO_8859_1))) {
119            throw new IllegalArgumentException("Unsupported encoding");
120        }
121
122        this.in = in;
123        this.charset = charset;
124        buf = new byte[capacity];
125    }
126
127    /**
128     * Closes the reader by closing the underlying {@code InputStream} and
129     * marking this reader as closed.
130     *
131     * @throws IOException for errors when closing the underlying {@code InputStream}.
132     */
133    @Override
134    public void close() throws IOException {
135        synchronized (in) {
136            if (buf != null) {
137                buf = null;
138                in.close();
139            }
140        }
141    }
142
143    /**
144     * Reads the next line. A line ends with {@code "\n"} or {@code "\r\n"},
145     * this end of line marker is not included in the result.
146     *
147     * @return the next line from the input.
148     * @throws IOException for underlying {@code InputStream} errors.
149     * @throws EOFException for the end of source stream.
150     */
151    public String readLine() throws IOException {
152        synchronized (in) {
153            if (buf == null) {
154                throw new IOException("LineReader is closed");
155            }
156
157            // Read more data if we are at the end of the buffered data.
158            // Though it's an error to read after an exception, we will let {@code fillBuf()}
159            // throw again if that happens; thus we need to handle end == -1 as well as end == pos.
160            if (pos >= end) {
161                fillBuf();
162            }
163            // Try to find LF in the buffered data and return the line if successful.
164            for (int i = pos; i != end; ++i) {
165                if (buf[i] == LF) {
166                    int lineEnd = (i != pos && buf[i - 1] == CR) ? i - 1 : i;
167                    String res = new String(buf, pos, lineEnd - pos, charset);
168                    pos = i + 1;
169                    return res;
170                }
171            }
172
173            // Let's anticipate up to 80 characters on top of those already read.
174            ByteArrayOutputStream out = new ByteArrayOutputStream(end - pos + 80) {
175                @Override
176                public String toString() {
177                    int length = (count > 0 && buf[count - 1] == CR) ? count - 1 : count;
178                    return new String(buf, 0, length, charset);
179                }
180            };
181
182            while (true) {
183                out.write(buf, pos, end - pos);
184                // Mark unterminated line in case fillBuf throws EOFException or IOException.
185                end = -1;
186                fillBuf();
187                // Try to find LF in the buffered data and return the line if successful.
188                for (int i = pos; i != end; ++i) {
189                    if (buf[i] == LF) {
190                        if (i != pos) {
191                            out.write(buf, pos, i - pos);
192                        }
193                        pos = i + 1;
194                        return out.toString();
195                    }
196                }
197            }
198        }
199    }
200
201    /**
202     * Read an {@code int} from a line containing its decimal representation.
203     *
204     * @return the value of the {@code int} from the next line.
205     * @throws IOException for underlying {@code InputStream} errors or conversion error.
206     * @throws EOFException for the end of source stream.
207     */
208    public int readInt() throws IOException {
209        String intString = readLine();
210        try {
211            return Integer.parseInt(intString);
212        } catch (NumberFormatException e) {
213            throw new IOException("expected an int but was \"" + intString + "\"");
214        }
215    }
216
217    /**
218     * Check whether there was an unterminated line at end of input after the line reader reported
219     * end-of-input with EOFException. The value is meaningless in any other situation.
220     *
221     * @return true if there was an unterminated line at end of input.
222     */
223    public boolean hasUnterminatedLine() {
224        return end == -1;
225    }
226
227    /**
228     * Reads new input data into the buffer. Call only with pos == end or end == -1,
229     * depending on the desired outcome if the function throws.
230     *
231     * @throws IOException for underlying {@code InputStream} errors.
232     * @throws EOFException for the end of source stream.
233     */
234    private void fillBuf() throws IOException {
235        int result = in.read(buf, 0, buf.length);
236        if (result == -1) {
237            throw new EOFException();
238        }
239        pos = 0;
240        end = result;
241    }
242}
243