1/*
2 *  Licensed to the Apache Software Foundation (ASF) under one or more
3 *  contributor license agreements.  See the NOTICE file distributed with
4 *  this work for additional information regarding copyright ownership.
5 *  The ASF licenses this file to You under the Apache License, Version 2.0
6 *  (the "License"); you may not use this file except in compliance with
7 *  the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *  Unless required by applicable law or agreed to in writing, software
12 *  distributed under the License is distributed on an "AS IS" BASIS,
13 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *  See the License for the specific language governing permissions and
15 *  limitations under the License.
16 */
17
18package java.io;
19
20import java.nio.ByteBuffer;
21import java.nio.CharBuffer;
22import java.nio.charset.Charset;
23import java.nio.charset.CharsetDecoder;
24import java.nio.charset.CoderResult;
25import java.nio.charset.CodingErrorAction;
26import java.util.Arrays;
27
28/**
29 * A class for turning a byte stream into a character stream. Data read from the
30 * source input stream is converted into characters by either a default or a
31 * provided character converter. The default encoding is taken from the
32 * "file.encoding" system property. {@code InputStreamReader} contains a buffer
33 * of bytes read from the source stream and converts these into characters as
34 * needed. The buffer size is 8K.
35 *
36 * @see OutputStreamWriter
37 */
38public class InputStreamReader extends Reader {
39    private InputStream in;
40
41    private boolean endOfInput = false;
42
43    private CharsetDecoder decoder;
44
45    private final ByteBuffer bytes = ByteBuffer.allocate(8192);
46
47    /**
48     * Constructs a new {@code InputStreamReader} on the {@link InputStream}
49     * {@code in}. This constructor sets the character converter to the encoding
50     * specified in the "file.encoding" property and falls back to ISO 8859_1
51     * (ISO-Latin-1) if the property doesn't exist.
52     *
53     * @param in
54     *            the input stream from which to read characters.
55     */
56    public InputStreamReader(InputStream in) {
57        this(in, Charset.defaultCharset());
58    }
59
60    /**
61     * Constructs a new InputStreamReader on the InputStream {@code in}. The
62     * character converter that is used to decode bytes into characters is
63     * identified by name by {@code charsetName}. If the encoding cannot be found, an
64     * UnsupportedEncodingException error is thrown.
65     *
66     * @param in
67     *            the InputStream from which to read characters.
68     * @param charsetName
69     *            identifies the character converter to use.
70     * @throws NullPointerException
71     *             if {@code charsetName} is {@code null}.
72     * @throws UnsupportedEncodingException
73     *             if the encoding specified by {@code charsetName} cannot be found.
74     */
75    public InputStreamReader(InputStream in, final String charsetName)
76            throws UnsupportedEncodingException {
77        super(in);
78        if (charsetName == null) {
79            throw new NullPointerException("charsetName == null");
80        }
81        this.in = in;
82        try {
83            decoder = Charset.forName(charsetName).newDecoder().onMalformedInput(
84                    CodingErrorAction.REPLACE).onUnmappableCharacter(
85                    CodingErrorAction.REPLACE);
86        } catch (IllegalArgumentException e) {
87            throw (UnsupportedEncodingException)
88                    new UnsupportedEncodingException(charsetName).initCause(e);
89        }
90        bytes.limit(0);
91    }
92
93    /**
94     * Constructs a new InputStreamReader on the InputStream {@code in} and
95     * CharsetDecoder {@code dec}.
96     *
97     * @param in
98     *            the source InputStream from which to read characters.
99     * @param dec
100     *            the CharsetDecoder used by the character conversion.
101     */
102    public InputStreamReader(InputStream in, CharsetDecoder dec) {
103        super(in);
104        dec.averageCharsPerByte();
105        this.in = in;
106        decoder = dec;
107        bytes.limit(0);
108    }
109
110    /**
111     * Constructs a new InputStreamReader on the InputStream {@code in} and
112     * Charset {@code charset}.
113     *
114     * @param in
115     *            the source InputStream from which to read characters.
116     * @param charset
117     *            the Charset that defines the character converter
118     */
119    public InputStreamReader(InputStream in, Charset charset) {
120        super(in);
121        this.in = in;
122        decoder = charset.newDecoder().onMalformedInput(
123                CodingErrorAction.REPLACE).onUnmappableCharacter(
124                CodingErrorAction.REPLACE);
125        bytes.limit(0);
126    }
127
128    /**
129     * Closes this reader. This implementation closes the source InputStream and
130     * releases all local storage.
131     *
132     * @throws IOException
133     *             if an error occurs attempting to close this reader.
134     */
135    @Override
136    public void close() throws IOException {
137        synchronized (lock) {
138            if (decoder != null) {
139                decoder.reset();
140            }
141            decoder = null;
142            if (in != null) {
143                in.close();
144                in = null;
145            }
146        }
147    }
148
149    /**
150     * Returns the canonical name of the encoding used by this writer to convert characters to
151     * bytes, or null if this writer has been closed. Most callers should probably keep
152     * track of the String or Charset they passed in; this method may not return the same
153     * name.
154     */
155    public String getEncoding() {
156        if (!isOpen()) {
157            return null;
158        }
159        return decoder.charset().name();
160    }
161
162    /**
163     * Reads a single character from this reader and returns it as an integer
164     * with the two higher-order bytes set to 0. Returns -1 if the end of the
165     * reader has been reached. The byte value is either obtained from
166     * converting bytes in this reader's buffer or by first filling the buffer
167     * from the source InputStream and then reading from the buffer.
168     *
169     * @return the character read or -1 if the end of the reader has been
170     *         reached.
171     * @throws IOException
172     *             if this reader is closed or some other I/O error occurs.
173     */
174    @Override
175    public int read() throws IOException {
176        synchronized (lock) {
177            if (!isOpen()) {
178                throw new IOException("InputStreamReader is closed");
179            }
180            char[] buf = new char[1];
181            return read(buf, 0, 1) != -1 ? buf[0] : -1;
182        }
183    }
184
185    /**
186     * Reads up to {@code count} characters from this reader and stores them
187     * at position {@code offset} in the character array {@code buffer}. Returns
188     * the number of characters actually read or -1 if the end of the reader has
189     * been reached. The bytes are either obtained from converting bytes in this
190     * reader's buffer or by first filling the buffer from the source
191     * InputStream and then reading from the buffer.
192     *
193     * @throws IndexOutOfBoundsException
194     *     if {@code offset < 0 || count < 0 || offset + count > buffer.length}.
195     * @throws IOException
196     *             if this reader is closed or some other I/O error occurs.
197     */
198    @Override
199    public int read(char[] buffer, int offset, int count) throws IOException {
200        synchronized (lock) {
201            if (!isOpen()) {
202                throw new IOException("InputStreamReader is closed");
203            }
204
205            Arrays.checkOffsetAndCount(buffer.length, offset, count);
206            if (count == 0) {
207                return 0;
208            }
209
210            CharBuffer out = CharBuffer.wrap(buffer, offset, count);
211            CoderResult result = CoderResult.UNDERFLOW;
212
213            // bytes.remaining() indicates number of bytes in buffer
214            // when 1-st time entered, it'll be equal to zero
215            boolean needInput = !bytes.hasRemaining();
216
217            while (out.hasRemaining()) {
218                // fill the buffer if needed
219                if (needInput) {
220                    try {
221                        if (in.available() == 0 && out.position() > offset) {
222                            // we could return the result without blocking read
223                            break;
224                        }
225                    } catch (IOException e) {
226                        // available didn't work so just try the read
227                    }
228
229                    int desiredByteCount = bytes.capacity() - bytes.limit();
230                    int off = bytes.arrayOffset() + bytes.limit();
231                    int actualByteCount = in.read(bytes.array(), off, desiredByteCount);
232
233                    if (actualByteCount == -1) {
234                        endOfInput = true;
235                        break;
236                    } else if (actualByteCount == 0) {
237                        break;
238                    }
239                    bytes.limit(bytes.limit() + actualByteCount);
240                    needInput = false;
241                }
242
243                // decode bytes
244                result = decoder.decode(bytes, out, false);
245
246                if (result.isUnderflow()) {
247                    // compact the buffer if no space left
248                    if (bytes.limit() == bytes.capacity()) {
249                        bytes.compact();
250                        bytes.limit(bytes.position());
251                        bytes.position(0);
252                    }
253                    needInput = true;
254                } else {
255                    break;
256                }
257            }
258
259            if (result == CoderResult.UNDERFLOW && endOfInput) {
260                result = decoder.decode(bytes, out, true);
261                if (result == CoderResult.UNDERFLOW) {
262                    result = decoder.flush(out);
263                }
264                decoder.reset();
265            }
266            if (result.isMalformed() || result.isUnmappable()) {
267                result.throwException();
268            }
269
270            return out.position() - offset == 0 ? -1 : out.position() - offset;
271        }
272    }
273
274    private boolean isOpen() {
275        return in != null;
276    }
277
278    /**
279     * Indicates whether this reader is ready to be read without blocking. If
280     * the result is {@code true}, the next {@code read()} will not block. If
281     * the result is {@code false} then this reader may or may not block when
282     * {@code read()} is called. This implementation returns {@code true} if
283     * there are bytes available in the buffer or the source stream has bytes
284     * available.
285     *
286     * @return {@code true} if the receiver will not block when {@code read()}
287     *         is called, {@code false} if unknown or blocking will occur.
288     * @throws IOException
289     *             if this reader is closed or some other I/O error occurs.
290     */
291    @Override
292    public boolean ready() throws IOException {
293        synchronized (lock) {
294            if (in == null) {
295                throw new IOException("InputStreamReader is closed");
296            }
297            try {
298                return bytes.hasRemaining() || in.available() > 0;
299            } catch (IOException e) {
300                return false;
301            }
302        }
303    }
304}
305