1/*
2 * SingleXZInputStream
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10package org.tukaani.xz;
11
12import java.io.InputStream;
13import java.io.DataInputStream;
14import java.io.IOException;
15import java.io.EOFException;
16import org.tukaani.xz.common.DecoderUtil;
17import org.tukaani.xz.common.StreamFlags;
18import org.tukaani.xz.index.IndexHash;
19import org.tukaani.xz.check.Check;
20
21/**
22 * Decompresses exactly one XZ Stream in streamed mode (no seeking).
23 * The decompression stops after the first XZ Stream has been decompressed,
24 * and the read position in the input stream is left at the first byte
25 * after the end of the XZ Stream. This can be useful when XZ data has
26 * been stored inside some other file format or protocol.
27 * <p>
28 * Unless you know what you are doing, don't use this class to decompress
29 * standalone .xz files. For that purpose, use <code>XZInputStream</code>.
30 *
31 * <h4>When uncompressed size is known beforehand</h4>
32 * <p>
33 * If you are decompressing complete XZ streams and your application knows
34 * exactly how much uncompressed data there should be, it is good to try
35 * reading one more byte by calling <code>read()</code> and checking
36 * that it returns <code>-1</code>. This way the decompressor will parse the
37 * file footers and verify the integrity checks, giving the caller more
38 * confidence that the uncompressed data is valid.
39 *
40 * @see XZInputStream
41 */
42public class SingleXZInputStream extends InputStream {
43    private InputStream in;
44    private final int memoryLimit;
45    private final StreamFlags streamHeaderFlags;
46    private final Check check;
47    private final boolean verifyCheck;
48    private BlockInputStream blockDecoder = null;
49    private final IndexHash indexHash = new IndexHash();
50    private boolean endReached = false;
51    private IOException exception = null;
52
53    private final byte[] tempBuf = new byte[1];
54
55    /**
56     * Reads the Stream Header into a buffer.
57     * This is a helper function for the constructors.
58     */
59    private static byte[] readStreamHeader(InputStream in) throws IOException {
60        byte[] streamHeader = new byte[DecoderUtil.STREAM_HEADER_SIZE];
61        new DataInputStream(in).readFully(streamHeader);
62        return streamHeader;
63    }
64
65    /**
66     * Creates a new XZ decompressor that decompresses exactly one
67     * XZ Stream from <code>in</code> without a memory usage limit.
68     * <p>
69     * This constructor reads and parses the XZ Stream Header (12 bytes)
70     * from <code>in</code>. The header of the first Block is not read
71     * until <code>read</code> is called.
72     *
73     * @param       in          input stream from which XZ-compressed
74     *                          data is read
75     *
76     * @throws      XZFormatException
77     *                          input is not in the XZ format
78     *
79     * @throws      CorruptedInputException
80     *                          XZ header CRC32 doesn't match
81     *
82     * @throws      UnsupportedOptionsException
83     *                          XZ header is valid but specifies options
84     *                          not supported by this implementation
85     *
86     * @throws      EOFException
87     *                          less than 12 bytes of input was available
88     *                          from <code>in</code>
89     *
90     * @throws      IOException may be thrown by <code>in</code>
91     */
92    public SingleXZInputStream(InputStream in) throws IOException {
93        this(in, -1);
94    }
95
96    /**
97     * Creates a new XZ decompressor that decompresses exactly one
98     * XZ Stream from <code>in</code> with an optional memory usage limit.
99     * <p>
100     * This is identical to <code>SingleXZInputStream(InputStream)</code>
101     * except that this takes also the <code>memoryLimit</code> argument.
102     *
103     * @param       in          input stream from which XZ-compressed
104     *                          data is read
105     *
106     * @param       memoryLimit memory usage limit in kibibytes (KiB)
107     *                          or <code>-1</code> to impose no
108     *                          memory usage limit
109     *
110     * @throws      XZFormatException
111     *                          input is not in the XZ format
112     *
113     * @throws      CorruptedInputException
114     *                          XZ header CRC32 doesn't match
115     *
116     * @throws      UnsupportedOptionsException
117     *                          XZ header is valid but specifies options
118     *                          not supported by this implementation
119     *
120     * @throws      EOFException
121     *                          less than 12 bytes of input was available
122     *                          from <code>in</code>
123     *
124     * @throws      IOException may be thrown by <code>in</code>
125     */
126    public SingleXZInputStream(InputStream in, int memoryLimit)
127            throws IOException {
128        this(in, memoryLimit, true, readStreamHeader(in));
129    }
130
131    /**
132     * Creates a new XZ decompressor that decompresses exactly one
133     * XZ Stream from <code>in</code> with an optional memory usage limit
134     * and ability to disable verification of integrity checks.
135     * <p>
136     * This is identical to <code>SingleXZInputStream(InputStream,int)</code>
137     * except that this takes also the <code>verifyCheck</code> argument.
138     * <p>
139     * Note that integrity check verification should almost never be disabled.
140     * Possible reasons to disable integrity check verification:
141     * <ul>
142     *   <li>Trying to recover data from a corrupt .xz file.</li>
143     *   <li>Speeding up decompression. This matters mostly with SHA-256
144     *   or with files that have compressed extremely well. It's recommended
145     *   that integrity checking isn't disabled for performance reasons
146     *   unless the file integrity is verified externally in some other
147     *   way.</li>
148     * </ul>
149     * <p>
150     * <code>verifyCheck</code> only affects the integrity check of
151     * the actual compressed data. The CRC32 fields in the headers
152     * are always verified.
153     *
154     * @param       in          input stream from which XZ-compressed
155     *                          data is read
156     *
157     * @param       memoryLimit memory usage limit in kibibytes (KiB)
158     *                          or <code>-1</code> to impose no
159     *                          memory usage limit
160     *
161     * @param       verifyCheck if <code>true</code>, the integrity checks
162     *                          will be verified; this should almost never
163     *                          be set to <code>false</code>
164     *
165     * @throws      XZFormatException
166     *                          input is not in the XZ format
167     *
168     * @throws      CorruptedInputException
169     *                          XZ header CRC32 doesn't match
170     *
171     * @throws      UnsupportedOptionsException
172     *                          XZ header is valid but specifies options
173     *                          not supported by this implementation
174     *
175     * @throws      EOFException
176     *                          less than 12 bytes of input was available
177     *                          from <code>in</code>
178     *
179     * @throws      IOException may be thrown by <code>in</code>
180     *
181     * @since 1.6
182     */
183    public SingleXZInputStream(InputStream in, int memoryLimit,
184                               boolean verifyCheck) throws IOException {
185        this(in, memoryLimit, verifyCheck, readStreamHeader(in));
186    }
187
188    SingleXZInputStream(InputStream in, int memoryLimit, boolean verifyCheck,
189                        byte[] streamHeader) throws IOException {
190        this.in = in;
191        this.memoryLimit = memoryLimit;
192        this.verifyCheck = verifyCheck;
193        streamHeaderFlags = DecoderUtil.decodeStreamHeader(streamHeader);
194        check = Check.getInstance(streamHeaderFlags.checkType);
195    }
196
197    /**
198     * Gets the ID of the integrity check used in this XZ Stream.
199     *
200     * @return      the Check ID specified in the XZ Stream Header
201     */
202    public int getCheckType() {
203        return streamHeaderFlags.checkType;
204    }
205
206    /**
207     * Gets the name of the integrity check used in this XZ Stream.
208     *
209     * @return      the name of the check specified in the XZ Stream Header
210     */
211    public String getCheckName() {
212        return check.getName();
213    }
214
215    /**
216     * Decompresses the next byte from this input stream.
217     * <p>
218     * Reading lots of data with <code>read()</code> from this input stream
219     * may be inefficient. Wrap it in {@link java.io.BufferedInputStream}
220     * if you need to read lots of data one byte at a time.
221     *
222     * @return      the next decompressed byte, or <code>-1</code>
223     *              to indicate the end of the compressed stream
224     *
225     * @throws      CorruptedInputException
226     * @throws      UnsupportedOptionsException
227     * @throws      MemoryLimitException
228     *
229     * @throws      XZIOException if the stream has been closed
230     *
231     * @throws      EOFException
232     *                          compressed input is truncated or corrupt
233     *
234     * @throws      IOException may be thrown by <code>in</code>
235     */
236    public int read() throws IOException {
237        return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
238    }
239
240    /**
241     * Decompresses into an array of bytes.
242     * <p>
243     * If <code>len</code> is zero, no bytes are read and <code>0</code>
244     * is returned. Otherwise this will try to decompress <code>len</code>
245     * bytes of uncompressed data. Less than <code>len</code> bytes may
246     * be read only in the following situations:
247     * <ul>
248     *   <li>The end of the compressed data was reached successfully.</li>
249     *   <li>An error is detected after at least one but less <code>len</code>
250     *       bytes have already been successfully decompressed.
251     *       The next call with non-zero <code>len</code> will immediately
252     *       throw the pending exception.</li>
253     *   <li>An exception is thrown.</li>
254     * </ul>
255     *
256     * @param       buf         target buffer for uncompressed data
257     * @param       off         start offset in <code>buf</code>
258     * @param       len         maximum number of uncompressed bytes to read
259     *
260     * @return      number of bytes read, or <code>-1</code> to indicate
261     *              the end of the compressed stream
262     *
263     * @throws      CorruptedInputException
264     * @throws      UnsupportedOptionsException
265     * @throws      MemoryLimitException
266     *
267     * @throws      XZIOException if the stream has been closed
268     *
269     * @throws      EOFException
270     *                          compressed input is truncated or corrupt
271     *
272     * @throws      IOException may be thrown by <code>in</code>
273     */
274    public int read(byte[] buf, int off, int len) throws IOException {
275        if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
276            throw new IndexOutOfBoundsException();
277
278        if (len == 0)
279            return 0;
280
281        if (in == null)
282            throw new XZIOException("Stream closed");
283
284        if (exception != null)
285            throw exception;
286
287        if (endReached)
288            return -1;
289
290        int size = 0;
291
292        try {
293            while (len > 0) {
294                if (blockDecoder == null) {
295                    try {
296                        blockDecoder = new BlockInputStream(
297                                in, check, verifyCheck, memoryLimit, -1, -1);
298                    } catch (IndexIndicatorException e) {
299                        indexHash.validate(in);
300                        validateStreamFooter();
301                        endReached = true;
302                        return size > 0 ? size : -1;
303                    }
304                }
305
306                int ret = blockDecoder.read(buf, off, len);
307
308                if (ret > 0) {
309                    size += ret;
310                    off += ret;
311                    len -= ret;
312                } else if (ret == -1) {
313                    indexHash.add(blockDecoder.getUnpaddedSize(),
314                                  blockDecoder.getUncompressedSize());
315                    blockDecoder = null;
316                }
317            }
318        } catch (IOException e) {
319            exception = e;
320            if (size == 0)
321                throw e;
322        }
323
324        return size;
325    }
326
327    private void validateStreamFooter() throws IOException {
328        byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];
329        new DataInputStream(in).readFully(buf);
330        StreamFlags streamFooterFlags = DecoderUtil.decodeStreamFooter(buf);
331
332        if (!DecoderUtil.areStreamFlagsEqual(streamHeaderFlags,
333                                             streamFooterFlags)
334                || indexHash.getIndexSize() != streamFooterFlags.backwardSize)
335            throw new CorruptedInputException(
336                    "XZ Stream Footer does not match Stream Header");
337    }
338
339    /**
340     * Returns the number of uncompressed bytes that can be read
341     * without blocking. The value is returned with an assumption
342     * that the compressed input data will be valid. If the compressed
343     * data is corrupt, <code>CorruptedInputException</code> may get
344     * thrown before the number of bytes claimed to be available have
345     * been read from this input stream.
346     *
347     * @return      the number of uncompressed bytes that can be read
348     *              without blocking
349     */
350    public int available() throws IOException {
351        if (in == null)
352            throw new XZIOException("Stream closed");
353
354        if (exception != null)
355            throw exception;
356
357        return blockDecoder == null ? 0 : blockDecoder.available();
358    }
359
360    /**
361     * Closes the stream and calls <code>in.close()</code>.
362     * If the stream was already closed, this does nothing.
363     *
364     * @throws  IOException if thrown by <code>in.close()</code>
365     */
366    public void close() throws IOException {
367        if (in != null) {
368            try {
369                in.close();
370            } finally {
371                in = null;
372            }
373        }
374    }
375}
376