1/*
2 * XZInputStream
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10package org.tukaani.xz;
11
12import java.io.InputStream;
13import java.io.DataInputStream;
14import java.io.IOException;
15import java.io.EOFException;
16import org.tukaani.xz.common.DecoderUtil;
17
18/**
19 * Decompresses a .xz file in streamed mode (no seeking).
20 * <p>
21 * Use this to decompress regular standalone .xz files. This reads from
22 * its input stream until the end of the input or until an error occurs.
23 * This supports decompressing concatenated .xz files.
24 *
25 * <h4>Typical use cases</h4>
26 * <p>
27 * Getting an input stream to decompress a .xz file:
28 * <p><blockquote><pre>
29 * InputStream infile = new FileInputStream("foo.xz");
30 * XZInputStream inxz = new XZInputStream(infile);
31 * </pre></blockquote>
32 * <p>
33 * It's important to keep in mind that decompressor memory usage depends
34 * on the settings used to compress the file. The worst-case memory usage
35 * of XZInputStream is currently 1.5&nbsp;GiB. Still, very few files will
36 * require more than about 65&nbsp;MiB because that's how much decompressing
37 * a file created with the highest preset level will need, and only a few
38 * people use settings other than the predefined presets.
39 * <p>
40 * It is possible to specify a memory usage limit for
41 * <code>XZInputStream</code>. If decompression requires more memory than
42 * the specified limit, MemoryLimitException will be thrown when reading
43 * from the stream. For example, the following sets the memory usage limit
44 * to 100&nbsp;MiB:
45 * <p><blockquote><pre>
46 * InputStream infile = new FileInputStream("foo.xz");
47 * XZInputStream inxz = new XZInputStream(infile, 100 * 1024);
48 * </pre></blockquote>
49 *
50 * <h4>When uncompressed size is known beforehand</h4>
51 * <p>
52 * If you are decompressing complete files and your application knows
53 * exactly how much uncompressed data there should be, it is good to try
54 * reading one more byte by calling <code>read()</code> and checking
55 * that it returns <code>-1</code>. This way the decompressor will parse the
56 * file footers and verify the integrity checks, giving the caller more
57 * confidence that the uncompressed data is valid. (This advice seems to
58 * apply to
59 * {@link java.util.zip.GZIPInputStream java.util.zip.GZIPInputStream} too.)
60 *
61 * @see SingleXZInputStream
62 */
63public class XZInputStream extends InputStream {
64    private final int memoryLimit;
65    private InputStream in;
66    private SingleXZInputStream xzIn;
67    private final boolean verifyCheck;
68    private boolean endReached = false;
69    private IOException exception = null;
70
71    private final byte[] tempBuf = new byte[1];
72
73    /**
74     * Creates a new XZ decompressor without a memory usage limit.
75     * <p>
76     * This constructor reads and parses the XZ Stream Header (12 bytes)
77     * from <code>in</code>. The header of the first Block is not read
78     * until <code>read</code> is called.
79     *
80     * @param       in          input stream from which XZ-compressed
81     *                          data is read
82     *
83     * @throws      XZFormatException
84     *                          input is not in the XZ format
85     *
86     * @throws      CorruptedInputException
87     *                          XZ header CRC32 doesn't match
88     *
89     * @throws      UnsupportedOptionsException
90     *                          XZ header is valid but specifies options
91     *                          not supported by this implementation
92     *
93     * @throws      EOFException
94     *                          less than 12 bytes of input was available
95     *                          from <code>in</code>
96     *
97     * @throws      IOException may be thrown by <code>in</code>
98     */
99    public XZInputStream(InputStream in) throws IOException {
100        this(in, -1);
101    }
102
103    /**
104     * Creates a new XZ decompressor with an optional memory usage limit.
105     * <p>
106     * This is identical to <code>XZInputStream(InputStream)</code> except
107     * that this takes also the <code>memoryLimit</code> argument.
108     *
109     * @param       in          input stream from which XZ-compressed
110     *                          data is read
111     *
112     * @param       memoryLimit memory usage limit in kibibytes (KiB)
113     *                          or <code>-1</code> to impose no
114     *                          memory usage limit
115     *
116     * @throws      XZFormatException
117     *                          input is not in the XZ format
118     *
119     * @throws      CorruptedInputException
120     *                          XZ header CRC32 doesn't match
121     *
122     * @throws      UnsupportedOptionsException
123     *                          XZ header is valid but specifies options
124     *                          not supported by this implementation
125     *
126     * @throws      EOFException
127     *                          less than 12 bytes of input was available
128     *                          from <code>in</code>
129     *
130     * @throws      IOException may be thrown by <code>in</code>
131     */
132    public XZInputStream(InputStream in, int memoryLimit) throws IOException {
133        this(in, memoryLimit, true);
134    }
135
136    /**
137     * Creates a new XZ decompressor with an optional memory usage limit
138     * and ability to disable verification of integrity checks.
139     * <p>
140     * This is identical to <code>XZInputStream(InputStream,int)</code> except
141     * that this takes also the <code>verifyCheck</code> argument.
142     * <p>
143     * Note that integrity check verification should almost never be disabled.
144     * Possible reasons to disable integrity check verification:
145     * <ul>
146     *   <li>Trying to recover data from a corrupt .xz file.</li>
147     *   <li>Speeding up decompression. This matters mostly with SHA-256
148     *   or with files that have compressed extremely well. It's recommended
149     *   that integrity checking isn't disabled for performance reasons
150     *   unless the file integrity is verified externally in some other
151     *   way.</li>
152     * </ul>
153     * <p>
154     * <code>verifyCheck</code> only affects the integrity check of
155     * the actual compressed data. The CRC32 fields in the headers
156     * are always verified.
157     *
158     * @param       in          input stream from which XZ-compressed
159     *                          data is read
160     *
161     * @param       memoryLimit memory usage limit in kibibytes (KiB)
162     *                          or <code>-1</code> to impose no
163     *                          memory usage limit
164     *
165     * @param       verifyCheck if <code>true</code>, the integrity checks
166     *                          will be verified; this should almost never
167     *                          be set to <code>false</code>
168     *
169     * @throws      XZFormatException
170     *                          input is not in the XZ format
171     *
172     * @throws      CorruptedInputException
173     *                          XZ header CRC32 doesn't match
174     *
175     * @throws      UnsupportedOptionsException
176     *                          XZ header is valid but specifies options
177     *                          not supported by this implementation
178     *
179     * @throws      EOFException
180     *                          less than 12 bytes of input was available
181     *                          from <code>in</code>
182     *
183     * @throws      IOException may be thrown by <code>in</code>
184     *
185     * @since 1.6
186     */
187    public XZInputStream(InputStream in, int memoryLimit, boolean verifyCheck)
188            throws IOException {
189        this.in = in;
190        this.memoryLimit = memoryLimit;
191        this.verifyCheck = verifyCheck;
192        this.xzIn = new SingleXZInputStream(in, memoryLimit, verifyCheck);
193    }
194
195    /**
196     * Decompresses the next byte from this input stream.
197     * <p>
198     * Reading lots of data with <code>read()</code> from this input stream
199     * may be inefficient. Wrap it in {@link java.io.BufferedInputStream}
200     * if you need to read lots of data one byte at a time.
201     *
202     * @return      the next decompressed byte, or <code>-1</code>
203     *              to indicate the end of the compressed stream
204     *
205     * @throws      CorruptedInputException
206     * @throws      UnsupportedOptionsException
207     * @throws      MemoryLimitException
208     *
209     * @throws      XZIOException if the stream has been closed
210     *
211     * @throws      EOFException
212     *                          compressed input is truncated or corrupt
213     *
214     * @throws      IOException may be thrown by <code>in</code>
215     */
216    public int read() throws IOException {
217        return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
218    }
219
220    /**
221     * Decompresses into an array of bytes.
222     * <p>
223     * If <code>len</code> is zero, no bytes are read and <code>0</code>
224     * is returned. Otherwise this will try to decompress <code>len</code>
225     * bytes of uncompressed data. Less than <code>len</code> bytes may
226     * be read only in the following situations:
227     * <ul>
228     *   <li>The end of the compressed data was reached successfully.</li>
229     *   <li>An error is detected after at least one but less <code>len</code>
230     *       bytes have already been successfully decompressed.
231     *       The next call with non-zero <code>len</code> will immediately
232     *       throw the pending exception.</li>
233     *   <li>An exception is thrown.</li>
234     * </ul>
235     *
236     * @param       buf         target buffer for uncompressed data
237     * @param       off         start offset in <code>buf</code>
238     * @param       len         maximum number of uncompressed bytes to read
239     *
240     * @return      number of bytes read, or <code>-1</code> to indicate
241     *              the end of the compressed stream
242     *
243     * @throws      CorruptedInputException
244     * @throws      UnsupportedOptionsException
245     * @throws      MemoryLimitException
246     *
247     * @throws      XZIOException if the stream has been closed
248     *
249     * @throws      EOFException
250     *                          compressed input is truncated or corrupt
251     *
252     * @throws      IOException may be thrown by <code>in</code>
253     */
254    public int read(byte[] buf, int off, int len) throws IOException {
255        if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
256            throw new IndexOutOfBoundsException();
257
258        if (len == 0)
259            return 0;
260
261        if (in == null)
262            throw new XZIOException("Stream closed");
263
264        if (exception != null)
265            throw exception;
266
267        if (endReached)
268            return -1;
269
270        int size = 0;
271
272        try {
273            while (len > 0) {
274                if (xzIn == null) {
275                    prepareNextStream();
276                    if (endReached)
277                        return size == 0 ? -1 : size;
278                }
279
280                int ret = xzIn.read(buf, off, len);
281
282                if (ret > 0) {
283                    size += ret;
284                    off += ret;
285                    len -= ret;
286                } else if (ret == -1) {
287                    xzIn = null;
288                }
289            }
290        } catch (IOException e) {
291            exception = e;
292            if (size == 0)
293                throw e;
294        }
295
296        return size;
297    }
298
299    private void prepareNextStream() throws IOException {
300        DataInputStream inData = new DataInputStream(in);
301        byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];
302
303        // The size of Stream Padding must be a multiple of four bytes,
304        // all bytes zero.
305        do {
306            // First try to read one byte to see if we have reached the end
307            // of the file.
308            int ret = inData.read(buf, 0, 1);
309            if (ret == -1) {
310                endReached = true;
311                return;
312            }
313
314            // Since we got one byte of input, there must be at least
315            // three more available in a valid file.
316            inData.readFully(buf, 1, 3);
317
318        } while (buf[0] == 0 && buf[1] == 0 && buf[2] == 0 && buf[3] == 0);
319
320        // Not all bytes are zero. In a valid Stream it indicates the
321        // beginning of the next Stream. Read the rest of the Stream Header
322        // and initialize the XZ decoder.
323        inData.readFully(buf, 4, DecoderUtil.STREAM_HEADER_SIZE - 4);
324
325        try {
326            xzIn = new SingleXZInputStream(in, memoryLimit, verifyCheck, buf);
327        } catch (XZFormatException e) {
328            // Since this isn't the first .xz Stream, it is more
329            // logical to tell that the data is corrupt.
330            throw new CorruptedInputException(
331                    "Garbage after a valid XZ Stream");
332        }
333    }
334
335    /**
336     * Returns the number of uncompressed bytes that can be read
337     * without blocking. The value is returned with an assumption
338     * that the compressed input data will be valid. If the compressed
339     * data is corrupt, <code>CorruptedInputException</code> may get
340     * thrown before the number of bytes claimed to be available have
341     * been read from this input stream.
342     *
343     * @return      the number of uncompressed bytes that can be read
344     *              without blocking
345     */
346    public int available() throws IOException {
347        if (in == null)
348            throw new XZIOException("Stream closed");
349
350        if (exception != null)
351            throw exception;
352
353        return xzIn == null ? 0 : xzIn.available();
354    }
355
356    /**
357     * Closes the stream and calls <code>in.close()</code>.
358     * If the stream was already closed, this does nothing.
359     *
360     * @throws  IOException if thrown by <code>in.close()</code>
361     */
362    public void close() throws IOException {
363        if (in != null) {
364            try {
365                in.close();
366            } finally {
367                in = null;
368            }
369        }
370    }
371}
372