1/*
2 * LZMA2InputStream
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 *          Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11package org.tukaani.xz;
12
13import java.io.InputStream;
14import java.io.DataInputStream;
15import java.io.IOException;
16import org.tukaani.xz.lz.LZDecoder;
17import org.tukaani.xz.rangecoder.RangeDecoderFromBuffer;
18import org.tukaani.xz.lzma.LZMADecoder;
19
20/**
21 * Decompresses a raw LZMA2 stream (no XZ headers).
22 */
23public class LZMA2InputStream extends InputStream {
24    /**
25     * Smallest valid LZMA2 dictionary size.
26     * <p>
27     * Very tiny dictionaries would be a performance problem, so
28     * the minimum is 4 KiB.
29     */
30    public static final int DICT_SIZE_MIN = 4096;
31
32    /**
33     * Largest dictionary size supported by this implementation.
34     * <p>
35     * The LZMA2 algorithm allows dictionaries up to one byte less than 4 GiB.
36     * This implementation supports only 16 bytes less than 2 GiB for raw
37     * LZMA2 streams, and for .xz files the maximum is 1.5 GiB. This
38     * limitation is due to Java using signed 32-bit integers for array
39     * indexing. The limitation shouldn't matter much in practice since so
40     * huge dictionaries are not normally used.
41     */
42    public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15;
43
44    private static final int COMPRESSED_SIZE_MAX = 1 << 16;
45
46    private DataInputStream in;
47
48    private final LZDecoder lz;
49    private final RangeDecoderFromBuffer rc
50            = new RangeDecoderFromBuffer(COMPRESSED_SIZE_MAX);
51    private LZMADecoder lzma;
52
53    private int uncompressedSize = 0;
54    private boolean isLZMAChunk;
55
56    private boolean needDictReset = true;
57    private boolean needProps = true;
58    private boolean endReached = false;
59
60    private IOException exception = null;
61
62    private final byte[] tempBuf = new byte[1];
63
64    /**
65     * Gets approximate decompressor memory requirements as kibibytes for
66     * the given dictionary size.
67     *
68     * @param       dictSize    LZMA2 dictionary size as bytes, must be
69     *                          in the range [<code>DICT_SIZE_MIN</code>,
70     *                          <code>DICT_SIZE_MAX</code>]
71     *
72     * @return      approximate memory requirements as kibibytes (KiB)
73     */
74    public static int getMemoryUsage(int dictSize) {
75        // The base state is around 30-40 KiB (probabilities etc.),
76        // range decoder needs COMPRESSED_SIZE_MAX bytes for buffering,
77        // and LZ decoder needs a dictionary buffer.
78        return 40 + COMPRESSED_SIZE_MAX / 1024 + getDictSize(dictSize) / 1024;
79    }
80
81    private static int getDictSize(int dictSize) {
82        if (dictSize < DICT_SIZE_MIN || dictSize > DICT_SIZE_MAX)
83            throw new IllegalArgumentException(
84                    "Unsupported dictionary size " + dictSize);
85
86        // Round dictionary size upward to a multiple of 16. This way LZMA
87        // can use LZDecoder.getPos() for calculating LZMA's posMask.
88        // Note that this check is needed only for raw LZMA2 streams; it is
89        // redundant with .xz.
90        return (dictSize + 15) & ~15;
91    }
92
93    /**
94     * Creates a new input stream that decompresses raw LZMA2 data
95     * from <code>in</code>.
96     * <p>
97     * The caller needs to know the dictionary size used when compressing;
98     * the dictionary size isn't stored as part of a raw LZMA2 stream.
99     * <p>
100     * Specifying a too small dictionary size will prevent decompressing
101     * the stream. Specifying a too big dictionary is waste of memory but
102     * decompression will work.
103     * <p>
104     * There is no need to specify a dictionary bigger than
105     * the uncompressed size of the data even if a bigger dictionary
106     * was used when compressing. If you know the uncompressed size
107     * of the data, this might allow saving some memory.
108     *
109     * @param       in          input stream from which LZMA2-compressed
110     *                          data is read
111     *
112     * @param       dictSize    LZMA2 dictionary size as bytes, must be
113     *                          in the range [<code>DICT_SIZE_MIN</code>,
114     *                          <code>DICT_SIZE_MAX</code>]
115     */
116    public LZMA2InputStream(InputStream in, int dictSize) {
117        this(in, dictSize, null);
118    }
119
120    /**
121     * Creates a new LZMA2 decompressor using a preset dictionary.
122     * <p>
123     * This is like <code>LZMA2InputStream(InputStream, int)</code> except
124     * that the dictionary may be initialized using a preset dictionary.
125     * If a preset dictionary was used when compressing the data, the
126     * same preset dictionary must be provided when decompressing.
127     *
128     * @param       in          input stream from which LZMA2-compressed
129     *                          data is read
130     *
131     * @param       dictSize    LZMA2 dictionary size as bytes, must be
132     *                          in the range [<code>DICT_SIZE_MIN</code>,
133     *                          <code>DICT_SIZE_MAX</code>]
134     *
135     * @param       presetDict  preset dictionary or <code>null</code>
136     *                          to use no preset dictionary
137     */
138    public LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict) {
139        // Check for null because otherwise null isn't detect
140        // in this constructor.
141        if (in == null)
142            throw new NullPointerException();
143
144        this.in = new DataInputStream(in);
145        this.lz = new LZDecoder(getDictSize(dictSize), presetDict);
146
147        if (presetDict != null && presetDict.length > 0)
148            needDictReset = false;
149    }
150
151    /**
152     * Decompresses the next byte from this input stream.
153     * <p>
154     * Reading lots of data with <code>read()</code> from this input stream
155     * may be inefficient. Wrap it in <code>java.io.BufferedInputStream</code>
156     * if you need to read lots of data one byte at a time.
157     *
158     * @return      the next decompressed byte, or <code>-1</code>
159     *              to indicate the end of the compressed stream
160     *
161     * @throws      CorruptedInputException
162     *
163     * @throws      XZIOException if the stream has been closed
164     *
165     * @throws      EOFException
166     *                          compressed input is truncated or corrupt
167     *
168     * @throws      IOException may be thrown by <code>in</code>
169     */
170    public int read() throws IOException {
171        return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
172    }
173
174    /**
175     * Decompresses into an array of bytes.
176     * <p>
177     * If <code>len</code> is zero, no bytes are read and <code>0</code>
178     * is returned. Otherwise this will block until <code>len</code>
179     * bytes have been decompressed, the end of the LZMA2 stream is reached,
180     * or an exception is thrown.
181     *
182     * @param       buf         target buffer for uncompressed data
183     * @param       off         start offset in <code>buf</code>
184     * @param       len         maximum number of uncompressed bytes to read
185     *
186     * @return      number of bytes read, or <code>-1</code> to indicate
187     *              the end of the compressed stream
188     *
189     * @throws      CorruptedInputException
190     *
191     * @throws      XZIOException if the stream has been closed
192     *
193     * @throws      EOFException
194     *                          compressed input is truncated or corrupt
195     *
196     * @throws      IOException may be thrown by <code>in</code>
197     */
198    public int read(byte[] buf, int off, int len) throws IOException {
199        if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
200            throw new IndexOutOfBoundsException();
201
202        if (len == 0)
203            return 0;
204
205        if (in == null)
206            throw new XZIOException("Stream closed");
207
208        if (exception != null)
209            throw exception;
210
211        if (endReached)
212            return -1;
213
214        try {
215            int size = 0;
216
217            while (len > 0) {
218                if (uncompressedSize == 0) {
219                    decodeChunkHeader();
220                    if (endReached)
221                        return size == 0 ? -1 : size;
222                }
223
224                int copySizeMax = Math.min(uncompressedSize, len);
225
226                if (!isLZMAChunk) {
227                    lz.copyUncompressed(in, copySizeMax);
228                } else {
229                    lz.setLimit(copySizeMax);
230                    lzma.decode();
231                    if (!rc.isInBufferOK())
232                        throw new CorruptedInputException();
233                }
234
235                int copiedSize = lz.flush(buf, off);
236                off += copiedSize;
237                len -= copiedSize;
238                size += copiedSize;
239                uncompressedSize -= copiedSize;
240
241                if (uncompressedSize == 0)
242                    if (!rc.isFinished() || lz.hasPending())
243                        throw new CorruptedInputException();
244            }
245
246            return size;
247
248        } catch (IOException e) {
249            exception = e;
250            throw e;
251        }
252    }
253
254    private void decodeChunkHeader() throws IOException {
255        int control = in.readUnsignedByte();
256
257        if (control == 0x00) {
258            endReached = true;
259            return;
260        }
261
262        if (control >= 0xE0 || control == 0x01) {
263            needProps = true;
264            needDictReset = false;
265            lz.reset();
266        } else if (needDictReset) {
267            throw new CorruptedInputException();
268        }
269
270        if (control >= 0x80) {
271            isLZMAChunk = true;
272
273            uncompressedSize = (control & 0x1F) << 16;
274            uncompressedSize += in.readUnsignedShort() + 1;
275
276            int compressedSize = in.readUnsignedShort() + 1;
277
278            if (control >= 0xC0) {
279                needProps = false;
280                decodeProps();
281
282            } else if (needProps) {
283                throw new CorruptedInputException();
284
285            } else if (control >= 0xA0) {
286                lzma.reset();
287            }
288
289            rc.prepareInputBuffer(in, compressedSize);
290
291        } else if (control > 0x02) {
292            throw new CorruptedInputException();
293
294        } else {
295            isLZMAChunk = false;
296            uncompressedSize = in.readUnsignedShort() + 1;
297        }
298    }
299
300    private void decodeProps() throws IOException {
301        int props = in.readUnsignedByte();
302
303        if (props > (4 * 5 + 4) * 9 + 8)
304            throw new CorruptedInputException();
305
306        int pb = props / (9 * 5);
307        props -= pb * 9 * 5;
308        int lp = props / 9;
309        int lc = props - lp * 9;
310
311        if (lc + lp > 4)
312            throw new CorruptedInputException();
313
314        lzma = new LZMADecoder(lz, rc, lc, lp, pb);
315    }
316
317    /**
318     * Returns the number of uncompressed bytes that can be read
319     * without blocking. The value is returned with an assumption
320     * that the compressed input data will be valid. If the compressed
321     * data is corrupt, <code>CorruptedInputException</code> may get
322     * thrown before the number of bytes claimed to be available have
323     * been read from this input stream.
324     * <p>
325     * In LZMA2InputStream, the return value will be non-zero when the
326     * decompressor is in the middle of an LZMA2 chunk. The return value
327     * will then be the number of uncompressed bytes remaining from that
328     * chunk.
329     *
330     * @return      the number of uncompressed bytes that can be read
331     *              without blocking
332     */
333    public int available() throws IOException {
334        if (in == null)
335            throw new XZIOException("Stream closed");
336
337        if (exception != null)
338            throw exception;
339
340        return uncompressedSize;
341    }
342
343    /**
344     * Closes the stream and calls <code>in.close()</code>.
345     * If the stream was already closed, this does nothing.
346     *
347     * @throws  IOException if thrown by <code>in.close()</code>
348     */
349    public void close() throws IOException {
350        if (in != null) {
351            try {
352                in.close();
353            } finally {
354                in = null;
355            }
356        }
357    }
358}
359