1971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
2971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Simple XZ decoder command line tool
3971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
4971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Author: Lasse Collin <lasse.collin@tukaani.org>
5971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
6971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * This file has been put into the public domain.
7971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * You can do whatever you want with this file.
8971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Modified for toybox by Isaac Dunham
9971d57ec4a9e14527e7582a5723d9634182d3fa7Rob LandleyUSE_XZCAT(NEWTOY(xzcat, NULL, TOYFLAG_USR|TOYFLAG_BIN))
10971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
11971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleyconfig XZCAT
12971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley  bool "xzcat"
13971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley  default n
14971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley  help
1539af4ae3e61d352c3faa7d1b87e6ac6fdb69add1Isaac Dunham    usage: xzcat [filename...]
16971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1739af4ae3e61d352c3faa7d1b87e6ac6fdb69add1Isaac Dunham    Decompress listed files to stdout. Use stdin if no files listed.
18971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
19971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley*/
20971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define FOR_xzcat
21971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#include "toys.h"
22971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
23971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley// BEGIN xz.h
24971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
25971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/**
26971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * enum xz_ret - Return codes
27971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * @XZ_OK:                  Everything is OK so far. More input or more
280c2509957b6291fb136eca4a5c278466045ce31dIsaac Dunham *                          output space is required to continue.
29971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * @XZ_STREAM_END:          Operation finished successfully.
30971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * @XZ_UNSUPPORTED_CHECK:   Integrity check type is not supported. Decoding
31971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *                          is still possible in multi-call mode by simply
32971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *                          calling xz_dec_run() again.
33971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *                          Note that this return value is used only if
34971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *                          XZ_DEC_ANY_CHECK was defined at build time,
35971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *                          which is not used in the kernel. Unsupported
36971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *                          check types return XZ_OPTIONS_ERROR if
37971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *                          XZ_DEC_ANY_CHECK was not defined at build time.
380c2509957b6291fb136eca4a5c278466045ce31dIsaac Dunham * @XZ_MEM_ERROR:           Allocating memory failed. The amount of memory
390c2509957b6291fb136eca4a5c278466045ce31dIsaac Dunham *                          that was tried to be allocated was no more than the
40971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *                          dict_max argument given to xz_dec_init().
41971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * @XZ_MEMLIMIT_ERROR:      A bigger LZMA2 dictionary would be needed than
42971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *                          allowed by the dict_max argument given to
430c2509957b6291fb136eca4a5c278466045ce31dIsaac Dunham *                          xz_dec_init().
44971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * @XZ_FORMAT_ERROR:        File format was not recognized (wrong magic
45971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *                          bytes).
46971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * @XZ_OPTIONS_ERROR:       This implementation doesn't support the requested
47971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *                          compression options. In the decoder this means
48971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *                          that the header CRC32 matches, but the header
49971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *                          itself specifies something that we don't support.
50971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * @XZ_DATA_ERROR:          Compressed data is corrupt.
51971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * @XZ_BUF_ERROR:           Cannot make any progress. Details are slightly
52971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *                          different between multi-call and single-call
53971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *                          mode; more information below.
54971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
550c2509957b6291fb136eca4a5c278466045ce31dIsaac Dunham * XZ_BUF_ERROR is returned when two consecutive calls to XZ code cannot
560c2509957b6291fb136eca4a5c278466045ce31dIsaac Dunham * consume any input and cannot produce any new output. This happens when
570c2509957b6291fb136eca4a5c278466045ce31dIsaac Dunham * there is no new input available, or the output buffer is full while at
580c2509957b6291fb136eca4a5c278466045ce31dIsaac Dunham * least one output byte is still pending. Assuming your code is not buggy,
590c2509957b6291fb136eca4a5c278466045ce31dIsaac Dunham * you can get this error only when decoding a compressed stream that is
600c2509957b6291fb136eca4a5c278466045ce31dIsaac Dunham * truncated or otherwise corrupt.
61971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
62971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleyenum xz_ret {
6318993e2f3af291eea7a175547862d057b7d83708Rob Landley  XZ_OK,
6418993e2f3af291eea7a175547862d057b7d83708Rob Landley  XZ_STREAM_END,
6518993e2f3af291eea7a175547862d057b7d83708Rob Landley  XZ_UNSUPPORTED_CHECK,
6618993e2f3af291eea7a175547862d057b7d83708Rob Landley  XZ_MEM_ERROR,
6718993e2f3af291eea7a175547862d057b7d83708Rob Landley  XZ_MEMLIMIT_ERROR,
6818993e2f3af291eea7a175547862d057b7d83708Rob Landley  XZ_FORMAT_ERROR,
6918993e2f3af291eea7a175547862d057b7d83708Rob Landley  XZ_OPTIONS_ERROR,
7018993e2f3af291eea7a175547862d057b7d83708Rob Landley  XZ_DATA_ERROR,
7118993e2f3af291eea7a175547862d057b7d83708Rob Landley  XZ_BUF_ERROR
72971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley};
73971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
74971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/**
75971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * struct xz_buf - Passing input and output buffers to XZ code
76971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * @in:         Beginning of the input buffer. This may be NULL if and only
77971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *              if in_pos is equal to in_size.
78971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * @in_pos:     Current position in the input buffer. This must not exceed
79971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *              in_size.
80971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * @in_size:    Size of the input buffer
81971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * @out:        Beginning of the output buffer. This may be NULL if and only
82971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *              if out_pos is equal to out_size.
83971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * @out_pos:    Current position in the output buffer. This must not exceed
84971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *              out_size.
85971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * @out_size:   Size of the output buffer
86971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
87971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Only the contents of the output buffer from out[out_pos] onward, and
88971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * the variables in_pos and out_pos are modified by the XZ code.
89971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
90971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystruct xz_buf {
9118993e2f3af291eea7a175547862d057b7d83708Rob Landley  const uint8_t *in;
9218993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t in_pos;
9318993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t in_size;
94971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
9518993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint8_t *out;
9618993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t out_pos;
9718993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t out_size;
98971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley};
99971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
100971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/**
101971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * struct xz_dec - Opaque type to hold the XZ decoder state
102971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
103971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystruct xz_dec;
104971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
105971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/**
106971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * xz_dec_init() - Allocate and initialize a XZ decoder state
107971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * @mode:       Operation mode
108971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * @dict_max:   Maximum size of the LZMA2 dictionary (history buffer) for
1090c2509957b6291fb136eca4a5c278466045ce31dIsaac Dunham *              multi-call decoding. LZMA2 dictionary is always 2^n bytes
110971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *              or 2^n + 2^(n-1) bytes (the latter sizes are less common
111971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *              in practice), so other values for dict_max don't make sense.
112971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *              In the kernel, dictionary sizes of 64 KiB, 128 KiB, 256 KiB,
113971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *              512 KiB, and 1 MiB are probably the only reasonable values,
114971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *              except for kernel and initramfs images where a bigger
115971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *              dictionary can be fine and useful.
116971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
117971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * dict_max specifies the maximum allowed dictionary size that xz_dec_run()
118971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * may allocate once it has parsed the dictionary size from the stream
119971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * headers. This way excessive allocations can be avoided while still
120971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * limiting the maximum memory usage to a sane value to prevent running the
121971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * system out of memory when decompressing streams from untrusted sources.
122971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
123971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * On success, xz_dec_init() returns a pointer to struct xz_dec, which is
124971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * ready to be used with xz_dec_run(). If memory allocation fails,
125971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * xz_dec_init() returns NULL.
126971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1270c2509957b6291fb136eca4a5c278466045ce31dIsaac Dunhamstruct xz_dec *xz_dec_init(uint32_t dict_max);
128971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
129971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/**
130971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * xz_dec_run() - Run the XZ decoder
131971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * @s:          Decoder state allocated using xz_dec_init()
132971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * @b:          Input and output buffers
133971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
134971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * The possible return values depend on build options and operation mode.
135971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * See enum xz_ret for details.
136971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
137971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Note that if an error occurs in single-call mode (return value is not
138971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * XZ_STREAM_END), b->in_pos and b->out_pos are not modified and the
139971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * contents of the output buffer from b->out[b->out_pos] onward are
140971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * undefined. This is true even after XZ_BUF_ERROR, because with some filter
141971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * chains, there may be a second pass over the output buffer, and this pass
142971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * cannot be properly done if the output buffer is truncated. Thus, you
143971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * cannot give the single-call decoder a too small buffer and then expect to
144971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * get that amount valid data from the beginning of the stream. You must use
145971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * the multi-call decoder if you don't want to uncompress the whole stream.
146971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1479bd7a1696e7060e51ec0d0aaef477eaf85aaf216Rob Landleyenum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b);
148971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
149971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/**
150971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * xz_dec_reset() - Reset an already allocated decoder state
151971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * @s:          Decoder state allocated using xz_dec_init()
152971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
153971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * This function can be used to reset the multi-call decoder state without
154971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * freeing and reallocating memory with xz_dec_end() and xz_dec_init().
155971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
156971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * In single-call mode, xz_dec_reset() is always called in the beginning of
157971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * xz_dec_run(). Thus, explicit call to xz_dec_reset() is useful only in
158971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * multi-call mode.
159971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1609bd7a1696e7060e51ec0d0aaef477eaf85aaf216Rob Landleyvoid xz_dec_reset(struct xz_dec *s);
161971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
162971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/**
163971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * xz_dec_end() - Free the memory allocated for the decoder state
164971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * @s:          Decoder state allocated using xz_dec_init(). If s is NULL,
165971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *              this function does nothing.
166971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1679bd7a1696e7060e51ec0d0aaef477eaf85aaf216Rob Landleyvoid xz_dec_end(struct xz_dec *s);
168971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
169971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
170971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Update CRC32 value using the polynomial from IEEE-802.3. To start a new
171971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * calculation, the third argument must be zero. To continue the calculation,
172971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * the previously returned value is passed as the third argument.
173971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1744ffface11f7857683ddb1f935fb05809821458abIsaac Dunhamstatic uint32_t xz_crc32_table[256];
1754ffface11f7857683ddb1f935fb05809821458abIsaac Dunham
1764ffface11f7857683ddb1f935fb05809821458abIsaac Dunhamuint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc)
1774ffface11f7857683ddb1f935fb05809821458abIsaac Dunham{
17818993e2f3af291eea7a175547862d057b7d83708Rob Landley  crc = ~crc;
1794ffface11f7857683ddb1f935fb05809821458abIsaac Dunham
18018993e2f3af291eea7a175547862d057b7d83708Rob Landley  while (size != 0) {
18118993e2f3af291eea7a175547862d057b7d83708Rob Landley    crc = xz_crc32_table[*buf++ ^ (crc & 0xFF)] ^ (crc >> 8);
18218993e2f3af291eea7a175547862d057b7d83708Rob Landley    --size;
18318993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
1844ffface11f7857683ddb1f935fb05809821458abIsaac Dunham
18518993e2f3af291eea7a175547862d057b7d83708Rob Landley  return ~crc;
1864ffface11f7857683ddb1f935fb05809821458abIsaac Dunham}
187971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
188052491266f02585c7c990100cb7457e56755c336Isaac Dunhamstatic uint64_t xz_crc64_table[256];
189052491266f02585c7c990100cb7457e56755c336Isaac Dunham
190971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
191971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley// END xz.h
192971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
193971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic uint8_t in[BUFSIZ];
194971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic uint8_t out[BUFSIZ];
195971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
19639af4ae3e61d352c3faa7d1b87e6ac6fdb69add1Isaac Dunhamvoid do_xzcat(int fd, char *name)
197971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
19818993e2f3af291eea7a175547862d057b7d83708Rob Landley  struct xz_buf b;
19918993e2f3af291eea7a175547862d057b7d83708Rob Landley  struct xz_dec *s;
20018993e2f3af291eea7a175547862d057b7d83708Rob Landley  enum xz_ret ret;
20118993e2f3af291eea7a175547862d057b7d83708Rob Landley  const char *msg;
20218993e2f3af291eea7a175547862d057b7d83708Rob Landley
20318993e2f3af291eea7a175547862d057b7d83708Rob Landley  crc_init(xz_crc32_table, 1);
204c810f9f80b9db62de09b6cf4c6ca770eed72ce53Isaac Dunham  const uint64_t poly = 0xC96C5795D7870F42ULL;
205c810f9f80b9db62de09b6cf4c6ca770eed72ce53Isaac Dunham  uint32_t i;
206c810f9f80b9db62de09b6cf4c6ca770eed72ce53Isaac Dunham  uint32_t j;
207c810f9f80b9db62de09b6cf4c6ca770eed72ce53Isaac Dunham  uint64_t r;
208c810f9f80b9db62de09b6cf4c6ca770eed72ce53Isaac Dunham
209c810f9f80b9db62de09b6cf4c6ca770eed72ce53Isaac Dunham  /* initialize CRC64 table*/
210c810f9f80b9db62de09b6cf4c6ca770eed72ce53Isaac Dunham  for (i = 0; i < 256; ++i) {
211c810f9f80b9db62de09b6cf4c6ca770eed72ce53Isaac Dunham    r = i;
212c810f9f80b9db62de09b6cf4c6ca770eed72ce53Isaac Dunham    for (j = 0; j < 8; ++j)
213c810f9f80b9db62de09b6cf4c6ca770eed72ce53Isaac Dunham      r = (r >> 1) ^ (poly & ~((r & 1) - 1));
214c810f9f80b9db62de09b6cf4c6ca770eed72ce53Isaac Dunham
215c810f9f80b9db62de09b6cf4c6ca770eed72ce53Isaac Dunham    xz_crc64_table[i] = r;
216c810f9f80b9db62de09b6cf4c6ca770eed72ce53Isaac Dunham  }
21718993e2f3af291eea7a175547862d057b7d83708Rob Landley
21818993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
21918993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Support up to 64 MiB dictionary. The actually needed memory
22018993e2f3af291eea7a175547862d057b7d83708Rob Landley   * is allocated once the headers have been parsed.
22118993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
2220c2509957b6291fb136eca4a5c278466045ce31dIsaac Dunham  s = xz_dec_init(1 << 26);
22318993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s == NULL) {
22418993e2f3af291eea7a175547862d057b7d83708Rob Landley    msg = "Memory allocation failed\n";
22518993e2f3af291eea7a175547862d057b7d83708Rob Landley    goto error;
22618993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
22718993e2f3af291eea7a175547862d057b7d83708Rob Landley
22818993e2f3af291eea7a175547862d057b7d83708Rob Landley  b.in = in;
22918993e2f3af291eea7a175547862d057b7d83708Rob Landley  b.in_pos = 0;
23018993e2f3af291eea7a175547862d057b7d83708Rob Landley  b.in_size = 0;
23118993e2f3af291eea7a175547862d057b7d83708Rob Landley  b.out = out;
23218993e2f3af291eea7a175547862d057b7d83708Rob Landley  b.out_pos = 0;
23318993e2f3af291eea7a175547862d057b7d83708Rob Landley  b.out_size = BUFSIZ;
23418993e2f3af291eea7a175547862d057b7d83708Rob Landley
23518993e2f3af291eea7a175547862d057b7d83708Rob Landley  for (;;) {
23618993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (b.in_pos == b.in_size) {
23739af4ae3e61d352c3faa7d1b87e6ac6fdb69add1Isaac Dunham      b.in_size = read(fd, in, sizeof(in));
23818993e2f3af291eea7a175547862d057b7d83708Rob Landley      b.in_pos = 0;
23918993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
24018993e2f3af291eea7a175547862d057b7d83708Rob Landley
24118993e2f3af291eea7a175547862d057b7d83708Rob Landley    ret = xz_dec_run(s, &b);
24218993e2f3af291eea7a175547862d057b7d83708Rob Landley
24318993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (b.out_pos == sizeof(out)) {
24418993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (fwrite(out, 1, b.out_pos, stdout) != b.out_pos) {
24518993e2f3af291eea7a175547862d057b7d83708Rob Landley        msg = "Write error\n";
24618993e2f3af291eea7a175547862d057b7d83708Rob Landley        goto error;
24718993e2f3af291eea7a175547862d057b7d83708Rob Landley      }
24818993e2f3af291eea7a175547862d057b7d83708Rob Landley
24918993e2f3af291eea7a175547862d057b7d83708Rob Landley      b.out_pos = 0;
25018993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
25118993e2f3af291eea7a175547862d057b7d83708Rob Landley
25218993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (ret == XZ_OK)
25318993e2f3af291eea7a175547862d057b7d83708Rob Landley      continue;
25418993e2f3af291eea7a175547862d057b7d83708Rob Landley
25518993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (ret == XZ_UNSUPPORTED_CHECK)
25618993e2f3af291eea7a175547862d057b7d83708Rob Landley      continue;
25718993e2f3af291eea7a175547862d057b7d83708Rob Landley
25839af4ae3e61d352c3faa7d1b87e6ac6fdb69add1Isaac Dunham    if (fwrite(out, 1, b.out_pos, stdout) != b.out_pos) {
25918993e2f3af291eea7a175547862d057b7d83708Rob Landley      msg = "Write error\n";
26018993e2f3af291eea7a175547862d057b7d83708Rob Landley      goto error;
26118993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
26218993e2f3af291eea7a175547862d057b7d83708Rob Landley
26318993e2f3af291eea7a175547862d057b7d83708Rob Landley    switch (ret) {
26418993e2f3af291eea7a175547862d057b7d83708Rob Landley    case XZ_STREAM_END:
26518993e2f3af291eea7a175547862d057b7d83708Rob Landley      xz_dec_end(s);
26618993e2f3af291eea7a175547862d057b7d83708Rob Landley      return;
26718993e2f3af291eea7a175547862d057b7d83708Rob Landley
26818993e2f3af291eea7a175547862d057b7d83708Rob Landley    case XZ_MEM_ERROR:
26918993e2f3af291eea7a175547862d057b7d83708Rob Landley      msg = "Memory allocation failed\n";
27018993e2f3af291eea7a175547862d057b7d83708Rob Landley      goto error;
27118993e2f3af291eea7a175547862d057b7d83708Rob Landley
27218993e2f3af291eea7a175547862d057b7d83708Rob Landley    case XZ_MEMLIMIT_ERROR:
27318993e2f3af291eea7a175547862d057b7d83708Rob Landley      msg = "Memory usage limit reached\n";
27418993e2f3af291eea7a175547862d057b7d83708Rob Landley      goto error;
27518993e2f3af291eea7a175547862d057b7d83708Rob Landley
27618993e2f3af291eea7a175547862d057b7d83708Rob Landley    case XZ_FORMAT_ERROR:
27718993e2f3af291eea7a175547862d057b7d83708Rob Landley      msg = "Not a .xz file\n";
27818993e2f3af291eea7a175547862d057b7d83708Rob Landley      goto error;
27918993e2f3af291eea7a175547862d057b7d83708Rob Landley
28018993e2f3af291eea7a175547862d057b7d83708Rob Landley    case XZ_OPTIONS_ERROR:
28118993e2f3af291eea7a175547862d057b7d83708Rob Landley      msg = "Unsupported options in the .xz headers\n";
28218993e2f3af291eea7a175547862d057b7d83708Rob Landley      goto error;
28318993e2f3af291eea7a175547862d057b7d83708Rob Landley
28418993e2f3af291eea7a175547862d057b7d83708Rob Landley    case XZ_DATA_ERROR:
28518993e2f3af291eea7a175547862d057b7d83708Rob Landley    case XZ_BUF_ERROR:
28618993e2f3af291eea7a175547862d057b7d83708Rob Landley      msg = "File is corrupt\n";
28718993e2f3af291eea7a175547862d057b7d83708Rob Landley      goto error;
28818993e2f3af291eea7a175547862d057b7d83708Rob Landley
28918993e2f3af291eea7a175547862d057b7d83708Rob Landley    default:
29018993e2f3af291eea7a175547862d057b7d83708Rob Landley      msg = "Bug!\n";
29118993e2f3af291eea7a175547862d057b7d83708Rob Landley      goto error;
29218993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
29318993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
294971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
295971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleyerror:
29618993e2f3af291eea7a175547862d057b7d83708Rob Landley  xz_dec_end(s);
29718993e2f3af291eea7a175547862d057b7d83708Rob Landley  error_exit("%s", msg);
298971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
299971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
30039af4ae3e61d352c3faa7d1b87e6ac6fdb69add1Isaac Dunhamvoid xzcat_main(void)
30139af4ae3e61d352c3faa7d1b87e6ac6fdb69add1Isaac Dunham{
30239af4ae3e61d352c3faa7d1b87e6ac6fdb69add1Isaac Dunham  loopfiles(toys.optargs, do_xzcat);
30339af4ae3e61d352c3faa7d1b87e6ac6fdb69add1Isaac Dunham}
30439af4ae3e61d352c3faa7d1b87e6ac6fdb69add1Isaac Dunham
305971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley// BEGIN xz_private.h
306971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
307971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
308971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Uncomment as needed to enable BCJ filter decoders.
309971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * These cost about 2.5 k when all are enabled; SPARC and IA64 make 0.7 k
310971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * */
311971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
312971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define XZ_DEC_X86
313971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define XZ_DEC_POWERPC
314971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define XZ_DEC_IA64
315971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define XZ_DEC_ARM
316971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define XZ_DEC_ARMTHUMB
317971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define XZ_DEC_SPARC
318971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
319971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
320971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define memeq(a, b, size) (memcmp(a, b, size) == 0)
321971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
322971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifndef min
323971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#	define min(x, y) ((x) < (y) ? (x) : (y))
324971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
325971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define min_t(type, x, y) min(x, y)
326971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
327971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
328971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Inline functions to access unaligned unsigned 32-bit integers */
329971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifndef get_unaligned_le32
330971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic inline uint32_t get_unaligned_le32(const uint8_t *buf)
331971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
33218993e2f3af291eea7a175547862d057b7d83708Rob Landley  return (uint32_t)buf[0]
33318993e2f3af291eea7a175547862d057b7d83708Rob Landley      | ((uint32_t)buf[1] << 8)
33418993e2f3af291eea7a175547862d057b7d83708Rob Landley      | ((uint32_t)buf[2] << 16)
33518993e2f3af291eea7a175547862d057b7d83708Rob Landley      | ((uint32_t)buf[3] << 24);
336971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
337971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
338971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
339971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifndef get_unaligned_be32
340971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic inline uint32_t get_unaligned_be32(const uint8_t *buf)
341971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
34218993e2f3af291eea7a175547862d057b7d83708Rob Landley  return (uint32_t)(buf[0] << 24)
34318993e2f3af291eea7a175547862d057b7d83708Rob Landley      | ((uint32_t)buf[1] << 16)
34418993e2f3af291eea7a175547862d057b7d83708Rob Landley      | ((uint32_t)buf[2] << 8)
34518993e2f3af291eea7a175547862d057b7d83708Rob Landley      | (uint32_t)buf[3];
346971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
347971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
348971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
349971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifndef put_unaligned_le32
350971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic inline void put_unaligned_le32(uint32_t val, uint8_t *buf)
351971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
35218993e2f3af291eea7a175547862d057b7d83708Rob Landley  buf[0] = (uint8_t)val;
35318993e2f3af291eea7a175547862d057b7d83708Rob Landley  buf[1] = (uint8_t)(val >> 8);
35418993e2f3af291eea7a175547862d057b7d83708Rob Landley  buf[2] = (uint8_t)(val >> 16);
35518993e2f3af291eea7a175547862d057b7d83708Rob Landley  buf[3] = (uint8_t)(val >> 24);
356971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
357971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
358971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
359971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifndef put_unaligned_be32
360971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic inline void put_unaligned_be32(uint32_t val, uint8_t *buf)
361971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
36218993e2f3af291eea7a175547862d057b7d83708Rob Landley  buf[0] = (uint8_t)(val >> 24);
36318993e2f3af291eea7a175547862d057b7d83708Rob Landley  buf[1] = (uint8_t)(val >> 16);
36418993e2f3af291eea7a175547862d057b7d83708Rob Landley  buf[2] = (uint8_t)(val >> 8);
36518993e2f3af291eea7a175547862d057b7d83708Rob Landley  buf[3] = (uint8_t)val;
366971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
367971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
368971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
369971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
370971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Use get_unaligned_le32() also for aligned access for simplicity. On
371971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * little endian systems, #define get_le32(ptr) (*(const uint32_t *)(ptr))
372971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * could save a few bytes in code size.
373971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
374971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifndef get_le32
375971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#	define get_le32 get_unaligned_le32
376971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
377971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
378971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
379971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ.
380971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * XZ_DEC_BCJ is used to enable generic support for BCJ decoders.
381971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
382971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifndef XZ_DEC_BCJ
383971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#	if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \
38418993e2f3af291eea7a175547862d057b7d83708Rob Landley      || defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \
38518993e2f3af291eea7a175547862d057b7d83708Rob Landley      || defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \
38618993e2f3af291eea7a175547862d057b7d83708Rob Landley      || defined(XZ_DEC_SPARC)
387971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#		define XZ_DEC_BCJ
388971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#	endif
389971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
390971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
391971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
392971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
393971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * before calling xz_dec_lzma2_run().
394971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
3950c2509957b6291fb136eca4a5c278466045ce31dIsaac Dunhamstruct xz_dec_lzma2 *xz_dec_lzma2_create(uint32_t dict_max);
396971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
397971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
398971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Decode the LZMA2 properties (one byte) and reset the decoder. Return
399971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not
400971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * big enough, and XZ_OPTIONS_ERROR if props indicates something that this
401971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * decoder doesn't support.
402971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
4039bd7a1696e7060e51ec0d0aaef477eaf85aaf216Rob Landleyenum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s,
40418993e2f3af291eea7a175547862d057b7d83708Rob Landley           uint8_t props);
405971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
406971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Decode raw LZMA2 stream from b->in to b->out. */
4079bd7a1696e7060e51ec0d0aaef477eaf85aaf216Rob Landleyenum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
40818993e2f3af291eea7a175547862d057b7d83708Rob Landley               struct xz_buf *b);
409971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
410971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley// END "xz_private.h"
411971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
412971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
413971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
414971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
415971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
416971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Branch/Call/Jump (BCJ) filter decoders
417052491266f02585c7c990100cb7457e56755c336Isaac Dunham * The rest of the code is inside this ifdef. It makes things a little more
418971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * convenient when building without support for any BCJ filters.
419971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
420971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_BCJ
421971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
422971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystruct xz_dec_bcj {
42318993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Type of the BCJ filter being used */
42418993e2f3af291eea7a175547862d057b7d83708Rob Landley  enum {
42518993e2f3af291eea7a175547862d057b7d83708Rob Landley    BCJ_X86 = 4,        /* x86 or x86-64 */
42618993e2f3af291eea7a175547862d057b7d83708Rob Landley    BCJ_POWERPC = 5,    /* Big endian only */
42718993e2f3af291eea7a175547862d057b7d83708Rob Landley    BCJ_IA64 = 6,       /* Big or little endian */
42818993e2f3af291eea7a175547862d057b7d83708Rob Landley    BCJ_ARM = 7,        /* Little endian only */
42918993e2f3af291eea7a175547862d057b7d83708Rob Landley    BCJ_ARMTHUMB = 8,   /* Little endian only */
43018993e2f3af291eea7a175547862d057b7d83708Rob Landley    BCJ_SPARC = 9       /* Big or little endian */
43118993e2f3af291eea7a175547862d057b7d83708Rob Landley  } type;
43218993e2f3af291eea7a175547862d057b7d83708Rob Landley
43318993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
43418993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Return value of the next filter in the chain. We need to preserve
43518993e2f3af291eea7a175547862d057b7d83708Rob Landley   * this information across calls, because we must not call the next
43618993e2f3af291eea7a175547862d057b7d83708Rob Landley   * filter anymore once it has returned XZ_STREAM_END.
43718993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
43818993e2f3af291eea7a175547862d057b7d83708Rob Landley  enum xz_ret ret;
43918993e2f3af291eea7a175547862d057b7d83708Rob Landley
44018993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
44118993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Absolute position relative to the beginning of the uncompressed
44218993e2f3af291eea7a175547862d057b7d83708Rob Landley   * data (in a single .xz Block). We care only about the lowest 32
44318993e2f3af291eea7a175547862d057b7d83708Rob Landley   * bits so this doesn't need to be uint64_t even with big files.
44418993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
44518993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t pos;
44618993e2f3af291eea7a175547862d057b7d83708Rob Landley
44718993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* x86 filter state */
44818993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t x86_prev_mask;
44918993e2f3af291eea7a175547862d057b7d83708Rob Landley
45018993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Temporary space to hold the variables from struct xz_buf */
45118993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint8_t *out;
45218993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t out_pos;
45318993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t out_size;
45418993e2f3af291eea7a175547862d057b7d83708Rob Landley
45518993e2f3af291eea7a175547862d057b7d83708Rob Landley  struct {
45618993e2f3af291eea7a175547862d057b7d83708Rob Landley    /* Amount of already filtered data in the beginning of buf */
45718993e2f3af291eea7a175547862d057b7d83708Rob Landley    size_t filtered;
45818993e2f3af291eea7a175547862d057b7d83708Rob Landley
45918993e2f3af291eea7a175547862d057b7d83708Rob Landley    /* Total amount of data currently stored in buf  */
46018993e2f3af291eea7a175547862d057b7d83708Rob Landley    size_t size;
46118993e2f3af291eea7a175547862d057b7d83708Rob Landley
46218993e2f3af291eea7a175547862d057b7d83708Rob Landley    /*
46318993e2f3af291eea7a175547862d057b7d83708Rob Landley     * Buffer to hold a mix of filtered and unfiltered data. This
46418993e2f3af291eea7a175547862d057b7d83708Rob Landley     * needs to be big enough to hold Alignment + 2 * Look-ahead:
46518993e2f3af291eea7a175547862d057b7d83708Rob Landley     *
46618993e2f3af291eea7a175547862d057b7d83708Rob Landley     * Type         Alignment   Look-ahead
46718993e2f3af291eea7a175547862d057b7d83708Rob Landley     * x86              1           4
46818993e2f3af291eea7a175547862d057b7d83708Rob Landley     * PowerPC          4           0
46918993e2f3af291eea7a175547862d057b7d83708Rob Landley     * IA-64           16           0
47018993e2f3af291eea7a175547862d057b7d83708Rob Landley     * ARM              4           0
47118993e2f3af291eea7a175547862d057b7d83708Rob Landley     * ARM-Thumb        2           2
47218993e2f3af291eea7a175547862d057b7d83708Rob Landley     * SPARC            4           0
47318993e2f3af291eea7a175547862d057b7d83708Rob Landley     */
47418993e2f3af291eea7a175547862d057b7d83708Rob Landley    uint8_t buf[16];
47518993e2f3af291eea7a175547862d057b7d83708Rob Landley  } temp;
476971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley};
477971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
4789641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham/*
4799641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham * Decode the Filter ID of a BCJ filter. This implementation doesn't
4809641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham * support custom start offsets, so no decoding of Filter Properties
4819641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham * is needed. Returns XZ_OK if the given Filter ID is supported.
4829641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham * Otherwise XZ_OPTIONS_ERROR is returned.
4839641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham */
4849641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunhamenum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id);
4859641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham
4869641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham/*
4879641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham * Decode raw BCJ + LZMA2 stream. This must be used only if there actually is
4889641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham * a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run()
4899641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham * must be called directly.
4909641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham */
4919641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunhamenum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
4929641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham             struct xz_dec_lzma2 *lzma2,
4939641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham             struct xz_buf *b);
4949641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham
495971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_X86
496971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
497971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * This is used to test the most significant byte of a memory address
498971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * in an x86 instruction.
499971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
500971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic inline int bcj_x86_test_msbyte(uint8_t b)
501971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
50218993e2f3af291eea7a175547862d057b7d83708Rob Landley  return b == 0x00 || b == 0xFF;
503971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
504971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
505971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
506971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
50718993e2f3af291eea7a175547862d057b7d83708Rob Landley  static const int mask_to_allowed_status[8]
50818993e2f3af291eea7a175547862d057b7d83708Rob Landley    = { 1,1,1,0,1,0,0,0 };
50918993e2f3af291eea7a175547862d057b7d83708Rob Landley
51018993e2f3af291eea7a175547862d057b7d83708Rob Landley  static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
51118993e2f3af291eea7a175547862d057b7d83708Rob Landley
51218993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t i;
51318993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t prev_pos = (size_t)-1;
51418993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t prev_mask = s->x86_prev_mask;
51518993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t src;
51618993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t dest;
51718993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t j;
51818993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint8_t b;
51918993e2f3af291eea7a175547862d057b7d83708Rob Landley
52018993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (size <= 4)
52118993e2f3af291eea7a175547862d057b7d83708Rob Landley    return 0;
52218993e2f3af291eea7a175547862d057b7d83708Rob Landley
52318993e2f3af291eea7a175547862d057b7d83708Rob Landley  size -= 4;
52418993e2f3af291eea7a175547862d057b7d83708Rob Landley  for (i = 0; i < size; ++i) {
52518993e2f3af291eea7a175547862d057b7d83708Rob Landley    if ((buf[i] & 0xFE) != 0xE8)
52618993e2f3af291eea7a175547862d057b7d83708Rob Landley      continue;
52718993e2f3af291eea7a175547862d057b7d83708Rob Landley
52818993e2f3af291eea7a175547862d057b7d83708Rob Landley    prev_pos = i - prev_pos;
52918993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (prev_pos > 3) {
53018993e2f3af291eea7a175547862d057b7d83708Rob Landley      prev_mask = 0;
53118993e2f3af291eea7a175547862d057b7d83708Rob Landley    } else {
53218993e2f3af291eea7a175547862d057b7d83708Rob Landley      prev_mask = (prev_mask << (prev_pos - 1)) & 7;
53318993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (prev_mask != 0) {
53418993e2f3af291eea7a175547862d057b7d83708Rob Landley        b = buf[i + 4 - mask_to_bit_num[prev_mask]];
53518993e2f3af291eea7a175547862d057b7d83708Rob Landley        if (!mask_to_allowed_status[prev_mask]
53618993e2f3af291eea7a175547862d057b7d83708Rob Landley            || bcj_x86_test_msbyte(b)) {
53718993e2f3af291eea7a175547862d057b7d83708Rob Landley          prev_pos = i;
53818993e2f3af291eea7a175547862d057b7d83708Rob Landley          prev_mask = (prev_mask << 1) | 1;
53918993e2f3af291eea7a175547862d057b7d83708Rob Landley          continue;
54018993e2f3af291eea7a175547862d057b7d83708Rob Landley        }
54118993e2f3af291eea7a175547862d057b7d83708Rob Landley      }
54218993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
54318993e2f3af291eea7a175547862d057b7d83708Rob Landley
54418993e2f3af291eea7a175547862d057b7d83708Rob Landley    prev_pos = i;
54518993e2f3af291eea7a175547862d057b7d83708Rob Landley
54618993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (bcj_x86_test_msbyte(buf[i + 4])) {
54718993e2f3af291eea7a175547862d057b7d83708Rob Landley      src = get_unaligned_le32(buf + i + 1);
54818993e2f3af291eea7a175547862d057b7d83708Rob Landley      for (;;) {
54918993e2f3af291eea7a175547862d057b7d83708Rob Landley        dest = src - (s->pos + (uint32_t)i + 5);
55018993e2f3af291eea7a175547862d057b7d83708Rob Landley        if (prev_mask == 0)
55118993e2f3af291eea7a175547862d057b7d83708Rob Landley          break;
55218993e2f3af291eea7a175547862d057b7d83708Rob Landley
55318993e2f3af291eea7a175547862d057b7d83708Rob Landley        j = mask_to_bit_num[prev_mask] * 8;
55418993e2f3af291eea7a175547862d057b7d83708Rob Landley        b = (uint8_t)(dest >> (24 - j));
55518993e2f3af291eea7a175547862d057b7d83708Rob Landley        if (!bcj_x86_test_msbyte(b))
55618993e2f3af291eea7a175547862d057b7d83708Rob Landley          break;
55718993e2f3af291eea7a175547862d057b7d83708Rob Landley
55818993e2f3af291eea7a175547862d057b7d83708Rob Landley        src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
55918993e2f3af291eea7a175547862d057b7d83708Rob Landley      }
56018993e2f3af291eea7a175547862d057b7d83708Rob Landley
56118993e2f3af291eea7a175547862d057b7d83708Rob Landley      dest &= 0x01FFFFFF;
56218993e2f3af291eea7a175547862d057b7d83708Rob Landley      dest |= (uint32_t)0 - (dest & 0x01000000);
56318993e2f3af291eea7a175547862d057b7d83708Rob Landley      put_unaligned_le32(dest, buf + i + 1);
56418993e2f3af291eea7a175547862d057b7d83708Rob Landley      i += 4;
56518993e2f3af291eea7a175547862d057b7d83708Rob Landley    } else {
56618993e2f3af291eea7a175547862d057b7d83708Rob Landley      prev_mask = (prev_mask << 1) | 1;
56718993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
56818993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
56918993e2f3af291eea7a175547862d057b7d83708Rob Landley
57018993e2f3af291eea7a175547862d057b7d83708Rob Landley  prev_pos = i - prev_pos;
57118993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
57218993e2f3af291eea7a175547862d057b7d83708Rob Landley  return i;
573971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
574971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
575971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
576971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_POWERPC
577971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
578971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
57918993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t i;
58018993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t instr;
581971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
58218993e2f3af291eea7a175547862d057b7d83708Rob Landley  for (i = 0; i + 4 <= size; i += 4) {
58318993e2f3af291eea7a175547862d057b7d83708Rob Landley    instr = get_unaligned_be32(buf + i);
58418993e2f3af291eea7a175547862d057b7d83708Rob Landley    if ((instr & 0xFC000003) == 0x48000001) {
58518993e2f3af291eea7a175547862d057b7d83708Rob Landley      instr &= 0x03FFFFFC;
58618993e2f3af291eea7a175547862d057b7d83708Rob Landley      instr -= s->pos + (uint32_t)i;
58718993e2f3af291eea7a175547862d057b7d83708Rob Landley      instr &= 0x03FFFFFC;
58818993e2f3af291eea7a175547862d057b7d83708Rob Landley      instr |= 0x48000001;
58918993e2f3af291eea7a175547862d057b7d83708Rob Landley      put_unaligned_be32(instr, buf + i);
59018993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
59118993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
592971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
59318993e2f3af291eea7a175547862d057b7d83708Rob Landley  return i;
594971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
595971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
596971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
597971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_IA64
598971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
599971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
60018993e2f3af291eea7a175547862d057b7d83708Rob Landley  static const uint8_t branch_table[32] = {
60118993e2f3af291eea7a175547862d057b7d83708Rob Landley    0, 0, 0, 0, 0, 0, 0, 0,
60218993e2f3af291eea7a175547862d057b7d83708Rob Landley    0, 0, 0, 0, 0, 0, 0, 0,
60318993e2f3af291eea7a175547862d057b7d83708Rob Landley    4, 4, 6, 6, 0, 0, 7, 7,
60418993e2f3af291eea7a175547862d057b7d83708Rob Landley    4, 4, 0, 0, 4, 4, 0, 0
60518993e2f3af291eea7a175547862d057b7d83708Rob Landley  };
60618993e2f3af291eea7a175547862d057b7d83708Rob Landley
60718993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
60818993e2f3af291eea7a175547862d057b7d83708Rob Landley   * The local variables take a little bit stack space, but it's less
60918993e2f3af291eea7a175547862d057b7d83708Rob Landley   * than what LZMA2 decoder takes, so it doesn't make sense to reduce
61018993e2f3af291eea7a175547862d057b7d83708Rob Landley   * stack usage here without doing that for the LZMA2 decoder too.
61118993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
61218993e2f3af291eea7a175547862d057b7d83708Rob Landley
61318993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Loop counters */
61418993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t i;
61518993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t j;
61618993e2f3af291eea7a175547862d057b7d83708Rob Landley
61718993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
61818993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t slot;
61918993e2f3af291eea7a175547862d057b7d83708Rob Landley
62018993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Bitwise offset of the instruction indicated by slot */
62118993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t bit_pos;
62218993e2f3af291eea7a175547862d057b7d83708Rob Landley
62318993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* bit_pos split into byte and bit parts */
62418993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t byte_pos;
62518993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t bit_res;
62618993e2f3af291eea7a175547862d057b7d83708Rob Landley
62718993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Address part of an instruction */
62818993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t addr;
62918993e2f3af291eea7a175547862d057b7d83708Rob Landley
63018993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Mask used to detect which instructions to convert */
63118993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t mask;
63218993e2f3af291eea7a175547862d057b7d83708Rob Landley
63318993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* 41-bit instruction stored somewhere in the lowest 48 bits */
63418993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint64_t instr;
63518993e2f3af291eea7a175547862d057b7d83708Rob Landley
63618993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Instruction normalized with bit_res for easier manipulation */
63718993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint64_t norm;
63818993e2f3af291eea7a175547862d057b7d83708Rob Landley
63918993e2f3af291eea7a175547862d057b7d83708Rob Landley  for (i = 0; i + 16 <= size; i += 16) {
64018993e2f3af291eea7a175547862d057b7d83708Rob Landley    mask = branch_table[buf[i] & 0x1F];
64118993e2f3af291eea7a175547862d057b7d83708Rob Landley    for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
64218993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (((mask >> slot) & 1) == 0)
64318993e2f3af291eea7a175547862d057b7d83708Rob Landley        continue;
64418993e2f3af291eea7a175547862d057b7d83708Rob Landley
64518993e2f3af291eea7a175547862d057b7d83708Rob Landley      byte_pos = bit_pos >> 3;
64618993e2f3af291eea7a175547862d057b7d83708Rob Landley      bit_res = bit_pos & 7;
64718993e2f3af291eea7a175547862d057b7d83708Rob Landley      instr = 0;
64818993e2f3af291eea7a175547862d057b7d83708Rob Landley      for (j = 0; j < 6; ++j)
64918993e2f3af291eea7a175547862d057b7d83708Rob Landley        instr |= (uint64_t)(buf[i + j + byte_pos])
65018993e2f3af291eea7a175547862d057b7d83708Rob Landley            << (8 * j);
65118993e2f3af291eea7a175547862d057b7d83708Rob Landley
65218993e2f3af291eea7a175547862d057b7d83708Rob Landley      norm = instr >> bit_res;
65318993e2f3af291eea7a175547862d057b7d83708Rob Landley
65418993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (((norm >> 37) & 0x0F) == 0x05
65518993e2f3af291eea7a175547862d057b7d83708Rob Landley          && ((norm >> 9) & 0x07) == 0) {
65618993e2f3af291eea7a175547862d057b7d83708Rob Landley        addr = (norm >> 13) & 0x0FFFFF;
65718993e2f3af291eea7a175547862d057b7d83708Rob Landley        addr |= ((uint32_t)(norm >> 36) & 1) << 20;
65818993e2f3af291eea7a175547862d057b7d83708Rob Landley        addr <<= 4;
65918993e2f3af291eea7a175547862d057b7d83708Rob Landley        addr -= s->pos + (uint32_t)i;
66018993e2f3af291eea7a175547862d057b7d83708Rob Landley        addr >>= 4;
66118993e2f3af291eea7a175547862d057b7d83708Rob Landley
66218993e2f3af291eea7a175547862d057b7d83708Rob Landley        norm &= ~((uint64_t)0x8FFFFF << 13);
66318993e2f3af291eea7a175547862d057b7d83708Rob Landley        norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
66418993e2f3af291eea7a175547862d057b7d83708Rob Landley        norm |= (uint64_t)(addr & 0x100000)
66518993e2f3af291eea7a175547862d057b7d83708Rob Landley            << (36 - 20);
66618993e2f3af291eea7a175547862d057b7d83708Rob Landley
66718993e2f3af291eea7a175547862d057b7d83708Rob Landley        instr &= (1 << bit_res) - 1;
66818993e2f3af291eea7a175547862d057b7d83708Rob Landley        instr |= norm << bit_res;
66918993e2f3af291eea7a175547862d057b7d83708Rob Landley
67018993e2f3af291eea7a175547862d057b7d83708Rob Landley        for (j = 0; j < 6; j++)
67118993e2f3af291eea7a175547862d057b7d83708Rob Landley          buf[i + j + byte_pos]
67218993e2f3af291eea7a175547862d057b7d83708Rob Landley            = (uint8_t)(instr >> (8 * j));
67318993e2f3af291eea7a175547862d057b7d83708Rob Landley      }
67418993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
67518993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
67618993e2f3af291eea7a175547862d057b7d83708Rob Landley
67718993e2f3af291eea7a175547862d057b7d83708Rob Landley  return i;
678971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
679971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
680971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
681971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_ARM
682971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
683971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
68418993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t i;
68518993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t addr;
686971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
68718993e2f3af291eea7a175547862d057b7d83708Rob Landley  for (i = 0; i + 4 <= size; i += 4) {
68818993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (buf[i + 3] == 0xEB) {
68918993e2f3af291eea7a175547862d057b7d83708Rob Landley      addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
69018993e2f3af291eea7a175547862d057b7d83708Rob Landley          | ((uint32_t)buf[i + 2] << 16);
69118993e2f3af291eea7a175547862d057b7d83708Rob Landley      addr <<= 2;
69218993e2f3af291eea7a175547862d057b7d83708Rob Landley      addr -= s->pos + (uint32_t)i + 8;
69318993e2f3af291eea7a175547862d057b7d83708Rob Landley      addr >>= 2;
69418993e2f3af291eea7a175547862d057b7d83708Rob Landley      buf[i] = (uint8_t)addr;
69518993e2f3af291eea7a175547862d057b7d83708Rob Landley      buf[i + 1] = (uint8_t)(addr >> 8);
69618993e2f3af291eea7a175547862d057b7d83708Rob Landley      buf[i + 2] = (uint8_t)(addr >> 16);
69718993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
69818993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
699971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
70018993e2f3af291eea7a175547862d057b7d83708Rob Landley  return i;
701971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
702971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
703971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
704971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_ARMTHUMB
705971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
706971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
70718993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t i;
70818993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t addr;
70918993e2f3af291eea7a175547862d057b7d83708Rob Landley
71018993e2f3af291eea7a175547862d057b7d83708Rob Landley  for (i = 0; i + 4 <= size; i += 2) {
71118993e2f3af291eea7a175547862d057b7d83708Rob Landley    if ((buf[i + 1] & 0xF8) == 0xF0
71218993e2f3af291eea7a175547862d057b7d83708Rob Landley        && (buf[i + 3] & 0xF8) == 0xF8) {
71318993e2f3af291eea7a175547862d057b7d83708Rob Landley      addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
71418993e2f3af291eea7a175547862d057b7d83708Rob Landley          | ((uint32_t)buf[i] << 11)
71518993e2f3af291eea7a175547862d057b7d83708Rob Landley          | (((uint32_t)buf[i + 3] & 0x07) << 8)
71618993e2f3af291eea7a175547862d057b7d83708Rob Landley          | (uint32_t)buf[i + 2];
71718993e2f3af291eea7a175547862d057b7d83708Rob Landley      addr <<= 1;
71818993e2f3af291eea7a175547862d057b7d83708Rob Landley      addr -= s->pos + (uint32_t)i + 4;
71918993e2f3af291eea7a175547862d057b7d83708Rob Landley      addr >>= 1;
72018993e2f3af291eea7a175547862d057b7d83708Rob Landley      buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
72118993e2f3af291eea7a175547862d057b7d83708Rob Landley      buf[i] = (uint8_t)(addr >> 11);
72218993e2f3af291eea7a175547862d057b7d83708Rob Landley      buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
72318993e2f3af291eea7a175547862d057b7d83708Rob Landley      buf[i + 2] = (uint8_t)addr;
72418993e2f3af291eea7a175547862d057b7d83708Rob Landley      i += 2;
72518993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
72618993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
72718993e2f3af291eea7a175547862d057b7d83708Rob Landley
72818993e2f3af291eea7a175547862d057b7d83708Rob Landley  return i;
729971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
730971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
731971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
732971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_SPARC
733971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
734971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
73518993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t i;
73618993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t instr;
737971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
73818993e2f3af291eea7a175547862d057b7d83708Rob Landley  for (i = 0; i + 4 <= size; i += 4) {
73918993e2f3af291eea7a175547862d057b7d83708Rob Landley    instr = get_unaligned_be32(buf + i);
74018993e2f3af291eea7a175547862d057b7d83708Rob Landley    if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
74118993e2f3af291eea7a175547862d057b7d83708Rob Landley      instr <<= 2;
74218993e2f3af291eea7a175547862d057b7d83708Rob Landley      instr -= s->pos + (uint32_t)i;
74318993e2f3af291eea7a175547862d057b7d83708Rob Landley      instr >>= 2;
74418993e2f3af291eea7a175547862d057b7d83708Rob Landley      instr = ((uint32_t)0x40000000 - (instr & 0x400000))
74518993e2f3af291eea7a175547862d057b7d83708Rob Landley          | 0x40000000 | (instr & 0x3FFFFF);
74618993e2f3af291eea7a175547862d057b7d83708Rob Landley      put_unaligned_be32(instr, buf + i);
74718993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
74818993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
749971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
75018993e2f3af291eea7a175547862d057b7d83708Rob Landley  return i;
751971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
752971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
753971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
754971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
755971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
756971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * of data that got filtered.
757971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
758971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * NOTE: This is implemented as a switch statement to avoid using function
759971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * pointers, which could be problematic in the kernel boot code, which must
760971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * avoid pointers to static data (at least on x86).
761971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
762971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic void bcj_apply(struct xz_dec_bcj *s,
76318993e2f3af291eea7a175547862d057b7d83708Rob Landley          uint8_t *buf, size_t *pos, size_t size)
764971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
76518993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t filtered;
766971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
76718993e2f3af291eea7a175547862d057b7d83708Rob Landley  buf += *pos;
76818993e2f3af291eea7a175547862d057b7d83708Rob Landley  size -= *pos;
769971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
77018993e2f3af291eea7a175547862d057b7d83708Rob Landley  switch (s->type) {
771971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_X86
77218993e2f3af291eea7a175547862d057b7d83708Rob Landley  case BCJ_X86:
77318993e2f3af291eea7a175547862d057b7d83708Rob Landley    filtered = bcj_x86(s, buf, size);
77418993e2f3af291eea7a175547862d057b7d83708Rob Landley    break;
775971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
776971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_POWERPC
77718993e2f3af291eea7a175547862d057b7d83708Rob Landley  case BCJ_POWERPC:
77818993e2f3af291eea7a175547862d057b7d83708Rob Landley    filtered = bcj_powerpc(s, buf, size);
77918993e2f3af291eea7a175547862d057b7d83708Rob Landley    break;
780971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
781971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_IA64
78218993e2f3af291eea7a175547862d057b7d83708Rob Landley  case BCJ_IA64:
78318993e2f3af291eea7a175547862d057b7d83708Rob Landley    filtered = bcj_ia64(s, buf, size);
78418993e2f3af291eea7a175547862d057b7d83708Rob Landley    break;
785971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
786971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_ARM
78718993e2f3af291eea7a175547862d057b7d83708Rob Landley  case BCJ_ARM:
78818993e2f3af291eea7a175547862d057b7d83708Rob Landley    filtered = bcj_arm(s, buf, size);
78918993e2f3af291eea7a175547862d057b7d83708Rob Landley    break;
790971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
791971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_ARMTHUMB
79218993e2f3af291eea7a175547862d057b7d83708Rob Landley  case BCJ_ARMTHUMB:
79318993e2f3af291eea7a175547862d057b7d83708Rob Landley    filtered = bcj_armthumb(s, buf, size);
79418993e2f3af291eea7a175547862d057b7d83708Rob Landley    break;
795971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
796971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_SPARC
79718993e2f3af291eea7a175547862d057b7d83708Rob Landley  case BCJ_SPARC:
79818993e2f3af291eea7a175547862d057b7d83708Rob Landley    filtered = bcj_sparc(s, buf, size);
79918993e2f3af291eea7a175547862d057b7d83708Rob Landley    break;
800971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
80118993e2f3af291eea7a175547862d057b7d83708Rob Landley  default:
80218993e2f3af291eea7a175547862d057b7d83708Rob Landley    /* Never reached but silence compiler warnings. */
80318993e2f3af291eea7a175547862d057b7d83708Rob Landley    filtered = 0;
80418993e2f3af291eea7a175547862d057b7d83708Rob Landley    break;
80518993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
806971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
80718993e2f3af291eea7a175547862d057b7d83708Rob Landley  *pos += filtered;
80818993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->pos += filtered;
809971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
810971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
811971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
812971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Flush pending filtered data from temp to the output buffer.
813971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Move the remaining mixture of possibly filtered and unfiltered
814971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * data to the beginning of temp.
815971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
816971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
817971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
81818993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t copy_size;
819971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
82018993e2f3af291eea7a175547862d057b7d83708Rob Landley  copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
82118993e2f3af291eea7a175547862d057b7d83708Rob Landley  memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
82218993e2f3af291eea7a175547862d057b7d83708Rob Landley  b->out_pos += copy_size;
823971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
82418993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->temp.filtered -= copy_size;
82518993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->temp.size -= copy_size;
82618993e2f3af291eea7a175547862d057b7d83708Rob Landley  memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
827971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
828971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
829971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
830971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * The BCJ filter functions are primitive in sense that they process the
831971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * data in chunks of 1-16 bytes. To hide this issue, this function does
832971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * some buffering.
833971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
8349bd7a1696e7060e51ec0d0aaef477eaf85aaf216Rob Landleyenum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
83518993e2f3af291eea7a175547862d057b7d83708Rob Landley             struct xz_dec_lzma2 *lzma2,
83618993e2f3af291eea7a175547862d057b7d83708Rob Landley             struct xz_buf *b)
83718993e2f3af291eea7a175547862d057b7d83708Rob Landley{
83818993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t out_start;
83918993e2f3af291eea7a175547862d057b7d83708Rob Landley
84018993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
84118993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Flush pending already filtered data to the output buffer. Return
84218993e2f3af291eea7a175547862d057b7d83708Rob Landley   * immediatelly if we couldn't flush everything, or if the next
84318993e2f3af291eea7a175547862d057b7d83708Rob Landley   * filter in the chain had already returned XZ_STREAM_END.
84418993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
84518993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s->temp.filtered > 0) {
84618993e2f3af291eea7a175547862d057b7d83708Rob Landley    bcj_flush(s, b);
84718993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (s->temp.filtered > 0)
84818993e2f3af291eea7a175547862d057b7d83708Rob Landley      return XZ_OK;
84918993e2f3af291eea7a175547862d057b7d83708Rob Landley
85018993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (s->ret == XZ_STREAM_END)
85118993e2f3af291eea7a175547862d057b7d83708Rob Landley      return XZ_STREAM_END;
85218993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
85318993e2f3af291eea7a175547862d057b7d83708Rob Landley
85418993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
85518993e2f3af291eea7a175547862d057b7d83708Rob Landley   * If we have more output space than what is currently pending in
85618993e2f3af291eea7a175547862d057b7d83708Rob Landley   * temp, copy the unfiltered data from temp to the output buffer
85718993e2f3af291eea7a175547862d057b7d83708Rob Landley   * and try to fill the output buffer by decoding more data from the
85818993e2f3af291eea7a175547862d057b7d83708Rob Landley   * next filter in the chain. Apply the BCJ filter on the new data
85918993e2f3af291eea7a175547862d057b7d83708Rob Landley   * in the output buffer. If everything cannot be filtered, copy it
86018993e2f3af291eea7a175547862d057b7d83708Rob Landley   * to temp and rewind the output buffer position accordingly.
86118993e2f3af291eea7a175547862d057b7d83708Rob Landley   *
86218993e2f3af291eea7a175547862d057b7d83708Rob Landley   * This needs to be always run when temp.size == 0 to handle a special
86318993e2f3af291eea7a175547862d057b7d83708Rob Landley   * case where the output buffer is full and the next filter has no
86418993e2f3af291eea7a175547862d057b7d83708Rob Landley   * more output coming but hasn't returned XZ_STREAM_END yet.
86518993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
86618993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
86718993e2f3af291eea7a175547862d057b7d83708Rob Landley    out_start = b->out_pos;
86818993e2f3af291eea7a175547862d057b7d83708Rob Landley    memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
86918993e2f3af291eea7a175547862d057b7d83708Rob Landley    b->out_pos += s->temp.size;
87018993e2f3af291eea7a175547862d057b7d83708Rob Landley
87118993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->ret = xz_dec_lzma2_run(lzma2, b);
87218993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (s->ret != XZ_STREAM_END
8739641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham        && (s->ret != XZ_OK ))
87418993e2f3af291eea7a175547862d057b7d83708Rob Landley      return s->ret;
87518993e2f3af291eea7a175547862d057b7d83708Rob Landley
87618993e2f3af291eea7a175547862d057b7d83708Rob Landley    bcj_apply(s, b->out, &out_start, b->out_pos);
87718993e2f3af291eea7a175547862d057b7d83708Rob Landley
87818993e2f3af291eea7a175547862d057b7d83708Rob Landley    /*
87918993e2f3af291eea7a175547862d057b7d83708Rob Landley     * As an exception, if the next filter returned XZ_STREAM_END,
88018993e2f3af291eea7a175547862d057b7d83708Rob Landley     * we can do that too, since the last few bytes that remain
88118993e2f3af291eea7a175547862d057b7d83708Rob Landley     * unfiltered are meant to remain unfiltered.
88218993e2f3af291eea7a175547862d057b7d83708Rob Landley     */
88318993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (s->ret == XZ_STREAM_END)
88418993e2f3af291eea7a175547862d057b7d83708Rob Landley      return XZ_STREAM_END;
88518993e2f3af291eea7a175547862d057b7d83708Rob Landley
88618993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->temp.size = b->out_pos - out_start;
88718993e2f3af291eea7a175547862d057b7d83708Rob Landley    b->out_pos -= s->temp.size;
88818993e2f3af291eea7a175547862d057b7d83708Rob Landley    memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
88918993e2f3af291eea7a175547862d057b7d83708Rob Landley
89018993e2f3af291eea7a175547862d057b7d83708Rob Landley    /*
89118993e2f3af291eea7a175547862d057b7d83708Rob Landley     * If there wasn't enough input to the next filter to fill
89218993e2f3af291eea7a175547862d057b7d83708Rob Landley     * the output buffer with unfiltered data, there's no point
89318993e2f3af291eea7a175547862d057b7d83708Rob Landley     * to try decoding more data to temp.
89418993e2f3af291eea7a175547862d057b7d83708Rob Landley     */
89518993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (b->out_pos + s->temp.size < b->out_size)
89618993e2f3af291eea7a175547862d057b7d83708Rob Landley      return XZ_OK;
89718993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
89818993e2f3af291eea7a175547862d057b7d83708Rob Landley
89918993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
90018993e2f3af291eea7a175547862d057b7d83708Rob Landley   * We have unfiltered data in temp. If the output buffer isn't full
90118993e2f3af291eea7a175547862d057b7d83708Rob Landley   * yet, try to fill the temp buffer by decoding more data from the
90218993e2f3af291eea7a175547862d057b7d83708Rob Landley   * next filter. Apply the BCJ filter on temp. Then we hopefully can
90318993e2f3af291eea7a175547862d057b7d83708Rob Landley   * fill the actual output buffer by copying filtered data from temp.
90418993e2f3af291eea7a175547862d057b7d83708Rob Landley   * A mix of filtered and unfiltered data may be left in temp; it will
90518993e2f3af291eea7a175547862d057b7d83708Rob Landley   * be taken care on the next call to this function.
90618993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
90718993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (b->out_pos < b->out_size) {
90818993e2f3af291eea7a175547862d057b7d83708Rob Landley    /* Make b->out{,_pos,_size} temporarily point to s->temp. */
90918993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->out = b->out;
91018993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->out_pos = b->out_pos;
91118993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->out_size = b->out_size;
91218993e2f3af291eea7a175547862d057b7d83708Rob Landley    b->out = s->temp.buf;
91318993e2f3af291eea7a175547862d057b7d83708Rob Landley    b->out_pos = s->temp.size;
91418993e2f3af291eea7a175547862d057b7d83708Rob Landley    b->out_size = sizeof(s->temp.buf);
91518993e2f3af291eea7a175547862d057b7d83708Rob Landley
91618993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->ret = xz_dec_lzma2_run(lzma2, b);
91718993e2f3af291eea7a175547862d057b7d83708Rob Landley
91818993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->temp.size = b->out_pos;
91918993e2f3af291eea7a175547862d057b7d83708Rob Landley    b->out = s->out;
92018993e2f3af291eea7a175547862d057b7d83708Rob Landley    b->out_pos = s->out_pos;
92118993e2f3af291eea7a175547862d057b7d83708Rob Landley    b->out_size = s->out_size;
92218993e2f3af291eea7a175547862d057b7d83708Rob Landley
92318993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
92418993e2f3af291eea7a175547862d057b7d83708Rob Landley      return s->ret;
92518993e2f3af291eea7a175547862d057b7d83708Rob Landley
92618993e2f3af291eea7a175547862d057b7d83708Rob Landley    bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
92718993e2f3af291eea7a175547862d057b7d83708Rob Landley
92818993e2f3af291eea7a175547862d057b7d83708Rob Landley    /*
92918993e2f3af291eea7a175547862d057b7d83708Rob Landley     * If the next filter returned XZ_STREAM_END, we mark that
93018993e2f3af291eea7a175547862d057b7d83708Rob Landley     * everything is filtered, since the last unfiltered bytes
93118993e2f3af291eea7a175547862d057b7d83708Rob Landley     * of the stream are meant to be left as is.
93218993e2f3af291eea7a175547862d057b7d83708Rob Landley     */
93318993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (s->ret == XZ_STREAM_END)
93418993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->temp.filtered = s->temp.size;
93518993e2f3af291eea7a175547862d057b7d83708Rob Landley
93618993e2f3af291eea7a175547862d057b7d83708Rob Landley    bcj_flush(s, b);
93718993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (s->temp.filtered > 0)
93818993e2f3af291eea7a175547862d057b7d83708Rob Landley      return XZ_OK;
93918993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
94018993e2f3af291eea7a175547862d057b7d83708Rob Landley
94118993e2f3af291eea7a175547862d057b7d83708Rob Landley  return s->ret;
942971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
943971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
9449bd7a1696e7060e51ec0d0aaef477eaf85aaf216Rob Landleyenum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
945971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
94618993e2f3af291eea7a175547862d057b7d83708Rob Landley  switch (id) {
947971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_X86
94818993e2f3af291eea7a175547862d057b7d83708Rob Landley  case BCJ_X86:
949971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
950971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_POWERPC
95118993e2f3af291eea7a175547862d057b7d83708Rob Landley  case BCJ_POWERPC:
952971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
953971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_IA64
95418993e2f3af291eea7a175547862d057b7d83708Rob Landley  case BCJ_IA64:
955971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
956971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_ARM
95718993e2f3af291eea7a175547862d057b7d83708Rob Landley  case BCJ_ARM:
958971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
959971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_ARMTHUMB
96018993e2f3af291eea7a175547862d057b7d83708Rob Landley  case BCJ_ARMTHUMB:
961971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
962971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_SPARC
96318993e2f3af291eea7a175547862d057b7d83708Rob Landley  case BCJ_SPARC:
964971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
96518993e2f3af291eea7a175547862d057b7d83708Rob Landley    break;
966971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
96718993e2f3af291eea7a175547862d057b7d83708Rob Landley  default:
96818993e2f3af291eea7a175547862d057b7d83708Rob Landley    /* Unsupported Filter ID */
96918993e2f3af291eea7a175547862d057b7d83708Rob Landley    return XZ_OPTIONS_ERROR;
97018993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
971971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
97218993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->type = id;
97318993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->ret = XZ_OK;
97418993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->pos = 0;
97518993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->x86_prev_mask = 0;
97618993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->temp.filtered = 0;
97718993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->temp.size = 0;
978971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
97918993e2f3af291eea7a175547862d057b7d83708Rob Landley  return XZ_OK;
980971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
981971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
982971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
983971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
984971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * LZMA2 decoder
985971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
986971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
987971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
988971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley// BEGIN xz_lzma2.h
989971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
990971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * LZMA2 definitions
991971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
992971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
993971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
994971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
995971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Range coder constants */
996971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define RC_SHIFT_BITS 8
997971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define RC_TOP_BITS 24
998971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define RC_TOP_VALUE (1 << RC_TOP_BITS)
999971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define RC_BIT_MODEL_TOTAL_BITS 11
1000971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS)
1001971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define RC_MOVE_BITS 5
1002971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1003971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
1004971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Maximum number of position states. A position state is the lowest pb
1005971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * number of bits of the current uncompressed offset. In some places there
1006971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * are different sets of probabilities for different position states.
1007971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1008971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define POS_STATES_MAX (1 << 4)
1009971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1010971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
1011971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * This enum is used to track which LZMA symbols have occurred most recently
1012971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * and in which order. This information is used to predict the next symbol.
1013971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
1014971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Symbols:
1015971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *  - Literal: One 8-bit byte
1016971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *  - Match: Repeat a chunk of data at some distance
1017971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *  - Long repeat: Multi-byte match at a recently seen distance
1018971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *  - Short repeat: One-byte repeat at a recently seen distance
1019971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
1020971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * The symbol names are in from STATE_oldest_older_previous. REP means
1021971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * either short or long repeated match, and NONLIT means any non-literal.
1022971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1023971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleyenum lzma_state {
102418993e2f3af291eea7a175547862d057b7d83708Rob Landley  STATE_LIT_LIT,
102518993e2f3af291eea7a175547862d057b7d83708Rob Landley  STATE_MATCH_LIT_LIT,
102618993e2f3af291eea7a175547862d057b7d83708Rob Landley  STATE_REP_LIT_LIT,
102718993e2f3af291eea7a175547862d057b7d83708Rob Landley  STATE_SHORTREP_LIT_LIT,
102818993e2f3af291eea7a175547862d057b7d83708Rob Landley  STATE_MATCH_LIT,
102918993e2f3af291eea7a175547862d057b7d83708Rob Landley  STATE_REP_LIT,
103018993e2f3af291eea7a175547862d057b7d83708Rob Landley  STATE_SHORTREP_LIT,
103118993e2f3af291eea7a175547862d057b7d83708Rob Landley  STATE_LIT_MATCH,
103218993e2f3af291eea7a175547862d057b7d83708Rob Landley  STATE_LIT_LONGREP,
103318993e2f3af291eea7a175547862d057b7d83708Rob Landley  STATE_LIT_SHORTREP,
103418993e2f3af291eea7a175547862d057b7d83708Rob Landley  STATE_NONLIT_MATCH,
103518993e2f3af291eea7a175547862d057b7d83708Rob Landley  STATE_NONLIT_REP
1036971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley};
1037971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1038971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Total number of states */
1039971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define STATES 12
1040971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1041971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* The lowest 7 states indicate that the previous state was a literal. */
1042971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define LIT_STATES 7
1043971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1044971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Indicate that the latest symbol was a literal. */
1045971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic inline void lzma_state_literal(enum lzma_state *state)
1046971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
104718993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (*state <= STATE_SHORTREP_LIT_LIT)
104818993e2f3af291eea7a175547862d057b7d83708Rob Landley    *state = STATE_LIT_LIT;
104918993e2f3af291eea7a175547862d057b7d83708Rob Landley  else if (*state <= STATE_LIT_SHORTREP)
105018993e2f3af291eea7a175547862d057b7d83708Rob Landley    *state -= 3;
105118993e2f3af291eea7a175547862d057b7d83708Rob Landley  else
105218993e2f3af291eea7a175547862d057b7d83708Rob Landley    *state -= 6;
1053971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1054971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1055971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Indicate that the latest symbol was a match. */
1056971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic inline void lzma_state_match(enum lzma_state *state)
1057971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
105818993e2f3af291eea7a175547862d057b7d83708Rob Landley  *state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH;
1059971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1060971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1061971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Indicate that the latest state was a long repeated match. */
1062971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic inline void lzma_state_long_rep(enum lzma_state *state)
1063971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
106418993e2f3af291eea7a175547862d057b7d83708Rob Landley  *state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP;
1065971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1066971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1067971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Indicate that the latest symbol was a short match. */
1068971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic inline void lzma_state_short_rep(enum lzma_state *state)
1069971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
107018993e2f3af291eea7a175547862d057b7d83708Rob Landley  *state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP;
1071971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1072971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1073971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Test if the previous symbol was a literal. */
10741cabcc2e9a75fc8ba0c49e888907e3e310cfcd41Rob Landleystatic inline int lzma_state_is_literal(enum lzma_state state)
1075971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
107618993e2f3af291eea7a175547862d057b7d83708Rob Landley  return state < LIT_STATES;
1077971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1078971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1079971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Each literal coder is divided in three sections:
1080971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *   - 0x001-0x0FF: Without match byte
1081971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *   - 0x101-0x1FF: With match byte; match bit is 0
1082971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *   - 0x201-0x2FF: With match byte; match bit is 1
1083971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
1084971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Match byte is used when the previous LZMA symbol was something else than
1085971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * a literal (that is, it was some kind of match).
1086971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1087971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define LITERAL_CODER_SIZE 0x300
1088971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1089971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Maximum number of literal coders */
1090971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define LITERAL_CODERS_MAX (1 << 4)
1091971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1092971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Minimum length of a match is two bytes. */
1093971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define MATCH_LEN_MIN 2
1094971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1095971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Match length is encoded with 4, 5, or 10 bits.
1096971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
1097971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Length   Bits
1098971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *  2-9      4 = Choice=0 + 3 bits
1099971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * 10-17     5 = Choice=1 + Choice2=0 + 3 bits
1100971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * 18-273   10 = Choice=1 + Choice2=1 + 8 bits
1101971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1102971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define LEN_LOW_BITS 3
1103971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
1104971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define LEN_MID_BITS 3
1105971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
1106971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define LEN_HIGH_BITS 8
1107971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
1108971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
1109971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1110971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
1111971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Maximum length of a match is 273 which is a result of the encoding
1112971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * described above.
1113971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1114971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
1115971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1116971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
1117971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Different sets of probabilities are used for match distances that have
1118971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * very short match length: Lengths of 2, 3, and 4 bytes have a separate
1119971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * set of probabilities for each length. The matches with longer length
1120971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * use a shared set of probabilities.
1121971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1122971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define DIST_STATES 4
1123971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1124971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
1125971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Get the index of the appropriate probability array for decoding
1126971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * the distance slot.
1127971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1128971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic inline uint32_t lzma_get_dist_state(uint32_t len)
1129971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
113018993e2f3af291eea7a175547862d057b7d83708Rob Landley  return len < DIST_STATES + MATCH_LEN_MIN
113118993e2f3af291eea7a175547862d057b7d83708Rob Landley      ? len - MATCH_LEN_MIN : DIST_STATES - 1;
1132971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1133971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1134971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
1135971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * The highest two bits of a 32-bit match distance are encoded using six bits.
1136971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * This six-bit value is called a distance slot. This way encoding a 32-bit
1137971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * value takes 6-36 bits, larger values taking more bits.
1138971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1139971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define DIST_SLOT_BITS 6
1140971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define DIST_SLOTS (1 << DIST_SLOT_BITS)
1141971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1142971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Match distances up to 127 are fully encoded using probabilities. Since
1143971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * the highest two bits (distance slot) are always encoded using six bits,
1144971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * the distances 0-3 don't need any additional bits to encode, since the
1145971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * distance slot itself is the same as the actual distance. DIST_MODEL_START
1146971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * indicates the first distance slot where at least one additional bit is
1147971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * needed.
1148971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1149971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define DIST_MODEL_START 4
1150971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1151971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
1152971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Match distances greater than 127 are encoded in three pieces:
1153971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *   - distance slot: the highest two bits
1154971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *   - direct bits: 2-26 bits below the highest two bits
1155971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *   - alignment bits: four lowest bits
1156971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
1157971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Direct bits don't use any probabilities.
1158971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
1159971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * The distance slot value of 14 is for distances 128-191.
1160971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1161971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define DIST_MODEL_END 14
1162971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1163971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Distance slots that indicate a distance <= 127. */
1164971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define FULL_DISTANCES_BITS (DIST_MODEL_END / 2)
1165971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
1166971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1167971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
1168971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * For match distances greater than 127, only the highest two bits and the
1169971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * lowest four bits (alignment) is encoded using probabilities.
1170971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1171971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define ALIGN_BITS 4
1172971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define ALIGN_SIZE (1 << ALIGN_BITS)
1173971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define ALIGN_MASK (ALIGN_SIZE - 1)
1174971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1175971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Total number of all probability variables */
1176971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE)
1177971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1178971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
1179971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * LZMA remembers the four most recent match distances. Reusing these
1180971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * distances tends to take less space than re-encoding the actual
1181971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * distance value.
1182971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1183971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define REPS 4
1184971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1185971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1186971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley// END xz_lzma2.h
1187971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1188971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
1189971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Range decoder initialization eats the first five bytes of each LZMA chunk.
1190971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1191971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define RC_INIT_BYTES 5
1192971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1193971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
1194971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Minimum number of usable input buffer to safely decode one LZMA symbol.
1195971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * The worst case is that we decode 22 bits using probabilities and 26
1196971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * direct bits. This may decode at maximum of 20 bytes of input. However,
1197971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * lzma_main() does an extra normalization before returning, thus we
1198971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * need to put 21 here.
1199971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1200971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define LZMA_IN_REQUIRED 21
1201971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1202971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
1203971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Dictionary (history buffer)
1204971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
1205971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * These are always true:
1206971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *    start <= pos <= full <= end
1207971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *    pos <= limit <= end
1208971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *    end == size
1209971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *    size <= size_max
1210971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *    allocated <= size
1211971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
12120c2509957b6291fb136eca4a5c278466045ce31dIsaac Dunham * Most of these variables are size_t as a relic of single-call mode,
1213971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * in which the dictionary variables address the actual output
1214971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * buffer directly.
1215971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1216971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystruct dictionary {
121718993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Beginning of the history buffer */
121818993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint8_t *buf;
121918993e2f3af291eea7a175547862d057b7d83708Rob Landley
122018993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Old position in buf (before decoding more data) */
122118993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t start;
122218993e2f3af291eea7a175547862d057b7d83708Rob Landley
122318993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Position in buf */
122418993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t pos;
122518993e2f3af291eea7a175547862d057b7d83708Rob Landley
122618993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
122718993e2f3af291eea7a175547862d057b7d83708Rob Landley   * How full dictionary is. This is used to detect corrupt input that
122818993e2f3af291eea7a175547862d057b7d83708Rob Landley   * would read beyond the beginning of the uncompressed stream.
122918993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
123018993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t full;
123118993e2f3af291eea7a175547862d057b7d83708Rob Landley
123218993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Write limit; we don't write to buf[limit] or later bytes. */
123318993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t limit;
123418993e2f3af291eea7a175547862d057b7d83708Rob Landley
12350c2509957b6291fb136eca4a5c278466045ce31dIsaac Dunham  /* End of the dictionary buffer. This is the same as the dictionary size. */
123618993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t end;
123718993e2f3af291eea7a175547862d057b7d83708Rob Landley
123818993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
123918993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Size of the dictionary as specified in Block Header. This is used
124018993e2f3af291eea7a175547862d057b7d83708Rob Landley   * together with "full" to detect corrupt input that would make us
124118993e2f3af291eea7a175547862d057b7d83708Rob Landley   * read beyond the beginning of the uncompressed stream.
124218993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
124318993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t size;
124418993e2f3af291eea7a175547862d057b7d83708Rob Landley
124518993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
12460c2509957b6291fb136eca4a5c278466045ce31dIsaac Dunham   * Maximum allowed dictionary size.
124718993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
124818993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t size_max;
124918993e2f3af291eea7a175547862d057b7d83708Rob Landley
125018993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
125118993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Amount of memory currently allocated for the dictionary.
125218993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
125318993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t allocated;
1254971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley};
1255971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1256971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Range decoder */
1257971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystruct rc_dec {
125818993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t range;
125918993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t code;
126018993e2f3af291eea7a175547862d057b7d83708Rob Landley
126118993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
126218993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Number of initializing bytes remaining to be read
126318993e2f3af291eea7a175547862d057b7d83708Rob Landley   * by rc_read_init().
126418993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
126518993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t init_bytes_left;
126618993e2f3af291eea7a175547862d057b7d83708Rob Landley
126718993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
126818993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Buffer from which we read our input. It can be either
126918993e2f3af291eea7a175547862d057b7d83708Rob Landley   * temp.buf or the caller-provided input buffer.
127018993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
127118993e2f3af291eea7a175547862d057b7d83708Rob Landley  const uint8_t *in;
127218993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t in_pos;
127318993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t in_limit;
1274971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley};
1275971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1276971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Probabilities for a length decoder. */
1277971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystruct lzma_len_dec {
127818993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Probability of match length being at least 10 */
127918993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint16_t choice;
1280971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
128118993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Probability of match length being at least 18 */
128218993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint16_t choice2;
1283971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
128418993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Probabilities for match lengths 2-9 */
128518993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
1286971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
128718993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Probabilities for match lengths 10-17 */
128818993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
1289971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
129018993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Probabilities for match lengths 18-273 */
129118993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint16_t high[LEN_HIGH_SYMBOLS];
1292971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley};
1293971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1294971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystruct lzma_dec {
129518993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Distances of latest four matches */
129618993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t rep0;
129718993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t rep1;
129818993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t rep2;
129918993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t rep3;
130018993e2f3af291eea7a175547862d057b7d83708Rob Landley
130118993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Types of the most recently seen LZMA symbols */
130218993e2f3af291eea7a175547862d057b7d83708Rob Landley  enum lzma_state state;
130318993e2f3af291eea7a175547862d057b7d83708Rob Landley
130418993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
130518993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Length of a match. This is updated so that dict_repeat can
130618993e2f3af291eea7a175547862d057b7d83708Rob Landley   * be called again to finish repeating the whole match.
130718993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
130818993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t len;
130918993e2f3af291eea7a175547862d057b7d83708Rob Landley
131018993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
131118993e2f3af291eea7a175547862d057b7d83708Rob Landley   * LZMA properties or related bit masks (number of literal
131218993e2f3af291eea7a175547862d057b7d83708Rob Landley   * context bits, a mask dervied from the number of literal
131318993e2f3af291eea7a175547862d057b7d83708Rob Landley   * position bits, and a mask dervied from the number
131418993e2f3af291eea7a175547862d057b7d83708Rob Landley   * position bits)
131518993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
131618993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t lc;
131718993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t literal_pos_mask; /* (1 << lp) - 1 */
131818993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t pos_mask;         /* (1 << pb) - 1 */
131918993e2f3af291eea7a175547862d057b7d83708Rob Landley
132018993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* If 1, it's a match. Otherwise it's a single 8-bit literal. */
132118993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint16_t is_match[STATES][POS_STATES_MAX];
132218993e2f3af291eea7a175547862d057b7d83708Rob Landley
132318993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */
132418993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint16_t is_rep[STATES];
132518993e2f3af291eea7a175547862d057b7d83708Rob Landley
132618993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
132718993e2f3af291eea7a175547862d057b7d83708Rob Landley   * If 0, distance of a repeated match is rep0.
132818993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Otherwise check is_rep1.
132918993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
133018993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint16_t is_rep0[STATES];
133118993e2f3af291eea7a175547862d057b7d83708Rob Landley
133218993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
133318993e2f3af291eea7a175547862d057b7d83708Rob Landley   * If 0, distance of a repeated match is rep1.
133418993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Otherwise check is_rep2.
133518993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
133618993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint16_t is_rep1[STATES];
133718993e2f3af291eea7a175547862d057b7d83708Rob Landley
133818993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */
133918993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint16_t is_rep2[STATES];
134018993e2f3af291eea7a175547862d057b7d83708Rob Landley
134118993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
134218993e2f3af291eea7a175547862d057b7d83708Rob Landley   * If 1, the repeated match has length of one byte. Otherwise
134318993e2f3af291eea7a175547862d057b7d83708Rob Landley   * the length is decoded from rep_len_decoder.
134418993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
134518993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint16_t is_rep0_long[STATES][POS_STATES_MAX];
134618993e2f3af291eea7a175547862d057b7d83708Rob Landley
134718993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
134818993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Probability tree for the highest two bits of the match
134918993e2f3af291eea7a175547862d057b7d83708Rob Landley   * distance. There is a separate probability tree for match
135018993e2f3af291eea7a175547862d057b7d83708Rob Landley   * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
135118993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
135218993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint16_t dist_slot[DIST_STATES][DIST_SLOTS];
135318993e2f3af291eea7a175547862d057b7d83708Rob Landley
135418993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
135518993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Probility trees for additional bits for match distance
135618993e2f3af291eea7a175547862d057b7d83708Rob Landley   * when the distance is in the range [4, 127].
135718993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
135818993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END];
135918993e2f3af291eea7a175547862d057b7d83708Rob Landley
136018993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
136118993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Probability tree for the lowest four bits of a match
136218993e2f3af291eea7a175547862d057b7d83708Rob Landley   * distance that is equal to or greater than 128.
136318993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
136418993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint16_t dist_align[ALIGN_SIZE];
136518993e2f3af291eea7a175547862d057b7d83708Rob Landley
136618993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Length of a normal match */
136718993e2f3af291eea7a175547862d057b7d83708Rob Landley  struct lzma_len_dec match_len_dec;
136818993e2f3af291eea7a175547862d057b7d83708Rob Landley
136918993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Length of a repeated match */
137018993e2f3af291eea7a175547862d057b7d83708Rob Landley  struct lzma_len_dec rep_len_dec;
137118993e2f3af291eea7a175547862d057b7d83708Rob Landley
137218993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Probabilities of literals */
137318993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
1374971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley};
1375971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1376971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystruct lzma2_dec {
137718993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Position in xz_dec_lzma2_run(). */
137818993e2f3af291eea7a175547862d057b7d83708Rob Landley  enum lzma2_seq {
137918993e2f3af291eea7a175547862d057b7d83708Rob Landley    SEQ_CONTROL,
138018993e2f3af291eea7a175547862d057b7d83708Rob Landley    SEQ_UNCOMPRESSED_1,
138118993e2f3af291eea7a175547862d057b7d83708Rob Landley    SEQ_UNCOMPRESSED_2,
138218993e2f3af291eea7a175547862d057b7d83708Rob Landley    SEQ_COMPRESSED_0,
138318993e2f3af291eea7a175547862d057b7d83708Rob Landley    SEQ_COMPRESSED_1,
138418993e2f3af291eea7a175547862d057b7d83708Rob Landley    SEQ_PROPERTIES,
138518993e2f3af291eea7a175547862d057b7d83708Rob Landley    SEQ_LZMA_PREPARE,
138618993e2f3af291eea7a175547862d057b7d83708Rob Landley    SEQ_LZMA_RUN,
138718993e2f3af291eea7a175547862d057b7d83708Rob Landley    SEQ_COPY
138818993e2f3af291eea7a175547862d057b7d83708Rob Landley  } sequence;
138918993e2f3af291eea7a175547862d057b7d83708Rob Landley
139018993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Next position after decoding the compressed size of the chunk. */
139118993e2f3af291eea7a175547862d057b7d83708Rob Landley  enum lzma2_seq next_sequence;
139218993e2f3af291eea7a175547862d057b7d83708Rob Landley
139318993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Uncompressed size of LZMA chunk (2 MiB at maximum) */
139418993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t uncompressed;
139518993e2f3af291eea7a175547862d057b7d83708Rob Landley
139618993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
139718993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Compressed size of LZMA chunk or compressed/uncompressed
139818993e2f3af291eea7a175547862d057b7d83708Rob Landley   * size of uncompressed chunk (64 KiB at maximum)
139918993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
140018993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t compressed;
140118993e2f3af291eea7a175547862d057b7d83708Rob Landley
140218993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
140318993e2f3af291eea7a175547862d057b7d83708Rob Landley   * True if dictionary reset is needed. This is false before
140418993e2f3af291eea7a175547862d057b7d83708Rob Landley   * the first chunk (LZMA or uncompressed).
140518993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
140618993e2f3af291eea7a175547862d057b7d83708Rob Landley  int need_dict_reset;
140718993e2f3af291eea7a175547862d057b7d83708Rob Landley
140818993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
140918993e2f3af291eea7a175547862d057b7d83708Rob Landley   * True if new LZMA properties are needed. This is false
141018993e2f3af291eea7a175547862d057b7d83708Rob Landley   * before the first LZMA chunk.
141118993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
141218993e2f3af291eea7a175547862d057b7d83708Rob Landley  int need_props;
1413971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley};
1414971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1415971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystruct xz_dec_lzma2 {
141618993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
141718993e2f3af291eea7a175547862d057b7d83708Rob Landley   * The order below is important on x86 to reduce code size and
141818993e2f3af291eea7a175547862d057b7d83708Rob Landley   * it shouldn't hurt on other platforms. Everything up to and
141918993e2f3af291eea7a175547862d057b7d83708Rob Landley   * including lzma.pos_mask are in the first 128 bytes on x86-32,
142018993e2f3af291eea7a175547862d057b7d83708Rob Landley   * which allows using smaller instructions to access those
142118993e2f3af291eea7a175547862d057b7d83708Rob Landley   * variables. On x86-64, fewer variables fit into the first 128
142218993e2f3af291eea7a175547862d057b7d83708Rob Landley   * bytes, but this is still the best order without sacrificing
142318993e2f3af291eea7a175547862d057b7d83708Rob Landley   * the readability by splitting the structures.
142418993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
142518993e2f3af291eea7a175547862d057b7d83708Rob Landley  struct rc_dec rc;
142618993e2f3af291eea7a175547862d057b7d83708Rob Landley  struct dictionary dict;
142718993e2f3af291eea7a175547862d057b7d83708Rob Landley  struct lzma2_dec lzma2;
142818993e2f3af291eea7a175547862d057b7d83708Rob Landley  struct lzma_dec lzma;
142918993e2f3af291eea7a175547862d057b7d83708Rob Landley
143018993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
143118993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Temporary buffer which holds small number of input bytes between
143218993e2f3af291eea7a175547862d057b7d83708Rob Landley   * decoder calls. See lzma2_lzma() for details.
143318993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
143418993e2f3af291eea7a175547862d057b7d83708Rob Landley  struct {
143518993e2f3af291eea7a175547862d057b7d83708Rob Landley    uint32_t size;
143618993e2f3af291eea7a175547862d057b7d83708Rob Landley    uint8_t buf[3 * LZMA_IN_REQUIRED];
143718993e2f3af291eea7a175547862d057b7d83708Rob Landley  } temp;
1438971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley};
1439971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1440971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/**************
1441971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Dictionary *
1442971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley **************/
1443971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
14440c2509957b6291fb136eca4a5c278466045ce31dIsaac Dunham/* Reset the dictionary state. */
14450c2509957b6291fb136eca4a5c278466045ce31dIsaac Dunhamstatic void dict_reset(struct dictionary *dict)
1446971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
144718993e2f3af291eea7a175547862d057b7d83708Rob Landley  dict->start = 0;
144818993e2f3af291eea7a175547862d057b7d83708Rob Landley  dict->pos = 0;
144918993e2f3af291eea7a175547862d057b7d83708Rob Landley  dict->limit = 0;
145018993e2f3af291eea7a175547862d057b7d83708Rob Landley  dict->full = 0;
1451971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1452971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1453971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Set dictionary write limit */
1454971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic void dict_limit(struct dictionary *dict, size_t out_max)
1455971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
145618993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (dict->end - dict->pos <= out_max)
145718993e2f3af291eea7a175547862d057b7d83708Rob Landley    dict->limit = dict->end;
145818993e2f3af291eea7a175547862d057b7d83708Rob Landley  else
145918993e2f3af291eea7a175547862d057b7d83708Rob Landley    dict->limit = dict->pos + out_max;
1460971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1461971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1462971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Return true if at least one byte can be written into the dictionary. */
14631cabcc2e9a75fc8ba0c49e888907e3e310cfcd41Rob Landleystatic inline int dict_has_space(const struct dictionary *dict)
1464971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
146518993e2f3af291eea7a175547862d057b7d83708Rob Landley  return dict->pos < dict->limit;
1466971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1467971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1468971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
1469971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Get a byte from the dictionary at the given distance. The distance is
1470971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * assumed to valid, or as a special case, zero when the dictionary is
1471971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * still empty. This special case is needed for single-call decoding to
1472971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * avoid writing a '\0' to the end of the destination buffer.
1473971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1474971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic inline uint32_t dict_get(const struct dictionary *dict, uint32_t dist)
1475971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
147618993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t offset = dict->pos - dist - 1;
1477971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
147818993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (dist >= dict->pos)
147918993e2f3af291eea7a175547862d057b7d83708Rob Landley    offset += dict->end;
1480971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
148118993e2f3af291eea7a175547862d057b7d83708Rob Landley  return dict->full > 0 ? dict->buf[offset] : 0;
1482971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1483971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1484971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
1485971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Put one byte into the dictionary. It is assumed that there is space for it.
1486971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1487971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic inline void dict_put(struct dictionary *dict, uint8_t byte)
1488971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
148918993e2f3af291eea7a175547862d057b7d83708Rob Landley  dict->buf[dict->pos++] = byte;
1490971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
149118993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (dict->full < dict->pos)
149218993e2f3af291eea7a175547862d057b7d83708Rob Landley    dict->full = dict->pos;
1493971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1494971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1495971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
1496971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Repeat given number of bytes from the given distance. If the distance is
1497971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * invalid, false is returned. On success, true is returned and *len is
1498971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * updated to indicate how many bytes were left to be repeated.
1499971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
15001cabcc2e9a75fc8ba0c49e888907e3e310cfcd41Rob Landleystatic int dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist)
1501971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
150218993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t back;
150318993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t left;
1504971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
150518993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (dist >= dict->full || dist >= dict->size) return 0;
1506971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
150718993e2f3af291eea7a175547862d057b7d83708Rob Landley  left = min_t(size_t, dict->limit - dict->pos, *len);
150818993e2f3af291eea7a175547862d057b7d83708Rob Landley  *len -= left;
1509971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
151018993e2f3af291eea7a175547862d057b7d83708Rob Landley  back = dict->pos - dist - 1;
151118993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (dist >= dict->pos)
151218993e2f3af291eea7a175547862d057b7d83708Rob Landley    back += dict->end;
1513971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
151418993e2f3af291eea7a175547862d057b7d83708Rob Landley  do {
151518993e2f3af291eea7a175547862d057b7d83708Rob Landley    dict->buf[dict->pos++] = dict->buf[back++];
151618993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (back == dict->end)
151718993e2f3af291eea7a175547862d057b7d83708Rob Landley      back = 0;
151818993e2f3af291eea7a175547862d057b7d83708Rob Landley  } while (--left > 0);
1519971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
152018993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (dict->full < dict->pos)
152118993e2f3af291eea7a175547862d057b7d83708Rob Landley    dict->full = dict->pos;
1522971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
152318993e2f3af291eea7a175547862d057b7d83708Rob Landley  return 1;
1524971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1525971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1526971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Copy uncompressed data as is from input to dictionary and output buffers. */
1527971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
152818993e2f3af291eea7a175547862d057b7d83708Rob Landley            uint32_t *left)
1529971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
153018993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t copy_size;
1531971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
153218993e2f3af291eea7a175547862d057b7d83708Rob Landley  while (*left > 0 && b->in_pos < b->in_size
153318993e2f3af291eea7a175547862d057b7d83708Rob Landley      && b->out_pos < b->out_size) {
153418993e2f3af291eea7a175547862d057b7d83708Rob Landley    copy_size = min(b->in_size - b->in_pos,
153518993e2f3af291eea7a175547862d057b7d83708Rob Landley        b->out_size - b->out_pos);
153618993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (copy_size > dict->end - dict->pos)
153718993e2f3af291eea7a175547862d057b7d83708Rob Landley      copy_size = dict->end - dict->pos;
153818993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (copy_size > *left)
153918993e2f3af291eea7a175547862d057b7d83708Rob Landley      copy_size = *left;
1540971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
154118993e2f3af291eea7a175547862d057b7d83708Rob Landley    *left -= copy_size;
1542971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
154318993e2f3af291eea7a175547862d057b7d83708Rob Landley    memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
154418993e2f3af291eea7a175547862d057b7d83708Rob Landley    dict->pos += copy_size;
1545971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
154618993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (dict->full < dict->pos)
154718993e2f3af291eea7a175547862d057b7d83708Rob Landley      dict->full = dict->pos;
1548971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
15499641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham    if (dict->pos == dict->end)
15509641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham      dict->pos = 0;
1551971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
15529641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham    memcpy(b->out + b->out_pos, b->in + b->in_pos,
15539641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham        copy_size);
1554971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
155518993e2f3af291eea7a175547862d057b7d83708Rob Landley    dict->start = dict->pos;
1556971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
155718993e2f3af291eea7a175547862d057b7d83708Rob Landley    b->out_pos += copy_size;
155818993e2f3af291eea7a175547862d057b7d83708Rob Landley    b->in_pos += copy_size;
155918993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
1560971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1561971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1562971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
1563971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Flush pending data from dictionary to b->out. It is assumed that there is
1564971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * enough space in b->out. This is guaranteed because caller uses dict_limit()
1565971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * before decoding data into the dictionary.
1566971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1567971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b)
1568971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
156918993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t copy_size = dict->pos - dict->start;
1570971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
15719641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham  if (dict->pos == dict->end)
15729641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham    dict->pos = 0;
1573971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
15749641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham  memcpy(b->out + b->out_pos, dict->buf + dict->start,
15759641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham      copy_size);
1576971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
157718993e2f3af291eea7a175547862d057b7d83708Rob Landley  dict->start = dict->pos;
157818993e2f3af291eea7a175547862d057b7d83708Rob Landley  b->out_pos += copy_size;
157918993e2f3af291eea7a175547862d057b7d83708Rob Landley  return copy_size;
1580971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1581971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1582971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*****************
1583971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Range decoder *
1584971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *****************/
1585971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1586971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Reset the range decoder. */
1587971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic void rc_reset(struct rc_dec *rc)
1588971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
158918993e2f3af291eea7a175547862d057b7d83708Rob Landley  rc->range = (uint32_t)-1;
159018993e2f3af291eea7a175547862d057b7d83708Rob Landley  rc->code = 0;
159118993e2f3af291eea7a175547862d057b7d83708Rob Landley  rc->init_bytes_left = RC_INIT_BYTES;
1592971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1593971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1594971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
1595971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Read the first five initial bytes into rc->code if they haven't been
1596971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * read already. (Yes, the first byte gets completely ignored.)
1597971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
15981cabcc2e9a75fc8ba0c49e888907e3e310cfcd41Rob Landleystatic int rc_read_init(struct rc_dec *rc, struct xz_buf *b)
1599971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
160018993e2f3af291eea7a175547862d057b7d83708Rob Landley  while (rc->init_bytes_left > 0) {
160118993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (b->in_pos == b->in_size) return 0;
1602971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
160318993e2f3af291eea7a175547862d057b7d83708Rob Landley    rc->code = (rc->code << 8) + b->in[b->in_pos++];
160418993e2f3af291eea7a175547862d057b7d83708Rob Landley    --rc->init_bytes_left;
160518993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
1606971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
160718993e2f3af291eea7a175547862d057b7d83708Rob Landley  return 1;
1608971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1609971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1610971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Return true if there may not be enough input for the next decoding loop. */
16111cabcc2e9a75fc8ba0c49e888907e3e310cfcd41Rob Landleystatic inline int rc_limit_exceeded(const struct rc_dec *rc)
1612971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
161318993e2f3af291eea7a175547862d057b7d83708Rob Landley  return rc->in_pos > rc->in_limit;
1614971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1615971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1616971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
1617971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Return true if it is possible (from point of view of range decoder) that
1618971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * we have reached the end of the LZMA chunk.
1619971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
16201cabcc2e9a75fc8ba0c49e888907e3e310cfcd41Rob Landleystatic inline int rc_is_finished(const struct rc_dec *rc)
1621971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
162218993e2f3af291eea7a175547862d057b7d83708Rob Landley  return rc->code == 0;
1623971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1624971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1625971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Read the next input byte if needed. */
16264ffface11f7857683ddb1f935fb05809821458abIsaac Dunhamstatic inline void rc_normalize(struct rc_dec *rc)
1627971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
162818993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (rc->range < RC_TOP_VALUE) {
162918993e2f3af291eea7a175547862d057b7d83708Rob Landley    rc->range <<= RC_SHIFT_BITS;
163018993e2f3af291eea7a175547862d057b7d83708Rob Landley    rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++];
163118993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
1632971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1633971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1634971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
1635971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Decode one bit. In some versions, this function has been splitted in three
1636971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * functions so that the compiler is supposed to be able to more easily avoid
1637971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * an extra branch. In this particular version of the LZMA decoder, this
1638971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3
1639971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * on x86). Using a non-splitted version results in nicer looking code too.
1640971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
1641971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * NOTE: This must return an int. Do not make it return a bool or the speed
1642971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,
1643971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * and it generates 10-20 % faster code than GCC 3.x from this file anyway.)
1644971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
16454ffface11f7857683ddb1f935fb05809821458abIsaac Dunhamstatic inline int rc_bit(struct rc_dec *rc, uint16_t *prob)
1646971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
164718993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t bound;
164818993e2f3af291eea7a175547862d057b7d83708Rob Landley  int bit;
1649971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
165018993e2f3af291eea7a175547862d057b7d83708Rob Landley  rc_normalize(rc);
165118993e2f3af291eea7a175547862d057b7d83708Rob Landley  bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob;
165218993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (rc->code < bound) {
165318993e2f3af291eea7a175547862d057b7d83708Rob Landley    rc->range = bound;
165418993e2f3af291eea7a175547862d057b7d83708Rob Landley    *prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS;
165518993e2f3af291eea7a175547862d057b7d83708Rob Landley    bit = 0;
165618993e2f3af291eea7a175547862d057b7d83708Rob Landley  } else {
165718993e2f3af291eea7a175547862d057b7d83708Rob Landley    rc->range -= bound;
165818993e2f3af291eea7a175547862d057b7d83708Rob Landley    rc->code -= bound;
165918993e2f3af291eea7a175547862d057b7d83708Rob Landley    *prob -= *prob >> RC_MOVE_BITS;
166018993e2f3af291eea7a175547862d057b7d83708Rob Landley    bit = 1;
166118993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
1662971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
166318993e2f3af291eea7a175547862d057b7d83708Rob Landley  return bit;
1664971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1665971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1666971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Decode a bittree starting from the most significant bit. */
16674ffface11f7857683ddb1f935fb05809821458abIsaac Dunhamstatic inline uint32_t rc_bittree(struct rc_dec *rc,
166818993e2f3af291eea7a175547862d057b7d83708Rob Landley             uint16_t *probs, uint32_t limit)
1669971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
167018993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t symbol = 1;
1671971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
167218993e2f3af291eea7a175547862d057b7d83708Rob Landley  do {
167318993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (rc_bit(rc, &probs[symbol]))
167418993e2f3af291eea7a175547862d057b7d83708Rob Landley      symbol = (symbol << 1) + 1;
167518993e2f3af291eea7a175547862d057b7d83708Rob Landley    else
167618993e2f3af291eea7a175547862d057b7d83708Rob Landley      symbol <<= 1;
167718993e2f3af291eea7a175547862d057b7d83708Rob Landley  } while (symbol < limit);
1678971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
167918993e2f3af291eea7a175547862d057b7d83708Rob Landley  return symbol;
1680971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1681971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1682971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Decode a bittree starting from the least significant bit. */
16834ffface11f7857683ddb1f935fb05809821458abIsaac Dunhamstatic inline void rc_bittree_reverse(struct rc_dec *rc,
168418993e2f3af291eea7a175547862d057b7d83708Rob Landley                 uint16_t *probs,
168518993e2f3af291eea7a175547862d057b7d83708Rob Landley                 uint32_t *dest, uint32_t limit)
1686971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
168718993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t symbol = 1;
168818993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t i = 0;
1689971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
169018993e2f3af291eea7a175547862d057b7d83708Rob Landley  do {
169118993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (rc_bit(rc, &probs[symbol])) {
169218993e2f3af291eea7a175547862d057b7d83708Rob Landley      symbol = (symbol << 1) + 1;
169318993e2f3af291eea7a175547862d057b7d83708Rob Landley      *dest += 1 << i;
169418993e2f3af291eea7a175547862d057b7d83708Rob Landley    } else {
169518993e2f3af291eea7a175547862d057b7d83708Rob Landley      symbol <<= 1;
169618993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
169718993e2f3af291eea7a175547862d057b7d83708Rob Landley  } while (++i < limit);
1698971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1699971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1700971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Decode direct bits (fixed fifty-fifty probability) */
1701971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic inline void rc_direct(struct rc_dec *rc, uint32_t *dest, uint32_t limit)
1702971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
170318993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t mask;
1704971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
170518993e2f3af291eea7a175547862d057b7d83708Rob Landley  do {
170618993e2f3af291eea7a175547862d057b7d83708Rob Landley    rc_normalize(rc);
170718993e2f3af291eea7a175547862d057b7d83708Rob Landley    rc->range >>= 1;
170818993e2f3af291eea7a175547862d057b7d83708Rob Landley    rc->code -= rc->range;
170918993e2f3af291eea7a175547862d057b7d83708Rob Landley    mask = (uint32_t)0 - (rc->code >> 31);
171018993e2f3af291eea7a175547862d057b7d83708Rob Landley    rc->code += rc->range & mask;
171118993e2f3af291eea7a175547862d057b7d83708Rob Landley    *dest = (*dest << 1) + (mask + 1);
171218993e2f3af291eea7a175547862d057b7d83708Rob Landley  } while (--limit > 0);
1713971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1714971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1715971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/********
1716971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * LZMA *
1717971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley ********/
1718971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1719971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Get pointer to literal coder probability array. */
1720971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic uint16_t *lzma_literal_probs(struct xz_dec_lzma2 *s)
1721971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
172218993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t prev_byte = dict_get(&s->dict, 0);
172318993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t low = prev_byte >> (8 - s->lzma.lc);
172418993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc;
172518993e2f3af291eea7a175547862d057b7d83708Rob Landley  return s->lzma.literal[low + high];
1726971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1727971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1728971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Decode a literal (one 8-bit byte) */
1729971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic void lzma_literal(struct xz_dec_lzma2 *s)
1730971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
173118993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint16_t *probs;
173218993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t symbol;
173318993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t match_byte;
173418993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t match_bit;
173518993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t offset;
173618993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t i;
173718993e2f3af291eea7a175547862d057b7d83708Rob Landley
173818993e2f3af291eea7a175547862d057b7d83708Rob Landley  probs = lzma_literal_probs(s);
173918993e2f3af291eea7a175547862d057b7d83708Rob Landley
174018993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (lzma_state_is_literal(s->lzma.state)) {
174118993e2f3af291eea7a175547862d057b7d83708Rob Landley    symbol = rc_bittree(&s->rc, probs, 0x100);
174218993e2f3af291eea7a175547862d057b7d83708Rob Landley  } else {
174318993e2f3af291eea7a175547862d057b7d83708Rob Landley    symbol = 1;
174418993e2f3af291eea7a175547862d057b7d83708Rob Landley    match_byte = dict_get(&s->dict, s->lzma.rep0) << 1;
174518993e2f3af291eea7a175547862d057b7d83708Rob Landley    offset = 0x100;
174618993e2f3af291eea7a175547862d057b7d83708Rob Landley
174718993e2f3af291eea7a175547862d057b7d83708Rob Landley    do {
174818993e2f3af291eea7a175547862d057b7d83708Rob Landley      match_bit = match_byte & offset;
174918993e2f3af291eea7a175547862d057b7d83708Rob Landley      match_byte <<= 1;
175018993e2f3af291eea7a175547862d057b7d83708Rob Landley      i = offset + match_bit + symbol;
175118993e2f3af291eea7a175547862d057b7d83708Rob Landley
175218993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (rc_bit(&s->rc, &probs[i])) {
175318993e2f3af291eea7a175547862d057b7d83708Rob Landley        symbol = (symbol << 1) + 1;
175418993e2f3af291eea7a175547862d057b7d83708Rob Landley        offset &= match_bit;
175518993e2f3af291eea7a175547862d057b7d83708Rob Landley      } else {
175618993e2f3af291eea7a175547862d057b7d83708Rob Landley        symbol <<= 1;
175718993e2f3af291eea7a175547862d057b7d83708Rob Landley        offset &= ~match_bit;
175818993e2f3af291eea7a175547862d057b7d83708Rob Landley      }
175918993e2f3af291eea7a175547862d057b7d83708Rob Landley    } while (symbol < 0x100);
176018993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
176118993e2f3af291eea7a175547862d057b7d83708Rob Landley
176218993e2f3af291eea7a175547862d057b7d83708Rob Landley  dict_put(&s->dict, (uint8_t)symbol);
176318993e2f3af291eea7a175547862d057b7d83708Rob Landley  lzma_state_literal(&s->lzma.state);
1764971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1765971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1766971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Decode the length of the match into s->lzma.len. */
1767971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic void lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l,
176818993e2f3af291eea7a175547862d057b7d83708Rob Landley         uint32_t pos_state)
176918993e2f3af291eea7a175547862d057b7d83708Rob Landley{
177018993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint16_t *probs;
177118993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t limit;
177218993e2f3af291eea7a175547862d057b7d83708Rob Landley
177318993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (!rc_bit(&s->rc, &l->choice)) {
177418993e2f3af291eea7a175547862d057b7d83708Rob Landley    probs = l->low[pos_state];
177518993e2f3af291eea7a175547862d057b7d83708Rob Landley    limit = LEN_LOW_SYMBOLS;
177618993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->lzma.len = MATCH_LEN_MIN;
177718993e2f3af291eea7a175547862d057b7d83708Rob Landley  } else {
177818993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (!rc_bit(&s->rc, &l->choice2)) {
177918993e2f3af291eea7a175547862d057b7d83708Rob Landley      probs = l->mid[pos_state];
178018993e2f3af291eea7a175547862d057b7d83708Rob Landley      limit = LEN_MID_SYMBOLS;
178118993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS;
178218993e2f3af291eea7a175547862d057b7d83708Rob Landley    } else {
178318993e2f3af291eea7a175547862d057b7d83708Rob Landley      probs = l->high;
178418993e2f3af291eea7a175547862d057b7d83708Rob Landley      limit = LEN_HIGH_SYMBOLS;
178518993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS
178618993e2f3af291eea7a175547862d057b7d83708Rob Landley          + LEN_MID_SYMBOLS;
178718993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
178818993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
178918993e2f3af291eea7a175547862d057b7d83708Rob Landley
179018993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit;
1791971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1792971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1793971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Decode a match. The distance will be stored in s->lzma.rep0. */
1794971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic void lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
1795971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
179618993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint16_t *probs;
179718993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t dist_slot;
179818993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t limit;
1799971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
180018993e2f3af291eea7a175547862d057b7d83708Rob Landley  lzma_state_match(&s->lzma.state);
1801971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
180218993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->lzma.rep3 = s->lzma.rep2;
180318993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->lzma.rep2 = s->lzma.rep1;
180418993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->lzma.rep1 = s->lzma.rep0;
1805971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
180618993e2f3af291eea7a175547862d057b7d83708Rob Landley  lzma_len(s, &s->lzma.match_len_dec, pos_state);
1807971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
180818993e2f3af291eea7a175547862d057b7d83708Rob Landley  probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)];
180918993e2f3af291eea7a175547862d057b7d83708Rob Landley  dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS;
1810971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
181118993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (dist_slot < DIST_MODEL_START) {
181218993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->lzma.rep0 = dist_slot;
181318993e2f3af291eea7a175547862d057b7d83708Rob Landley  } else {
181418993e2f3af291eea7a175547862d057b7d83708Rob Landley    limit = (dist_slot >> 1) - 1;
181518993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->lzma.rep0 = 2 + (dist_slot & 1);
1816971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
181718993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (dist_slot < DIST_MODEL_END) {
181818993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->lzma.rep0 <<= limit;
181918993e2f3af291eea7a175547862d057b7d83708Rob Landley      probs = s->lzma.dist_special + s->lzma.rep0
182018993e2f3af291eea7a175547862d057b7d83708Rob Landley          - dist_slot - 1;
182118993e2f3af291eea7a175547862d057b7d83708Rob Landley      rc_bittree_reverse(&s->rc, probs,
182218993e2f3af291eea7a175547862d057b7d83708Rob Landley          &s->lzma.rep0, limit);
182318993e2f3af291eea7a175547862d057b7d83708Rob Landley    } else {
182418993e2f3af291eea7a175547862d057b7d83708Rob Landley      rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS);
182518993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->lzma.rep0 <<= ALIGN_BITS;
182618993e2f3af291eea7a175547862d057b7d83708Rob Landley      rc_bittree_reverse(&s->rc, s->lzma.dist_align,
182718993e2f3af291eea7a175547862d057b7d83708Rob Landley          &s->lzma.rep0, ALIGN_BITS);
182818993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
182918993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
1830971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1831971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1832971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
1833971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Decode a repeated match. The distance is one of the four most recently
1834971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * seen matches. The distance will be stored in s->lzma.rep0.
1835971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1836971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic void lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
1837971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
183818993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t tmp;
183918993e2f3af291eea7a175547862d057b7d83708Rob Landley
184018993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) {
184118993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[
184218993e2f3af291eea7a175547862d057b7d83708Rob Landley        s->lzma.state][pos_state])) {
184318993e2f3af291eea7a175547862d057b7d83708Rob Landley      lzma_state_short_rep(&s->lzma.state);
184418993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->lzma.len = 1;
184518993e2f3af291eea7a175547862d057b7d83708Rob Landley      return;
184618993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
184718993e2f3af291eea7a175547862d057b7d83708Rob Landley  } else {
184818993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) {
184918993e2f3af291eea7a175547862d057b7d83708Rob Landley      tmp = s->lzma.rep1;
185018993e2f3af291eea7a175547862d057b7d83708Rob Landley    } else {
185118993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) {
185218993e2f3af291eea7a175547862d057b7d83708Rob Landley        tmp = s->lzma.rep2;
185318993e2f3af291eea7a175547862d057b7d83708Rob Landley      } else {
185418993e2f3af291eea7a175547862d057b7d83708Rob Landley        tmp = s->lzma.rep3;
185518993e2f3af291eea7a175547862d057b7d83708Rob Landley        s->lzma.rep3 = s->lzma.rep2;
185618993e2f3af291eea7a175547862d057b7d83708Rob Landley      }
185718993e2f3af291eea7a175547862d057b7d83708Rob Landley
185818993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->lzma.rep2 = s->lzma.rep1;
185918993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
186018993e2f3af291eea7a175547862d057b7d83708Rob Landley
186118993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->lzma.rep1 = s->lzma.rep0;
186218993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->lzma.rep0 = tmp;
186318993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
186418993e2f3af291eea7a175547862d057b7d83708Rob Landley
186518993e2f3af291eea7a175547862d057b7d83708Rob Landley  lzma_state_long_rep(&s->lzma.state);
186618993e2f3af291eea7a175547862d057b7d83708Rob Landley  lzma_len(s, &s->lzma.rep_len_dec, pos_state);
1867971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1868971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1869971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* LZMA decoder core */
18701cabcc2e9a75fc8ba0c49e888907e3e310cfcd41Rob Landleystatic int lzma_main(struct xz_dec_lzma2 *s)
1871971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
187218993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t pos_state;
187318993e2f3af291eea7a175547862d057b7d83708Rob Landley
187418993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
187518993e2f3af291eea7a175547862d057b7d83708Rob Landley   * If the dictionary was reached during the previous call, try to
187618993e2f3af291eea7a175547862d057b7d83708Rob Landley   * finish the possibly pending repeat in the dictionary.
187718993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
187818993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (dict_has_space(&s->dict) && s->lzma.len > 0)
187918993e2f3af291eea7a175547862d057b7d83708Rob Landley    dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0);
188018993e2f3af291eea7a175547862d057b7d83708Rob Landley
188118993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
188218993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Decode more LZMA symbols. One iteration may consume up to
188318993e2f3af291eea7a175547862d057b7d83708Rob Landley   * LZMA_IN_REQUIRED - 1 bytes.
188418993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
188518993e2f3af291eea7a175547862d057b7d83708Rob Landley  while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) {
188618993e2f3af291eea7a175547862d057b7d83708Rob Landley    pos_state = s->dict.pos & s->lzma.pos_mask;
188718993e2f3af291eea7a175547862d057b7d83708Rob Landley
188818993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (!rc_bit(&s->rc, &s->lzma.is_match[
188918993e2f3af291eea7a175547862d057b7d83708Rob Landley        s->lzma.state][pos_state])) {
189018993e2f3af291eea7a175547862d057b7d83708Rob Landley      lzma_literal(s);
189118993e2f3af291eea7a175547862d057b7d83708Rob Landley    } else {
189218993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state]))
189318993e2f3af291eea7a175547862d057b7d83708Rob Landley        lzma_rep_match(s, pos_state);
189418993e2f3af291eea7a175547862d057b7d83708Rob Landley      else
189518993e2f3af291eea7a175547862d057b7d83708Rob Landley        lzma_match(s, pos_state);
189618993e2f3af291eea7a175547862d057b7d83708Rob Landley
189718993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0))
189818993e2f3af291eea7a175547862d057b7d83708Rob Landley        return 0;
189918993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
190018993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
190118993e2f3af291eea7a175547862d057b7d83708Rob Landley
190218993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
190318993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Having the range decoder always normalized when we are outside
190418993e2f3af291eea7a175547862d057b7d83708Rob Landley   * this function makes it easier to correctly handle end of the chunk.
190518993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
190618993e2f3af291eea7a175547862d057b7d83708Rob Landley  rc_normalize(&s->rc);
190718993e2f3af291eea7a175547862d057b7d83708Rob Landley
190818993e2f3af291eea7a175547862d057b7d83708Rob Landley  return 1;
1909971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1910971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1911971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
1912971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Reset the LZMA decoder and range decoder state. Dictionary is nore reset
1913971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * here, because LZMA state may be reset without resetting the dictionary.
1914971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
1915971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic void lzma_reset(struct xz_dec_lzma2 *s)
1916971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
191718993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint16_t *probs;
191818993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t i;
1919971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
192018993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->lzma.state = STATE_LIT_LIT;
192118993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->lzma.rep0 = 0;
192218993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->lzma.rep1 = 0;
192318993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->lzma.rep2 = 0;
192418993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->lzma.rep3 = 0;
1925971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
192618993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
192718993e2f3af291eea7a175547862d057b7d83708Rob Landley   * All probabilities are initialized to the same value. This hack
192818993e2f3af291eea7a175547862d057b7d83708Rob Landley   * makes the code smaller by avoiding a separate loop for each
192918993e2f3af291eea7a175547862d057b7d83708Rob Landley   * probability array.
193018993e2f3af291eea7a175547862d057b7d83708Rob Landley   *
193118993e2f3af291eea7a175547862d057b7d83708Rob Landley   * This could be optimized so that only that part of literal
193218993e2f3af291eea7a175547862d057b7d83708Rob Landley   * probabilities that are actually required. In the common case
193318993e2f3af291eea7a175547862d057b7d83708Rob Landley   * we would write 12 KiB less.
193418993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
193518993e2f3af291eea7a175547862d057b7d83708Rob Landley  probs = s->lzma.is_match[0];
193618993e2f3af291eea7a175547862d057b7d83708Rob Landley  for (i = 0; i < PROBS_TOTAL; ++i)
193718993e2f3af291eea7a175547862d057b7d83708Rob Landley    probs[i] = RC_BIT_MODEL_TOTAL / 2;
1938971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
193918993e2f3af291eea7a175547862d057b7d83708Rob Landley  rc_reset(&s->rc);
1940971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1941971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1942971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
1943971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks
1944971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * from the decoded lp and pb values. On success, the LZMA decoder state is
1945971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * reset and true is returned.
1946971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
19471cabcc2e9a75fc8ba0c49e888907e3e310cfcd41Rob Landleystatic int lzma_props(struct xz_dec_lzma2 *s, uint8_t props)
1948971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
194918993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (props > (4 * 5 + 4) * 9 + 8)
195018993e2f3af291eea7a175547862d057b7d83708Rob Landley    return 0;
1951971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
195218993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->lzma.pos_mask = 0;
195318993e2f3af291eea7a175547862d057b7d83708Rob Landley  while (props >= 9 * 5) {
195418993e2f3af291eea7a175547862d057b7d83708Rob Landley    props -= 9 * 5;
195518993e2f3af291eea7a175547862d057b7d83708Rob Landley    ++s->lzma.pos_mask;
195618993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
1957971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
195818993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1;
1959971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
196018993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->lzma.literal_pos_mask = 0;
196118993e2f3af291eea7a175547862d057b7d83708Rob Landley  while (props >= 9) {
196218993e2f3af291eea7a175547862d057b7d83708Rob Landley    props -= 9;
196318993e2f3af291eea7a175547862d057b7d83708Rob Landley    ++s->lzma.literal_pos_mask;
196418993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
1965971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
196618993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->lzma.lc = props;
1967971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
196818993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s->lzma.lc + s->lzma.literal_pos_mask > 4)
196918993e2f3af291eea7a175547862d057b7d83708Rob Landley    return 0;
1970971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
197118993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1;
1972971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
197318993e2f3af291eea7a175547862d057b7d83708Rob Landley  lzma_reset(s);
1974971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
197518993e2f3af291eea7a175547862d057b7d83708Rob Landley  return 1;
1976971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
1977971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1978971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*********
1979971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * LZMA2 *
1980971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *********/
1981971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
1982971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
1983971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't
1984971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This
1985971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * wrapper function takes care of making the LZMA decoder's assumption safe.
1986971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
1987971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * As long as there is plenty of input left to be decoded in the current LZMA
1988971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * chunk, we decode directly from the caller-supplied input buffer until
1989971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into
1990971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * s->temp.buf, which (hopefully) gets filled on the next call to this
1991971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * function. We decode a few bytes from the temporary buffer so that we can
1992971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * continue decoding from the caller-supplied input buffer again.
1993971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
19941cabcc2e9a75fc8ba0c49e888907e3e310cfcd41Rob Landleystatic int lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b)
1995971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
199618993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t in_avail;
199718993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t tmp;
1998971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
199918993e2f3af291eea7a175547862d057b7d83708Rob Landley  in_avail = b->in_size - b->in_pos;
200018993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s->temp.size > 0 || s->lzma2.compressed == 0) {
200118993e2f3af291eea7a175547862d057b7d83708Rob Landley    tmp = 2 * LZMA_IN_REQUIRED - s->temp.size;
200218993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (tmp > s->lzma2.compressed - s->temp.size)
200318993e2f3af291eea7a175547862d057b7d83708Rob Landley      tmp = s->lzma2.compressed - s->temp.size;
200418993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (tmp > in_avail)
200518993e2f3af291eea7a175547862d057b7d83708Rob Landley      tmp = in_avail;
2006971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
200718993e2f3af291eea7a175547862d057b7d83708Rob Landley    memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp);
2008971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
200918993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (s->temp.size + tmp == s->lzma2.compressed) {
201018993e2f3af291eea7a175547862d057b7d83708Rob Landley      memset(s->temp.buf + s->temp.size + tmp, 0,
201118993e2f3af291eea7a175547862d057b7d83708Rob Landley          sizeof(s->temp.buf)
201218993e2f3af291eea7a175547862d057b7d83708Rob Landley            - s->temp.size - tmp);
201318993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->rc.in_limit = s->temp.size + tmp;
201418993e2f3af291eea7a175547862d057b7d83708Rob Landley    } else if (s->temp.size + tmp < LZMA_IN_REQUIRED) {
201518993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->temp.size += tmp;
201618993e2f3af291eea7a175547862d057b7d83708Rob Landley      b->in_pos += tmp;
201718993e2f3af291eea7a175547862d057b7d83708Rob Landley      return 1;
201818993e2f3af291eea7a175547862d057b7d83708Rob Landley    } else {
201918993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED;
202018993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
2021971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
202218993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->rc.in = s->temp.buf;
202318993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->rc.in_pos = 0;
2024971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
202518993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp)
202618993e2f3af291eea7a175547862d057b7d83708Rob Landley      return 0;
2027971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
202818993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->lzma2.compressed -= s->rc.in_pos;
2029971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
203018993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (s->rc.in_pos < s->temp.size) {
203118993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->temp.size -= s->rc.in_pos;
203218993e2f3af291eea7a175547862d057b7d83708Rob Landley      memmove(s->temp.buf, s->temp.buf + s->rc.in_pos,
203318993e2f3af291eea7a175547862d057b7d83708Rob Landley          s->temp.size);
203418993e2f3af291eea7a175547862d057b7d83708Rob Landley      return 1;
203518993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
2036971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
203718993e2f3af291eea7a175547862d057b7d83708Rob Landley    b->in_pos += s->rc.in_pos - s->temp.size;
203818993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->temp.size = 0;
203918993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
2040971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
204118993e2f3af291eea7a175547862d057b7d83708Rob Landley  in_avail = b->in_size - b->in_pos;
204218993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (in_avail >= LZMA_IN_REQUIRED) {
204318993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->rc.in = b->in;
204418993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->rc.in_pos = b->in_pos;
2045971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
204618993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED)
204718993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->rc.in_limit = b->in_pos + s->lzma2.compressed;
204818993e2f3af291eea7a175547862d057b7d83708Rob Landley    else
204918993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED;
2050971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
205118993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (!lzma_main(s))
205218993e2f3af291eea7a175547862d057b7d83708Rob Landley      return 0;
2053971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
205418993e2f3af291eea7a175547862d057b7d83708Rob Landley    in_avail = s->rc.in_pos - b->in_pos;
205518993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (in_avail > s->lzma2.compressed) return 0;
2056971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
205718993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->lzma2.compressed -= in_avail;
205818993e2f3af291eea7a175547862d057b7d83708Rob Landley    b->in_pos = s->rc.in_pos;
205918993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
2060971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
206118993e2f3af291eea7a175547862d057b7d83708Rob Landley  in_avail = b->in_size - b->in_pos;
206218993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (in_avail < LZMA_IN_REQUIRED) {
206318993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (in_avail > s->lzma2.compressed)
206418993e2f3af291eea7a175547862d057b7d83708Rob Landley      in_avail = s->lzma2.compressed;
2065971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
206618993e2f3af291eea7a175547862d057b7d83708Rob Landley    memcpy(s->temp.buf, b->in + b->in_pos, in_avail);
206718993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->temp.size = in_avail;
206818993e2f3af291eea7a175547862d057b7d83708Rob Landley    b->in_pos += in_avail;
206918993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
2070971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
207118993e2f3af291eea7a175547862d057b7d83708Rob Landley  return 1;
2072971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
2073971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2074971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
2075971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Take care of the LZMA2 control layer, and forward the job of actual LZMA
2076971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * decoding or copying of uncompressed chunks to other functions.
2077971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
20789bd7a1696e7060e51ec0d0aaef477eaf85aaf216Rob Landleyenum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
207918993e2f3af291eea7a175547862d057b7d83708Rob Landley               struct xz_buf *b)
208018993e2f3af291eea7a175547862d057b7d83708Rob Landley{
208118993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t tmp;
208218993e2f3af291eea7a175547862d057b7d83708Rob Landley
208318993e2f3af291eea7a175547862d057b7d83708Rob Landley  while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) {
208418993e2f3af291eea7a175547862d057b7d83708Rob Landley    switch (s->lzma2.sequence) {
208518993e2f3af291eea7a175547862d057b7d83708Rob Landley    case SEQ_CONTROL:
208618993e2f3af291eea7a175547862d057b7d83708Rob Landley      /*
208718993e2f3af291eea7a175547862d057b7d83708Rob Landley       * LZMA2 control byte
208818993e2f3af291eea7a175547862d057b7d83708Rob Landley       *
208918993e2f3af291eea7a175547862d057b7d83708Rob Landley       * Exact values:
209018993e2f3af291eea7a175547862d057b7d83708Rob Landley       *   0x00   End marker
209118993e2f3af291eea7a175547862d057b7d83708Rob Landley       *   0x01   Dictionary reset followed by
209218993e2f3af291eea7a175547862d057b7d83708Rob Landley       *          an uncompressed chunk
209318993e2f3af291eea7a175547862d057b7d83708Rob Landley       *   0x02   Uncompressed chunk (no dictionary reset)
209418993e2f3af291eea7a175547862d057b7d83708Rob Landley       *
209518993e2f3af291eea7a175547862d057b7d83708Rob Landley       * Highest three bits (s->control & 0xE0):
209618993e2f3af291eea7a175547862d057b7d83708Rob Landley       *   0xE0   Dictionary reset, new properties and state
209718993e2f3af291eea7a175547862d057b7d83708Rob Landley       *          reset, followed by LZMA compressed chunk
209818993e2f3af291eea7a175547862d057b7d83708Rob Landley       *   0xC0   New properties and state reset, followed
209918993e2f3af291eea7a175547862d057b7d83708Rob Landley       *          by LZMA compressed chunk (no dictionary
210018993e2f3af291eea7a175547862d057b7d83708Rob Landley       *          reset)
210118993e2f3af291eea7a175547862d057b7d83708Rob Landley       *   0xA0   State reset using old properties,
210218993e2f3af291eea7a175547862d057b7d83708Rob Landley       *          followed by LZMA compressed chunk (no
210318993e2f3af291eea7a175547862d057b7d83708Rob Landley       *          dictionary reset)
210418993e2f3af291eea7a175547862d057b7d83708Rob Landley       *   0x80   LZMA chunk (no dictionary or state reset)
210518993e2f3af291eea7a175547862d057b7d83708Rob Landley       *
210618993e2f3af291eea7a175547862d057b7d83708Rob Landley       * For LZMA compressed chunks, the lowest five bits
210718993e2f3af291eea7a175547862d057b7d83708Rob Landley       * (s->control & 1F) are the highest bits of the
210818993e2f3af291eea7a175547862d057b7d83708Rob Landley       * uncompressed size (bits 16-20).
210918993e2f3af291eea7a175547862d057b7d83708Rob Landley       *
211018993e2f3af291eea7a175547862d057b7d83708Rob Landley       * A new LZMA2 stream must begin with a dictionary
211118993e2f3af291eea7a175547862d057b7d83708Rob Landley       * reset. The first LZMA chunk must set new
211218993e2f3af291eea7a175547862d057b7d83708Rob Landley       * properties and reset the LZMA state.
211318993e2f3af291eea7a175547862d057b7d83708Rob Landley       *
211418993e2f3af291eea7a175547862d057b7d83708Rob Landley       * Values that don't match anything described above
211518993e2f3af291eea7a175547862d057b7d83708Rob Landley       * are invalid and we return XZ_DATA_ERROR.
211618993e2f3af291eea7a175547862d057b7d83708Rob Landley       */
211718993e2f3af291eea7a175547862d057b7d83708Rob Landley      tmp = b->in[b->in_pos++];
211818993e2f3af291eea7a175547862d057b7d83708Rob Landley
211918993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (tmp == 0x00)
212018993e2f3af291eea7a175547862d057b7d83708Rob Landley        return XZ_STREAM_END;
212118993e2f3af291eea7a175547862d057b7d83708Rob Landley
212218993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (tmp >= 0xE0 || tmp == 0x01) {
212318993e2f3af291eea7a175547862d057b7d83708Rob Landley        s->lzma2.need_props = 1;
212418993e2f3af291eea7a175547862d057b7d83708Rob Landley        s->lzma2.need_dict_reset = 0;
21250c2509957b6291fb136eca4a5c278466045ce31dIsaac Dunham        dict_reset(&s->dict);
212618993e2f3af291eea7a175547862d057b7d83708Rob Landley      } else if (s->lzma2.need_dict_reset) {
212718993e2f3af291eea7a175547862d057b7d83708Rob Landley        return XZ_DATA_ERROR;
212818993e2f3af291eea7a175547862d057b7d83708Rob Landley      }
212918993e2f3af291eea7a175547862d057b7d83708Rob Landley
213018993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (tmp >= 0x80) {
213118993e2f3af291eea7a175547862d057b7d83708Rob Landley        s->lzma2.uncompressed = (tmp & 0x1F) << 16;
213218993e2f3af291eea7a175547862d057b7d83708Rob Landley        s->lzma2.sequence = SEQ_UNCOMPRESSED_1;
213318993e2f3af291eea7a175547862d057b7d83708Rob Landley
213418993e2f3af291eea7a175547862d057b7d83708Rob Landley        if (tmp >= 0xC0) {
213518993e2f3af291eea7a175547862d057b7d83708Rob Landley          /*
213618993e2f3af291eea7a175547862d057b7d83708Rob Landley           * When there are new properties,
213718993e2f3af291eea7a175547862d057b7d83708Rob Landley           * state reset is done at
213818993e2f3af291eea7a175547862d057b7d83708Rob Landley           * SEQ_PROPERTIES.
213918993e2f3af291eea7a175547862d057b7d83708Rob Landley           */
214018993e2f3af291eea7a175547862d057b7d83708Rob Landley          s->lzma2.need_props = 0;
214118993e2f3af291eea7a175547862d057b7d83708Rob Landley          s->lzma2.next_sequence
214218993e2f3af291eea7a175547862d057b7d83708Rob Landley              = SEQ_PROPERTIES;
214318993e2f3af291eea7a175547862d057b7d83708Rob Landley
214418993e2f3af291eea7a175547862d057b7d83708Rob Landley        } else if (s->lzma2.need_props) {
214518993e2f3af291eea7a175547862d057b7d83708Rob Landley          return XZ_DATA_ERROR;
214618993e2f3af291eea7a175547862d057b7d83708Rob Landley
214718993e2f3af291eea7a175547862d057b7d83708Rob Landley        } else {
214818993e2f3af291eea7a175547862d057b7d83708Rob Landley          s->lzma2.next_sequence
214918993e2f3af291eea7a175547862d057b7d83708Rob Landley              = SEQ_LZMA_PREPARE;
215018993e2f3af291eea7a175547862d057b7d83708Rob Landley          if (tmp >= 0xA0)
215118993e2f3af291eea7a175547862d057b7d83708Rob Landley            lzma_reset(s);
215218993e2f3af291eea7a175547862d057b7d83708Rob Landley        }
215318993e2f3af291eea7a175547862d057b7d83708Rob Landley      } else {
215418993e2f3af291eea7a175547862d057b7d83708Rob Landley        if (tmp > 0x02)
215518993e2f3af291eea7a175547862d057b7d83708Rob Landley          return XZ_DATA_ERROR;
215618993e2f3af291eea7a175547862d057b7d83708Rob Landley
215718993e2f3af291eea7a175547862d057b7d83708Rob Landley        s->lzma2.sequence = SEQ_COMPRESSED_0;
215818993e2f3af291eea7a175547862d057b7d83708Rob Landley        s->lzma2.next_sequence = SEQ_COPY;
215918993e2f3af291eea7a175547862d057b7d83708Rob Landley      }
216018993e2f3af291eea7a175547862d057b7d83708Rob Landley
216118993e2f3af291eea7a175547862d057b7d83708Rob Landley      break;
216218993e2f3af291eea7a175547862d057b7d83708Rob Landley
216318993e2f3af291eea7a175547862d057b7d83708Rob Landley    case SEQ_UNCOMPRESSED_1:
216418993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->lzma2.uncompressed
216518993e2f3af291eea7a175547862d057b7d83708Rob Landley          += (uint32_t)b->in[b->in_pos++] << 8;
216618993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->lzma2.sequence = SEQ_UNCOMPRESSED_2;
216718993e2f3af291eea7a175547862d057b7d83708Rob Landley      break;
216818993e2f3af291eea7a175547862d057b7d83708Rob Landley
216918993e2f3af291eea7a175547862d057b7d83708Rob Landley    case SEQ_UNCOMPRESSED_2:
217018993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->lzma2.uncompressed
217118993e2f3af291eea7a175547862d057b7d83708Rob Landley          += (uint32_t)b->in[b->in_pos++] + 1;
217218993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->lzma2.sequence = SEQ_COMPRESSED_0;
217318993e2f3af291eea7a175547862d057b7d83708Rob Landley      break;
217418993e2f3af291eea7a175547862d057b7d83708Rob Landley
217518993e2f3af291eea7a175547862d057b7d83708Rob Landley    case SEQ_COMPRESSED_0:
217618993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->lzma2.compressed
217718993e2f3af291eea7a175547862d057b7d83708Rob Landley          = (uint32_t)b->in[b->in_pos++] << 8;
217818993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->lzma2.sequence = SEQ_COMPRESSED_1;
217918993e2f3af291eea7a175547862d057b7d83708Rob Landley      break;
218018993e2f3af291eea7a175547862d057b7d83708Rob Landley
218118993e2f3af291eea7a175547862d057b7d83708Rob Landley    case SEQ_COMPRESSED_1:
218218993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->lzma2.compressed
218318993e2f3af291eea7a175547862d057b7d83708Rob Landley          += (uint32_t)b->in[b->in_pos++] + 1;
218418993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->lzma2.sequence = s->lzma2.next_sequence;
218518993e2f3af291eea7a175547862d057b7d83708Rob Landley      break;
218618993e2f3af291eea7a175547862d057b7d83708Rob Landley
218718993e2f3af291eea7a175547862d057b7d83708Rob Landley    case SEQ_PROPERTIES:
218818993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (!lzma_props(s, b->in[b->in_pos++]))
218918993e2f3af291eea7a175547862d057b7d83708Rob Landley        return XZ_DATA_ERROR;
219018993e2f3af291eea7a175547862d057b7d83708Rob Landley
219118993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->lzma2.sequence = SEQ_LZMA_PREPARE;
219218993e2f3af291eea7a175547862d057b7d83708Rob Landley
219318993e2f3af291eea7a175547862d057b7d83708Rob Landley    case SEQ_LZMA_PREPARE:
219418993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (s->lzma2.compressed < RC_INIT_BYTES)
219518993e2f3af291eea7a175547862d057b7d83708Rob Landley        return XZ_DATA_ERROR;
219618993e2f3af291eea7a175547862d057b7d83708Rob Landley
219718993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (!rc_read_init(&s->rc, b))
219818993e2f3af291eea7a175547862d057b7d83708Rob Landley        return XZ_OK;
219918993e2f3af291eea7a175547862d057b7d83708Rob Landley
220018993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->lzma2.compressed -= RC_INIT_BYTES;
220118993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->lzma2.sequence = SEQ_LZMA_RUN;
220218993e2f3af291eea7a175547862d057b7d83708Rob Landley
220318993e2f3af291eea7a175547862d057b7d83708Rob Landley    case SEQ_LZMA_RUN:
220418993e2f3af291eea7a175547862d057b7d83708Rob Landley      /*
220518993e2f3af291eea7a175547862d057b7d83708Rob Landley       * Set dictionary limit to indicate how much we want
220618993e2f3af291eea7a175547862d057b7d83708Rob Landley       * to be encoded at maximum. Decode new data into the
220718993e2f3af291eea7a175547862d057b7d83708Rob Landley       * dictionary. Flush the new data from dictionary to
220818993e2f3af291eea7a175547862d057b7d83708Rob Landley       * b->out. Check if we finished decoding this chunk.
220918993e2f3af291eea7a175547862d057b7d83708Rob Landley       * In case the dictionary got full but we didn't fill
221018993e2f3af291eea7a175547862d057b7d83708Rob Landley       * the output buffer yet, we may run this loop
221118993e2f3af291eea7a175547862d057b7d83708Rob Landley       * multiple times without changing s->lzma2.sequence.
221218993e2f3af291eea7a175547862d057b7d83708Rob Landley       */
221318993e2f3af291eea7a175547862d057b7d83708Rob Landley      dict_limit(&s->dict, min_t(size_t,
221418993e2f3af291eea7a175547862d057b7d83708Rob Landley          b->out_size - b->out_pos,
221518993e2f3af291eea7a175547862d057b7d83708Rob Landley          s->lzma2.uncompressed));
221618993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (!lzma2_lzma(s, b))
221718993e2f3af291eea7a175547862d057b7d83708Rob Landley        return XZ_DATA_ERROR;
221818993e2f3af291eea7a175547862d057b7d83708Rob Landley
221918993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->lzma2.uncompressed -= dict_flush(&s->dict, b);
222018993e2f3af291eea7a175547862d057b7d83708Rob Landley
222118993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (s->lzma2.uncompressed == 0) {
222218993e2f3af291eea7a175547862d057b7d83708Rob Landley        if (s->lzma2.compressed > 0 || s->lzma.len > 0
222318993e2f3af291eea7a175547862d057b7d83708Rob Landley            || !rc_is_finished(&s->rc))
222418993e2f3af291eea7a175547862d057b7d83708Rob Landley          return XZ_DATA_ERROR;
222518993e2f3af291eea7a175547862d057b7d83708Rob Landley
222618993e2f3af291eea7a175547862d057b7d83708Rob Landley        rc_reset(&s->rc);
222718993e2f3af291eea7a175547862d057b7d83708Rob Landley        s->lzma2.sequence = SEQ_CONTROL;
222818993e2f3af291eea7a175547862d057b7d83708Rob Landley
222918993e2f3af291eea7a175547862d057b7d83708Rob Landley      } else if (b->out_pos == b->out_size
223018993e2f3af291eea7a175547862d057b7d83708Rob Landley          || (b->in_pos == b->in_size
223118993e2f3af291eea7a175547862d057b7d83708Rob Landley            && s->temp.size
223218993e2f3af291eea7a175547862d057b7d83708Rob Landley            < s->lzma2.compressed)) {
223318993e2f3af291eea7a175547862d057b7d83708Rob Landley        return XZ_OK;
223418993e2f3af291eea7a175547862d057b7d83708Rob Landley      }
223518993e2f3af291eea7a175547862d057b7d83708Rob Landley
223618993e2f3af291eea7a175547862d057b7d83708Rob Landley      break;
223718993e2f3af291eea7a175547862d057b7d83708Rob Landley
223818993e2f3af291eea7a175547862d057b7d83708Rob Landley    case SEQ_COPY:
223918993e2f3af291eea7a175547862d057b7d83708Rob Landley      dict_uncompressed(&s->dict, b, &s->lzma2.compressed);
224018993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (s->lzma2.compressed > 0)
224118993e2f3af291eea7a175547862d057b7d83708Rob Landley        return XZ_OK;
224218993e2f3af291eea7a175547862d057b7d83708Rob Landley
224318993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->lzma2.sequence = SEQ_CONTROL;
224418993e2f3af291eea7a175547862d057b7d83708Rob Landley      break;
224518993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
224618993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
224718993e2f3af291eea7a175547862d057b7d83708Rob Landley
224818993e2f3af291eea7a175547862d057b7d83708Rob Landley  return XZ_OK;
2249971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
2250971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
22510c2509957b6291fb136eca4a5c278466045ce31dIsaac Dunhamstruct xz_dec_lzma2 *xz_dec_lzma2_create(uint32_t dict_max)
2252971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
225318993e2f3af291eea7a175547862d057b7d83708Rob Landley  struct xz_dec_lzma2 *s = malloc(sizeof(*s));
225418993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s == NULL)
225518993e2f3af291eea7a175547862d057b7d83708Rob Landley    return NULL;
2256971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
225718993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->dict.size_max = dict_max;
22589641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham  s->dict.buf = NULL;
22599641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham  s->dict.allocated = 0;
2260971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
226118993e2f3af291eea7a175547862d057b7d83708Rob Landley  return s;
2262971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
2263971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
22649bd7a1696e7060e51ec0d0aaef477eaf85aaf216Rob Landleyenum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
2265971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
226618993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* This limits dictionary size to 3 GiB to keep parsing simpler. */
226718993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (props > 39)
226818993e2f3af291eea7a175547862d057b7d83708Rob Landley    return XZ_OPTIONS_ERROR;
2269971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
227018993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->dict.size = 2 + (props & 1);
227118993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->dict.size <<= (props >> 1) + 11;
2272971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
22739641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham  if (s->dict.size > s->dict.size_max)
22749641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham    return XZ_MEMLIMIT_ERROR;
2275971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
22769641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham  s->dict.end = s->dict.size;
2277971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
22789641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham  if (s->dict.allocated < s->dict.size) {
22799641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham    free(s->dict.buf);
22809641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham    s->dict.buf = malloc(s->dict.size);
22819641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham    if (s->dict.buf == NULL) {
22829641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham      s->dict.allocated = 0;
22839641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham      return XZ_MEM_ERROR;
228418993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
228518993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
2286971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
228718993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->lzma.len = 0;
2288971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
228918993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->lzma2.sequence = SEQ_CONTROL;
229018993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->lzma2.need_dict_reset = 1;
2291971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
229218993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->temp.size = 0;
2293971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
229418993e2f3af291eea7a175547862d057b7d83708Rob Landley  return XZ_OK;
2295971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
2296971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2297971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
2298971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * .xz Stream decoder
2299971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
2300971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2301971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2302971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley// BEGIN xz_stream.h
2303971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
2304971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Definitions for handling the .xz file format
2305971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
2306971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2307971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
2308971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * See the .xz file format specification at
2309971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * http://tukaani.org/xz/xz-file-format.txt
2310971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * to understand the container format.
2311971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
2312971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2313971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define STREAM_HEADER_SIZE 12
2314971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2315971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define HEADER_MAGIC "\3757zXZ"
2316971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define HEADER_MAGIC_SIZE 6
2317971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2318971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define FOOTER_MAGIC "YZ"
2319971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define FOOTER_MAGIC_SIZE 2
2320971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2321971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
2322971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Variable-length integer can hold a 63-bit unsigned integer or a special
2323971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * value indicating that the value is unknown.
2324971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
2325971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Experimental: vli_type can be defined to uint32_t to save a few bytes
2326971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * in code size (no effect on speed). Doing so limits the uncompressed and
2327971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * compressed size of the file to less than 256 MiB and may also weaken
2328971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * error detection slightly.
2329971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
2330971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleytypedef uint64_t vli_type;
2331971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2332971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define VLI_MAX ((vli_type)-1 / 2)
2333971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define VLI_UNKNOWN ((vli_type)-1)
2334971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2335971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Maximum encoded size of a VLI */
2336971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7)
2337971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2338971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Integrity Check types */
2339971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleyenum xz_check {
234018993e2f3af291eea7a175547862d057b7d83708Rob Landley  XZ_CHECK_NONE = 0,
234118993e2f3af291eea7a175547862d057b7d83708Rob Landley  XZ_CHECK_CRC32 = 1,
234218993e2f3af291eea7a175547862d057b7d83708Rob Landley  XZ_CHECK_CRC64 = 4,
234318993e2f3af291eea7a175547862d057b7d83708Rob Landley  XZ_CHECK_SHA256 = 10
2344971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley};
2345971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2346971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Maximum possible Check ID */
2347971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#define XZ_CHECK_MAX 15
2348971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley// END xz_stream.h
2349971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
23509bd7a1696e7060e51ec0d0aaef477eaf85aaf216Rob Landley#define IS_CRC64(check_type) ((check_type) == XZ_CHECK_CRC64)
2351971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2352971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Hash used to validate the Index field */
2353971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystruct xz_dec_hash {
235418993e2f3af291eea7a175547862d057b7d83708Rob Landley  vli_type unpadded;
235518993e2f3af291eea7a175547862d057b7d83708Rob Landley  vli_type uncompressed;
235618993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t crc32;
2357971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley};
2358971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2359971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystruct xz_dec {
236018993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Position in dec_main() */
236118993e2f3af291eea7a175547862d057b7d83708Rob Landley  enum {
236218993e2f3af291eea7a175547862d057b7d83708Rob Landley    SEQ_STREAM_HEADER,
236318993e2f3af291eea7a175547862d057b7d83708Rob Landley    SEQ_BLOCK_START,
236418993e2f3af291eea7a175547862d057b7d83708Rob Landley    SEQ_BLOCK_HEADER,
236518993e2f3af291eea7a175547862d057b7d83708Rob Landley    SEQ_BLOCK_UNCOMPRESS,
236618993e2f3af291eea7a175547862d057b7d83708Rob Landley    SEQ_BLOCK_PADDING,
236718993e2f3af291eea7a175547862d057b7d83708Rob Landley    SEQ_BLOCK_CHECK,
236818993e2f3af291eea7a175547862d057b7d83708Rob Landley    SEQ_INDEX,
236918993e2f3af291eea7a175547862d057b7d83708Rob Landley    SEQ_INDEX_PADDING,
237018993e2f3af291eea7a175547862d057b7d83708Rob Landley    SEQ_INDEX_CRC32,
237118993e2f3af291eea7a175547862d057b7d83708Rob Landley    SEQ_STREAM_FOOTER
237218993e2f3af291eea7a175547862d057b7d83708Rob Landley  } sequence;
237318993e2f3af291eea7a175547862d057b7d83708Rob Landley
237418993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Position in variable-length integers and Check fields */
237518993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint32_t pos;
237618993e2f3af291eea7a175547862d057b7d83708Rob Landley
237718993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Variable-length integer decoded by dec_vli() */
237818993e2f3af291eea7a175547862d057b7d83708Rob Landley  vli_type vli;
237918993e2f3af291eea7a175547862d057b7d83708Rob Landley
238018993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Saved in_pos and out_pos */
238118993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t in_start;
238218993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t out_start;
238318993e2f3af291eea7a175547862d057b7d83708Rob Landley
238418993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* CRC32 or CRC64 value in Block or CRC32 value in Index */
238518993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint64_t crc;
238618993e2f3af291eea7a175547862d057b7d83708Rob Landley
238718993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Type of the integrity check calculated from uncompressed data */
238818993e2f3af291eea7a175547862d057b7d83708Rob Landley  enum xz_check check_type;
238918993e2f3af291eea7a175547862d057b7d83708Rob Landley
239018993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
239118993e2f3af291eea7a175547862d057b7d83708Rob Landley   * True if the next call to xz_dec_run() is allowed to return
239218993e2f3af291eea7a175547862d057b7d83708Rob Landley   * XZ_BUF_ERROR.
239318993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
239418993e2f3af291eea7a175547862d057b7d83708Rob Landley  int allow_buf_error;
239518993e2f3af291eea7a175547862d057b7d83708Rob Landley
239618993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Information stored in Block Header */
239718993e2f3af291eea7a175547862d057b7d83708Rob Landley  struct {
239818993e2f3af291eea7a175547862d057b7d83708Rob Landley    /*
239918993e2f3af291eea7a175547862d057b7d83708Rob Landley     * Value stored in the Compressed Size field, or
240018993e2f3af291eea7a175547862d057b7d83708Rob Landley     * VLI_UNKNOWN if Compressed Size is not present.
240118993e2f3af291eea7a175547862d057b7d83708Rob Landley     */
240218993e2f3af291eea7a175547862d057b7d83708Rob Landley    vli_type compressed;
240318993e2f3af291eea7a175547862d057b7d83708Rob Landley
240418993e2f3af291eea7a175547862d057b7d83708Rob Landley    /*
240518993e2f3af291eea7a175547862d057b7d83708Rob Landley     * Value stored in the Uncompressed Size field, or
240618993e2f3af291eea7a175547862d057b7d83708Rob Landley     * VLI_UNKNOWN if Uncompressed Size is not present.
240718993e2f3af291eea7a175547862d057b7d83708Rob Landley     */
240818993e2f3af291eea7a175547862d057b7d83708Rob Landley    vli_type uncompressed;
240918993e2f3af291eea7a175547862d057b7d83708Rob Landley
241018993e2f3af291eea7a175547862d057b7d83708Rob Landley    /* Size of the Block Header field */
241118993e2f3af291eea7a175547862d057b7d83708Rob Landley    uint32_t size;
241218993e2f3af291eea7a175547862d057b7d83708Rob Landley  } block_header;
241318993e2f3af291eea7a175547862d057b7d83708Rob Landley
241418993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Information collected when decoding Blocks */
241518993e2f3af291eea7a175547862d057b7d83708Rob Landley  struct {
241618993e2f3af291eea7a175547862d057b7d83708Rob Landley    /* Observed compressed size of the current Block */
241718993e2f3af291eea7a175547862d057b7d83708Rob Landley    vli_type compressed;
241818993e2f3af291eea7a175547862d057b7d83708Rob Landley
241918993e2f3af291eea7a175547862d057b7d83708Rob Landley    /* Observed uncompressed size of the current Block */
242018993e2f3af291eea7a175547862d057b7d83708Rob Landley    vli_type uncompressed;
242118993e2f3af291eea7a175547862d057b7d83708Rob Landley
242218993e2f3af291eea7a175547862d057b7d83708Rob Landley    /* Number of Blocks decoded so far */
242318993e2f3af291eea7a175547862d057b7d83708Rob Landley    vli_type count;
242418993e2f3af291eea7a175547862d057b7d83708Rob Landley
242518993e2f3af291eea7a175547862d057b7d83708Rob Landley    /*
242618993e2f3af291eea7a175547862d057b7d83708Rob Landley     * Hash calculated from the Block sizes. This is used to
242718993e2f3af291eea7a175547862d057b7d83708Rob Landley     * validate the Index field.
242818993e2f3af291eea7a175547862d057b7d83708Rob Landley     */
242918993e2f3af291eea7a175547862d057b7d83708Rob Landley    struct xz_dec_hash hash;
243018993e2f3af291eea7a175547862d057b7d83708Rob Landley  } block;
243118993e2f3af291eea7a175547862d057b7d83708Rob Landley
243218993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Variables needed when verifying the Index field */
243318993e2f3af291eea7a175547862d057b7d83708Rob Landley  struct {
243418993e2f3af291eea7a175547862d057b7d83708Rob Landley    /* Position in dec_index() */
243518993e2f3af291eea7a175547862d057b7d83708Rob Landley    enum {
243618993e2f3af291eea7a175547862d057b7d83708Rob Landley      SEQ_INDEX_COUNT,
243718993e2f3af291eea7a175547862d057b7d83708Rob Landley      SEQ_INDEX_UNPADDED,
243818993e2f3af291eea7a175547862d057b7d83708Rob Landley      SEQ_INDEX_UNCOMPRESSED
243918993e2f3af291eea7a175547862d057b7d83708Rob Landley    } sequence;
244018993e2f3af291eea7a175547862d057b7d83708Rob Landley
244118993e2f3af291eea7a175547862d057b7d83708Rob Landley    /* Size of the Index in bytes */
244218993e2f3af291eea7a175547862d057b7d83708Rob Landley    vli_type size;
244318993e2f3af291eea7a175547862d057b7d83708Rob Landley
244418993e2f3af291eea7a175547862d057b7d83708Rob Landley    /* Number of Records (matches block.count in valid files) */
244518993e2f3af291eea7a175547862d057b7d83708Rob Landley    vli_type count;
244618993e2f3af291eea7a175547862d057b7d83708Rob Landley
244718993e2f3af291eea7a175547862d057b7d83708Rob Landley    /*
244818993e2f3af291eea7a175547862d057b7d83708Rob Landley     * Hash calculated from the Records (matches block.hash in
244918993e2f3af291eea7a175547862d057b7d83708Rob Landley     * valid files).
245018993e2f3af291eea7a175547862d057b7d83708Rob Landley     */
245118993e2f3af291eea7a175547862d057b7d83708Rob Landley    struct xz_dec_hash hash;
245218993e2f3af291eea7a175547862d057b7d83708Rob Landley  } index;
245318993e2f3af291eea7a175547862d057b7d83708Rob Landley
245418993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
245518993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Temporary buffer needed to hold Stream Header, Block Header,
245618993e2f3af291eea7a175547862d057b7d83708Rob Landley   * and Stream Footer. The Block Header is the biggest (1 KiB)
245718993e2f3af291eea7a175547862d057b7d83708Rob Landley   * so we reserve space according to that. buf[] has to be aligned
245818993e2f3af291eea7a175547862d057b7d83708Rob Landley   * to a multiple of four bytes; the size_t variables before it
245918993e2f3af291eea7a175547862d057b7d83708Rob Landley   * should guarantee this.
246018993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
246118993e2f3af291eea7a175547862d057b7d83708Rob Landley  struct {
246218993e2f3af291eea7a175547862d057b7d83708Rob Landley    size_t pos;
246318993e2f3af291eea7a175547862d057b7d83708Rob Landley    size_t size;
246418993e2f3af291eea7a175547862d057b7d83708Rob Landley    uint8_t buf[1024];
246518993e2f3af291eea7a175547862d057b7d83708Rob Landley  } temp;
246618993e2f3af291eea7a175547862d057b7d83708Rob Landley
246718993e2f3af291eea7a175547862d057b7d83708Rob Landley  struct xz_dec_lzma2 *lzma2;
2468971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2469971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_BCJ
247018993e2f3af291eea7a175547862d057b7d83708Rob Landley  struct xz_dec_bcj *bcj;
247118993e2f3af291eea7a175547862d057b7d83708Rob Landley  int bcj_active;
2472971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
2473971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley};
2474971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2475971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Sizes of the Check field with different Check IDs */
2476971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic const uint8_t check_sizes[16] = {
247718993e2f3af291eea7a175547862d057b7d83708Rob Landley  0,
247818993e2f3af291eea7a175547862d057b7d83708Rob Landley  4, 4, 4,
247918993e2f3af291eea7a175547862d057b7d83708Rob Landley  8, 8, 8,
248018993e2f3af291eea7a175547862d057b7d83708Rob Landley  16, 16, 16,
248118993e2f3af291eea7a175547862d057b7d83708Rob Landley  32, 32, 32,
248218993e2f3af291eea7a175547862d057b7d83708Rob Landley  64, 64, 64
2483971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley};
2484971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2485971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
2486971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller
2487971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * must have set s->temp.pos to indicate how much data we are supposed
2488971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * to copy into s->temp.buf. Return true once s->temp.pos has reached
2489971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * s->temp.size.
2490971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
24911cabcc2e9a75fc8ba0c49e888907e3e310cfcd41Rob Landleystatic int fill_temp(struct xz_dec *s, struct xz_buf *b)
2492971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
249318993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t copy_size = min_t(size_t,
249418993e2f3af291eea7a175547862d057b7d83708Rob Landley      b->in_size - b->in_pos, s->temp.size - s->temp.pos);
2495971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
249618993e2f3af291eea7a175547862d057b7d83708Rob Landley  memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size);
249718993e2f3af291eea7a175547862d057b7d83708Rob Landley  b->in_pos += copy_size;
249818993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->temp.pos += copy_size;
2499971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
250018993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s->temp.pos == s->temp.size) {
250118993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->temp.pos = 0;
250218993e2f3af291eea7a175547862d057b7d83708Rob Landley    return 1;
250318993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
2504971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
250518993e2f3af291eea7a175547862d057b7d83708Rob Landley  return 0;
2506971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
2507971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2508971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Decode a variable-length integer (little-endian base-128 encoding) */
2509971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic enum xz_ret dec_vli(struct xz_dec *s, const uint8_t *in,
251018993e2f3af291eea7a175547862d057b7d83708Rob Landley         size_t *in_pos, size_t in_size)
2511971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
251218993e2f3af291eea7a175547862d057b7d83708Rob Landley  uint8_t byte;
2513971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
251418993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s->pos == 0)
251518993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->vli = 0;
2516971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
251718993e2f3af291eea7a175547862d057b7d83708Rob Landley  while (*in_pos < in_size) {
251818993e2f3af291eea7a175547862d057b7d83708Rob Landley    byte = in[*in_pos];
251918993e2f3af291eea7a175547862d057b7d83708Rob Landley    ++*in_pos;
2520971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
252118993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->vli |= (vli_type)(byte & 0x7F) << s->pos;
2522971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
252318993e2f3af291eea7a175547862d057b7d83708Rob Landley    if ((byte & 0x80) == 0) {
252418993e2f3af291eea7a175547862d057b7d83708Rob Landley      /* Don't allow non-minimal encodings. */
252518993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (byte == 0 && s->pos != 0)
252618993e2f3af291eea7a175547862d057b7d83708Rob Landley        return XZ_DATA_ERROR;
2527971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
252818993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->pos = 0;
252918993e2f3af291eea7a175547862d057b7d83708Rob Landley      return XZ_STREAM_END;
253018993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
2531971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
253218993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->pos += 7;
253318993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (s->pos == 7 * VLI_BYTES_MAX)
253418993e2f3af291eea7a175547862d057b7d83708Rob Landley      return XZ_DATA_ERROR;
253518993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
2536971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
253718993e2f3af291eea7a175547862d057b7d83708Rob Landley  return XZ_OK;
2538971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
2539971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2540971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
2541971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Decode the Compressed Data field from a Block. Update and validate
2542971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * the observed compressed and uncompressed sizes of the Block so that
2543971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * they don't exceed the values possibly stored in the Block Header
2544971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * (validation assumes that no integer overflow occurs, since vli_type
2545971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * is normally uint64_t). Update the CRC32 or CRC64 value if presence of
2546971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * the CRC32 or CRC64 field was indicated in Stream Header.
2547971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
2548971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Once the decoding is finished, validate that the observed sizes match
2549971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * the sizes possibly stored in the Block Header. Update the hash and
2550971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Block count, which are later used to validate the Index field.
2551971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
2552971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b)
2553971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
255418993e2f3af291eea7a175547862d057b7d83708Rob Landley  enum xz_ret ret;
2555971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
255618993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->in_start = b->in_pos;
255718993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->out_start = b->out_pos;
2558971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2559971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_BCJ
256018993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s->bcj_active)
256118993e2f3af291eea7a175547862d057b7d83708Rob Landley    ret = xz_dec_bcj_run(s->bcj, s->lzma2, b);
256218993e2f3af291eea7a175547862d057b7d83708Rob Landley  else
2563971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
256418993e2f3af291eea7a175547862d057b7d83708Rob Landley    ret = xz_dec_lzma2_run(s->lzma2, b);
2565971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
256618993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->block.compressed += b->in_pos - s->in_start;
256718993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->block.uncompressed += b->out_pos - s->out_start;
2568971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
256918993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
257018993e2f3af291eea7a175547862d057b7d83708Rob Landley   * There is no need to separately check for VLI_UNKNOWN, since
257118993e2f3af291eea7a175547862d057b7d83708Rob Landley   * the observed sizes are always smaller than VLI_UNKNOWN.
257218993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
257318993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s->block.compressed > s->block_header.compressed
257418993e2f3af291eea7a175547862d057b7d83708Rob Landley      || s->block.uncompressed
257518993e2f3af291eea7a175547862d057b7d83708Rob Landley        > s->block_header.uncompressed)
257618993e2f3af291eea7a175547862d057b7d83708Rob Landley    return XZ_DATA_ERROR;
2577971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
257818993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s->check_type == XZ_CHECK_CRC32)
257918993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->crc = xz_crc32(b->out + s->out_start,
258018993e2f3af291eea7a175547862d057b7d83708Rob Landley        b->out_pos - s->out_start, s->crc);
258118993e2f3af291eea7a175547862d057b7d83708Rob Landley  else if (s->check_type == XZ_CHECK_CRC64)
2582c810f9f80b9db62de09b6cf4c6ca770eed72ce53Isaac Dunham    s->crc = ~(s->crc);
2583c810f9f80b9db62de09b6cf4c6ca770eed72ce53Isaac Dunham    size_t size = b->out_pos - s->out_start;
2584c810f9f80b9db62de09b6cf4c6ca770eed72ce53Isaac Dunham    uint8_t *buf = b->out + s->out_start;
2585c810f9f80b9db62de09b6cf4c6ca770eed72ce53Isaac Dunham    while (size) {
2586c810f9f80b9db62de09b6cf4c6ca770eed72ce53Isaac Dunham      s->crc = xz_crc64_table[*buf++ ^ (s->crc & 0xFF)] ^ (s->crc >> 8);
2587c810f9f80b9db62de09b6cf4c6ca770eed72ce53Isaac Dunham      --size;
2588c810f9f80b9db62de09b6cf4c6ca770eed72ce53Isaac Dunham    }
2589c810f9f80b9db62de09b6cf4c6ca770eed72ce53Isaac Dunham    s->crc=~(s->crc);
2590971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
259118993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (ret == XZ_STREAM_END) {
259218993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (s->block_header.compressed != VLI_UNKNOWN
259318993e2f3af291eea7a175547862d057b7d83708Rob Landley        && s->block_header.compressed
259418993e2f3af291eea7a175547862d057b7d83708Rob Landley          != s->block.compressed)
259518993e2f3af291eea7a175547862d057b7d83708Rob Landley      return XZ_DATA_ERROR;
2596971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
259718993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (s->block_header.uncompressed != VLI_UNKNOWN
259818993e2f3af291eea7a175547862d057b7d83708Rob Landley        && s->block_header.uncompressed
259918993e2f3af291eea7a175547862d057b7d83708Rob Landley          != s->block.uncompressed)
260018993e2f3af291eea7a175547862d057b7d83708Rob Landley      return XZ_DATA_ERROR;
2601971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
260218993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->block.hash.unpadded += s->block_header.size
260318993e2f3af291eea7a175547862d057b7d83708Rob Landley        + s->block.compressed;
2604971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
260518993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->block.hash.unpadded += check_sizes[s->check_type];
2606971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
260718993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->block.hash.uncompressed += s->block.uncompressed;
260818993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->block.hash.crc32 = xz_crc32(
260918993e2f3af291eea7a175547862d057b7d83708Rob Landley        (const uint8_t *)&s->block.hash,
261018993e2f3af291eea7a175547862d057b7d83708Rob Landley        sizeof(s->block.hash), s->block.hash.crc32);
2611971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
261218993e2f3af291eea7a175547862d057b7d83708Rob Landley    ++s->block.count;
261318993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
2614971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
261518993e2f3af291eea7a175547862d057b7d83708Rob Landley  return ret;
2616971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
2617971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2618971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Update the Index size and the CRC32 value. */
2619971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic void index_update(struct xz_dec *s, const struct xz_buf *b)
2620971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
262118993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t in_used = b->in_pos - s->in_start;
262218993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->index.size += in_used;
262318993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->crc = xz_crc32(b->in + s->in_start, in_used, s->crc);
2624971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
2625971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2626971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
2627971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Decode the Number of Records, Unpadded Size, and Uncompressed Size
2628971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * fields from the Index field. That is, Index Padding and CRC32 are not
2629971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * decoded by this function.
2630971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
2631971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * This can return XZ_OK (more input needed), XZ_STREAM_END (everything
2632971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * successfully decoded), or XZ_DATA_ERROR (input is corrupt).
2633971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
2634971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic enum xz_ret dec_index(struct xz_dec *s, struct xz_buf *b)
2635971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
263618993e2f3af291eea7a175547862d057b7d83708Rob Landley  enum xz_ret ret;
263718993e2f3af291eea7a175547862d057b7d83708Rob Landley
263818993e2f3af291eea7a175547862d057b7d83708Rob Landley  do {
263918993e2f3af291eea7a175547862d057b7d83708Rob Landley    ret = dec_vli(s, b->in, &b->in_pos, b->in_size);
264018993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (ret != XZ_STREAM_END) {
264118993e2f3af291eea7a175547862d057b7d83708Rob Landley      index_update(s, b);
264218993e2f3af291eea7a175547862d057b7d83708Rob Landley      return ret;
264318993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
264418993e2f3af291eea7a175547862d057b7d83708Rob Landley
264518993e2f3af291eea7a175547862d057b7d83708Rob Landley    switch (s->index.sequence) {
264618993e2f3af291eea7a175547862d057b7d83708Rob Landley    case SEQ_INDEX_COUNT:
264718993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->index.count = s->vli;
264818993e2f3af291eea7a175547862d057b7d83708Rob Landley
264918993e2f3af291eea7a175547862d057b7d83708Rob Landley      /*
265018993e2f3af291eea7a175547862d057b7d83708Rob Landley       * Validate that the Number of Records field
265118993e2f3af291eea7a175547862d057b7d83708Rob Landley       * indicates the same number of Records as
265218993e2f3af291eea7a175547862d057b7d83708Rob Landley       * there were Blocks in the Stream.
265318993e2f3af291eea7a175547862d057b7d83708Rob Landley       */
265418993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (s->index.count != s->block.count)
265518993e2f3af291eea7a175547862d057b7d83708Rob Landley        return XZ_DATA_ERROR;
265618993e2f3af291eea7a175547862d057b7d83708Rob Landley
265718993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->index.sequence = SEQ_INDEX_UNPADDED;
265818993e2f3af291eea7a175547862d057b7d83708Rob Landley      break;
265918993e2f3af291eea7a175547862d057b7d83708Rob Landley
266018993e2f3af291eea7a175547862d057b7d83708Rob Landley    case SEQ_INDEX_UNPADDED:
266118993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->index.hash.unpadded += s->vli;
266218993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->index.sequence = SEQ_INDEX_UNCOMPRESSED;
266318993e2f3af291eea7a175547862d057b7d83708Rob Landley      break;
266418993e2f3af291eea7a175547862d057b7d83708Rob Landley
266518993e2f3af291eea7a175547862d057b7d83708Rob Landley    case SEQ_INDEX_UNCOMPRESSED:
266618993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->index.hash.uncompressed += s->vli;
266718993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->index.hash.crc32 = xz_crc32(
266818993e2f3af291eea7a175547862d057b7d83708Rob Landley          (const uint8_t *)&s->index.hash,
266918993e2f3af291eea7a175547862d057b7d83708Rob Landley          sizeof(s->index.hash),
267018993e2f3af291eea7a175547862d057b7d83708Rob Landley          s->index.hash.crc32);
267118993e2f3af291eea7a175547862d057b7d83708Rob Landley      --s->index.count;
267218993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->index.sequence = SEQ_INDEX_UNPADDED;
267318993e2f3af291eea7a175547862d057b7d83708Rob Landley      break;
267418993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
267518993e2f3af291eea7a175547862d057b7d83708Rob Landley  } while (s->index.count > 0);
267618993e2f3af291eea7a175547862d057b7d83708Rob Landley
267718993e2f3af291eea7a175547862d057b7d83708Rob Landley  return XZ_STREAM_END;
2678971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
2679971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2680971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
2681971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Validate that the next four or eight input bytes match the value
2682971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * of s->crc. s->pos must be zero when starting to validate the first byte.
2683971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * The "bits" argument allows using the same code for both CRC32 and CRC64.
2684971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
2685971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic enum xz_ret crc_validate(struct xz_dec *s, struct xz_buf *b,
268618993e2f3af291eea7a175547862d057b7d83708Rob Landley        uint32_t bits)
2687971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
268818993e2f3af291eea7a175547862d057b7d83708Rob Landley  do {
268918993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (b->in_pos == b->in_size)
269018993e2f3af291eea7a175547862d057b7d83708Rob Landley      return XZ_OK;
2691971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
269218993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (((s->crc >> s->pos) & 0xFF) != b->in[b->in_pos++])
269318993e2f3af291eea7a175547862d057b7d83708Rob Landley      return XZ_DATA_ERROR;
2694971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
269518993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->pos += 8;
2696971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
269718993e2f3af291eea7a175547862d057b7d83708Rob Landley  } while (s->pos < bits);
2698971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
269918993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->crc = 0;
270018993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->pos = 0;
2701971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
270218993e2f3af291eea7a175547862d057b7d83708Rob Landley  return XZ_STREAM_END;
2703971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
2704971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2705971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
2706971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Skip over the Check field when the Check ID is not supported.
2707971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Returns true once the whole Check field has been skipped over.
2708971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
27091cabcc2e9a75fc8ba0c49e888907e3e310cfcd41Rob Landleystatic int check_skip(struct xz_dec *s, struct xz_buf *b)
2710971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
271118993e2f3af291eea7a175547862d057b7d83708Rob Landley  while (s->pos < check_sizes[s->check_type]) {
271218993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (b->in_pos == b->in_size) return 0;
2713971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
271418993e2f3af291eea7a175547862d057b7d83708Rob Landley    ++b->in_pos;
271518993e2f3af291eea7a175547862d057b7d83708Rob Landley    ++s->pos;
271618993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
2717971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
271818993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->pos = 0;
2719971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
272018993e2f3af291eea7a175547862d057b7d83708Rob Landley  return 1;
2721971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
2722971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2723971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */
2724971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic enum xz_ret dec_stream_header(struct xz_dec *s)
2725971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
272618993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE))
272718993e2f3af291eea7a175547862d057b7d83708Rob Landley    return XZ_FORMAT_ERROR;
2728971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
272918993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0)
273018993e2f3af291eea7a175547862d057b7d83708Rob Landley      != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2))
273118993e2f3af291eea7a175547862d057b7d83708Rob Landley    return XZ_DATA_ERROR;
2732971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
273318993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s->temp.buf[HEADER_MAGIC_SIZE] != 0)
273418993e2f3af291eea7a175547862d057b7d83708Rob Landley    return XZ_OPTIONS_ERROR;
2735971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
273618993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
273718993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Of integrity checks, we support none (Check ID = 0),
273818993e2f3af291eea7a175547862d057b7d83708Rob Landley   * CRC32 (Check ID = 1), and optionally CRC64 (Check ID = 4).
273918993e2f3af291eea7a175547862d057b7d83708Rob Landley   * However, if XZ_DEC_ANY_CHECK is defined, we will accept other
274018993e2f3af291eea7a175547862d057b7d83708Rob Landley   * check types too, but then the check won't be verified and
274118993e2f3af291eea7a175547862d057b7d83708Rob Landley   * a warning (XZ_UNSUPPORTED_CHECK) will be given.
274218993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
274318993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
2744971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
274518993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s->check_type > XZ_CHECK_MAX)
274618993e2f3af291eea7a175547862d057b7d83708Rob Landley    return XZ_OPTIONS_ERROR;
2747971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
274818993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s->check_type > XZ_CHECK_CRC32 && !IS_CRC64(s->check_type))
274918993e2f3af291eea7a175547862d057b7d83708Rob Landley    return XZ_UNSUPPORTED_CHECK;
2750971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
275118993e2f3af291eea7a175547862d057b7d83708Rob Landley  return XZ_OK;
2752971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
2753971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2754971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */
2755971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic enum xz_ret dec_stream_footer(struct xz_dec *s)
2756971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
275718993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE))
275818993e2f3af291eea7a175547862d057b7d83708Rob Landley    return XZ_DATA_ERROR;
2759971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
276018993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf))
276118993e2f3af291eea7a175547862d057b7d83708Rob Landley    return XZ_DATA_ERROR;
2762971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
276318993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
276418993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Validate Backward Size. Note that we never added the size of the
276518993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Index CRC32 field to s->index.size, thus we use s->index.size / 4
276618993e2f3af291eea7a175547862d057b7d83708Rob Landley   * instead of s->index.size / 4 - 1.
276718993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
276818993e2f3af291eea7a175547862d057b7d83708Rob Landley  if ((s->index.size >> 2) != get_le32(s->temp.buf + 4))
276918993e2f3af291eea7a175547862d057b7d83708Rob Landley    return XZ_DATA_ERROR;
2770971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
277118993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type)
277218993e2f3af291eea7a175547862d057b7d83708Rob Landley    return XZ_DATA_ERROR;
2773971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
277418993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
277518993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Use XZ_STREAM_END instead of XZ_OK to be more convenient
277618993e2f3af291eea7a175547862d057b7d83708Rob Landley   * for the caller.
277718993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
277818993e2f3af291eea7a175547862d057b7d83708Rob Landley  return XZ_STREAM_END;
2779971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
2780971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2781971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/* Decode the Block Header and initialize the filter chain. */
2782971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic enum xz_ret dec_block_header(struct xz_dec *s)
2783971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
278418993e2f3af291eea7a175547862d057b7d83708Rob Landley  enum xz_ret ret;
2785971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
278618993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
278718993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Validate the CRC32. We know that the temp buffer is at least
278818993e2f3af291eea7a175547862d057b7d83708Rob Landley   * eight bytes so this is safe.
278918993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
279018993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->temp.size -= 4;
279118993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (xz_crc32(s->temp.buf, s->temp.size, 0)
279218993e2f3af291eea7a175547862d057b7d83708Rob Landley      != get_le32(s->temp.buf + s->temp.size))
279318993e2f3af291eea7a175547862d057b7d83708Rob Landley    return XZ_DATA_ERROR;
2794971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
279518993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->temp.pos = 2;
2796971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
279718993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
279818993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Catch unsupported Block Flags. We support only one or two filters
279918993e2f3af291eea7a175547862d057b7d83708Rob Landley   * in the chain, so we catch that with the same test.
280018993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
2801971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_BCJ
280218993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s->temp.buf[1] & 0x3E)
2803971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#else
280418993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s->temp.buf[1] & 0x3F)
2805971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
280618993e2f3af291eea7a175547862d057b7d83708Rob Landley    return XZ_OPTIONS_ERROR;
280718993e2f3af291eea7a175547862d057b7d83708Rob Landley
280818993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Compressed Size */
280918993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s->temp.buf[1] & 0x40) {
281018993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
281118993e2f3af291eea7a175547862d057b7d83708Rob Landley          != XZ_STREAM_END)
281218993e2f3af291eea7a175547862d057b7d83708Rob Landley      return XZ_DATA_ERROR;
281318993e2f3af291eea7a175547862d057b7d83708Rob Landley
281418993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->block_header.compressed = s->vli;
281518993e2f3af291eea7a175547862d057b7d83708Rob Landley  } else {
281618993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->block_header.compressed = VLI_UNKNOWN;
281718993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
281818993e2f3af291eea7a175547862d057b7d83708Rob Landley
281918993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Uncompressed Size */
282018993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s->temp.buf[1] & 0x80) {
282118993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
282218993e2f3af291eea7a175547862d057b7d83708Rob Landley        != XZ_STREAM_END)
282318993e2f3af291eea7a175547862d057b7d83708Rob Landley      return XZ_DATA_ERROR;
282418993e2f3af291eea7a175547862d057b7d83708Rob Landley
282518993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->block_header.uncompressed = s->vli;
282618993e2f3af291eea7a175547862d057b7d83708Rob Landley  } else {
282718993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->block_header.uncompressed = VLI_UNKNOWN;
282818993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
2829971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2830971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_BCJ
283118993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* If there are two filters, the first one must be a BCJ filter. */
283218993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->bcj_active = s->temp.buf[1] & 0x01;
283318993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s->bcj_active) {
283418993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (s->temp.size - s->temp.pos < 2)
283518993e2f3af291eea7a175547862d057b7d83708Rob Landley      return XZ_OPTIONS_ERROR;
283618993e2f3af291eea7a175547862d057b7d83708Rob Landley
283718993e2f3af291eea7a175547862d057b7d83708Rob Landley    ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]);
283818993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (ret != XZ_OK)
283918993e2f3af291eea7a175547862d057b7d83708Rob Landley      return ret;
284018993e2f3af291eea7a175547862d057b7d83708Rob Landley
284118993e2f3af291eea7a175547862d057b7d83708Rob Landley    /*
284218993e2f3af291eea7a175547862d057b7d83708Rob Landley     * We don't support custom start offset,
284318993e2f3af291eea7a175547862d057b7d83708Rob Landley     * so Size of Properties must be zero.
284418993e2f3af291eea7a175547862d057b7d83708Rob Landley     */
284518993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (s->temp.buf[s->temp.pos++] != 0x00)
284618993e2f3af291eea7a175547862d057b7d83708Rob Landley      return XZ_OPTIONS_ERROR;
284718993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
2848971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
2849971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
285018993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Valid Filter Flags always take at least two bytes. */
285118993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s->temp.size - s->temp.pos < 2)
285218993e2f3af291eea7a175547862d057b7d83708Rob Landley    return XZ_DATA_ERROR;
2853971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
285418993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Filter ID = LZMA2 */
285518993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s->temp.buf[s->temp.pos++] != 0x21)
285618993e2f3af291eea7a175547862d057b7d83708Rob Landley    return XZ_OPTIONS_ERROR;
2857971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
285818993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Size of Properties = 1-byte Filter Properties */
285918993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s->temp.buf[s->temp.pos++] != 0x01)
286018993e2f3af291eea7a175547862d057b7d83708Rob Landley    return XZ_OPTIONS_ERROR;
2861971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
286218993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Filter Properties contains LZMA2 dictionary size. */
286318993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s->temp.size - s->temp.pos < 1)
286418993e2f3af291eea7a175547862d057b7d83708Rob Landley    return XZ_DATA_ERROR;
2865971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
286618993e2f3af291eea7a175547862d057b7d83708Rob Landley  ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]);
286718993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (ret != XZ_OK)
286818993e2f3af291eea7a175547862d057b7d83708Rob Landley    return ret;
2869971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
287018993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* The rest must be Header Padding. */
287118993e2f3af291eea7a175547862d057b7d83708Rob Landley  while (s->temp.pos < s->temp.size)
287218993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (s->temp.buf[s->temp.pos++] != 0x00)
287318993e2f3af291eea7a175547862d057b7d83708Rob Landley      return XZ_OPTIONS_ERROR;
2874971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
287518993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->temp.pos = 0;
287618993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->block.compressed = 0;
287718993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->block.uncompressed = 0;
2878971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
287918993e2f3af291eea7a175547862d057b7d83708Rob Landley  return XZ_OK;
2880971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
2881971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
2882971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleystatic enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
2883971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
288418993e2f3af291eea7a175547862d057b7d83708Rob Landley  enum xz_ret ret;
288518993e2f3af291eea7a175547862d057b7d83708Rob Landley
288618993e2f3af291eea7a175547862d057b7d83708Rob Landley  /*
288718993e2f3af291eea7a175547862d057b7d83708Rob Landley   * Store the start position for the case when we are in the middle
288818993e2f3af291eea7a175547862d057b7d83708Rob Landley   * of the Index field.
288918993e2f3af291eea7a175547862d057b7d83708Rob Landley   */
289018993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->in_start = b->in_pos;
289118993e2f3af291eea7a175547862d057b7d83708Rob Landley
289218993e2f3af291eea7a175547862d057b7d83708Rob Landley  for (;;) {
289318993e2f3af291eea7a175547862d057b7d83708Rob Landley    switch (s->sequence) {
289418993e2f3af291eea7a175547862d057b7d83708Rob Landley    case SEQ_STREAM_HEADER:
289518993e2f3af291eea7a175547862d057b7d83708Rob Landley      /*
289618993e2f3af291eea7a175547862d057b7d83708Rob Landley       * Stream Header is copied to s->temp, and then
289718993e2f3af291eea7a175547862d057b7d83708Rob Landley       * decoded from there. This way if the caller
289818993e2f3af291eea7a175547862d057b7d83708Rob Landley       * gives us only little input at a time, we can
289918993e2f3af291eea7a175547862d057b7d83708Rob Landley       * still keep the Stream Header decoding code
290018993e2f3af291eea7a175547862d057b7d83708Rob Landley       * simple. Similar approach is used in many places
290118993e2f3af291eea7a175547862d057b7d83708Rob Landley       * in this file.
290218993e2f3af291eea7a175547862d057b7d83708Rob Landley       */
290318993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (!fill_temp(s, b))
290418993e2f3af291eea7a175547862d057b7d83708Rob Landley        return XZ_OK;
290518993e2f3af291eea7a175547862d057b7d83708Rob Landley
290618993e2f3af291eea7a175547862d057b7d83708Rob Landley      /*
290718993e2f3af291eea7a175547862d057b7d83708Rob Landley       * If dec_stream_header() returns
290818993e2f3af291eea7a175547862d057b7d83708Rob Landley       * XZ_UNSUPPORTED_CHECK, it is still possible
290918993e2f3af291eea7a175547862d057b7d83708Rob Landley       * to continue decoding if working in multi-call
291018993e2f3af291eea7a175547862d057b7d83708Rob Landley       * mode. Thus, update s->sequence before calling
291118993e2f3af291eea7a175547862d057b7d83708Rob Landley       * dec_stream_header().
291218993e2f3af291eea7a175547862d057b7d83708Rob Landley       */
291318993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->sequence = SEQ_BLOCK_START;
291418993e2f3af291eea7a175547862d057b7d83708Rob Landley
291518993e2f3af291eea7a175547862d057b7d83708Rob Landley      ret = dec_stream_header(s);
291618993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (ret != XZ_OK)
291718993e2f3af291eea7a175547862d057b7d83708Rob Landley        return ret;
291818993e2f3af291eea7a175547862d057b7d83708Rob Landley
291918993e2f3af291eea7a175547862d057b7d83708Rob Landley    case SEQ_BLOCK_START:
292018993e2f3af291eea7a175547862d057b7d83708Rob Landley      /* We need one byte of input to continue. */
292118993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (b->in_pos == b->in_size)
292218993e2f3af291eea7a175547862d057b7d83708Rob Landley        return XZ_OK;
292318993e2f3af291eea7a175547862d057b7d83708Rob Landley
292418993e2f3af291eea7a175547862d057b7d83708Rob Landley      /* See if this is the beginning of the Index field. */
292518993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (b->in[b->in_pos] == 0) {
292618993e2f3af291eea7a175547862d057b7d83708Rob Landley        s->in_start = b->in_pos++;
292718993e2f3af291eea7a175547862d057b7d83708Rob Landley        s->sequence = SEQ_INDEX;
292818993e2f3af291eea7a175547862d057b7d83708Rob Landley        break;
292918993e2f3af291eea7a175547862d057b7d83708Rob Landley      }
293018993e2f3af291eea7a175547862d057b7d83708Rob Landley
293118993e2f3af291eea7a175547862d057b7d83708Rob Landley      /*
293218993e2f3af291eea7a175547862d057b7d83708Rob Landley       * Calculate the size of the Block Header and
293318993e2f3af291eea7a175547862d057b7d83708Rob Landley       * prepare to decode it.
293418993e2f3af291eea7a175547862d057b7d83708Rob Landley       */
293518993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->block_header.size
293618993e2f3af291eea7a175547862d057b7d83708Rob Landley        = ((uint32_t)b->in[b->in_pos] + 1) * 4;
293718993e2f3af291eea7a175547862d057b7d83708Rob Landley
293818993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->temp.size = s->block_header.size;
293918993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->temp.pos = 0;
294018993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->sequence = SEQ_BLOCK_HEADER;
294118993e2f3af291eea7a175547862d057b7d83708Rob Landley
294218993e2f3af291eea7a175547862d057b7d83708Rob Landley    case SEQ_BLOCK_HEADER:
294318993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (!fill_temp(s, b))
294418993e2f3af291eea7a175547862d057b7d83708Rob Landley        return XZ_OK;
294518993e2f3af291eea7a175547862d057b7d83708Rob Landley
294618993e2f3af291eea7a175547862d057b7d83708Rob Landley      ret = dec_block_header(s);
294718993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (ret != XZ_OK)
294818993e2f3af291eea7a175547862d057b7d83708Rob Landley        return ret;
294918993e2f3af291eea7a175547862d057b7d83708Rob Landley
295018993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->sequence = SEQ_BLOCK_UNCOMPRESS;
295118993e2f3af291eea7a175547862d057b7d83708Rob Landley
295218993e2f3af291eea7a175547862d057b7d83708Rob Landley    case SEQ_BLOCK_UNCOMPRESS:
295318993e2f3af291eea7a175547862d057b7d83708Rob Landley      ret = dec_block(s, b);
295418993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (ret != XZ_STREAM_END)
295518993e2f3af291eea7a175547862d057b7d83708Rob Landley        return ret;
295618993e2f3af291eea7a175547862d057b7d83708Rob Landley
295718993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->sequence = SEQ_BLOCK_PADDING;
295818993e2f3af291eea7a175547862d057b7d83708Rob Landley
295918993e2f3af291eea7a175547862d057b7d83708Rob Landley    case SEQ_BLOCK_PADDING:
296018993e2f3af291eea7a175547862d057b7d83708Rob Landley      /*
296118993e2f3af291eea7a175547862d057b7d83708Rob Landley       * Size of Compressed Data + Block Padding
296218993e2f3af291eea7a175547862d057b7d83708Rob Landley       * must be a multiple of four. We don't need
296318993e2f3af291eea7a175547862d057b7d83708Rob Landley       * s->block.compressed for anything else
296418993e2f3af291eea7a175547862d057b7d83708Rob Landley       * anymore, so we use it here to test the size
296518993e2f3af291eea7a175547862d057b7d83708Rob Landley       * of the Block Padding field.
296618993e2f3af291eea7a175547862d057b7d83708Rob Landley       */
296718993e2f3af291eea7a175547862d057b7d83708Rob Landley      while (s->block.compressed & 3) {
296818993e2f3af291eea7a175547862d057b7d83708Rob Landley        if (b->in_pos == b->in_size)
296918993e2f3af291eea7a175547862d057b7d83708Rob Landley          return XZ_OK;
297018993e2f3af291eea7a175547862d057b7d83708Rob Landley
297118993e2f3af291eea7a175547862d057b7d83708Rob Landley        if (b->in[b->in_pos++] != 0)
297218993e2f3af291eea7a175547862d057b7d83708Rob Landley          return XZ_DATA_ERROR;
297318993e2f3af291eea7a175547862d057b7d83708Rob Landley
297418993e2f3af291eea7a175547862d057b7d83708Rob Landley        ++s->block.compressed;
297518993e2f3af291eea7a175547862d057b7d83708Rob Landley      }
297618993e2f3af291eea7a175547862d057b7d83708Rob Landley
297718993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->sequence = SEQ_BLOCK_CHECK;
297818993e2f3af291eea7a175547862d057b7d83708Rob Landley
297918993e2f3af291eea7a175547862d057b7d83708Rob Landley    case SEQ_BLOCK_CHECK:
298018993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (s->check_type == XZ_CHECK_CRC32) {
298118993e2f3af291eea7a175547862d057b7d83708Rob Landley        ret = crc_validate(s, b, 32);
298218993e2f3af291eea7a175547862d057b7d83708Rob Landley        if (ret != XZ_STREAM_END)
298318993e2f3af291eea7a175547862d057b7d83708Rob Landley          return ret;
298418993e2f3af291eea7a175547862d057b7d83708Rob Landley      }
298518993e2f3af291eea7a175547862d057b7d83708Rob Landley      else if (IS_CRC64(s->check_type)) {
298618993e2f3af291eea7a175547862d057b7d83708Rob Landley        ret = crc_validate(s, b, 64);
298718993e2f3af291eea7a175547862d057b7d83708Rob Landley        if (ret != XZ_STREAM_END)
298818993e2f3af291eea7a175547862d057b7d83708Rob Landley          return ret;
298918993e2f3af291eea7a175547862d057b7d83708Rob Landley      }
299018993e2f3af291eea7a175547862d057b7d83708Rob Landley      else if (!check_skip(s, b)) {
299118993e2f3af291eea7a175547862d057b7d83708Rob Landley        return XZ_OK;
299218993e2f3af291eea7a175547862d057b7d83708Rob Landley      }
299318993e2f3af291eea7a175547862d057b7d83708Rob Landley
299418993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->sequence = SEQ_BLOCK_START;
299518993e2f3af291eea7a175547862d057b7d83708Rob Landley      break;
299618993e2f3af291eea7a175547862d057b7d83708Rob Landley
299718993e2f3af291eea7a175547862d057b7d83708Rob Landley    case SEQ_INDEX:
299818993e2f3af291eea7a175547862d057b7d83708Rob Landley      ret = dec_index(s, b);
299918993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (ret != XZ_STREAM_END)
300018993e2f3af291eea7a175547862d057b7d83708Rob Landley        return ret;
300118993e2f3af291eea7a175547862d057b7d83708Rob Landley
300218993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->sequence = SEQ_INDEX_PADDING;
300318993e2f3af291eea7a175547862d057b7d83708Rob Landley
300418993e2f3af291eea7a175547862d057b7d83708Rob Landley    case SEQ_INDEX_PADDING:
300518993e2f3af291eea7a175547862d057b7d83708Rob Landley      while ((s->index.size + (b->in_pos - s->in_start))
300618993e2f3af291eea7a175547862d057b7d83708Rob Landley          & 3) {
300718993e2f3af291eea7a175547862d057b7d83708Rob Landley        if (b->in_pos == b->in_size) {
300818993e2f3af291eea7a175547862d057b7d83708Rob Landley          index_update(s, b);
300918993e2f3af291eea7a175547862d057b7d83708Rob Landley          return XZ_OK;
301018993e2f3af291eea7a175547862d057b7d83708Rob Landley        }
301118993e2f3af291eea7a175547862d057b7d83708Rob Landley
301218993e2f3af291eea7a175547862d057b7d83708Rob Landley        if (b->in[b->in_pos++] != 0)
301318993e2f3af291eea7a175547862d057b7d83708Rob Landley          return XZ_DATA_ERROR;
301418993e2f3af291eea7a175547862d057b7d83708Rob Landley      }
301518993e2f3af291eea7a175547862d057b7d83708Rob Landley
301618993e2f3af291eea7a175547862d057b7d83708Rob Landley      /* Finish the CRC32 value and Index size. */
301718993e2f3af291eea7a175547862d057b7d83708Rob Landley      index_update(s, b);
301818993e2f3af291eea7a175547862d057b7d83708Rob Landley
301918993e2f3af291eea7a175547862d057b7d83708Rob Landley      /* Compare the hashes to validate the Index field. */
302018993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (!memeq(&s->block.hash, &s->index.hash,
302118993e2f3af291eea7a175547862d057b7d83708Rob Landley          sizeof(s->block.hash)))
302218993e2f3af291eea7a175547862d057b7d83708Rob Landley        return XZ_DATA_ERROR;
302318993e2f3af291eea7a175547862d057b7d83708Rob Landley
302418993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->sequence = SEQ_INDEX_CRC32;
302518993e2f3af291eea7a175547862d057b7d83708Rob Landley
302618993e2f3af291eea7a175547862d057b7d83708Rob Landley    case SEQ_INDEX_CRC32:
302718993e2f3af291eea7a175547862d057b7d83708Rob Landley      ret = crc_validate(s, b, 32);
302818993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (ret != XZ_STREAM_END)
302918993e2f3af291eea7a175547862d057b7d83708Rob Landley        return ret;
303018993e2f3af291eea7a175547862d057b7d83708Rob Landley
303118993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->temp.size = STREAM_HEADER_SIZE;
303218993e2f3af291eea7a175547862d057b7d83708Rob Landley      s->sequence = SEQ_STREAM_FOOTER;
303318993e2f3af291eea7a175547862d057b7d83708Rob Landley
303418993e2f3af291eea7a175547862d057b7d83708Rob Landley    case SEQ_STREAM_FOOTER:
303518993e2f3af291eea7a175547862d057b7d83708Rob Landley      if (!fill_temp(s, b))
303618993e2f3af291eea7a175547862d057b7d83708Rob Landley        return XZ_OK;
303718993e2f3af291eea7a175547862d057b7d83708Rob Landley
303818993e2f3af291eea7a175547862d057b7d83708Rob Landley      return dec_stream_footer(s);
303918993e2f3af291eea7a175547862d057b7d83708Rob Landley    }
304018993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
304118993e2f3af291eea7a175547862d057b7d83708Rob Landley
304218993e2f3af291eea7a175547862d057b7d83708Rob Landley  /* Never reached */
3043971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
3044971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
3045971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley/*
3046971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * xz_dec_run() is a wrapper for dec_main() to handle some special cases in
3047971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * multi-call and single-call decoding.
3048971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
3049971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we
3050971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * are not going to make any progress anymore. This is to prevent the caller
3051971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * from calling us infinitely when the input file is truncated or otherwise
3052971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * corrupt. Since zlib-style API allows that the caller fills the input buffer
3053971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * only when the decoder doesn't produce any new output, we have to be careful
3054971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only
3055971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * after the second consecutive call to xz_dec_run() that makes no progress.
3056971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
3057971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * In single-call mode, if we couldn't decode everything and no error
3058971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * occurred, either the input is truncated or the output buffer is too small.
3059971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * Since we know that the last input byte never produces any output, we know
3060971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * that if all the input was consumed and decoding wasn't finished, the file
3061971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * must be corrupt. Otherwise the output buffer has to be too small or the
3062971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * file is corrupt in a way that decoding it produces too big output.
3063971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley *
3064971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * If single-call decoding fails, we reset b->in_pos and b->out_pos back to
3065971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * their original values. This is because with some filter chains there won't
3066971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * be any valid uncompressed data in the output buffer unless the decoding
3067971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * actually succeeds (that's the price to pay of using the output buffer as
3068971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley * the workspace).
3069971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley */
30709bd7a1696e7060e51ec0d0aaef477eaf85aaf216Rob Landleyenum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b)
3071971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
307218993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t in_start;
307318993e2f3af291eea7a175547862d057b7d83708Rob Landley  size_t out_start;
307418993e2f3af291eea7a175547862d057b7d83708Rob Landley  enum xz_ret ret;
3075971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
307618993e2f3af291eea7a175547862d057b7d83708Rob Landley  in_start = b->in_pos;
307718993e2f3af291eea7a175547862d057b7d83708Rob Landley  out_start = b->out_pos;
307818993e2f3af291eea7a175547862d057b7d83708Rob Landley  ret = dec_main(s, b);
3079971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
30809641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham  if (ret == XZ_OK && in_start == b->in_pos && out_start == b->out_pos) {
308118993e2f3af291eea7a175547862d057b7d83708Rob Landley    if (s->allow_buf_error)
308218993e2f3af291eea7a175547862d057b7d83708Rob Landley      ret = XZ_BUF_ERROR;
3083971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
308418993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->allow_buf_error = 1;
308518993e2f3af291eea7a175547862d057b7d83708Rob Landley  } else {
308618993e2f3af291eea7a175547862d057b7d83708Rob Landley    s->allow_buf_error = 0;
308718993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
3088971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
308918993e2f3af291eea7a175547862d057b7d83708Rob Landley  return ret;
3090971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
3091971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
30920c2509957b6291fb136eca4a5c278466045ce31dIsaac Dunhamstruct xz_dec *xz_dec_init(uint32_t dict_max)
3093971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
309418993e2f3af291eea7a175547862d057b7d83708Rob Landley  struct xz_dec *s = malloc(sizeof(*s));
30959641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham  if (!s)
309618993e2f3af291eea7a175547862d057b7d83708Rob Landley    return NULL;
3097971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
3098971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_BCJ
30999641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham  s->bcj = malloc(sizeof(*s->bcj));
31009641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham  if (!s->bcj)
310118993e2f3af291eea7a175547862d057b7d83708Rob Landley    goto error_bcj;
3102971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
3103971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
31040c2509957b6291fb136eca4a5c278466045ce31dIsaac Dunham  s->lzma2 = xz_dec_lzma2_create(dict_max);
310518993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s->lzma2 == NULL)
310618993e2f3af291eea7a175547862d057b7d83708Rob Landley    goto error_lzma2;
3107971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
310818993e2f3af291eea7a175547862d057b7d83708Rob Landley  xz_dec_reset(s);
310918993e2f3af291eea7a175547862d057b7d83708Rob Landley  return s;
3110971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
3111971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleyerror_lzma2:
3112971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_BCJ
311318993e2f3af291eea7a175547862d057b7d83708Rob Landley  free(s->bcj);
3114971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landleyerror_bcj:
3115971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
311618993e2f3af291eea7a175547862d057b7d83708Rob Landley  free(s);
311718993e2f3af291eea7a175547862d057b7d83708Rob Landley  return NULL;
3118971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
3119971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
31209bd7a1696e7060e51ec0d0aaef477eaf85aaf216Rob Landleyvoid xz_dec_reset(struct xz_dec *s)
3121971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
312218993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->sequence = SEQ_STREAM_HEADER;
312318993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->allow_buf_error = 0;
312418993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->pos = 0;
312518993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->crc = 0;
312618993e2f3af291eea7a175547862d057b7d83708Rob Landley  memset(&s->block, 0, sizeof(s->block));
312718993e2f3af291eea7a175547862d057b7d83708Rob Landley  memset(&s->index, 0, sizeof(s->index));
312818993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->temp.pos = 0;
312918993e2f3af291eea7a175547862d057b7d83708Rob Landley  s->temp.size = STREAM_HEADER_SIZE;
3130971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
3131971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley
31329bd7a1696e7060e51ec0d0aaef477eaf85aaf216Rob Landleyvoid xz_dec_end(struct xz_dec *s)
3133971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley{
313418993e2f3af291eea7a175547862d057b7d83708Rob Landley  if (s != NULL) {
31359641a3c6c5447c76f56cefd1844a4e545c015822Isaac Dunham    free((s->lzma2)->dict.buf);
313618993e2f3af291eea7a175547862d057b7d83708Rob Landley    free(s->lzma2);
31374ffface11f7857683ddb1f935fb05809821458abIsaac Dunham
3138971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#ifdef XZ_DEC_BCJ
313918993e2f3af291eea7a175547862d057b7d83708Rob Landley    free(s->bcj);
3140971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley#endif
314118993e2f3af291eea7a175547862d057b7d83708Rob Landley    free(s);
314218993e2f3af291eea7a175547862d057b7d83708Rob Landley  }
3143971d57ec4a9e14527e7582a5723d9634182d3fa7Rob Landley}
3144