1/*
2 * Simple XZ decoder command line tool
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 * Modified for toybox by Isaac Dunham
9USE_XZCAT(NEWTOY(xzcat, NULL, TOYFLAG_USR|TOYFLAG_BIN))
10
11config XZCAT
12  bool "xzcat"
13  default n
14  help
15    usage: xzcat [filename...]
16
17    Decompress listed files to stdout. Use stdin if no files listed.
18
19*/
20#define FOR_xzcat
21#include "toys.h"
22
23// BEGIN xz.h
24
25/**
26 * enum xz_ret - Return codes
27 * @XZ_OK:                  Everything is OK so far. More input or more
28 *                          output space is required to continue.
29 * @XZ_STREAM_END:          Operation finished successfully.
30 * @XZ_UNSUPPORTED_CHECK:   Integrity check type is not supported. Decoding
31 *                          is still possible in multi-call mode by simply
32 *                          calling xz_dec_run() again.
33 *                          Note that this return value is used only if
34 *                          XZ_DEC_ANY_CHECK was defined at build time,
35 *                          which is not used in the kernel. Unsupported
36 *                          check types return XZ_OPTIONS_ERROR if
37 *                          XZ_DEC_ANY_CHECK was not defined at build time.
38 * @XZ_MEM_ERROR:           Allocating memory failed. The amount of memory
39 *                          that was tried to be allocated was no more than the
40 *                          dict_max argument given to xz_dec_init().
41 * @XZ_MEMLIMIT_ERROR:      A bigger LZMA2 dictionary would be needed than
42 *                          allowed by the dict_max argument given to
43 *                          xz_dec_init().
44 * @XZ_FORMAT_ERROR:        File format was not recognized (wrong magic
45 *                          bytes).
46 * @XZ_OPTIONS_ERROR:       This implementation doesn't support the requested
47 *                          compression options. In the decoder this means
48 *                          that the header CRC32 matches, but the header
49 *                          itself specifies something that we don't support.
50 * @XZ_DATA_ERROR:          Compressed data is corrupt.
51 * @XZ_BUF_ERROR:           Cannot make any progress. Details are slightly
52 *                          different between multi-call and single-call
53 *                          mode; more information below.
54 *
55 * XZ_BUF_ERROR is returned when two consecutive calls to XZ code cannot
56 * consume any input and cannot produce any new output. This happens when
57 * there is no new input available, or the output buffer is full while at
58 * least one output byte is still pending. Assuming your code is not buggy,
59 * you can get this error only when decoding a compressed stream that is
60 * truncated or otherwise corrupt.
61 */
62enum xz_ret {
63  XZ_OK,
64  XZ_STREAM_END,
65  XZ_UNSUPPORTED_CHECK,
66  XZ_MEM_ERROR,
67  XZ_MEMLIMIT_ERROR,
68  XZ_FORMAT_ERROR,
69  XZ_OPTIONS_ERROR,
70  XZ_DATA_ERROR,
71  XZ_BUF_ERROR
72};
73
74/**
75 * struct xz_buf - Passing input and output buffers to XZ code
76 * @in:         Beginning of the input buffer. This may be NULL if and only
77 *              if in_pos is equal to in_size.
78 * @in_pos:     Current position in the input buffer. This must not exceed
79 *              in_size.
80 * @in_size:    Size of the input buffer
81 * @out:        Beginning of the output buffer. This may be NULL if and only
82 *              if out_pos is equal to out_size.
83 * @out_pos:    Current position in the output buffer. This must not exceed
84 *              out_size.
85 * @out_size:   Size of the output buffer
86 *
87 * Only the contents of the output buffer from out[out_pos] onward, and
88 * the variables in_pos and out_pos are modified by the XZ code.
89 */
90struct xz_buf {
91  const uint8_t *in;
92  size_t in_pos;
93  size_t in_size;
94
95  uint8_t *out;
96  size_t out_pos;
97  size_t out_size;
98};
99
100/**
101 * struct xz_dec - Opaque type to hold the XZ decoder state
102 */
103struct xz_dec;
104
105/**
106 * xz_dec_init() - Allocate and initialize a XZ decoder state
107 * @mode:       Operation mode
108 * @dict_max:   Maximum size of the LZMA2 dictionary (history buffer) for
109 *              multi-call decoding. LZMA2 dictionary is always 2^n bytes
110 *              or 2^n + 2^(n-1) bytes (the latter sizes are less common
111 *              in practice), so other values for dict_max don't make sense.
112 *              In the kernel, dictionary sizes of 64 KiB, 128 KiB, 256 KiB,
113 *              512 KiB, and 1 MiB are probably the only reasonable values,
114 *              except for kernel and initramfs images where a bigger
115 *              dictionary can be fine and useful.
116 *
117 * dict_max specifies the maximum allowed dictionary size that xz_dec_run()
118 * may allocate once it has parsed the dictionary size from the stream
119 * headers. This way excessive allocations can be avoided while still
120 * limiting the maximum memory usage to a sane value to prevent running the
121 * system out of memory when decompressing streams from untrusted sources.
122 *
123 * On success, xz_dec_init() returns a pointer to struct xz_dec, which is
124 * ready to be used with xz_dec_run(). If memory allocation fails,
125 * xz_dec_init() returns NULL.
126 */
127struct xz_dec *xz_dec_init(uint32_t dict_max);
128
129/**
130 * xz_dec_run() - Run the XZ decoder
131 * @s:          Decoder state allocated using xz_dec_init()
132 * @b:          Input and output buffers
133 *
134 * The possible return values depend on build options and operation mode.
135 * See enum xz_ret for details.
136 *
137 * Note that if an error occurs in single-call mode (return value is not
138 * XZ_STREAM_END), b->in_pos and b->out_pos are not modified and the
139 * contents of the output buffer from b->out[b->out_pos] onward are
140 * undefined. This is true even after XZ_BUF_ERROR, because with some filter
141 * chains, there may be a second pass over the output buffer, and this pass
142 * cannot be properly done if the output buffer is truncated. Thus, you
143 * cannot give the single-call decoder a too small buffer and then expect to
144 * get that amount valid data from the beginning of the stream. You must use
145 * the multi-call decoder if you don't want to uncompress the whole stream.
146 */
147enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b);
148
149/**
150 * xz_dec_reset() - Reset an already allocated decoder state
151 * @s:          Decoder state allocated using xz_dec_init()
152 *
153 * This function can be used to reset the multi-call decoder state without
154 * freeing and reallocating memory with xz_dec_end() and xz_dec_init().
155 *
156 * In single-call mode, xz_dec_reset() is always called in the beginning of
157 * xz_dec_run(). Thus, explicit call to xz_dec_reset() is useful only in
158 * multi-call mode.
159 */
160void xz_dec_reset(struct xz_dec *s);
161
162/**
163 * xz_dec_end() - Free the memory allocated for the decoder state
164 * @s:          Decoder state allocated using xz_dec_init(). If s is NULL,
165 *              this function does nothing.
166 */
167void xz_dec_end(struct xz_dec *s);
168
169/*
170 * Update CRC32 value using the polynomial from IEEE-802.3. To start a new
171 * calculation, the third argument must be zero. To continue the calculation,
172 * the previously returned value is passed as the third argument.
173 */
174static uint32_t xz_crc32_table[256];
175
176uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc)
177{
178  crc = ~crc;
179
180  while (size != 0) {
181    crc = xz_crc32_table[*buf++ ^ (crc & 0xFF)] ^ (crc >> 8);
182    --size;
183  }
184
185  return ~crc;
186}
187
188static uint64_t xz_crc64_table[256];
189
190
191// END xz.h
192
193static uint8_t in[BUFSIZ];
194static uint8_t out[BUFSIZ];
195
196void do_xzcat(int fd, char *name)
197{
198  struct xz_buf b;
199  struct xz_dec *s;
200  enum xz_ret ret;
201  const char *msg;
202
203  crc_init(xz_crc32_table, 1);
204  const uint64_t poly = 0xC96C5795D7870F42ULL;
205  uint32_t i;
206  uint32_t j;
207  uint64_t r;
208
209  /* initialize CRC64 table*/
210  for (i = 0; i < 256; ++i) {
211    r = i;
212    for (j = 0; j < 8; ++j)
213      r = (r >> 1) ^ (poly & ~((r & 1) - 1));
214
215    xz_crc64_table[i] = r;
216  }
217
218  /*
219   * Support up to 64 MiB dictionary. The actually needed memory
220   * is allocated once the headers have been parsed.
221   */
222  s = xz_dec_init(1 << 26);
223  if (s == NULL) {
224    msg = "Memory allocation failed\n";
225    goto error;
226  }
227
228  b.in = in;
229  b.in_pos = 0;
230  b.in_size = 0;
231  b.out = out;
232  b.out_pos = 0;
233  b.out_size = BUFSIZ;
234
235  for (;;) {
236    if (b.in_pos == b.in_size) {
237      b.in_size = read(fd, in, sizeof(in));
238      b.in_pos = 0;
239    }
240
241    ret = xz_dec_run(s, &b);
242
243    if (b.out_pos == sizeof(out)) {
244      if (fwrite(out, 1, b.out_pos, stdout) != b.out_pos) {
245        msg = "Write error\n";
246        goto error;
247      }
248
249      b.out_pos = 0;
250    }
251
252    if (ret == XZ_OK)
253      continue;
254
255    if (ret == XZ_UNSUPPORTED_CHECK)
256      continue;
257
258    if (fwrite(out, 1, b.out_pos, stdout) != b.out_pos) {
259      msg = "Write error\n";
260      goto error;
261    }
262
263    switch (ret) {
264    case XZ_STREAM_END:
265      xz_dec_end(s);
266      return;
267
268    case XZ_MEM_ERROR:
269      msg = "Memory allocation failed\n";
270      goto error;
271
272    case XZ_MEMLIMIT_ERROR:
273      msg = "Memory usage limit reached\n";
274      goto error;
275
276    case XZ_FORMAT_ERROR:
277      msg = "Not a .xz file\n";
278      goto error;
279
280    case XZ_OPTIONS_ERROR:
281      msg = "Unsupported options in the .xz headers\n";
282      goto error;
283
284    case XZ_DATA_ERROR:
285    case XZ_BUF_ERROR:
286      msg = "File is corrupt\n";
287      goto error;
288
289    default:
290      msg = "Bug!\n";
291      goto error;
292    }
293  }
294
295error:
296  xz_dec_end(s);
297  error_exit("%s", msg);
298}
299
300void xzcat_main(void)
301{
302  loopfiles(toys.optargs, do_xzcat);
303}
304
305// BEGIN xz_private.h
306
307
308/* Uncomment as needed to enable BCJ filter decoders.
309 * These cost about 2.5 k when all are enabled; SPARC and IA64 make 0.7 k
310 * */
311
312#define XZ_DEC_X86
313#define XZ_DEC_POWERPC
314#define XZ_DEC_IA64
315#define XZ_DEC_ARM
316#define XZ_DEC_ARMTHUMB
317#define XZ_DEC_SPARC
318
319
320#define memeq(a, b, size) (memcmp(a, b, size) == 0)
321
322#ifndef min
323#	define min(x, y) ((x) < (y) ? (x) : (y))
324#endif
325#define min_t(type, x, y) min(x, y)
326
327
328/* Inline functions to access unaligned unsigned 32-bit integers */
329#ifndef get_unaligned_le32
330static inline uint32_t get_unaligned_le32(const uint8_t *buf)
331{
332  return (uint32_t)buf[0]
333      | ((uint32_t)buf[1] << 8)
334      | ((uint32_t)buf[2] << 16)
335      | ((uint32_t)buf[3] << 24);
336}
337#endif
338
339#ifndef get_unaligned_be32
340static inline uint32_t get_unaligned_be32(const uint8_t *buf)
341{
342  return (uint32_t)(buf[0] << 24)
343      | ((uint32_t)buf[1] << 16)
344      | ((uint32_t)buf[2] << 8)
345      | (uint32_t)buf[3];
346}
347#endif
348
349#ifndef put_unaligned_le32
350static inline void put_unaligned_le32(uint32_t val, uint8_t *buf)
351{
352  buf[0] = (uint8_t)val;
353  buf[1] = (uint8_t)(val >> 8);
354  buf[2] = (uint8_t)(val >> 16);
355  buf[3] = (uint8_t)(val >> 24);
356}
357#endif
358
359#ifndef put_unaligned_be32
360static inline void put_unaligned_be32(uint32_t val, uint8_t *buf)
361{
362  buf[0] = (uint8_t)(val >> 24);
363  buf[1] = (uint8_t)(val >> 16);
364  buf[2] = (uint8_t)(val >> 8);
365  buf[3] = (uint8_t)val;
366}
367#endif
368
369/*
370 * Use get_unaligned_le32() also for aligned access for simplicity. On
371 * little endian systems, #define get_le32(ptr) (*(const uint32_t *)(ptr))
372 * could save a few bytes in code size.
373 */
374#ifndef get_le32
375#	define get_le32 get_unaligned_le32
376#endif
377
378/*
379 * If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ.
380 * XZ_DEC_BCJ is used to enable generic support for BCJ decoders.
381 */
382#ifndef XZ_DEC_BCJ
383#	if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \
384      || defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \
385      || defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \
386      || defined(XZ_DEC_SPARC)
387#		define XZ_DEC_BCJ
388#	endif
389#endif
390
391/*
392 * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
393 * before calling xz_dec_lzma2_run().
394 */
395struct xz_dec_lzma2 *xz_dec_lzma2_create(uint32_t dict_max);
396
397/*
398 * Decode the LZMA2 properties (one byte) and reset the decoder. Return
399 * XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not
400 * big enough, and XZ_OPTIONS_ERROR if props indicates something that this
401 * decoder doesn't support.
402 */
403enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s,
404           uint8_t props);
405
406/* Decode raw LZMA2 stream from b->in to b->out. */
407enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
408               struct xz_buf *b);
409
410// END "xz_private.h"
411
412
413
414
415/*
416 * Branch/Call/Jump (BCJ) filter decoders
417 * The rest of the code is inside this ifdef. It makes things a little more
418 * convenient when building without support for any BCJ filters.
419 */
420#ifdef XZ_DEC_BCJ
421
422struct xz_dec_bcj {
423  /* Type of the BCJ filter being used */
424  enum {
425    BCJ_X86 = 4,        /* x86 or x86-64 */
426    BCJ_POWERPC = 5,    /* Big endian only */
427    BCJ_IA64 = 6,       /* Big or little endian */
428    BCJ_ARM = 7,        /* Little endian only */
429    BCJ_ARMTHUMB = 8,   /* Little endian only */
430    BCJ_SPARC = 9       /* Big or little endian */
431  } type;
432
433  /*
434   * Return value of the next filter in the chain. We need to preserve
435   * this information across calls, because we must not call the next
436   * filter anymore once it has returned XZ_STREAM_END.
437   */
438  enum xz_ret ret;
439
440  /*
441   * Absolute position relative to the beginning of the uncompressed
442   * data (in a single .xz Block). We care only about the lowest 32
443   * bits so this doesn't need to be uint64_t even with big files.
444   */
445  uint32_t pos;
446
447  /* x86 filter state */
448  uint32_t x86_prev_mask;
449
450  /* Temporary space to hold the variables from struct xz_buf */
451  uint8_t *out;
452  size_t out_pos;
453  size_t out_size;
454
455  struct {
456    /* Amount of already filtered data in the beginning of buf */
457    size_t filtered;
458
459    /* Total amount of data currently stored in buf  */
460    size_t size;
461
462    /*
463     * Buffer to hold a mix of filtered and unfiltered data. This
464     * needs to be big enough to hold Alignment + 2 * Look-ahead:
465     *
466     * Type         Alignment   Look-ahead
467     * x86              1           4
468     * PowerPC          4           0
469     * IA-64           16           0
470     * ARM              4           0
471     * ARM-Thumb        2           2
472     * SPARC            4           0
473     */
474    uint8_t buf[16];
475  } temp;
476};
477
478/*
479 * Decode the Filter ID of a BCJ filter. This implementation doesn't
480 * support custom start offsets, so no decoding of Filter Properties
481 * is needed. Returns XZ_OK if the given Filter ID is supported.
482 * Otherwise XZ_OPTIONS_ERROR is returned.
483 */
484enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id);
485
486/*
487 * Decode raw BCJ + LZMA2 stream. This must be used only if there actually is
488 * a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run()
489 * must be called directly.
490 */
491enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
492             struct xz_dec_lzma2 *lzma2,
493             struct xz_buf *b);
494
495#ifdef XZ_DEC_X86
496/*
497 * This is used to test the most significant byte of a memory address
498 * in an x86 instruction.
499 */
500static inline int bcj_x86_test_msbyte(uint8_t b)
501{
502  return b == 0x00 || b == 0xFF;
503}
504
505static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
506{
507  static const int mask_to_allowed_status[8]
508    = { 1,1,1,0,1,0,0,0 };
509
510  static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
511
512  size_t i;
513  size_t prev_pos = (size_t)-1;
514  uint32_t prev_mask = s->x86_prev_mask;
515  uint32_t src;
516  uint32_t dest;
517  uint32_t j;
518  uint8_t b;
519
520  if (size <= 4)
521    return 0;
522
523  size -= 4;
524  for (i = 0; i < size; ++i) {
525    if ((buf[i] & 0xFE) != 0xE8)
526      continue;
527
528    prev_pos = i - prev_pos;
529    if (prev_pos > 3) {
530      prev_mask = 0;
531    } else {
532      prev_mask = (prev_mask << (prev_pos - 1)) & 7;
533      if (prev_mask != 0) {
534        b = buf[i + 4 - mask_to_bit_num[prev_mask]];
535        if (!mask_to_allowed_status[prev_mask]
536            || bcj_x86_test_msbyte(b)) {
537          prev_pos = i;
538          prev_mask = (prev_mask << 1) | 1;
539          continue;
540        }
541      }
542    }
543
544    prev_pos = i;
545
546    if (bcj_x86_test_msbyte(buf[i + 4])) {
547      src = get_unaligned_le32(buf + i + 1);
548      for (;;) {
549        dest = src - (s->pos + (uint32_t)i + 5);
550        if (prev_mask == 0)
551          break;
552
553        j = mask_to_bit_num[prev_mask] * 8;
554        b = (uint8_t)(dest >> (24 - j));
555        if (!bcj_x86_test_msbyte(b))
556          break;
557
558        src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
559      }
560
561      dest &= 0x01FFFFFF;
562      dest |= (uint32_t)0 - (dest & 0x01000000);
563      put_unaligned_le32(dest, buf + i + 1);
564      i += 4;
565    } else {
566      prev_mask = (prev_mask << 1) | 1;
567    }
568  }
569
570  prev_pos = i - prev_pos;
571  s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
572  return i;
573}
574#endif
575
576#ifdef XZ_DEC_POWERPC
577static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
578{
579  size_t i;
580  uint32_t instr;
581
582  for (i = 0; i + 4 <= size; i += 4) {
583    instr = get_unaligned_be32(buf + i);
584    if ((instr & 0xFC000003) == 0x48000001) {
585      instr &= 0x03FFFFFC;
586      instr -= s->pos + (uint32_t)i;
587      instr &= 0x03FFFFFC;
588      instr |= 0x48000001;
589      put_unaligned_be32(instr, buf + i);
590    }
591  }
592
593  return i;
594}
595#endif
596
597#ifdef XZ_DEC_IA64
598static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
599{
600  static const uint8_t branch_table[32] = {
601    0, 0, 0, 0, 0, 0, 0, 0,
602    0, 0, 0, 0, 0, 0, 0, 0,
603    4, 4, 6, 6, 0, 0, 7, 7,
604    4, 4, 0, 0, 4, 4, 0, 0
605  };
606
607  /*
608   * The local variables take a little bit stack space, but it's less
609   * than what LZMA2 decoder takes, so it doesn't make sense to reduce
610   * stack usage here without doing that for the LZMA2 decoder too.
611   */
612
613  /* Loop counters */
614  size_t i;
615  size_t j;
616
617  /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
618  uint32_t slot;
619
620  /* Bitwise offset of the instruction indicated by slot */
621  uint32_t bit_pos;
622
623  /* bit_pos split into byte and bit parts */
624  uint32_t byte_pos;
625  uint32_t bit_res;
626
627  /* Address part of an instruction */
628  uint32_t addr;
629
630  /* Mask used to detect which instructions to convert */
631  uint32_t mask;
632
633  /* 41-bit instruction stored somewhere in the lowest 48 bits */
634  uint64_t instr;
635
636  /* Instruction normalized with bit_res for easier manipulation */
637  uint64_t norm;
638
639  for (i = 0; i + 16 <= size; i += 16) {
640    mask = branch_table[buf[i] & 0x1F];
641    for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
642      if (((mask >> slot) & 1) == 0)
643        continue;
644
645      byte_pos = bit_pos >> 3;
646      bit_res = bit_pos & 7;
647      instr = 0;
648      for (j = 0; j < 6; ++j)
649        instr |= (uint64_t)(buf[i + j + byte_pos])
650            << (8 * j);
651
652      norm = instr >> bit_res;
653
654      if (((norm >> 37) & 0x0F) == 0x05
655          && ((norm >> 9) & 0x07) == 0) {
656        addr = (norm >> 13) & 0x0FFFFF;
657        addr |= ((uint32_t)(norm >> 36) & 1) << 20;
658        addr <<= 4;
659        addr -= s->pos + (uint32_t)i;
660        addr >>= 4;
661
662        norm &= ~((uint64_t)0x8FFFFF << 13);
663        norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
664        norm |= (uint64_t)(addr & 0x100000)
665            << (36 - 20);
666
667        instr &= (1 << bit_res) - 1;
668        instr |= norm << bit_res;
669
670        for (j = 0; j < 6; j++)
671          buf[i + j + byte_pos]
672            = (uint8_t)(instr >> (8 * j));
673      }
674    }
675  }
676
677  return i;
678}
679#endif
680
681#ifdef XZ_DEC_ARM
682static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
683{
684  size_t i;
685  uint32_t addr;
686
687  for (i = 0; i + 4 <= size; i += 4) {
688    if (buf[i + 3] == 0xEB) {
689      addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
690          | ((uint32_t)buf[i + 2] << 16);
691      addr <<= 2;
692      addr -= s->pos + (uint32_t)i + 8;
693      addr >>= 2;
694      buf[i] = (uint8_t)addr;
695      buf[i + 1] = (uint8_t)(addr >> 8);
696      buf[i + 2] = (uint8_t)(addr >> 16);
697    }
698  }
699
700  return i;
701}
702#endif
703
704#ifdef XZ_DEC_ARMTHUMB
705static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
706{
707  size_t i;
708  uint32_t addr;
709
710  for (i = 0; i + 4 <= size; i += 2) {
711    if ((buf[i + 1] & 0xF8) == 0xF0
712        && (buf[i + 3] & 0xF8) == 0xF8) {
713      addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
714          | ((uint32_t)buf[i] << 11)
715          | (((uint32_t)buf[i + 3] & 0x07) << 8)
716          | (uint32_t)buf[i + 2];
717      addr <<= 1;
718      addr -= s->pos + (uint32_t)i + 4;
719      addr >>= 1;
720      buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
721      buf[i] = (uint8_t)(addr >> 11);
722      buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
723      buf[i + 2] = (uint8_t)addr;
724      i += 2;
725    }
726  }
727
728  return i;
729}
730#endif
731
732#ifdef XZ_DEC_SPARC
733static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
734{
735  size_t i;
736  uint32_t instr;
737
738  for (i = 0; i + 4 <= size; i += 4) {
739    instr = get_unaligned_be32(buf + i);
740    if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
741      instr <<= 2;
742      instr -= s->pos + (uint32_t)i;
743      instr >>= 2;
744      instr = ((uint32_t)0x40000000 - (instr & 0x400000))
745          | 0x40000000 | (instr & 0x3FFFFF);
746      put_unaligned_be32(instr, buf + i);
747    }
748  }
749
750  return i;
751}
752#endif
753
754/*
755 * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
756 * of data that got filtered.
757 *
758 * NOTE: This is implemented as a switch statement to avoid using function
759 * pointers, which could be problematic in the kernel boot code, which must
760 * avoid pointers to static data (at least on x86).
761 */
762static void bcj_apply(struct xz_dec_bcj *s,
763          uint8_t *buf, size_t *pos, size_t size)
764{
765  size_t filtered;
766
767  buf += *pos;
768  size -= *pos;
769
770  switch (s->type) {
771#ifdef XZ_DEC_X86
772  case BCJ_X86:
773    filtered = bcj_x86(s, buf, size);
774    break;
775#endif
776#ifdef XZ_DEC_POWERPC
777  case BCJ_POWERPC:
778    filtered = bcj_powerpc(s, buf, size);
779    break;
780#endif
781#ifdef XZ_DEC_IA64
782  case BCJ_IA64:
783    filtered = bcj_ia64(s, buf, size);
784    break;
785#endif
786#ifdef XZ_DEC_ARM
787  case BCJ_ARM:
788    filtered = bcj_arm(s, buf, size);
789    break;
790#endif
791#ifdef XZ_DEC_ARMTHUMB
792  case BCJ_ARMTHUMB:
793    filtered = bcj_armthumb(s, buf, size);
794    break;
795#endif
796#ifdef XZ_DEC_SPARC
797  case BCJ_SPARC:
798    filtered = bcj_sparc(s, buf, size);
799    break;
800#endif
801  default:
802    /* Never reached but silence compiler warnings. */
803    filtered = 0;
804    break;
805  }
806
807  *pos += filtered;
808  s->pos += filtered;
809}
810
811/*
812 * Flush pending filtered data from temp to the output buffer.
813 * Move the remaining mixture of possibly filtered and unfiltered
814 * data to the beginning of temp.
815 */
816static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
817{
818  size_t copy_size;
819
820  copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
821  memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
822  b->out_pos += copy_size;
823
824  s->temp.filtered -= copy_size;
825  s->temp.size -= copy_size;
826  memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
827}
828
829/*
830 * The BCJ filter functions are primitive in sense that they process the
831 * data in chunks of 1-16 bytes. To hide this issue, this function does
832 * some buffering.
833 */
834enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
835             struct xz_dec_lzma2 *lzma2,
836             struct xz_buf *b)
837{
838  size_t out_start;
839
840  /*
841   * Flush pending already filtered data to the output buffer. Return
842   * immediatelly if we couldn't flush everything, or if the next
843   * filter in the chain had already returned XZ_STREAM_END.
844   */
845  if (s->temp.filtered > 0) {
846    bcj_flush(s, b);
847    if (s->temp.filtered > 0)
848      return XZ_OK;
849
850    if (s->ret == XZ_STREAM_END)
851      return XZ_STREAM_END;
852  }
853
854  /*
855   * If we have more output space than what is currently pending in
856   * temp, copy the unfiltered data from temp to the output buffer
857   * and try to fill the output buffer by decoding more data from the
858   * next filter in the chain. Apply the BCJ filter on the new data
859   * in the output buffer. If everything cannot be filtered, copy it
860   * to temp and rewind the output buffer position accordingly.
861   *
862   * This needs to be always run when temp.size == 0 to handle a special
863   * case where the output buffer is full and the next filter has no
864   * more output coming but hasn't returned XZ_STREAM_END yet.
865   */
866  if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
867    out_start = b->out_pos;
868    memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
869    b->out_pos += s->temp.size;
870
871    s->ret = xz_dec_lzma2_run(lzma2, b);
872    if (s->ret != XZ_STREAM_END
873        && (s->ret != XZ_OK ))
874      return s->ret;
875
876    bcj_apply(s, b->out, &out_start, b->out_pos);
877
878    /*
879     * As an exception, if the next filter returned XZ_STREAM_END,
880     * we can do that too, since the last few bytes that remain
881     * unfiltered are meant to remain unfiltered.
882     */
883    if (s->ret == XZ_STREAM_END)
884      return XZ_STREAM_END;
885
886    s->temp.size = b->out_pos - out_start;
887    b->out_pos -= s->temp.size;
888    memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
889
890    /*
891     * If there wasn't enough input to the next filter to fill
892     * the output buffer with unfiltered data, there's no point
893     * to try decoding more data to temp.
894     */
895    if (b->out_pos + s->temp.size < b->out_size)
896      return XZ_OK;
897  }
898
899  /*
900   * We have unfiltered data in temp. If the output buffer isn't full
901   * yet, try to fill the temp buffer by decoding more data from the
902   * next filter. Apply the BCJ filter on temp. Then we hopefully can
903   * fill the actual output buffer by copying filtered data from temp.
904   * A mix of filtered and unfiltered data may be left in temp; it will
905   * be taken care on the next call to this function.
906   */
907  if (b->out_pos < b->out_size) {
908    /* Make b->out{,_pos,_size} temporarily point to s->temp. */
909    s->out = b->out;
910    s->out_pos = b->out_pos;
911    s->out_size = b->out_size;
912    b->out = s->temp.buf;
913    b->out_pos = s->temp.size;
914    b->out_size = sizeof(s->temp.buf);
915
916    s->ret = xz_dec_lzma2_run(lzma2, b);
917
918    s->temp.size = b->out_pos;
919    b->out = s->out;
920    b->out_pos = s->out_pos;
921    b->out_size = s->out_size;
922
923    if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
924      return s->ret;
925
926    bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
927
928    /*
929     * If the next filter returned XZ_STREAM_END, we mark that
930     * everything is filtered, since the last unfiltered bytes
931     * of the stream are meant to be left as is.
932     */
933    if (s->ret == XZ_STREAM_END)
934      s->temp.filtered = s->temp.size;
935
936    bcj_flush(s, b);
937    if (s->temp.filtered > 0)
938      return XZ_OK;
939  }
940
941  return s->ret;
942}
943
944enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
945{
946  switch (id) {
947#ifdef XZ_DEC_X86
948  case BCJ_X86:
949#endif
950#ifdef XZ_DEC_POWERPC
951  case BCJ_POWERPC:
952#endif
953#ifdef XZ_DEC_IA64
954  case BCJ_IA64:
955#endif
956#ifdef XZ_DEC_ARM
957  case BCJ_ARM:
958#endif
959#ifdef XZ_DEC_ARMTHUMB
960  case BCJ_ARMTHUMB:
961#endif
962#ifdef XZ_DEC_SPARC
963  case BCJ_SPARC:
964#endif
965    break;
966
967  default:
968    /* Unsupported Filter ID */
969    return XZ_OPTIONS_ERROR;
970  }
971
972  s->type = id;
973  s->ret = XZ_OK;
974  s->pos = 0;
975  s->x86_prev_mask = 0;
976  s->temp.filtered = 0;
977  s->temp.size = 0;
978
979  return XZ_OK;
980}
981
982#endif
983/*
984 * LZMA2 decoder
985 */
986
987
988// BEGIN xz_lzma2.h
989/*
990 * LZMA2 definitions
991 *
992 */
993
994
995/* Range coder constants */
996#define RC_SHIFT_BITS 8
997#define RC_TOP_BITS 24
998#define RC_TOP_VALUE (1 << RC_TOP_BITS)
999#define RC_BIT_MODEL_TOTAL_BITS 11
1000#define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS)
1001#define RC_MOVE_BITS 5
1002
1003/*
1004 * Maximum number of position states. A position state is the lowest pb
1005 * number of bits of the current uncompressed offset. In some places there
1006 * are different sets of probabilities for different position states.
1007 */
1008#define POS_STATES_MAX (1 << 4)
1009
1010/*
1011 * This enum is used to track which LZMA symbols have occurred most recently
1012 * and in which order. This information is used to predict the next symbol.
1013 *
1014 * Symbols:
1015 *  - Literal: One 8-bit byte
1016 *  - Match: Repeat a chunk of data at some distance
1017 *  - Long repeat: Multi-byte match at a recently seen distance
1018 *  - Short repeat: One-byte repeat at a recently seen distance
1019 *
1020 * The symbol names are in from STATE_oldest_older_previous. REP means
1021 * either short or long repeated match, and NONLIT means any non-literal.
1022 */
1023enum lzma_state {
1024  STATE_LIT_LIT,
1025  STATE_MATCH_LIT_LIT,
1026  STATE_REP_LIT_LIT,
1027  STATE_SHORTREP_LIT_LIT,
1028  STATE_MATCH_LIT,
1029  STATE_REP_LIT,
1030  STATE_SHORTREP_LIT,
1031  STATE_LIT_MATCH,
1032  STATE_LIT_LONGREP,
1033  STATE_LIT_SHORTREP,
1034  STATE_NONLIT_MATCH,
1035  STATE_NONLIT_REP
1036};
1037
1038/* Total number of states */
1039#define STATES 12
1040
1041/* The lowest 7 states indicate that the previous state was a literal. */
1042#define LIT_STATES 7
1043
1044/* Indicate that the latest symbol was a literal. */
1045static inline void lzma_state_literal(enum lzma_state *state)
1046{
1047  if (*state <= STATE_SHORTREP_LIT_LIT)
1048    *state = STATE_LIT_LIT;
1049  else if (*state <= STATE_LIT_SHORTREP)
1050    *state -= 3;
1051  else
1052    *state -= 6;
1053}
1054
1055/* Indicate that the latest symbol was a match. */
1056static inline void lzma_state_match(enum lzma_state *state)
1057{
1058  *state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH;
1059}
1060
1061/* Indicate that the latest state was a long repeated match. */
1062static inline void lzma_state_long_rep(enum lzma_state *state)
1063{
1064  *state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP;
1065}
1066
1067/* Indicate that the latest symbol was a short match. */
1068static inline void lzma_state_short_rep(enum lzma_state *state)
1069{
1070  *state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP;
1071}
1072
1073/* Test if the previous symbol was a literal. */
1074static inline int lzma_state_is_literal(enum lzma_state state)
1075{
1076  return state < LIT_STATES;
1077}
1078
1079/* Each literal coder is divided in three sections:
1080 *   - 0x001-0x0FF: Without match byte
1081 *   - 0x101-0x1FF: With match byte; match bit is 0
1082 *   - 0x201-0x2FF: With match byte; match bit is 1
1083 *
1084 * Match byte is used when the previous LZMA symbol was something else than
1085 * a literal (that is, it was some kind of match).
1086 */
1087#define LITERAL_CODER_SIZE 0x300
1088
1089/* Maximum number of literal coders */
1090#define LITERAL_CODERS_MAX (1 << 4)
1091
1092/* Minimum length of a match is two bytes. */
1093#define MATCH_LEN_MIN 2
1094
1095/* Match length is encoded with 4, 5, or 10 bits.
1096 *
1097 * Length   Bits
1098 *  2-9      4 = Choice=0 + 3 bits
1099 * 10-17     5 = Choice=1 + Choice2=0 + 3 bits
1100 * 18-273   10 = Choice=1 + Choice2=1 + 8 bits
1101 */
1102#define LEN_LOW_BITS 3
1103#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
1104#define LEN_MID_BITS 3
1105#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
1106#define LEN_HIGH_BITS 8
1107#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
1108#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
1109
1110/*
1111 * Maximum length of a match is 273 which is a result of the encoding
1112 * described above.
1113 */
1114#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
1115
1116/*
1117 * Different sets of probabilities are used for match distances that have
1118 * very short match length: Lengths of 2, 3, and 4 bytes have a separate
1119 * set of probabilities for each length. The matches with longer length
1120 * use a shared set of probabilities.
1121 */
1122#define DIST_STATES 4
1123
1124/*
1125 * Get the index of the appropriate probability array for decoding
1126 * the distance slot.
1127 */
1128static inline uint32_t lzma_get_dist_state(uint32_t len)
1129{
1130  return len < DIST_STATES + MATCH_LEN_MIN
1131      ? len - MATCH_LEN_MIN : DIST_STATES - 1;
1132}
1133
1134/*
1135 * The highest two bits of a 32-bit match distance are encoded using six bits.
1136 * This six-bit value is called a distance slot. This way encoding a 32-bit
1137 * value takes 6-36 bits, larger values taking more bits.
1138 */
1139#define DIST_SLOT_BITS 6
1140#define DIST_SLOTS (1 << DIST_SLOT_BITS)
1141
1142/* Match distances up to 127 are fully encoded using probabilities. Since
1143 * the highest two bits (distance slot) are always encoded using six bits,
1144 * the distances 0-3 don't need any additional bits to encode, since the
1145 * distance slot itself is the same as the actual distance. DIST_MODEL_START
1146 * indicates the first distance slot where at least one additional bit is
1147 * needed.
1148 */
1149#define DIST_MODEL_START 4
1150
1151/*
1152 * Match distances greater than 127 are encoded in three pieces:
1153 *   - distance slot: the highest two bits
1154 *   - direct bits: 2-26 bits below the highest two bits
1155 *   - alignment bits: four lowest bits
1156 *
1157 * Direct bits don't use any probabilities.
1158 *
1159 * The distance slot value of 14 is for distances 128-191.
1160 */
1161#define DIST_MODEL_END 14
1162
1163/* Distance slots that indicate a distance <= 127. */
1164#define FULL_DISTANCES_BITS (DIST_MODEL_END / 2)
1165#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
1166
1167/*
1168 * For match distances greater than 127, only the highest two bits and the
1169 * lowest four bits (alignment) is encoded using probabilities.
1170 */
1171#define ALIGN_BITS 4
1172#define ALIGN_SIZE (1 << ALIGN_BITS)
1173#define ALIGN_MASK (ALIGN_SIZE - 1)
1174
1175/* Total number of all probability variables */
1176#define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE)
1177
1178/*
1179 * LZMA remembers the four most recent match distances. Reusing these
1180 * distances tends to take less space than re-encoding the actual
1181 * distance value.
1182 */
1183#define REPS 4
1184
1185
1186// END xz_lzma2.h
1187
1188/*
1189 * Range decoder initialization eats the first five bytes of each LZMA chunk.
1190 */
1191#define RC_INIT_BYTES 5
1192
1193/*
1194 * Minimum number of usable input buffer to safely decode one LZMA symbol.
1195 * The worst case is that we decode 22 bits using probabilities and 26
1196 * direct bits. This may decode at maximum of 20 bytes of input. However,
1197 * lzma_main() does an extra normalization before returning, thus we
1198 * need to put 21 here.
1199 */
1200#define LZMA_IN_REQUIRED 21
1201
1202/*
1203 * Dictionary (history buffer)
1204 *
1205 * These are always true:
1206 *    start <= pos <= full <= end
1207 *    pos <= limit <= end
1208 *    end == size
1209 *    size <= size_max
1210 *    allocated <= size
1211 *
1212 * Most of these variables are size_t as a relic of single-call mode,
1213 * in which the dictionary variables address the actual output
1214 * buffer directly.
1215 */
1216struct dictionary {
1217  /* Beginning of the history buffer */
1218  uint8_t *buf;
1219
1220  /* Old position in buf (before decoding more data) */
1221  size_t start;
1222
1223  /* Position in buf */
1224  size_t pos;
1225
1226  /*
1227   * How full dictionary is. This is used to detect corrupt input that
1228   * would read beyond the beginning of the uncompressed stream.
1229   */
1230  size_t full;
1231
1232  /* Write limit; we don't write to buf[limit] or later bytes. */
1233  size_t limit;
1234
1235  /* End of the dictionary buffer. This is the same as the dictionary size. */
1236  size_t end;
1237
1238  /*
1239   * Size of the dictionary as specified in Block Header. This is used
1240   * together with "full" to detect corrupt input that would make us
1241   * read beyond the beginning of the uncompressed stream.
1242   */
1243  uint32_t size;
1244
1245  /*
1246   * Maximum allowed dictionary size.
1247   */
1248  uint32_t size_max;
1249
1250  /*
1251   * Amount of memory currently allocated for the dictionary.
1252   */
1253  uint32_t allocated;
1254};
1255
1256/* Range decoder */
1257struct rc_dec {
1258  uint32_t range;
1259  uint32_t code;
1260
1261  /*
1262   * Number of initializing bytes remaining to be read
1263   * by rc_read_init().
1264   */
1265  uint32_t init_bytes_left;
1266
1267  /*
1268   * Buffer from which we read our input. It can be either
1269   * temp.buf or the caller-provided input buffer.
1270   */
1271  const uint8_t *in;
1272  size_t in_pos;
1273  size_t in_limit;
1274};
1275
1276/* Probabilities for a length decoder. */
1277struct lzma_len_dec {
1278  /* Probability of match length being at least 10 */
1279  uint16_t choice;
1280
1281  /* Probability of match length being at least 18 */
1282  uint16_t choice2;
1283
1284  /* Probabilities for match lengths 2-9 */
1285  uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
1286
1287  /* Probabilities for match lengths 10-17 */
1288  uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
1289
1290  /* Probabilities for match lengths 18-273 */
1291  uint16_t high[LEN_HIGH_SYMBOLS];
1292};
1293
1294struct lzma_dec {
1295  /* Distances of latest four matches */
1296  uint32_t rep0;
1297  uint32_t rep1;
1298  uint32_t rep2;
1299  uint32_t rep3;
1300
1301  /* Types of the most recently seen LZMA symbols */
1302  enum lzma_state state;
1303
1304  /*
1305   * Length of a match. This is updated so that dict_repeat can
1306   * be called again to finish repeating the whole match.
1307   */
1308  uint32_t len;
1309
1310  /*
1311   * LZMA properties or related bit masks (number of literal
1312   * context bits, a mask dervied from the number of literal
1313   * position bits, and a mask dervied from the number
1314   * position bits)
1315   */
1316  uint32_t lc;
1317  uint32_t literal_pos_mask; /* (1 << lp) - 1 */
1318  uint32_t pos_mask;         /* (1 << pb) - 1 */
1319
1320  /* If 1, it's a match. Otherwise it's a single 8-bit literal. */
1321  uint16_t is_match[STATES][POS_STATES_MAX];
1322
1323  /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */
1324  uint16_t is_rep[STATES];
1325
1326  /*
1327   * If 0, distance of a repeated match is rep0.
1328   * Otherwise check is_rep1.
1329   */
1330  uint16_t is_rep0[STATES];
1331
1332  /*
1333   * If 0, distance of a repeated match is rep1.
1334   * Otherwise check is_rep2.
1335   */
1336  uint16_t is_rep1[STATES];
1337
1338  /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */
1339  uint16_t is_rep2[STATES];
1340
1341  /*
1342   * If 1, the repeated match has length of one byte. Otherwise
1343   * the length is decoded from rep_len_decoder.
1344   */
1345  uint16_t is_rep0_long[STATES][POS_STATES_MAX];
1346
1347  /*
1348   * Probability tree for the highest two bits of the match
1349   * distance. There is a separate probability tree for match
1350   * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
1351   */
1352  uint16_t dist_slot[DIST_STATES][DIST_SLOTS];
1353
1354  /*
1355   * Probility trees for additional bits for match distance
1356   * when the distance is in the range [4, 127].
1357   */
1358  uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END];
1359
1360  /*
1361   * Probability tree for the lowest four bits of a match
1362   * distance that is equal to or greater than 128.
1363   */
1364  uint16_t dist_align[ALIGN_SIZE];
1365
1366  /* Length of a normal match */
1367  struct lzma_len_dec match_len_dec;
1368
1369  /* Length of a repeated match */
1370  struct lzma_len_dec rep_len_dec;
1371
1372  /* Probabilities of literals */
1373  uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
1374};
1375
1376struct lzma2_dec {
1377  /* Position in xz_dec_lzma2_run(). */
1378  enum lzma2_seq {
1379    SEQ_CONTROL,
1380    SEQ_UNCOMPRESSED_1,
1381    SEQ_UNCOMPRESSED_2,
1382    SEQ_COMPRESSED_0,
1383    SEQ_COMPRESSED_1,
1384    SEQ_PROPERTIES,
1385    SEQ_LZMA_PREPARE,
1386    SEQ_LZMA_RUN,
1387    SEQ_COPY
1388  } sequence;
1389
1390  /* Next position after decoding the compressed size of the chunk. */
1391  enum lzma2_seq next_sequence;
1392
1393  /* Uncompressed size of LZMA chunk (2 MiB at maximum) */
1394  uint32_t uncompressed;
1395
1396  /*
1397   * Compressed size of LZMA chunk or compressed/uncompressed
1398   * size of uncompressed chunk (64 KiB at maximum)
1399   */
1400  uint32_t compressed;
1401
1402  /*
1403   * True if dictionary reset is needed. This is false before
1404   * the first chunk (LZMA or uncompressed).
1405   */
1406  int need_dict_reset;
1407
1408  /*
1409   * True if new LZMA properties are needed. This is false
1410   * before the first LZMA chunk.
1411   */
1412  int need_props;
1413};
1414
1415struct xz_dec_lzma2 {
1416  /*
1417   * The order below is important on x86 to reduce code size and
1418   * it shouldn't hurt on other platforms. Everything up to and
1419   * including lzma.pos_mask are in the first 128 bytes on x86-32,
1420   * which allows using smaller instructions to access those
1421   * variables. On x86-64, fewer variables fit into the first 128
1422   * bytes, but this is still the best order without sacrificing
1423   * the readability by splitting the structures.
1424   */
1425  struct rc_dec rc;
1426  struct dictionary dict;
1427  struct lzma2_dec lzma2;
1428  struct lzma_dec lzma;
1429
1430  /*
1431   * Temporary buffer which holds small number of input bytes between
1432   * decoder calls. See lzma2_lzma() for details.
1433   */
1434  struct {
1435    uint32_t size;
1436    uint8_t buf[3 * LZMA_IN_REQUIRED];
1437  } temp;
1438};
1439
1440/**************
1441 * Dictionary *
1442 **************/
1443
1444/* Reset the dictionary state. */
1445static void dict_reset(struct dictionary *dict)
1446{
1447  dict->start = 0;
1448  dict->pos = 0;
1449  dict->limit = 0;
1450  dict->full = 0;
1451}
1452
1453/* Set dictionary write limit */
1454static void dict_limit(struct dictionary *dict, size_t out_max)
1455{
1456  if (dict->end - dict->pos <= out_max)
1457    dict->limit = dict->end;
1458  else
1459    dict->limit = dict->pos + out_max;
1460}
1461
1462/* Return true if at least one byte can be written into the dictionary. */
1463static inline int dict_has_space(const struct dictionary *dict)
1464{
1465  return dict->pos < dict->limit;
1466}
1467
1468/*
1469 * Get a byte from the dictionary at the given distance. The distance is
1470 * assumed to valid, or as a special case, zero when the dictionary is
1471 * still empty. This special case is needed for single-call decoding to
1472 * avoid writing a '\0' to the end of the destination buffer.
1473 */
1474static inline uint32_t dict_get(const struct dictionary *dict, uint32_t dist)
1475{
1476  size_t offset = dict->pos - dist - 1;
1477
1478  if (dist >= dict->pos)
1479    offset += dict->end;
1480
1481  return dict->full > 0 ? dict->buf[offset] : 0;
1482}
1483
1484/*
1485 * Put one byte into the dictionary. It is assumed that there is space for it.
1486 */
1487static inline void dict_put(struct dictionary *dict, uint8_t byte)
1488{
1489  dict->buf[dict->pos++] = byte;
1490
1491  if (dict->full < dict->pos)
1492    dict->full = dict->pos;
1493}
1494
1495/*
1496 * Repeat given number of bytes from the given distance. If the distance is
1497 * invalid, false is returned. On success, true is returned and *len is
1498 * updated to indicate how many bytes were left to be repeated.
1499 */
1500static int dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist)
1501{
1502  size_t back;
1503  uint32_t left;
1504
1505  if (dist >= dict->full || dist >= dict->size) return 0;
1506
1507  left = min_t(size_t, dict->limit - dict->pos, *len);
1508  *len -= left;
1509
1510  back = dict->pos - dist - 1;
1511  if (dist >= dict->pos)
1512    back += dict->end;
1513
1514  do {
1515    dict->buf[dict->pos++] = dict->buf[back++];
1516    if (back == dict->end)
1517      back = 0;
1518  } while (--left > 0);
1519
1520  if (dict->full < dict->pos)
1521    dict->full = dict->pos;
1522
1523  return 1;
1524}
1525
1526/* Copy uncompressed data as is from input to dictionary and output buffers. */
1527static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
1528            uint32_t *left)
1529{
1530  size_t copy_size;
1531
1532  while (*left > 0 && b->in_pos < b->in_size
1533      && b->out_pos < b->out_size) {
1534    copy_size = min(b->in_size - b->in_pos,
1535        b->out_size - b->out_pos);
1536    if (copy_size > dict->end - dict->pos)
1537      copy_size = dict->end - dict->pos;
1538    if (copy_size > *left)
1539      copy_size = *left;
1540
1541    *left -= copy_size;
1542
1543    memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
1544    dict->pos += copy_size;
1545
1546    if (dict->full < dict->pos)
1547      dict->full = dict->pos;
1548
1549    if (dict->pos == dict->end)
1550      dict->pos = 0;
1551
1552    memcpy(b->out + b->out_pos, b->in + b->in_pos,
1553        copy_size);
1554
1555    dict->start = dict->pos;
1556
1557    b->out_pos += copy_size;
1558    b->in_pos += copy_size;
1559  }
1560}
1561
1562/*
1563 * Flush pending data from dictionary to b->out. It is assumed that there is
1564 * enough space in b->out. This is guaranteed because caller uses dict_limit()
1565 * before decoding data into the dictionary.
1566 */
1567static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b)
1568{
1569  size_t copy_size = dict->pos - dict->start;
1570
1571  if (dict->pos == dict->end)
1572    dict->pos = 0;
1573
1574  memcpy(b->out + b->out_pos, dict->buf + dict->start,
1575      copy_size);
1576
1577  dict->start = dict->pos;
1578  b->out_pos += copy_size;
1579  return copy_size;
1580}
1581
1582/*****************
1583 * Range decoder *
1584 *****************/
1585
1586/* Reset the range decoder. */
1587static void rc_reset(struct rc_dec *rc)
1588{
1589  rc->range = (uint32_t)-1;
1590  rc->code = 0;
1591  rc->init_bytes_left = RC_INIT_BYTES;
1592}
1593
1594/*
1595 * Read the first five initial bytes into rc->code if they haven't been
1596 * read already. (Yes, the first byte gets completely ignored.)
1597 */
1598static int rc_read_init(struct rc_dec *rc, struct xz_buf *b)
1599{
1600  while (rc->init_bytes_left > 0) {
1601    if (b->in_pos == b->in_size) return 0;
1602
1603    rc->code = (rc->code << 8) + b->in[b->in_pos++];
1604    --rc->init_bytes_left;
1605  }
1606
1607  return 1;
1608}
1609
1610/* Return true if there may not be enough input for the next decoding loop. */
1611static inline int rc_limit_exceeded(const struct rc_dec *rc)
1612{
1613  return rc->in_pos > rc->in_limit;
1614}
1615
1616/*
1617 * Return true if it is possible (from point of view of range decoder) that
1618 * we have reached the end of the LZMA chunk.
1619 */
1620static inline int rc_is_finished(const struct rc_dec *rc)
1621{
1622  return rc->code == 0;
1623}
1624
1625/* Read the next input byte if needed. */
1626static inline void rc_normalize(struct rc_dec *rc)
1627{
1628  if (rc->range < RC_TOP_VALUE) {
1629    rc->range <<= RC_SHIFT_BITS;
1630    rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++];
1631  }
1632}
1633
1634/*
1635 * Decode one bit. In some versions, this function has been splitted in three
1636 * functions so that the compiler is supposed to be able to more easily avoid
1637 * an extra branch. In this particular version of the LZMA decoder, this
1638 * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3
1639 * on x86). Using a non-splitted version results in nicer looking code too.
1640 *
1641 * NOTE: This must return an int. Do not make it return a bool or the speed
1642 * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,
1643 * and it generates 10-20 % faster code than GCC 3.x from this file anyway.)
1644 */
1645static inline int rc_bit(struct rc_dec *rc, uint16_t *prob)
1646{
1647  uint32_t bound;
1648  int bit;
1649
1650  rc_normalize(rc);
1651  bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob;
1652  if (rc->code < bound) {
1653    rc->range = bound;
1654    *prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS;
1655    bit = 0;
1656  } else {
1657    rc->range -= bound;
1658    rc->code -= bound;
1659    *prob -= *prob >> RC_MOVE_BITS;
1660    bit = 1;
1661  }
1662
1663  return bit;
1664}
1665
1666/* Decode a bittree starting from the most significant bit. */
1667static inline uint32_t rc_bittree(struct rc_dec *rc,
1668             uint16_t *probs, uint32_t limit)
1669{
1670  uint32_t symbol = 1;
1671
1672  do {
1673    if (rc_bit(rc, &probs[symbol]))
1674      symbol = (symbol << 1) + 1;
1675    else
1676      symbol <<= 1;
1677  } while (symbol < limit);
1678
1679  return symbol;
1680}
1681
1682/* Decode a bittree starting from the least significant bit. */
1683static inline void rc_bittree_reverse(struct rc_dec *rc,
1684                 uint16_t *probs,
1685                 uint32_t *dest, uint32_t limit)
1686{
1687  uint32_t symbol = 1;
1688  uint32_t i = 0;
1689
1690  do {
1691    if (rc_bit(rc, &probs[symbol])) {
1692      symbol = (symbol << 1) + 1;
1693      *dest += 1 << i;
1694    } else {
1695      symbol <<= 1;
1696    }
1697  } while (++i < limit);
1698}
1699
1700/* Decode direct bits (fixed fifty-fifty probability) */
1701static inline void rc_direct(struct rc_dec *rc, uint32_t *dest, uint32_t limit)
1702{
1703  uint32_t mask;
1704
1705  do {
1706    rc_normalize(rc);
1707    rc->range >>= 1;
1708    rc->code -= rc->range;
1709    mask = (uint32_t)0 - (rc->code >> 31);
1710    rc->code += rc->range & mask;
1711    *dest = (*dest << 1) + (mask + 1);
1712  } while (--limit > 0);
1713}
1714
1715/********
1716 * LZMA *
1717 ********/
1718
1719/* Get pointer to literal coder probability array. */
1720static uint16_t *lzma_literal_probs(struct xz_dec_lzma2 *s)
1721{
1722  uint32_t prev_byte = dict_get(&s->dict, 0);
1723  uint32_t low = prev_byte >> (8 - s->lzma.lc);
1724  uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc;
1725  return s->lzma.literal[low + high];
1726}
1727
1728/* Decode a literal (one 8-bit byte) */
1729static void lzma_literal(struct xz_dec_lzma2 *s)
1730{
1731  uint16_t *probs;
1732  uint32_t symbol;
1733  uint32_t match_byte;
1734  uint32_t match_bit;
1735  uint32_t offset;
1736  uint32_t i;
1737
1738  probs = lzma_literal_probs(s);
1739
1740  if (lzma_state_is_literal(s->lzma.state)) {
1741    symbol = rc_bittree(&s->rc, probs, 0x100);
1742  } else {
1743    symbol = 1;
1744    match_byte = dict_get(&s->dict, s->lzma.rep0) << 1;
1745    offset = 0x100;
1746
1747    do {
1748      match_bit = match_byte & offset;
1749      match_byte <<= 1;
1750      i = offset + match_bit + symbol;
1751
1752      if (rc_bit(&s->rc, &probs[i])) {
1753        symbol = (symbol << 1) + 1;
1754        offset &= match_bit;
1755      } else {
1756        symbol <<= 1;
1757        offset &= ~match_bit;
1758      }
1759    } while (symbol < 0x100);
1760  }
1761
1762  dict_put(&s->dict, (uint8_t)symbol);
1763  lzma_state_literal(&s->lzma.state);
1764}
1765
1766/* Decode the length of the match into s->lzma.len. */
1767static void lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l,
1768         uint32_t pos_state)
1769{
1770  uint16_t *probs;
1771  uint32_t limit;
1772
1773  if (!rc_bit(&s->rc, &l->choice)) {
1774    probs = l->low[pos_state];
1775    limit = LEN_LOW_SYMBOLS;
1776    s->lzma.len = MATCH_LEN_MIN;
1777  } else {
1778    if (!rc_bit(&s->rc, &l->choice2)) {
1779      probs = l->mid[pos_state];
1780      limit = LEN_MID_SYMBOLS;
1781      s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS;
1782    } else {
1783      probs = l->high;
1784      limit = LEN_HIGH_SYMBOLS;
1785      s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS
1786          + LEN_MID_SYMBOLS;
1787    }
1788  }
1789
1790  s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit;
1791}
1792
1793/* Decode a match. The distance will be stored in s->lzma.rep0. */
1794static void lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
1795{
1796  uint16_t *probs;
1797  uint32_t dist_slot;
1798  uint32_t limit;
1799
1800  lzma_state_match(&s->lzma.state);
1801
1802  s->lzma.rep3 = s->lzma.rep2;
1803  s->lzma.rep2 = s->lzma.rep1;
1804  s->lzma.rep1 = s->lzma.rep0;
1805
1806  lzma_len(s, &s->lzma.match_len_dec, pos_state);
1807
1808  probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)];
1809  dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS;
1810
1811  if (dist_slot < DIST_MODEL_START) {
1812    s->lzma.rep0 = dist_slot;
1813  } else {
1814    limit = (dist_slot >> 1) - 1;
1815    s->lzma.rep0 = 2 + (dist_slot & 1);
1816
1817    if (dist_slot < DIST_MODEL_END) {
1818      s->lzma.rep0 <<= limit;
1819      probs = s->lzma.dist_special + s->lzma.rep0
1820          - dist_slot - 1;
1821      rc_bittree_reverse(&s->rc, probs,
1822          &s->lzma.rep0, limit);
1823    } else {
1824      rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS);
1825      s->lzma.rep0 <<= ALIGN_BITS;
1826      rc_bittree_reverse(&s->rc, s->lzma.dist_align,
1827          &s->lzma.rep0, ALIGN_BITS);
1828    }
1829  }
1830}
1831
1832/*
1833 * Decode a repeated match. The distance is one of the four most recently
1834 * seen matches. The distance will be stored in s->lzma.rep0.
1835 */
1836static void lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
1837{
1838  uint32_t tmp;
1839
1840  if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) {
1841    if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[
1842        s->lzma.state][pos_state])) {
1843      lzma_state_short_rep(&s->lzma.state);
1844      s->lzma.len = 1;
1845      return;
1846    }
1847  } else {
1848    if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) {
1849      tmp = s->lzma.rep1;
1850    } else {
1851      if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) {
1852        tmp = s->lzma.rep2;
1853      } else {
1854        tmp = s->lzma.rep3;
1855        s->lzma.rep3 = s->lzma.rep2;
1856      }
1857
1858      s->lzma.rep2 = s->lzma.rep1;
1859    }
1860
1861    s->lzma.rep1 = s->lzma.rep0;
1862    s->lzma.rep0 = tmp;
1863  }
1864
1865  lzma_state_long_rep(&s->lzma.state);
1866  lzma_len(s, &s->lzma.rep_len_dec, pos_state);
1867}
1868
1869/* LZMA decoder core */
1870static int lzma_main(struct xz_dec_lzma2 *s)
1871{
1872  uint32_t pos_state;
1873
1874  /*
1875   * If the dictionary was reached during the previous call, try to
1876   * finish the possibly pending repeat in the dictionary.
1877   */
1878  if (dict_has_space(&s->dict) && s->lzma.len > 0)
1879    dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0);
1880
1881  /*
1882   * Decode more LZMA symbols. One iteration may consume up to
1883   * LZMA_IN_REQUIRED - 1 bytes.
1884   */
1885  while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) {
1886    pos_state = s->dict.pos & s->lzma.pos_mask;
1887
1888    if (!rc_bit(&s->rc, &s->lzma.is_match[
1889        s->lzma.state][pos_state])) {
1890      lzma_literal(s);
1891    } else {
1892      if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state]))
1893        lzma_rep_match(s, pos_state);
1894      else
1895        lzma_match(s, pos_state);
1896
1897      if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0))
1898        return 0;
1899    }
1900  }
1901
1902  /*
1903   * Having the range decoder always normalized when we are outside
1904   * this function makes it easier to correctly handle end of the chunk.
1905   */
1906  rc_normalize(&s->rc);
1907
1908  return 1;
1909}
1910
1911/*
1912 * Reset the LZMA decoder and range decoder state. Dictionary is nore reset
1913 * here, because LZMA state may be reset without resetting the dictionary.
1914 */
1915static void lzma_reset(struct xz_dec_lzma2 *s)
1916{
1917  uint16_t *probs;
1918  size_t i;
1919
1920  s->lzma.state = STATE_LIT_LIT;
1921  s->lzma.rep0 = 0;
1922  s->lzma.rep1 = 0;
1923  s->lzma.rep2 = 0;
1924  s->lzma.rep3 = 0;
1925
1926  /*
1927   * All probabilities are initialized to the same value. This hack
1928   * makes the code smaller by avoiding a separate loop for each
1929   * probability array.
1930   *
1931   * This could be optimized so that only that part of literal
1932   * probabilities that are actually required. In the common case
1933   * we would write 12 KiB less.
1934   */
1935  probs = s->lzma.is_match[0];
1936  for (i = 0; i < PROBS_TOTAL; ++i)
1937    probs[i] = RC_BIT_MODEL_TOTAL / 2;
1938
1939  rc_reset(&s->rc);
1940}
1941
1942/*
1943 * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks
1944 * from the decoded lp and pb values. On success, the LZMA decoder state is
1945 * reset and true is returned.
1946 */
1947static int lzma_props(struct xz_dec_lzma2 *s, uint8_t props)
1948{
1949  if (props > (4 * 5 + 4) * 9 + 8)
1950    return 0;
1951
1952  s->lzma.pos_mask = 0;
1953  while (props >= 9 * 5) {
1954    props -= 9 * 5;
1955    ++s->lzma.pos_mask;
1956  }
1957
1958  s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1;
1959
1960  s->lzma.literal_pos_mask = 0;
1961  while (props >= 9) {
1962    props -= 9;
1963    ++s->lzma.literal_pos_mask;
1964  }
1965
1966  s->lzma.lc = props;
1967
1968  if (s->lzma.lc + s->lzma.literal_pos_mask > 4)
1969    return 0;
1970
1971  s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1;
1972
1973  lzma_reset(s);
1974
1975  return 1;
1976}
1977
1978/*********
1979 * LZMA2 *
1980 *********/
1981
1982/*
1983 * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't
1984 * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This
1985 * wrapper function takes care of making the LZMA decoder's assumption safe.
1986 *
1987 * As long as there is plenty of input left to be decoded in the current LZMA
1988 * chunk, we decode directly from the caller-supplied input buffer until
1989 * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into
1990 * s->temp.buf, which (hopefully) gets filled on the next call to this
1991 * function. We decode a few bytes from the temporary buffer so that we can
1992 * continue decoding from the caller-supplied input buffer again.
1993 */
1994static int lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b)
1995{
1996  size_t in_avail;
1997  uint32_t tmp;
1998
1999  in_avail = b->in_size - b->in_pos;
2000  if (s->temp.size > 0 || s->lzma2.compressed == 0) {
2001    tmp = 2 * LZMA_IN_REQUIRED - s->temp.size;
2002    if (tmp > s->lzma2.compressed - s->temp.size)
2003      tmp = s->lzma2.compressed - s->temp.size;
2004    if (tmp > in_avail)
2005      tmp = in_avail;
2006
2007    memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp);
2008
2009    if (s->temp.size + tmp == s->lzma2.compressed) {
2010      memset(s->temp.buf + s->temp.size + tmp, 0,
2011          sizeof(s->temp.buf)
2012            - s->temp.size - tmp);
2013      s->rc.in_limit = s->temp.size + tmp;
2014    } else if (s->temp.size + tmp < LZMA_IN_REQUIRED) {
2015      s->temp.size += tmp;
2016      b->in_pos += tmp;
2017      return 1;
2018    } else {
2019      s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED;
2020    }
2021
2022    s->rc.in = s->temp.buf;
2023    s->rc.in_pos = 0;
2024
2025    if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp)
2026      return 0;
2027
2028    s->lzma2.compressed -= s->rc.in_pos;
2029
2030    if (s->rc.in_pos < s->temp.size) {
2031      s->temp.size -= s->rc.in_pos;
2032      memmove(s->temp.buf, s->temp.buf + s->rc.in_pos,
2033          s->temp.size);
2034      return 1;
2035    }
2036
2037    b->in_pos += s->rc.in_pos - s->temp.size;
2038    s->temp.size = 0;
2039  }
2040
2041  in_avail = b->in_size - b->in_pos;
2042  if (in_avail >= LZMA_IN_REQUIRED) {
2043    s->rc.in = b->in;
2044    s->rc.in_pos = b->in_pos;
2045
2046    if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED)
2047      s->rc.in_limit = b->in_pos + s->lzma2.compressed;
2048    else
2049      s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED;
2050
2051    if (!lzma_main(s))
2052      return 0;
2053
2054    in_avail = s->rc.in_pos - b->in_pos;
2055    if (in_avail > s->lzma2.compressed) return 0;
2056
2057    s->lzma2.compressed -= in_avail;
2058    b->in_pos = s->rc.in_pos;
2059  }
2060
2061  in_avail = b->in_size - b->in_pos;
2062  if (in_avail < LZMA_IN_REQUIRED) {
2063    if (in_avail > s->lzma2.compressed)
2064      in_avail = s->lzma2.compressed;
2065
2066    memcpy(s->temp.buf, b->in + b->in_pos, in_avail);
2067    s->temp.size = in_avail;
2068    b->in_pos += in_avail;
2069  }
2070
2071  return 1;
2072}
2073
2074/*
2075 * Take care of the LZMA2 control layer, and forward the job of actual LZMA
2076 * decoding or copying of uncompressed chunks to other functions.
2077 */
2078enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
2079               struct xz_buf *b)
2080{
2081  uint32_t tmp;
2082
2083  while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) {
2084    switch (s->lzma2.sequence) {
2085    case SEQ_CONTROL:
2086      /*
2087       * LZMA2 control byte
2088       *
2089       * Exact values:
2090       *   0x00   End marker
2091       *   0x01   Dictionary reset followed by
2092       *          an uncompressed chunk
2093       *   0x02   Uncompressed chunk (no dictionary reset)
2094       *
2095       * Highest three bits (s->control & 0xE0):
2096       *   0xE0   Dictionary reset, new properties and state
2097       *          reset, followed by LZMA compressed chunk
2098       *   0xC0   New properties and state reset, followed
2099       *          by LZMA compressed chunk (no dictionary
2100       *          reset)
2101       *   0xA0   State reset using old properties,
2102       *          followed by LZMA compressed chunk (no
2103       *          dictionary reset)
2104       *   0x80   LZMA chunk (no dictionary or state reset)
2105       *
2106       * For LZMA compressed chunks, the lowest five bits
2107       * (s->control & 1F) are the highest bits of the
2108       * uncompressed size (bits 16-20).
2109       *
2110       * A new LZMA2 stream must begin with a dictionary
2111       * reset. The first LZMA chunk must set new
2112       * properties and reset the LZMA state.
2113       *
2114       * Values that don't match anything described above
2115       * are invalid and we return XZ_DATA_ERROR.
2116       */
2117      tmp = b->in[b->in_pos++];
2118
2119      if (tmp == 0x00)
2120        return XZ_STREAM_END;
2121
2122      if (tmp >= 0xE0 || tmp == 0x01) {
2123        s->lzma2.need_props = 1;
2124        s->lzma2.need_dict_reset = 0;
2125        dict_reset(&s->dict);
2126      } else if (s->lzma2.need_dict_reset) {
2127        return XZ_DATA_ERROR;
2128      }
2129
2130      if (tmp >= 0x80) {
2131        s->lzma2.uncompressed = (tmp & 0x1F) << 16;
2132        s->lzma2.sequence = SEQ_UNCOMPRESSED_1;
2133
2134        if (tmp >= 0xC0) {
2135          /*
2136           * When there are new properties,
2137           * state reset is done at
2138           * SEQ_PROPERTIES.
2139           */
2140          s->lzma2.need_props = 0;
2141          s->lzma2.next_sequence
2142              = SEQ_PROPERTIES;
2143
2144        } else if (s->lzma2.need_props) {
2145          return XZ_DATA_ERROR;
2146
2147        } else {
2148          s->lzma2.next_sequence
2149              = SEQ_LZMA_PREPARE;
2150          if (tmp >= 0xA0)
2151            lzma_reset(s);
2152        }
2153      } else {
2154        if (tmp > 0x02)
2155          return XZ_DATA_ERROR;
2156
2157        s->lzma2.sequence = SEQ_COMPRESSED_0;
2158        s->lzma2.next_sequence = SEQ_COPY;
2159      }
2160
2161      break;
2162
2163    case SEQ_UNCOMPRESSED_1:
2164      s->lzma2.uncompressed
2165          += (uint32_t)b->in[b->in_pos++] << 8;
2166      s->lzma2.sequence = SEQ_UNCOMPRESSED_2;
2167      break;
2168
2169    case SEQ_UNCOMPRESSED_2:
2170      s->lzma2.uncompressed
2171          += (uint32_t)b->in[b->in_pos++] + 1;
2172      s->lzma2.sequence = SEQ_COMPRESSED_0;
2173      break;
2174
2175    case SEQ_COMPRESSED_0:
2176      s->lzma2.compressed
2177          = (uint32_t)b->in[b->in_pos++] << 8;
2178      s->lzma2.sequence = SEQ_COMPRESSED_1;
2179      break;
2180
2181    case SEQ_COMPRESSED_1:
2182      s->lzma2.compressed
2183          += (uint32_t)b->in[b->in_pos++] + 1;
2184      s->lzma2.sequence = s->lzma2.next_sequence;
2185      break;
2186
2187    case SEQ_PROPERTIES:
2188      if (!lzma_props(s, b->in[b->in_pos++]))
2189        return XZ_DATA_ERROR;
2190
2191      s->lzma2.sequence = SEQ_LZMA_PREPARE;
2192
2193    case SEQ_LZMA_PREPARE:
2194      if (s->lzma2.compressed < RC_INIT_BYTES)
2195        return XZ_DATA_ERROR;
2196
2197      if (!rc_read_init(&s->rc, b))
2198        return XZ_OK;
2199
2200      s->lzma2.compressed -= RC_INIT_BYTES;
2201      s->lzma2.sequence = SEQ_LZMA_RUN;
2202
2203    case SEQ_LZMA_RUN:
2204      /*
2205       * Set dictionary limit to indicate how much we want
2206       * to be encoded at maximum. Decode new data into the
2207       * dictionary. Flush the new data from dictionary to
2208       * b->out. Check if we finished decoding this chunk.
2209       * In case the dictionary got full but we didn't fill
2210       * the output buffer yet, we may run this loop
2211       * multiple times without changing s->lzma2.sequence.
2212       */
2213      dict_limit(&s->dict, min_t(size_t,
2214          b->out_size - b->out_pos,
2215          s->lzma2.uncompressed));
2216      if (!lzma2_lzma(s, b))
2217        return XZ_DATA_ERROR;
2218
2219      s->lzma2.uncompressed -= dict_flush(&s->dict, b);
2220
2221      if (s->lzma2.uncompressed == 0) {
2222        if (s->lzma2.compressed > 0 || s->lzma.len > 0
2223            || !rc_is_finished(&s->rc))
2224          return XZ_DATA_ERROR;
2225
2226        rc_reset(&s->rc);
2227        s->lzma2.sequence = SEQ_CONTROL;
2228
2229      } else if (b->out_pos == b->out_size
2230          || (b->in_pos == b->in_size
2231            && s->temp.size
2232            < s->lzma2.compressed)) {
2233        return XZ_OK;
2234      }
2235
2236      break;
2237
2238    case SEQ_COPY:
2239      dict_uncompressed(&s->dict, b, &s->lzma2.compressed);
2240      if (s->lzma2.compressed > 0)
2241        return XZ_OK;
2242
2243      s->lzma2.sequence = SEQ_CONTROL;
2244      break;
2245    }
2246  }
2247
2248  return XZ_OK;
2249}
2250
2251struct xz_dec_lzma2 *xz_dec_lzma2_create(uint32_t dict_max)
2252{
2253  struct xz_dec_lzma2 *s = malloc(sizeof(*s));
2254  if (s == NULL)
2255    return NULL;
2256
2257  s->dict.size_max = dict_max;
2258  s->dict.buf = NULL;
2259  s->dict.allocated = 0;
2260
2261  return s;
2262}
2263
2264enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
2265{
2266  /* This limits dictionary size to 3 GiB to keep parsing simpler. */
2267  if (props > 39)
2268    return XZ_OPTIONS_ERROR;
2269
2270  s->dict.size = 2 + (props & 1);
2271  s->dict.size <<= (props >> 1) + 11;
2272
2273  if (s->dict.size > s->dict.size_max)
2274    return XZ_MEMLIMIT_ERROR;
2275
2276  s->dict.end = s->dict.size;
2277
2278  if (s->dict.allocated < s->dict.size) {
2279    free(s->dict.buf);
2280    s->dict.buf = malloc(s->dict.size);
2281    if (s->dict.buf == NULL) {
2282      s->dict.allocated = 0;
2283      return XZ_MEM_ERROR;
2284    }
2285  }
2286
2287  s->lzma.len = 0;
2288
2289  s->lzma2.sequence = SEQ_CONTROL;
2290  s->lzma2.need_dict_reset = 1;
2291
2292  s->temp.size = 0;
2293
2294  return XZ_OK;
2295}
2296
2297/*
2298 * .xz Stream decoder
2299 */
2300
2301
2302// BEGIN xz_stream.h
2303/*
2304 * Definitions for handling the .xz file format
2305 */
2306
2307/*
2308 * See the .xz file format specification at
2309 * http://tukaani.org/xz/xz-file-format.txt
2310 * to understand the container format.
2311 */
2312
2313#define STREAM_HEADER_SIZE 12
2314
2315#define HEADER_MAGIC "\3757zXZ"
2316#define HEADER_MAGIC_SIZE 6
2317
2318#define FOOTER_MAGIC "YZ"
2319#define FOOTER_MAGIC_SIZE 2
2320
2321/*
2322 * Variable-length integer can hold a 63-bit unsigned integer or a special
2323 * value indicating that the value is unknown.
2324 *
2325 * Experimental: vli_type can be defined to uint32_t to save a few bytes
2326 * in code size (no effect on speed). Doing so limits the uncompressed and
2327 * compressed size of the file to less than 256 MiB and may also weaken
2328 * error detection slightly.
2329 */
2330typedef uint64_t vli_type;
2331
2332#define VLI_MAX ((vli_type)-1 / 2)
2333#define VLI_UNKNOWN ((vli_type)-1)
2334
2335/* Maximum encoded size of a VLI */
2336#define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7)
2337
2338/* Integrity Check types */
2339enum xz_check {
2340  XZ_CHECK_NONE = 0,
2341  XZ_CHECK_CRC32 = 1,
2342  XZ_CHECK_CRC64 = 4,
2343  XZ_CHECK_SHA256 = 10
2344};
2345
2346/* Maximum possible Check ID */
2347#define XZ_CHECK_MAX 15
2348// END xz_stream.h
2349
2350#define IS_CRC64(check_type) ((check_type) == XZ_CHECK_CRC64)
2351
2352/* Hash used to validate the Index field */
2353struct xz_dec_hash {
2354  vli_type unpadded;
2355  vli_type uncompressed;
2356  uint32_t crc32;
2357};
2358
2359struct xz_dec {
2360  /* Position in dec_main() */
2361  enum {
2362    SEQ_STREAM_HEADER,
2363    SEQ_BLOCK_START,
2364    SEQ_BLOCK_HEADER,
2365    SEQ_BLOCK_UNCOMPRESS,
2366    SEQ_BLOCK_PADDING,
2367    SEQ_BLOCK_CHECK,
2368    SEQ_INDEX,
2369    SEQ_INDEX_PADDING,
2370    SEQ_INDEX_CRC32,
2371    SEQ_STREAM_FOOTER
2372  } sequence;
2373
2374  /* Position in variable-length integers and Check fields */
2375  uint32_t pos;
2376
2377  /* Variable-length integer decoded by dec_vli() */
2378  vli_type vli;
2379
2380  /* Saved in_pos and out_pos */
2381  size_t in_start;
2382  size_t out_start;
2383
2384  /* CRC32 or CRC64 value in Block or CRC32 value in Index */
2385  uint64_t crc;
2386
2387  /* Type of the integrity check calculated from uncompressed data */
2388  enum xz_check check_type;
2389
2390  /*
2391   * True if the next call to xz_dec_run() is allowed to return
2392   * XZ_BUF_ERROR.
2393   */
2394  int allow_buf_error;
2395
2396  /* Information stored in Block Header */
2397  struct {
2398    /*
2399     * Value stored in the Compressed Size field, or
2400     * VLI_UNKNOWN if Compressed Size is not present.
2401     */
2402    vli_type compressed;
2403
2404    /*
2405     * Value stored in the Uncompressed Size field, or
2406     * VLI_UNKNOWN if Uncompressed Size is not present.
2407     */
2408    vli_type uncompressed;
2409
2410    /* Size of the Block Header field */
2411    uint32_t size;
2412  } block_header;
2413
2414  /* Information collected when decoding Blocks */
2415  struct {
2416    /* Observed compressed size of the current Block */
2417    vli_type compressed;
2418
2419    /* Observed uncompressed size of the current Block */
2420    vli_type uncompressed;
2421
2422    /* Number of Blocks decoded so far */
2423    vli_type count;
2424
2425    /*
2426     * Hash calculated from the Block sizes. This is used to
2427     * validate the Index field.
2428     */
2429    struct xz_dec_hash hash;
2430  } block;
2431
2432  /* Variables needed when verifying the Index field */
2433  struct {
2434    /* Position in dec_index() */
2435    enum {
2436      SEQ_INDEX_COUNT,
2437      SEQ_INDEX_UNPADDED,
2438      SEQ_INDEX_UNCOMPRESSED
2439    } sequence;
2440
2441    /* Size of the Index in bytes */
2442    vli_type size;
2443
2444    /* Number of Records (matches block.count in valid files) */
2445    vli_type count;
2446
2447    /*
2448     * Hash calculated from the Records (matches block.hash in
2449     * valid files).
2450     */
2451    struct xz_dec_hash hash;
2452  } index;
2453
2454  /*
2455   * Temporary buffer needed to hold Stream Header, Block Header,
2456   * and Stream Footer. The Block Header is the biggest (1 KiB)
2457   * so we reserve space according to that. buf[] has to be aligned
2458   * to a multiple of four bytes; the size_t variables before it
2459   * should guarantee this.
2460   */
2461  struct {
2462    size_t pos;
2463    size_t size;
2464    uint8_t buf[1024];
2465  } temp;
2466
2467  struct xz_dec_lzma2 *lzma2;
2468
2469#ifdef XZ_DEC_BCJ
2470  struct xz_dec_bcj *bcj;
2471  int bcj_active;
2472#endif
2473};
2474
2475/* Sizes of the Check field with different Check IDs */
2476static const uint8_t check_sizes[16] = {
2477  0,
2478  4, 4, 4,
2479  8, 8, 8,
2480  16, 16, 16,
2481  32, 32, 32,
2482  64, 64, 64
2483};
2484
2485/*
2486 * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller
2487 * must have set s->temp.pos to indicate how much data we are supposed
2488 * to copy into s->temp.buf. Return true once s->temp.pos has reached
2489 * s->temp.size.
2490 */
2491static int fill_temp(struct xz_dec *s, struct xz_buf *b)
2492{
2493  size_t copy_size = min_t(size_t,
2494      b->in_size - b->in_pos, s->temp.size - s->temp.pos);
2495
2496  memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size);
2497  b->in_pos += copy_size;
2498  s->temp.pos += copy_size;
2499
2500  if (s->temp.pos == s->temp.size) {
2501    s->temp.pos = 0;
2502    return 1;
2503  }
2504
2505  return 0;
2506}
2507
2508/* Decode a variable-length integer (little-endian base-128 encoding) */
2509static enum xz_ret dec_vli(struct xz_dec *s, const uint8_t *in,
2510         size_t *in_pos, size_t in_size)
2511{
2512  uint8_t byte;
2513
2514  if (s->pos == 0)
2515    s->vli = 0;
2516
2517  while (*in_pos < in_size) {
2518    byte = in[*in_pos];
2519    ++*in_pos;
2520
2521    s->vli |= (vli_type)(byte & 0x7F) << s->pos;
2522
2523    if ((byte & 0x80) == 0) {
2524      /* Don't allow non-minimal encodings. */
2525      if (byte == 0 && s->pos != 0)
2526        return XZ_DATA_ERROR;
2527
2528      s->pos = 0;
2529      return XZ_STREAM_END;
2530    }
2531
2532    s->pos += 7;
2533    if (s->pos == 7 * VLI_BYTES_MAX)
2534      return XZ_DATA_ERROR;
2535  }
2536
2537  return XZ_OK;
2538}
2539
2540/*
2541 * Decode the Compressed Data field from a Block. Update and validate
2542 * the observed compressed and uncompressed sizes of the Block so that
2543 * they don't exceed the values possibly stored in the Block Header
2544 * (validation assumes that no integer overflow occurs, since vli_type
2545 * is normally uint64_t). Update the CRC32 or CRC64 value if presence of
2546 * the CRC32 or CRC64 field was indicated in Stream Header.
2547 *
2548 * Once the decoding is finished, validate that the observed sizes match
2549 * the sizes possibly stored in the Block Header. Update the hash and
2550 * Block count, which are later used to validate the Index field.
2551 */
2552static enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b)
2553{
2554  enum xz_ret ret;
2555
2556  s->in_start = b->in_pos;
2557  s->out_start = b->out_pos;
2558
2559#ifdef XZ_DEC_BCJ
2560  if (s->bcj_active)
2561    ret = xz_dec_bcj_run(s->bcj, s->lzma2, b);
2562  else
2563#endif
2564    ret = xz_dec_lzma2_run(s->lzma2, b);
2565
2566  s->block.compressed += b->in_pos - s->in_start;
2567  s->block.uncompressed += b->out_pos - s->out_start;
2568
2569  /*
2570   * There is no need to separately check for VLI_UNKNOWN, since
2571   * the observed sizes are always smaller than VLI_UNKNOWN.
2572   */
2573  if (s->block.compressed > s->block_header.compressed
2574      || s->block.uncompressed
2575        > s->block_header.uncompressed)
2576    return XZ_DATA_ERROR;
2577
2578  if (s->check_type == XZ_CHECK_CRC32)
2579    s->crc = xz_crc32(b->out + s->out_start,
2580        b->out_pos - s->out_start, s->crc);
2581  else if (s->check_type == XZ_CHECK_CRC64)
2582    s->crc = ~(s->crc);
2583    size_t size = b->out_pos - s->out_start;
2584    uint8_t *buf = b->out + s->out_start;
2585    while (size) {
2586      s->crc = xz_crc64_table[*buf++ ^ (s->crc & 0xFF)] ^ (s->crc >> 8);
2587      --size;
2588    }
2589    s->crc=~(s->crc);
2590
2591  if (ret == XZ_STREAM_END) {
2592    if (s->block_header.compressed != VLI_UNKNOWN
2593        && s->block_header.compressed
2594          != s->block.compressed)
2595      return XZ_DATA_ERROR;
2596
2597    if (s->block_header.uncompressed != VLI_UNKNOWN
2598        && s->block_header.uncompressed
2599          != s->block.uncompressed)
2600      return XZ_DATA_ERROR;
2601
2602    s->block.hash.unpadded += s->block_header.size
2603        + s->block.compressed;
2604
2605    s->block.hash.unpadded += check_sizes[s->check_type];
2606
2607    s->block.hash.uncompressed += s->block.uncompressed;
2608    s->block.hash.crc32 = xz_crc32(
2609        (const uint8_t *)&s->block.hash,
2610        sizeof(s->block.hash), s->block.hash.crc32);
2611
2612    ++s->block.count;
2613  }
2614
2615  return ret;
2616}
2617
2618/* Update the Index size and the CRC32 value. */
2619static void index_update(struct xz_dec *s, const struct xz_buf *b)
2620{
2621  size_t in_used = b->in_pos - s->in_start;
2622  s->index.size += in_used;
2623  s->crc = xz_crc32(b->in + s->in_start, in_used, s->crc);
2624}
2625
2626/*
2627 * Decode the Number of Records, Unpadded Size, and Uncompressed Size
2628 * fields from the Index field. That is, Index Padding and CRC32 are not
2629 * decoded by this function.
2630 *
2631 * This can return XZ_OK (more input needed), XZ_STREAM_END (everything
2632 * successfully decoded), or XZ_DATA_ERROR (input is corrupt).
2633 */
2634static enum xz_ret dec_index(struct xz_dec *s, struct xz_buf *b)
2635{
2636  enum xz_ret ret;
2637
2638  do {
2639    ret = dec_vli(s, b->in, &b->in_pos, b->in_size);
2640    if (ret != XZ_STREAM_END) {
2641      index_update(s, b);
2642      return ret;
2643    }
2644
2645    switch (s->index.sequence) {
2646    case SEQ_INDEX_COUNT:
2647      s->index.count = s->vli;
2648
2649      /*
2650       * Validate that the Number of Records field
2651       * indicates the same number of Records as
2652       * there were Blocks in the Stream.
2653       */
2654      if (s->index.count != s->block.count)
2655        return XZ_DATA_ERROR;
2656
2657      s->index.sequence = SEQ_INDEX_UNPADDED;
2658      break;
2659
2660    case SEQ_INDEX_UNPADDED:
2661      s->index.hash.unpadded += s->vli;
2662      s->index.sequence = SEQ_INDEX_UNCOMPRESSED;
2663      break;
2664
2665    case SEQ_INDEX_UNCOMPRESSED:
2666      s->index.hash.uncompressed += s->vli;
2667      s->index.hash.crc32 = xz_crc32(
2668          (const uint8_t *)&s->index.hash,
2669          sizeof(s->index.hash),
2670          s->index.hash.crc32);
2671      --s->index.count;
2672      s->index.sequence = SEQ_INDEX_UNPADDED;
2673      break;
2674    }
2675  } while (s->index.count > 0);
2676
2677  return XZ_STREAM_END;
2678}
2679
2680/*
2681 * Validate that the next four or eight input bytes match the value
2682 * of s->crc. s->pos must be zero when starting to validate the first byte.
2683 * The "bits" argument allows using the same code for both CRC32 and CRC64.
2684 */
2685static enum xz_ret crc_validate(struct xz_dec *s, struct xz_buf *b,
2686        uint32_t bits)
2687{
2688  do {
2689    if (b->in_pos == b->in_size)
2690      return XZ_OK;
2691
2692    if (((s->crc >> s->pos) & 0xFF) != b->in[b->in_pos++])
2693      return XZ_DATA_ERROR;
2694
2695    s->pos += 8;
2696
2697  } while (s->pos < bits);
2698
2699  s->crc = 0;
2700  s->pos = 0;
2701
2702  return XZ_STREAM_END;
2703}
2704
2705/*
2706 * Skip over the Check field when the Check ID is not supported.
2707 * Returns true once the whole Check field has been skipped over.
2708 */
2709static int check_skip(struct xz_dec *s, struct xz_buf *b)
2710{
2711  while (s->pos < check_sizes[s->check_type]) {
2712    if (b->in_pos == b->in_size) return 0;
2713
2714    ++b->in_pos;
2715    ++s->pos;
2716  }
2717
2718  s->pos = 0;
2719
2720  return 1;
2721}
2722
2723/* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */
2724static enum xz_ret dec_stream_header(struct xz_dec *s)
2725{
2726  if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE))
2727    return XZ_FORMAT_ERROR;
2728
2729  if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0)
2730      != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2))
2731    return XZ_DATA_ERROR;
2732
2733  if (s->temp.buf[HEADER_MAGIC_SIZE] != 0)
2734    return XZ_OPTIONS_ERROR;
2735
2736  /*
2737   * Of integrity checks, we support none (Check ID = 0),
2738   * CRC32 (Check ID = 1), and optionally CRC64 (Check ID = 4).
2739   * However, if XZ_DEC_ANY_CHECK is defined, we will accept other
2740   * check types too, but then the check won't be verified and
2741   * a warning (XZ_UNSUPPORTED_CHECK) will be given.
2742   */
2743  s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
2744
2745  if (s->check_type > XZ_CHECK_MAX)
2746    return XZ_OPTIONS_ERROR;
2747
2748  if (s->check_type > XZ_CHECK_CRC32 && !IS_CRC64(s->check_type))
2749    return XZ_UNSUPPORTED_CHECK;
2750
2751  return XZ_OK;
2752}
2753
2754/* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */
2755static enum xz_ret dec_stream_footer(struct xz_dec *s)
2756{
2757  if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE))
2758    return XZ_DATA_ERROR;
2759
2760  if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf))
2761    return XZ_DATA_ERROR;
2762
2763  /*
2764   * Validate Backward Size. Note that we never added the size of the
2765   * Index CRC32 field to s->index.size, thus we use s->index.size / 4
2766   * instead of s->index.size / 4 - 1.
2767   */
2768  if ((s->index.size >> 2) != get_le32(s->temp.buf + 4))
2769    return XZ_DATA_ERROR;
2770
2771  if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type)
2772    return XZ_DATA_ERROR;
2773
2774  /*
2775   * Use XZ_STREAM_END instead of XZ_OK to be more convenient
2776   * for the caller.
2777   */
2778  return XZ_STREAM_END;
2779}
2780
2781/* Decode the Block Header and initialize the filter chain. */
2782static enum xz_ret dec_block_header(struct xz_dec *s)
2783{
2784  enum xz_ret ret;
2785
2786  /*
2787   * Validate the CRC32. We know that the temp buffer is at least
2788   * eight bytes so this is safe.
2789   */
2790  s->temp.size -= 4;
2791  if (xz_crc32(s->temp.buf, s->temp.size, 0)
2792      != get_le32(s->temp.buf + s->temp.size))
2793    return XZ_DATA_ERROR;
2794
2795  s->temp.pos = 2;
2796
2797  /*
2798   * Catch unsupported Block Flags. We support only one or two filters
2799   * in the chain, so we catch that with the same test.
2800   */
2801#ifdef XZ_DEC_BCJ
2802  if (s->temp.buf[1] & 0x3E)
2803#else
2804  if (s->temp.buf[1] & 0x3F)
2805#endif
2806    return XZ_OPTIONS_ERROR;
2807
2808  /* Compressed Size */
2809  if (s->temp.buf[1] & 0x40) {
2810    if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
2811          != XZ_STREAM_END)
2812      return XZ_DATA_ERROR;
2813
2814    s->block_header.compressed = s->vli;
2815  } else {
2816    s->block_header.compressed = VLI_UNKNOWN;
2817  }
2818
2819  /* Uncompressed Size */
2820  if (s->temp.buf[1] & 0x80) {
2821    if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
2822        != XZ_STREAM_END)
2823      return XZ_DATA_ERROR;
2824
2825    s->block_header.uncompressed = s->vli;
2826  } else {
2827    s->block_header.uncompressed = VLI_UNKNOWN;
2828  }
2829
2830#ifdef XZ_DEC_BCJ
2831  /* If there are two filters, the first one must be a BCJ filter. */
2832  s->bcj_active = s->temp.buf[1] & 0x01;
2833  if (s->bcj_active) {
2834    if (s->temp.size - s->temp.pos < 2)
2835      return XZ_OPTIONS_ERROR;
2836
2837    ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]);
2838    if (ret != XZ_OK)
2839      return ret;
2840
2841    /*
2842     * We don't support custom start offset,
2843     * so Size of Properties must be zero.
2844     */
2845    if (s->temp.buf[s->temp.pos++] != 0x00)
2846      return XZ_OPTIONS_ERROR;
2847  }
2848#endif
2849
2850  /* Valid Filter Flags always take at least two bytes. */
2851  if (s->temp.size - s->temp.pos < 2)
2852    return XZ_DATA_ERROR;
2853
2854  /* Filter ID = LZMA2 */
2855  if (s->temp.buf[s->temp.pos++] != 0x21)
2856    return XZ_OPTIONS_ERROR;
2857
2858  /* Size of Properties = 1-byte Filter Properties */
2859  if (s->temp.buf[s->temp.pos++] != 0x01)
2860    return XZ_OPTIONS_ERROR;
2861
2862  /* Filter Properties contains LZMA2 dictionary size. */
2863  if (s->temp.size - s->temp.pos < 1)
2864    return XZ_DATA_ERROR;
2865
2866  ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]);
2867  if (ret != XZ_OK)
2868    return ret;
2869
2870  /* The rest must be Header Padding. */
2871  while (s->temp.pos < s->temp.size)
2872    if (s->temp.buf[s->temp.pos++] != 0x00)
2873      return XZ_OPTIONS_ERROR;
2874
2875  s->temp.pos = 0;
2876  s->block.compressed = 0;
2877  s->block.uncompressed = 0;
2878
2879  return XZ_OK;
2880}
2881
2882static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b)
2883{
2884  enum xz_ret ret;
2885
2886  /*
2887   * Store the start position for the case when we are in the middle
2888   * of the Index field.
2889   */
2890  s->in_start = b->in_pos;
2891
2892  for (;;) {
2893    switch (s->sequence) {
2894    case SEQ_STREAM_HEADER:
2895      /*
2896       * Stream Header is copied to s->temp, and then
2897       * decoded from there. This way if the caller
2898       * gives us only little input at a time, we can
2899       * still keep the Stream Header decoding code
2900       * simple. Similar approach is used in many places
2901       * in this file.
2902       */
2903      if (!fill_temp(s, b))
2904        return XZ_OK;
2905
2906      /*
2907       * If dec_stream_header() returns
2908       * XZ_UNSUPPORTED_CHECK, it is still possible
2909       * to continue decoding if working in multi-call
2910       * mode. Thus, update s->sequence before calling
2911       * dec_stream_header().
2912       */
2913      s->sequence = SEQ_BLOCK_START;
2914
2915      ret = dec_stream_header(s);
2916      if (ret != XZ_OK)
2917        return ret;
2918
2919    case SEQ_BLOCK_START:
2920      /* We need one byte of input to continue. */
2921      if (b->in_pos == b->in_size)
2922        return XZ_OK;
2923
2924      /* See if this is the beginning of the Index field. */
2925      if (b->in[b->in_pos] == 0) {
2926        s->in_start = b->in_pos++;
2927        s->sequence = SEQ_INDEX;
2928        break;
2929      }
2930
2931      /*
2932       * Calculate the size of the Block Header and
2933       * prepare to decode it.
2934       */
2935      s->block_header.size
2936        = ((uint32_t)b->in[b->in_pos] + 1) * 4;
2937
2938      s->temp.size = s->block_header.size;
2939      s->temp.pos = 0;
2940      s->sequence = SEQ_BLOCK_HEADER;
2941
2942    case SEQ_BLOCK_HEADER:
2943      if (!fill_temp(s, b))
2944        return XZ_OK;
2945
2946      ret = dec_block_header(s);
2947      if (ret != XZ_OK)
2948        return ret;
2949
2950      s->sequence = SEQ_BLOCK_UNCOMPRESS;
2951
2952    case SEQ_BLOCK_UNCOMPRESS:
2953      ret = dec_block(s, b);
2954      if (ret != XZ_STREAM_END)
2955        return ret;
2956
2957      s->sequence = SEQ_BLOCK_PADDING;
2958
2959    case SEQ_BLOCK_PADDING:
2960      /*
2961       * Size of Compressed Data + Block Padding
2962       * must be a multiple of four. We don't need
2963       * s->block.compressed for anything else
2964       * anymore, so we use it here to test the size
2965       * of the Block Padding field.
2966       */
2967      while (s->block.compressed & 3) {
2968        if (b->in_pos == b->in_size)
2969          return XZ_OK;
2970
2971        if (b->in[b->in_pos++] != 0)
2972          return XZ_DATA_ERROR;
2973
2974        ++s->block.compressed;
2975      }
2976
2977      s->sequence = SEQ_BLOCK_CHECK;
2978
2979    case SEQ_BLOCK_CHECK:
2980      if (s->check_type == XZ_CHECK_CRC32) {
2981        ret = crc_validate(s, b, 32);
2982        if (ret != XZ_STREAM_END)
2983          return ret;
2984      }
2985      else if (IS_CRC64(s->check_type)) {
2986        ret = crc_validate(s, b, 64);
2987        if (ret != XZ_STREAM_END)
2988          return ret;
2989      }
2990      else if (!check_skip(s, b)) {
2991        return XZ_OK;
2992      }
2993
2994      s->sequence = SEQ_BLOCK_START;
2995      break;
2996
2997    case SEQ_INDEX:
2998      ret = dec_index(s, b);
2999      if (ret != XZ_STREAM_END)
3000        return ret;
3001
3002      s->sequence = SEQ_INDEX_PADDING;
3003
3004    case SEQ_INDEX_PADDING:
3005      while ((s->index.size + (b->in_pos - s->in_start))
3006          & 3) {
3007        if (b->in_pos == b->in_size) {
3008          index_update(s, b);
3009          return XZ_OK;
3010        }
3011
3012        if (b->in[b->in_pos++] != 0)
3013          return XZ_DATA_ERROR;
3014      }
3015
3016      /* Finish the CRC32 value and Index size. */
3017      index_update(s, b);
3018
3019      /* Compare the hashes to validate the Index field. */
3020      if (!memeq(&s->block.hash, &s->index.hash,
3021          sizeof(s->block.hash)))
3022        return XZ_DATA_ERROR;
3023
3024      s->sequence = SEQ_INDEX_CRC32;
3025
3026    case SEQ_INDEX_CRC32:
3027      ret = crc_validate(s, b, 32);
3028      if (ret != XZ_STREAM_END)
3029        return ret;
3030
3031      s->temp.size = STREAM_HEADER_SIZE;
3032      s->sequence = SEQ_STREAM_FOOTER;
3033
3034    case SEQ_STREAM_FOOTER:
3035      if (!fill_temp(s, b))
3036        return XZ_OK;
3037
3038      return dec_stream_footer(s);
3039    }
3040  }
3041
3042  /* Never reached */
3043}
3044
3045/*
3046 * xz_dec_run() is a wrapper for dec_main() to handle some special cases in
3047 * multi-call and single-call decoding.
3048 *
3049 * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we
3050 * are not going to make any progress anymore. This is to prevent the caller
3051 * from calling us infinitely when the input file is truncated or otherwise
3052 * corrupt. Since zlib-style API allows that the caller fills the input buffer
3053 * only when the decoder doesn't produce any new output, we have to be careful
3054 * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only
3055 * after the second consecutive call to xz_dec_run() that makes no progress.
3056 *
3057 * In single-call mode, if we couldn't decode everything and no error
3058 * occurred, either the input is truncated or the output buffer is too small.
3059 * Since we know that the last input byte never produces any output, we know
3060 * that if all the input was consumed and decoding wasn't finished, the file
3061 * must be corrupt. Otherwise the output buffer has to be too small or the
3062 * file is corrupt in a way that decoding it produces too big output.
3063 *
3064 * If single-call decoding fails, we reset b->in_pos and b->out_pos back to
3065 * their original values. This is because with some filter chains there won't
3066 * be any valid uncompressed data in the output buffer unless the decoding
3067 * actually succeeds (that's the price to pay of using the output buffer as
3068 * the workspace).
3069 */
3070enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b)
3071{
3072  size_t in_start;
3073  size_t out_start;
3074  enum xz_ret ret;
3075
3076  in_start = b->in_pos;
3077  out_start = b->out_pos;
3078  ret = dec_main(s, b);
3079
3080  if (ret == XZ_OK && in_start == b->in_pos && out_start == b->out_pos) {
3081    if (s->allow_buf_error)
3082      ret = XZ_BUF_ERROR;
3083
3084    s->allow_buf_error = 1;
3085  } else {
3086    s->allow_buf_error = 0;
3087  }
3088
3089  return ret;
3090}
3091
3092struct xz_dec *xz_dec_init(uint32_t dict_max)
3093{
3094  struct xz_dec *s = malloc(sizeof(*s));
3095  if (!s)
3096    return NULL;
3097
3098#ifdef XZ_DEC_BCJ
3099  s->bcj = malloc(sizeof(*s->bcj));
3100  if (!s->bcj)
3101    goto error_bcj;
3102#endif
3103
3104  s->lzma2 = xz_dec_lzma2_create(dict_max);
3105  if (s->lzma2 == NULL)
3106    goto error_lzma2;
3107
3108  xz_dec_reset(s);
3109  return s;
3110
3111error_lzma2:
3112#ifdef XZ_DEC_BCJ
3113  free(s->bcj);
3114error_bcj:
3115#endif
3116  free(s);
3117  return NULL;
3118}
3119
3120void xz_dec_reset(struct xz_dec *s)
3121{
3122  s->sequence = SEQ_STREAM_HEADER;
3123  s->allow_buf_error = 0;
3124  s->pos = 0;
3125  s->crc = 0;
3126  memset(&s->block, 0, sizeof(s->block));
3127  memset(&s->index, 0, sizeof(s->index));
3128  s->temp.pos = 0;
3129  s->temp.size = STREAM_HEADER_SIZE;
3130}
3131
3132void xz_dec_end(struct xz_dec *s)
3133{
3134  if (s != NULL) {
3135    free((s->lzma2)->dict.buf);
3136    free(s->lzma2);
3137
3138#ifdef XZ_DEC_BCJ
3139    free(s->bcj);
3140#endif
3141    free(s);
3142  }
3143}
3144