111d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley/* bzcat.c - bzip2 decompression
22896480c4918f2accccb8301bec457a7bff7377eRob Landley *
3b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * Copyright 2003, 2007 Rob Landley <rob@landley.net>
4b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley *
5b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * Based on a close reading (but not the actual code) of the original bzip2
6b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * decompression code by Julian R Seward (jseward@acm.org), which also
7b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * acknowledges contributions by Mike Burrows, David Wheeler, Peter Fenwick,
8b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * Alistair Moffat, Radford Neal, Ian H. Witten, Robert Sedgewick, and
9b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * Jon L. Bentley.
10b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley *
11b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * No standard.
12b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
132896480c4918f2accccb8301bec457a7bff7377eRob Landley
1455928b1e0a08d84a5cbc50020f0a8c1024f5b6ceRob LandleyUSE_BZCAT(NEWTOY(bzcat, NULL, TOYFLAG_USR|TOYFLAG_BIN))
1511d2ff5ffac9382be0d6971b8ac84df21eca85dfRob LandleyUSE_BUNZIP2(NEWTOY(bunzip2, "cftkv", TOYFLAG_USR|TOYFLAG_BIN))
1611d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley
1711d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landleyconfig BUNZIP2
1811d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  bool "bunzip2"
1911d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  default y
2011d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  help
2111d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley    usage: bunzip2 [-cftkv] [FILE...]
2211d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley
2311d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley    Decompress listed files (file.bz becomes file) deleting archive file(s).
2411d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley    Read from stdin if no files listed.
2511d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley
2611d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley    -c	force output to stdout
27eb7e847adcecf11cd8fd99077ba3a582abe60146Elliott Hughes    -f	force decompression (if FILE doesn't end in .bz, replace original)
2811d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley    -k	keep input files (-c and -t imply this)
29cf2e8d08b3e06a7bdf95c30282a976bbf072a168Rob Landley    -t	test integrity
3011d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley    -v	verbose
3155928b1e0a08d84a5cbc50020f0a8c1024f5b6ceRob Landley
322896480c4918f2accccb8301bec457a7bff7377eRob Landleyconfig BZCAT
337aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley  bool "bzcat"
347aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley  default y
357aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley  help
3611d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley    usage: bzcat [FILE...]
372896480c4918f2accccb8301bec457a7bff7377eRob Landley
387aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley    Decompress listed files to stdout. Use stdin if no files listed.
392896480c4918f2accccb8301bec457a7bff7377eRob Landley*/
40636b5cf46650d3aa3bbc9b29feb0e07589085e3fRob Landley
4111d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley#define FOR_bunzip2
42636b5cf46650d3aa3bbc9b29feb0e07589085e3fRob Landley#include "toys.h"
43636b5cf46650d3aa3bbc9b29feb0e07589085e3fRob Landley
44b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define THREADS 1
45b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
46b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Constants for huffman coding
47b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define MAX_GROUPS               6
48b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define GROUP_SIZE               50     /* 64 would have been more efficient */
49b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define MAX_HUFCODE_BITS         20     /* Longest huffman code allowed */
50b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define MAX_SYMBOLS              258    /* 256 literals + RUNA + RUNB */
51b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define SYMBOL_RUNA              0
52b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define SYMBOL_RUNB              1
53b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
54b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Other housekeeping constants
55b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define IOBUF_SIZE               4096
56b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
57b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Status return values
58b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define RETVAL_LAST_BLOCK        (-100)
59b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define RETVAL_NOT_BZIP_DATA     (-1)
60b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define RETVAL_DATA_ERROR        (-2)
61b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define RETVAL_OBSOLETE_INPUT    (-3)
62b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
63b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// This is what we know about each huffman coding group
64b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landleystruct group_data {
65b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  int limit[MAX_HUFCODE_BITS+1], base[MAX_HUFCODE_BITS], permute[MAX_SYMBOLS];
66b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  char minLen, maxLen;
67b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley};
68b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
69b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Data for burrows wheeler transform
70b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
71b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landleystruct bwdata {
72b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  unsigned int origPtr;
73b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  int byteCount[256];
74b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // State saved when interrupting output
75b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  int writePos, writeRun, writeCount, writeCurrent;
76b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  unsigned int dataCRC, headerCRC;
77b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  unsigned int *dbuf;
78b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley};
79b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
80b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Structure holding all the housekeeping data, including IO buffers and
81b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// memory that persists between calls to bunzip
82b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landleystruct bunzip_data {
83b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // Input stream, input buffer, input bit buffer
84b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  int in_fd, inbufCount, inbufPos;
85b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  char *inbuf;
86b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  unsigned int inbufBitCount, inbufBits;
87b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
88b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // Output buffer
89b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  char outbuf[IOBUF_SIZE];
90b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  int outbufPos;
91b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
92b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  unsigned int totalCRC;
93b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
94b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // First pass decompression data (Huffman and MTF decoding)
95b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  char selectors[32768];                  // nSelectors=15 bits
96b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  struct group_data groups[MAX_GROUPS];   // huffman coding tables
97b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  int symTotal, groupCount, nSelectors;
98b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  unsigned char symToByte[256], mtfSymbol[256];
99b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
100b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // The CRC values stored in the block header and calculated from the data
101b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  unsigned int crc32Table[256];
102b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
103b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // Second pass decompression data (burrows-wheeler transform)
104b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  unsigned int dbufSize;
105b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  struct bwdata bwdata[THREADS];
106b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley};
107b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
108b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Return the next nnn bits of input.  All reads from the compressed input
109b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// are done through this function.  All reads are big endian.
110b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landleystatic unsigned int get_bits(struct bunzip_data *bd, char bits_wanted)
111b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley{
112b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  unsigned int bits = 0;
113b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
114b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // If we need to get more data from the byte buffer, do so.  (Loop getting
115b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // one byte at a time to enforce endianness and avoid unaligned access.)
116b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  while (bd->inbufBitCount < bits_wanted) {
117b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
118b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // If we need to read more data from file into byte buffer, do so
119b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    if (bd->inbufPos == bd->inbufCount) {
120b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      if (0 >= (bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE)))
121b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        error_exit("input EOF");
122b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      bd->inbufPos = 0;
123b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    }
124b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
125b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // Avoid 32-bit overflow (dump bit buffer to top of output)
126b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    if (bd->inbufBitCount>=24) {
127b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      bits = bd->inbufBits&((1<<bd->inbufBitCount)-1);
128b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      bits_wanted -= bd->inbufBitCount;
129b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      bits <<= bits_wanted;
130b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      bd->inbufBitCount = 0;
131b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    }
132b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
133b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // Grab next 8 bits of input from buffer.
134b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    bd->inbufBits = (bd->inbufBits<<8) | bd->inbuf[bd->inbufPos++];
135b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    bd->inbufBitCount += 8;
136b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  }
137b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
138b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // Calculate result
139b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  bd->inbufBitCount -= bits_wanted;
140b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  bits |= (bd->inbufBits>>bd->inbufBitCount) & ((1<<bits_wanted)-1);
141b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
142b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  return bits;
143b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley}
144b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
145b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley/* Read block header at start of a new compressed data block.  Consists of:
146b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley *
147b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * 48 bits : Block signature, either pi (data block) or e (EOF block).
148b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * 32 bits : bw->headerCRC
149b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * 1  bit  : obsolete feature flag.
150b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * 24 bits : origPtr (Burrows-wheeler unwind index, only 20 bits ever used)
151b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * 16 bits : Mapping table index.
152b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley *[16 bits]: symToByte[symTotal] (Mapping table.  For each bit set in mapping
153b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley *           table index above, read another 16 bits of mapping table data.
154b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley *           If correspondig bit is unset, all bits in that mapping table
155b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley *           section are 0.)
156b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley *  3 bits : groupCount (how many huffman tables used to encode, anywhere
157b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley *           from 2 to MAX_GROUPS)
158b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * variable: hufGroup[groupCount] (MTF encoded huffman table data.)
159b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley */
160b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
161b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landleystatic int read_block_header(struct bunzip_data *bd, struct bwdata *bw)
162b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley{
163b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  struct group_data *hufGroup;
164b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  int hh, ii, jj, kk, symCount, *base, *limit;
165b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  unsigned char uc;
166b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
167b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // Read in header signature and CRC (which is stored big endian)
168b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  ii = get_bits(bd, 24);
169b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  jj = get_bits(bd, 24);
170b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  bw->headerCRC = get_bits(bd,32);
171b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
172b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // Is this the EOF block with CRC for whole file?  (Constant is "e")
173b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  if (ii==0x177245 && jj==0x385090) return RETVAL_LAST_BLOCK;
174b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
175b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // Is this a valid data block?  (Constant is "pi".)
176b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  if (ii!=0x314159 || jj!=0x265359) return RETVAL_NOT_BZIP_DATA;
177b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
178b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // We can add support for blockRandomised if anybody complains.
179b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  if (get_bits(bd,1)) return RETVAL_OBSOLETE_INPUT;
180b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  if ((bw->origPtr = get_bits(bd,24)) > bd->dbufSize) return RETVAL_DATA_ERROR;
181b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
182b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // mapping table: if some byte values are never used (encoding things
183b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // like ascii text), the compression code removes the gaps to have fewer
184b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // symbols to deal with, and writes a sparse bitfield indicating which
185b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // values were present.  We make a translation table to convert the symbols
186b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // back to the corresponding bytes.
187b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  hh = get_bits(bd, 16);
188b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  bd->symTotal = 0;
189b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  for (ii=0; ii<16; ii++) {
190b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    if (hh & (1 << (15 - ii))) {
191b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      kk = get_bits(bd, 16);
192b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      for (jj=0; jj<16; jj++)
193b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        if (kk & (1 << (15 - jj)))
194b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley          bd->symToByte[bd->symTotal++] = (16 * ii) + jj;
195b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    }
196b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  }
197b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
198b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // How many different huffman coding groups does this block use?
199b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  bd->groupCount = get_bits(bd,3);
200b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  if (bd->groupCount<2 || bd->groupCount>MAX_GROUPS) return RETVAL_DATA_ERROR;
201b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
202b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // nSelectors: Every GROUP_SIZE many symbols we switch huffman coding
203b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // tables.  Each group has a selector, which is an index into the huffman
204b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // coding table arrays.
205b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  //
206b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // Read in the group selector array, which is stored as MTF encoded
207b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // bit runs.  (MTF = Move To Front.  Every time a symbol occurs it's moved
208b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // to the front of the table, so it has a shorter encoding next time.)
209b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  if (!(bd->nSelectors = get_bits(bd, 15))) return RETVAL_DATA_ERROR;
210b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  for (ii=0; ii<bd->groupCount; ii++) bd->mtfSymbol[ii] = ii;
211b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  for (ii=0; ii<bd->nSelectors; ii++) {
212b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
213b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // Get next value
214b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    for(jj=0;get_bits(bd,1);jj++)
215b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      if (jj>=bd->groupCount) return RETVAL_DATA_ERROR;
216b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
217b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // Decode MTF to get the next selector, and move it to the front.
218b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    uc = bd->mtfSymbol[jj];
219b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    memmove(bd->mtfSymbol+1, bd->mtfSymbol, jj);
220b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    bd->mtfSymbol[0] = bd->selectors[ii] = uc;
221b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  }
222b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
223b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // Read the huffman coding tables for each group, which code for symTotal
224b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // literal symbols, plus two run symbols (RUNA, RUNB)
225b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  symCount = bd->symTotal+2;
226b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  for (jj=0; jj<bd->groupCount; jj++) {
227b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    unsigned char length[MAX_SYMBOLS];
228b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    unsigned temp[MAX_HUFCODE_BITS+1];
229b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    int minLen, maxLen, pp;
230b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
231b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // Read lengths
232b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    hh = get_bits(bd, 5);
233b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    for (ii = 0; ii < symCount; ii++) {
234b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      for(;;) {
235b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        // !hh || hh > MAX_HUFCODE_BITS in one test.
236b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        if (MAX_HUFCODE_BITS-1 < (unsigned)hh-1) return RETVAL_DATA_ERROR;
237b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        // Grab 2 bits instead of 1 (slightly smaller/faster).  Stop if
238b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        // first bit is 0, otherwise second bit says whether to
239b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        // increment or decrement.
240b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        kk = get_bits(bd, 2);
241b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        if (kk & 2) hh += 1 - ((kk&1)<<1);
242b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        else {
243b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley          bd->inbufBitCount++;
244b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley          break;
245b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        }
246b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      }
247b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      length[ii] = hh;
248b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    }
249b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
250b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // Find largest and smallest lengths in this group
251b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    minLen = maxLen = length[0];
252b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    for (ii = 1; ii < symCount; ii++) {
253b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      if(length[ii] > maxLen) maxLen = length[ii];
254b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      else if(length[ii] < minLen) minLen = length[ii];
255b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    }
256b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
257b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    /* Calculate permute[], base[], and limit[] tables from length[].
258b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley     *
259b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley     * permute[] is the lookup table for converting huffman coded symbols
260b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley     * into decoded symbols.  It contains symbol values sorted by length.
261b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley     *
262b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley     * base[] is the amount to subtract from the value of a huffman symbol
263b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley     * of a given length when using permute[].
264b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley     *
265b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley     * limit[] indicates the largest numerical value a symbol with a given
266b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley     * number of bits can have.  It lets us know when to stop reading.
267b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley     *
268b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley     * To use these, keep reading bits until value <= limit[bitcount] or
269b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley     * you've read over 20 bits (error).  Then the decoded symbol
270b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley     * equals permute[hufcode_value - base[hufcode_bitcount]].
271b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley     */
272b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    hufGroup = bd->groups+jj;
273b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    hufGroup->minLen = minLen;
274b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    hufGroup->maxLen = maxLen;
275b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
276b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // Note that minLen can't be smaller than 1, so we adjust the base
277b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // and limit array pointers so we're not always wasting the first
278b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // entry.  We do this again when using them (during symbol decoding).
279b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    base = hufGroup->base-1;
280b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    limit = hufGroup->limit-1;
281b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
282b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // zero temp[] and limit[], and calculate permute[]
283b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    pp = 0;
284b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    for (ii = minLen; ii <= maxLen; ii++) {
285b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      temp[ii] = limit[ii] = 0;
286b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      for (hh = 0; hh < symCount; hh++)
287b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        if (length[hh] == ii) hufGroup->permute[pp++] = hh;
288b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    }
289b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
290b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // Count symbols coded for at each bit length
291b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    for (ii = 0; ii < symCount; ii++) temp[length[ii]]++;
292b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
293b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    /* Calculate limit[] (the largest symbol-coding value at each bit
294b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley     * length, which is (previous limit<<1)+symbols at this level), and
295b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley     * base[] (number of symbols to ignore at each bit length, which is
296b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley     * limit minus the cumulative count of symbols coded for already). */
297b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    pp = hh = 0;
298b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    for (ii = minLen; ii < maxLen; ii++) {
299b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      pp += temp[ii];
300b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      limit[ii] = pp-1;
301b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      pp <<= 1;
302b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      base[ii+1] = pp-(hh+=temp[ii]);
303b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    }
304b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    limit[maxLen] = pp+temp[maxLen]-1;
305b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    limit[maxLen+1] = INT_MAX;
306b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    base[minLen] = 0;
307b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  }
308b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
309b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  return 0;
310b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley}
311b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
312b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley/* First pass, read block's symbols into dbuf[dbufCount].
313b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley *
314b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * This undoes three types of compression: huffman coding, run length encoding,
315b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * and move to front encoding.  We have to undo all those to know when we've
316b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * read enough input.
317b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley */
318b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
319b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landleystatic int read_huffman_data(struct bunzip_data *bd, struct bwdata *bw)
320b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley{
321b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  struct group_data *hufGroup;
3225ad93f32da3e2ac70b1fa929889d3034c79f7ed6Rob Landley  int ii, jj, kk, runPos, dbufCount, symCount, selector, nextSym,
323b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    *byteCount, *base, *limit;
3245ad93f32da3e2ac70b1fa929889d3034c79f7ed6Rob Landley  unsigned hh, *dbuf = bw->dbuf;
325b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  unsigned char uc;
326b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
327b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // We've finished reading and digesting the block header.  Now read this
328b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // block's huffman coded symbols from the file and undo the huffman coding
329b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // and run length encoding, saving the result into dbuf[dbufCount++] = uc
330b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
331b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // Initialize symbol occurrence counters and symbol mtf table
332b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  byteCount = bw->byteCount;
333b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  for(ii=0; ii<256; ii++) {
334b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    byteCount[ii] = 0;
335b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    bd->mtfSymbol[ii] = ii;
336b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  }
337b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
338b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // Loop through compressed symbols.  This is the first "tight inner loop"
339b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // that needs to be micro-optimized for speed.  (This one fills out dbuf[]
340b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // linearly, staying in cache more, so isn't as limited by DRAM access.)
341b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  runPos = dbufCount = symCount = selector = 0;
342b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // Some unnecessary initializations to shut gcc up.
343b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  base = limit = 0;
344b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  hufGroup = 0;
345b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  hh = 0;
346b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
347b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  for (;;) {
348b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // Have we reached the end of this huffman group?
349b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    if (!(symCount--)) {
350b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      // Determine which huffman coding group to use.
351b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      symCount = GROUP_SIZE-1;
352b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      if (selector >= bd->nSelectors) return RETVAL_DATA_ERROR;
353b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      hufGroup = bd->groups + bd->selectors[selector++];
354b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      base = hufGroup->base-1;
355b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      limit = hufGroup->limit-1;
356b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    }
357b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
358b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // Read next huffman-coded symbol (into jj).
359b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    ii = hufGroup->minLen;
360b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    jj = get_bits(bd, ii);
361b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    while (jj > limit[ii]) {
362b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      // if (ii > hufGroup->maxLen) return RETVAL_DATA_ERROR;
363b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      ii++;
364b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
365b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      // Unroll get_bits() to avoid a function call when the data's in
366b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      // the buffer already.
367b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      kk = bd->inbufBitCount
368b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        ? (bd->inbufBits >> --(bd->inbufBitCount)) & 1 : get_bits(bd, 1);
369b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      jj = (jj << 1) | kk;
370b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    }
371b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // Huffman decode jj into nextSym (with bounds checking)
372b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    jj-=base[ii];
373b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
374b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    if (ii > hufGroup->maxLen || (unsigned)jj >= MAX_SYMBOLS)
375b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      return RETVAL_DATA_ERROR;
376b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    nextSym = hufGroup->permute[jj];
377b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
378b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // If this is a repeated run, loop collecting data
379b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    if ((unsigned)nextSym <= SYMBOL_RUNB) {
380b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      // If this is the start of a new run, zero out counter
381b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      if(!runPos) {
382b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        runPos = 1;
383b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        hh = 0;
384b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      }
385b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
386b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      /* Neat trick that saves 1 symbol: instead of or-ing 0 or 1 at
387b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley         each bit position, add 1 or 2 instead. For example,
388b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley         1011 is 1<<0 + 1<<1 + 2<<2. 1010 is 2<<0 + 2<<1 + 1<<2.
389b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley         You can make any bit pattern that way using 1 less symbol than
390b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley         the basic or 0/1 method (except all bits 0, which would use no
391b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley         symbols, but a run of length 0 doesn't mean anything in this
392b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley         context). Thus space is saved. */
393b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      hh += (runPos << nextSym); // +runPos if RUNA; +2*runPos if RUNB
394b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      runPos <<= 1;
395b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      continue;
396b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    }
397b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
398b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    /* When we hit the first non-run symbol after a run, we now know
399b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley       how many times to repeat the last literal, so append that many
400b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley       copies to our buffer of decoded symbols (dbuf) now. (The last
401b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley       literal used is the one at the head of the mtfSymbol array.) */
402b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    if (runPos) {
403b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      runPos = 0;
4045ad93f32da3e2ac70b1fa929889d3034c79f7ed6Rob Landley      // Check for integer overflow
4055ad93f32da3e2ac70b1fa929889d3034c79f7ed6Rob Landley      if (hh>bd->dbufSize || dbufCount+hh>bd->dbufSize)
4065ad93f32da3e2ac70b1fa929889d3034c79f7ed6Rob Landley        return RETVAL_DATA_ERROR;
407b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
408b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      uc = bd->symToByte[bd->mtfSymbol[0]];
409b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      byteCount[uc] += hh;
410b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      while (hh--) dbuf[dbufCount++] = uc;
411b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    }
412b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
413b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // Is this the terminating symbol?
414b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    if (nextSym>bd->symTotal) break;
415b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
416b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    /* At this point, the symbol we just decoded indicates a new literal
417b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley       character. Subtract one to get the position in the MTF array
418b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley       at which this literal is currently to be found. (Note that the
419b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley       result can't be -1 or 0, because 0 and 1 are RUNA and RUNB.
420b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley       Another instance of the first symbol in the mtf array, position 0,
421b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley       would have been handled as part of a run.) */
422b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    if (dbufCount>=bd->dbufSize) return RETVAL_DATA_ERROR;
423b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    ii = nextSym - 1;
424b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    uc = bd->mtfSymbol[ii];
425b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // On my laptop, unrolling this memmove() into a loop shaves 3.5% off
426b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // the total running time.
427b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    while(ii--) bd->mtfSymbol[ii+1] = bd->mtfSymbol[ii];
428b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    bd->mtfSymbol[0] = uc;
429b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    uc = bd->symToByte[uc];
430b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
431b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // We have our literal byte.  Save it into dbuf.
432b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    byteCount[uc]++;
433b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    dbuf[dbufCount++] = (unsigned int)uc;
434b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  }
435b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
436b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // Now we know what dbufCount is, do a better sanity check on origPtr.
437b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  if (bw->origPtr >= (bw->writeCount = dbufCount)) return RETVAL_DATA_ERROR;
438b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
439b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  return 0;
440b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley}
441b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
442b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Flush output buffer to disk
44311d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landleystatic void flush_bunzip_outbuf(struct bunzip_data *bd, int out_fd)
444b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley{
445b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  if (bd->outbufPos) {
446b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    if (write(out_fd, bd->outbuf, bd->outbufPos) != bd->outbufPos)
447b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      error_exit("output EOF");
448b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    bd->outbufPos = 0;
449b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  }
450b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley}
451b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
45211d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landleystatic void burrows_wheeler_prep(struct bunzip_data *bd, struct bwdata *bw)
453b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley{
454b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  int ii, jj;
455b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  unsigned int *dbuf = bw->dbuf;
456b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  int *byteCount = bw->byteCount;
457b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
458b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // Turn byteCount into cumulative occurrence counts of 0 to n-1.
459b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  jj = 0;
460b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  for (ii=0; ii<256; ii++) {
461b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    int kk = jj + byteCount[ii];
462b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    byteCount[ii] = jj;
463b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    jj = kk;
464b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  }
465b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
466b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // Use occurrence counts to quickly figure out what order dbuf would be in
467b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // if we sorted it.
468b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  for (ii=0; ii < bw->writeCount; ii++) {
469b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    unsigned char uc = dbuf[ii];
470b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    dbuf[byteCount[uc]] |= (ii << 8);
471b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    byteCount[uc]++;
472b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  }
473b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
474b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // blockRandomised support would go here.
475b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
476b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // Using ii as position, jj as previous character, hh as current character,
477b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // and uc as run count.
478b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  bw->dataCRC = 0xffffffffL;
479b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
480b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  /* Decode first byte by hand to initialize "previous" byte. Note that it
481b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley     doesn't get output, and if the first three characters are identical
482b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley     it doesn't qualify as a run (hence uc=255, which will either wrap
483b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley     to 1 or get reset). */
484b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  if (bw->writeCount) {
485b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    bw->writePos = dbuf[bw->origPtr];
486b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    bw->writeCurrent = (unsigned char)bw->writePos;
487b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    bw->writePos >>= 8;
488b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    bw->writeRun = -1;
489b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  }
490b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley}
491b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
492b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Decompress a block of text to intermediate buffer
49311d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landleystatic int read_bunzip_data(struct bunzip_data *bd)
494b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley{
495b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  int rc = read_block_header(bd, bd->bwdata);
496b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  if (!rc) rc=read_huffman_data(bd, bd->bwdata);
497b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
498b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // First thing that can be done by a background thread.
499b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  burrows_wheeler_prep(bd, bd->bwdata);
500b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
501b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  return rc;
502b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley}
503b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
504b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Undo burrows-wheeler transform on intermediate buffer to produce output.
505b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// If !len, write up to len bytes of data to buf.  Otherwise write to out_fd.
506b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Returns len ? bytes written : 0.  Notice all errors are negative #'s.
507b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley//
508b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Burrows-wheeler transform is described at:
509b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// http://dogma.net/markn/articles/bwt/bwt.htm
510b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// http://marknelson.us/1996/09/01/bwt/
511b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
51211d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landleystatic int write_bunzip_data(struct bunzip_data *bd, struct bwdata *bw,
51311d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  int out_fd, char *outbuf, int len)
514b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley{
515b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  unsigned int *dbuf = bw->dbuf;
516b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  int count, pos, current, run, copies, outbyte, previous, gotcount = 0;
517b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
518b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  for (;;) {
519b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // If last read was short due to end of file, return last block now
520b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    if (bw->writeCount < 0) return bw->writeCount;
521b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
522b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // If we need to refill dbuf, do it.
523b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    if (!bw->writeCount) {
524b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      int i = read_bunzip_data(bd);
525b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      if (i) {
526b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        if (i == RETVAL_LAST_BLOCK) {
527b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley          bw->writeCount = i;
528b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley          return gotcount;
529b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        } else return i;
530b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      }
531b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    }
532b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
533b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // loop generating output
534b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    count = bw->writeCount;
535b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    pos = bw->writePos;
536b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    current = bw->writeCurrent;
537b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    run = bw->writeRun;
538b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    while (count) {
539b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
540b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      // If somebody (like tar) wants a certain number of bytes of
541b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      // data from memory instead of written to a file, humor them.
542b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      if (len && bd->outbufPos >= len) goto dataus_interruptus;
543b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      count--;
544b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
545b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      // Follow sequence vector to undo Burrows-Wheeler transform.
546b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      previous = current;
547b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      pos = dbuf[pos];
548b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      current = pos&0xff;
549b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      pos >>= 8;
550b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
551b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      // Whenever we see 3 consecutive copies of the same byte,
552b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      // the 4th is a repeat count
553b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      if (run++ == 3) {
554b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        copies = current;
555b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        outbyte = previous;
556b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        current = -1;
557b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      } else {
558b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        copies = 1;
559b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        outbyte = current;
560b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      }
561b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
562b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      // Output bytes to buffer, flushing to file if necessary
563b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      while (copies--) {
564b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        if (bd->outbufPos == IOBUF_SIZE) flush_bunzip_outbuf(bd, out_fd);
565b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        bd->outbuf[bd->outbufPos++] = outbyte;
566b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        bw->dataCRC = (bw->dataCRC << 8)
567b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley                ^ bd->crc32Table[(bw->dataCRC >> 24) ^ outbyte];
568b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      }
569b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      if (current != previous) run=0;
570b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    }
571b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
572b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // decompression of this block completed successfully
573b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    bw->dataCRC = ~(bw->dataCRC);
574b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bw->dataCRC;
575b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
576b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    // if this block had a crc error, force file level crc error.
577b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    if (bw->dataCRC != bw->headerCRC) {
578b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      bd->totalCRC = bw->headerCRC+1;
579b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
580b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      return RETVAL_LAST_BLOCK;
581b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    }
582b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landleydataus_interruptus:
583b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    bw->writeCount = count;
584b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    if (len) {
585b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      gotcount += bd->outbufPos;
586b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      memcpy(outbuf, bd->outbuf, len);
587b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
588b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      // If we got enough data, checkpoint loop state and return
589b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      if ((len -= bd->outbufPos)<1) {
590b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        bd->outbufPos -= len;
591b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        if (bd->outbufPos) memmove(bd->outbuf, bd->outbuf+len, bd->outbufPos);
592b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        bw->writePos = pos;
593b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        bw->writeCurrent = current;
594b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        bw->writeRun = run;
595b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
596b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley        return gotcount;
597b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley      }
598b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    }
599b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  }
600b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley}
601b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
602b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Allocate the structure, read file header. If !len, src_fd contains
603b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// filehandle to read from. Else inbuf contains data.
60411d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landleystatic int start_bunzip(struct bunzip_data **bdp, int src_fd, char *inbuf,
60511d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  int len)
606b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley{
607b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  struct bunzip_data *bd;
608b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  unsigned int i;
609b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
610b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // Figure out how much data to allocate.
611b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  i = sizeof(struct bunzip_data);
612b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  if (!len) i += IOBUF_SIZE;
613b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
614b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // Allocate bunzip_data. Most fields initialize to zero.
615b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  bd = *bdp = xzalloc(i);
616b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  if (len) {
617b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    bd->inbuf = inbuf;
618b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    bd->inbufCount = len;
619b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    bd->in_fd = -1;
620b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  } else {
621b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    bd->inbuf = (char *)(bd+1);
622b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    bd->in_fd = src_fd;
623b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  }
624b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
625b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  crc_init(bd->crc32Table, 0);
626b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
627b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // Ensure that file starts with "BZh".
628b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  for (i=0;i<3;i++) if (get_bits(bd,8)!="BZh"[i]) return RETVAL_NOT_BZIP_DATA;
629b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
630b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // Next byte ascii '1'-'9', indicates block size in units of 100k of
631b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  // uncompressed data. Allocate intermediate buffer for block.
632b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  i = get_bits(bd, 8);
633b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  if (i<'1' || i>'9') return RETVAL_NOT_BZIP_DATA;
634b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  bd->dbufSize = 100000*(i-'0')*THREADS;
635b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  for (i=0; i<THREADS; i++)
636b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    bd->bwdata[i].dbuf = xmalloc(bd->dbufSize * sizeof(int));
637b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
638b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  return 0;
639b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley}
640b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
641b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Example usage: decompress src_fd to dst_fd. (Stops at end of bzip data,
642b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// not end of file.)
64311d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landleystatic char *bunzipStream(int src_fd, int dst_fd)
644b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley{
645b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  struct bunzip_data *bd;
64611d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  char *bunzip_errors[] = {0, "not bzip", "bad data", "old format"};
647b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  int i, j;
648b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
649b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  if (!(i = start_bunzip(&bd,src_fd, 0, 0))) {
650b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley    i = write_bunzip_data(bd,bd->bwdata, dst_fd, 0, 0);
651badd9f799eed7d7efb9357df1e931df1f422f275Rob Landley    if (i==RETVAL_LAST_BLOCK) {
652badd9f799eed7d7efb9357df1e931df1f422f275Rob Landley      if (bd->bwdata[0].headerCRC==bd->totalCRC) i = 0;
653badd9f799eed7d7efb9357df1e931df1f422f275Rob Landley      else i = RETVAL_DATA_ERROR;
654badd9f799eed7d7efb9357df1e931df1f422f275Rob Landley    }
655b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  }
656b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  flush_bunzip_outbuf(bd, dst_fd);
657b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
658b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  for (j=0; j<THREADS; j++) free(bd->bwdata[j].dbuf);
659b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley  free(bd);
66011d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley
66111d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  return bunzip_errors[-i];
662b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley}
663b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley
664df92ef5e8e6cf62358b37f441952b7b5f310b20cRob Landleystatic void do_bzcat(int fd, char *name)
665df92ef5e8e6cf62358b37f441952b7b5f310b20cRob Landley{
66611d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  char *err = bunzipStream(fd, 1);
66711d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley
668d3a435e53c94ec25b4ae5fa2614f49ef8884e08aRob Landley  if (err) error_exit_raw(err);
669df92ef5e8e6cf62358b37f441952b7b5f310b20cRob Landley}
670df92ef5e8e6cf62358b37f441952b7b5f310b20cRob Landley
671efda21ca931766eed6cfc49d1b2122c53827d9fcRob Landleyvoid bzcat_main(void)
672636b5cf46650d3aa3bbc9b29feb0e07589085e3fRob Landley{
6737aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley  loopfiles(toys.optargs, do_bzcat);
674636b5cf46650d3aa3bbc9b29feb0e07589085e3fRob Landley}
67511d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley
67611d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landleystatic void do_bunzip2(int fd, char *name)
67711d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley{
67811d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  int outfd = 1, rename = 0, len = strlen(name);
67911d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  char *tmp, *err, *dotbz = 0;
68011d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley
68111d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  // Trim off .bz or .bz2 extension
68211d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  dotbz = name+len-3;
68311d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  if ((len>3 && !strcmp(dotbz, ".bz")) || (len>4 && !strcmp(--dotbz, ".bz2")))
68411d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley    dotbz = 0;
68511d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley
68611d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  // For - no replace
68711d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  if (toys.optflags&FLAG_t) outfd = xopen("/dev/null", O_WRONLY);
68811d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  else if ((fd || strcmp(name, "-")) && !(toys.optflags&FLAG_c)) {
68911d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley    if (toys.optflags&FLAG_k) {
69011d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley      if (!dotbz || !access(name, X_OK)) {
69111d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley        error_msg("%s exists", name);
69211d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley
69311d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley        return;
69411d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley      }
69511d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley    }
69611d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley    outfd = copy_tempfile(fd, name, &tmp);
69711d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley    rename++;
69811d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  }
69911d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley
70011d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  if (toys.optflags&FLAG_v) printf("%s:", name);
70111d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  err = bunzipStream(fd, outfd);
70211d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  if (toys.optflags&FLAG_v) {
70311d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley    printf("%s\n", err ? err : "ok");
70411d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley    toys.exitval |= !!err;
705d3a435e53c94ec25b4ae5fa2614f49ef8884e08aRob Landley  } else if (err) error_msg_raw(err);
70611d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley
70711d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  // can't test outfd==1 because may have been called with stdin+stdout closed
70811d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  if (rename) {
70911d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley    if (toys.optflags&FLAG_k) {
71011d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley      free(tmp);
71111d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley      tmp = 0;
71211d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley    } else {
71311d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley      if (dotbz) *dotbz = '.';
714d3a435e53c94ec25b4ae5fa2614f49ef8884e08aRob Landley      if (!unlink(name)) perror_msg_raw(name);
71511d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley    }
71611d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley    (err ? delete_tempfile : replace_tempfile)(-1, outfd, &tmp);
71711d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  }
71811d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley}
71911d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley
72011d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landleyvoid bunzip2_main(void)
72111d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley{
72211d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley  loopfiles(toys.optargs, do_bunzip2);
72311d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley}
724