111d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley/* bzcat.c - bzip2 decompression 22896480c4918f2accccb8301bec457a7bff7377eRob Landley * 3b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * Copyright 2003, 2007 Rob Landley <rob@landley.net> 4b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * 5b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * Based on a close reading (but not the actual code) of the original bzip2 6b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * decompression code by Julian R Seward (jseward@acm.org), which also 7b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * acknowledges contributions by Mike Burrows, David Wheeler, Peter Fenwick, 8b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * Alistair Moffat, Radford Neal, Ian H. Witten, Robert Sedgewick, and 9b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * Jon L. Bentley. 10b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * 11b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * No standard. 12b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 132896480c4918f2accccb8301bec457a7bff7377eRob Landley 1455928b1e0a08d84a5cbc50020f0a8c1024f5b6ceRob LandleyUSE_BZCAT(NEWTOY(bzcat, NULL, TOYFLAG_USR|TOYFLAG_BIN)) 1511d2ff5ffac9382be0d6971b8ac84df21eca85dfRob LandleyUSE_BUNZIP2(NEWTOY(bunzip2, "cftkv", TOYFLAG_USR|TOYFLAG_BIN)) 1611d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley 1711d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landleyconfig BUNZIP2 1811d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley bool "bunzip2" 1911d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley default y 2011d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley help 2111d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley usage: bunzip2 [-cftkv] [FILE...] 2211d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley 2311d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley Decompress listed files (file.bz becomes file) deleting archive file(s). 2411d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley Read from stdin if no files listed. 2511d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley 2611d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley -c force output to stdout 27eb7e847adcecf11cd8fd99077ba3a582abe60146Elliott Hughes -f force decompression (if FILE doesn't end in .bz, replace original) 2811d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley -k keep input files (-c and -t imply this) 29cf2e8d08b3e06a7bdf95c30282a976bbf072a168Rob Landley -t test integrity 3011d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley -v verbose 3155928b1e0a08d84a5cbc50020f0a8c1024f5b6ceRob Landley 322896480c4918f2accccb8301bec457a7bff7377eRob Landleyconfig BZCAT 337aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley bool "bzcat" 347aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley default y 357aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley help 3611d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley usage: bzcat [FILE...] 372896480c4918f2accccb8301bec457a7bff7377eRob Landley 387aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley Decompress listed files to stdout. Use stdin if no files listed. 392896480c4918f2accccb8301bec457a7bff7377eRob Landley*/ 40636b5cf46650d3aa3bbc9b29feb0e07589085e3fRob Landley 4111d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley#define FOR_bunzip2 42636b5cf46650d3aa3bbc9b29feb0e07589085e3fRob Landley#include "toys.h" 43636b5cf46650d3aa3bbc9b29feb0e07589085e3fRob Landley 44b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define THREADS 1 45b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 46b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Constants for huffman coding 47b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define MAX_GROUPS 6 48b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define GROUP_SIZE 50 /* 64 would have been more efficient */ 49b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define MAX_HUFCODE_BITS 20 /* Longest huffman code allowed */ 50b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */ 51b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define SYMBOL_RUNA 0 52b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define SYMBOL_RUNB 1 53b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 54b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Other housekeeping constants 55b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define IOBUF_SIZE 4096 56b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 57b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Status return values 58b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define RETVAL_LAST_BLOCK (-100) 59b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define RETVAL_NOT_BZIP_DATA (-1) 60b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define RETVAL_DATA_ERROR (-2) 61b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley#define RETVAL_OBSOLETE_INPUT (-3) 62b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 63b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// This is what we know about each huffman coding group 64b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landleystruct group_data { 65b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley int limit[MAX_HUFCODE_BITS+1], base[MAX_HUFCODE_BITS], permute[MAX_SYMBOLS]; 66b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley char minLen, maxLen; 67b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley}; 68b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 69b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Data for burrows wheeler transform 70b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 71b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landleystruct bwdata { 72b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley unsigned int origPtr; 73b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley int byteCount[256]; 74b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // State saved when interrupting output 75b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley int writePos, writeRun, writeCount, writeCurrent; 76b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley unsigned int dataCRC, headerCRC; 77b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley unsigned int *dbuf; 78b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley}; 79b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 80b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Structure holding all the housekeeping data, including IO buffers and 81b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// memory that persists between calls to bunzip 82b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landleystruct bunzip_data { 83b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Input stream, input buffer, input bit buffer 84b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley int in_fd, inbufCount, inbufPos; 85b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley char *inbuf; 86b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley unsigned int inbufBitCount, inbufBits; 87b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 88b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Output buffer 89b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley char outbuf[IOBUF_SIZE]; 90b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley int outbufPos; 91b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 92b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley unsigned int totalCRC; 93b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 94b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // First pass decompression data (Huffman and MTF decoding) 95b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley char selectors[32768]; // nSelectors=15 bits 96b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley struct group_data groups[MAX_GROUPS]; // huffman coding tables 97b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley int symTotal, groupCount, nSelectors; 98b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley unsigned char symToByte[256], mtfSymbol[256]; 99b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 100b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // The CRC values stored in the block header and calculated from the data 101b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley unsigned int crc32Table[256]; 102b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 103b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Second pass decompression data (burrows-wheeler transform) 104b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley unsigned int dbufSize; 105b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley struct bwdata bwdata[THREADS]; 106b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley}; 107b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 108b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Return the next nnn bits of input. All reads from the compressed input 109b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// are done through this function. All reads are big endian. 110b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landleystatic unsigned int get_bits(struct bunzip_data *bd, char bits_wanted) 111b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley{ 112b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley unsigned int bits = 0; 113b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 114b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // If we need to get more data from the byte buffer, do so. (Loop getting 115b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // one byte at a time to enforce endianness and avoid unaligned access.) 116b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley while (bd->inbufBitCount < bits_wanted) { 117b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 118b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // If we need to read more data from file into byte buffer, do so 119b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (bd->inbufPos == bd->inbufCount) { 120b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (0 >= (bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE))) 121b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley error_exit("input EOF"); 122b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->inbufPos = 0; 123b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 124b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 125b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Avoid 32-bit overflow (dump bit buffer to top of output) 126b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (bd->inbufBitCount>=24) { 127b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bits = bd->inbufBits&((1<<bd->inbufBitCount)-1); 128b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bits_wanted -= bd->inbufBitCount; 129b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bits <<= bits_wanted; 130b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->inbufBitCount = 0; 131b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 132b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 133b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Grab next 8 bits of input from buffer. 134b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->inbufBits = (bd->inbufBits<<8) | bd->inbuf[bd->inbufPos++]; 135b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->inbufBitCount += 8; 136b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 137b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 138b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Calculate result 139b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->inbufBitCount -= bits_wanted; 140b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bits |= (bd->inbufBits>>bd->inbufBitCount) & ((1<<bits_wanted)-1); 141b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 142b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley return bits; 143b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley} 144b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 145b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley/* Read block header at start of a new compressed data block. Consists of: 146b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * 147b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * 48 bits : Block signature, either pi (data block) or e (EOF block). 148b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * 32 bits : bw->headerCRC 149b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * 1 bit : obsolete feature flag. 150b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * 24 bits : origPtr (Burrows-wheeler unwind index, only 20 bits ever used) 151b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * 16 bits : Mapping table index. 152b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley *[16 bits]: symToByte[symTotal] (Mapping table. For each bit set in mapping 153b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * table index above, read another 16 bits of mapping table data. 154b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * If correspondig bit is unset, all bits in that mapping table 155b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * section are 0.) 156b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * 3 bits : groupCount (how many huffman tables used to encode, anywhere 157b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * from 2 to MAX_GROUPS) 158b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * variable: hufGroup[groupCount] (MTF encoded huffman table data.) 159b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley */ 160b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 161b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landleystatic int read_block_header(struct bunzip_data *bd, struct bwdata *bw) 162b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley{ 163b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley struct group_data *hufGroup; 164b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley int hh, ii, jj, kk, symCount, *base, *limit; 165b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley unsigned char uc; 166b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 167b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Read in header signature and CRC (which is stored big endian) 168b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley ii = get_bits(bd, 24); 169b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley jj = get_bits(bd, 24); 170b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bw->headerCRC = get_bits(bd,32); 171b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 172b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Is this the EOF block with CRC for whole file? (Constant is "e") 173b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (ii==0x177245 && jj==0x385090) return RETVAL_LAST_BLOCK; 174b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 175b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Is this a valid data block? (Constant is "pi".) 176b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (ii!=0x314159 || jj!=0x265359) return RETVAL_NOT_BZIP_DATA; 177b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 178b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // We can add support for blockRandomised if anybody complains. 179b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (get_bits(bd,1)) return RETVAL_OBSOLETE_INPUT; 180b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if ((bw->origPtr = get_bits(bd,24)) > bd->dbufSize) return RETVAL_DATA_ERROR; 181b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 182b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // mapping table: if some byte values are never used (encoding things 183b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // like ascii text), the compression code removes the gaps to have fewer 184b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // symbols to deal with, and writes a sparse bitfield indicating which 185b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // values were present. We make a translation table to convert the symbols 186b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // back to the corresponding bytes. 187b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley hh = get_bits(bd, 16); 188b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->symTotal = 0; 189b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley for (ii=0; ii<16; ii++) { 190b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (hh & (1 << (15 - ii))) { 191b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley kk = get_bits(bd, 16); 192b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley for (jj=0; jj<16; jj++) 193b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (kk & (1 << (15 - jj))) 194b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->symToByte[bd->symTotal++] = (16 * ii) + jj; 195b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 196b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 197b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 198b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // How many different huffman coding groups does this block use? 199b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->groupCount = get_bits(bd,3); 200b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (bd->groupCount<2 || bd->groupCount>MAX_GROUPS) return RETVAL_DATA_ERROR; 201b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 202b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // nSelectors: Every GROUP_SIZE many symbols we switch huffman coding 203b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // tables. Each group has a selector, which is an index into the huffman 204b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // coding table arrays. 205b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // 206b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Read in the group selector array, which is stored as MTF encoded 207b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // bit runs. (MTF = Move To Front. Every time a symbol occurs it's moved 208b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // to the front of the table, so it has a shorter encoding next time.) 209b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (!(bd->nSelectors = get_bits(bd, 15))) return RETVAL_DATA_ERROR; 210b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley for (ii=0; ii<bd->groupCount; ii++) bd->mtfSymbol[ii] = ii; 211b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley for (ii=0; ii<bd->nSelectors; ii++) { 212b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 213b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Get next value 214b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley for(jj=0;get_bits(bd,1);jj++) 215b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (jj>=bd->groupCount) return RETVAL_DATA_ERROR; 216b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 217b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Decode MTF to get the next selector, and move it to the front. 218b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley uc = bd->mtfSymbol[jj]; 219b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley memmove(bd->mtfSymbol+1, bd->mtfSymbol, jj); 220b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->mtfSymbol[0] = bd->selectors[ii] = uc; 221b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 222b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 223b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Read the huffman coding tables for each group, which code for symTotal 224b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // literal symbols, plus two run symbols (RUNA, RUNB) 225b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley symCount = bd->symTotal+2; 226b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley for (jj=0; jj<bd->groupCount; jj++) { 227b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley unsigned char length[MAX_SYMBOLS]; 228b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley unsigned temp[MAX_HUFCODE_BITS+1]; 229b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley int minLen, maxLen, pp; 230b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 231b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Read lengths 232b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley hh = get_bits(bd, 5); 233b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley for (ii = 0; ii < symCount; ii++) { 234b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley for(;;) { 235b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // !hh || hh > MAX_HUFCODE_BITS in one test. 236b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (MAX_HUFCODE_BITS-1 < (unsigned)hh-1) return RETVAL_DATA_ERROR; 237b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Grab 2 bits instead of 1 (slightly smaller/faster). Stop if 238b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // first bit is 0, otherwise second bit says whether to 239b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // increment or decrement. 240b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley kk = get_bits(bd, 2); 241b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (kk & 2) hh += 1 - ((kk&1)<<1); 242b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley else { 243b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->inbufBitCount++; 244b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley break; 245b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 246b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 247b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley length[ii] = hh; 248b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 249b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 250b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Find largest and smallest lengths in this group 251b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley minLen = maxLen = length[0]; 252b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley for (ii = 1; ii < symCount; ii++) { 253b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if(length[ii] > maxLen) maxLen = length[ii]; 254b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley else if(length[ii] < minLen) minLen = length[ii]; 255b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 256b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 257b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley /* Calculate permute[], base[], and limit[] tables from length[]. 258b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * 259b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * permute[] is the lookup table for converting huffman coded symbols 260b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * into decoded symbols. It contains symbol values sorted by length. 261b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * 262b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * base[] is the amount to subtract from the value of a huffman symbol 263b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * of a given length when using permute[]. 264b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * 265b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * limit[] indicates the largest numerical value a symbol with a given 266b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * number of bits can have. It lets us know when to stop reading. 267b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * 268b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * To use these, keep reading bits until value <= limit[bitcount] or 269b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * you've read over 20 bits (error). Then the decoded symbol 270b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * equals permute[hufcode_value - base[hufcode_bitcount]]. 271b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley */ 272b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley hufGroup = bd->groups+jj; 273b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley hufGroup->minLen = minLen; 274b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley hufGroup->maxLen = maxLen; 275b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 276b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Note that minLen can't be smaller than 1, so we adjust the base 277b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // and limit array pointers so we're not always wasting the first 278b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // entry. We do this again when using them (during symbol decoding). 279b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley base = hufGroup->base-1; 280b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley limit = hufGroup->limit-1; 281b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 282b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // zero temp[] and limit[], and calculate permute[] 283b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley pp = 0; 284b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley for (ii = minLen; ii <= maxLen; ii++) { 285b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley temp[ii] = limit[ii] = 0; 286b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley for (hh = 0; hh < symCount; hh++) 287b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (length[hh] == ii) hufGroup->permute[pp++] = hh; 288b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 289b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 290b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Count symbols coded for at each bit length 291b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley for (ii = 0; ii < symCount; ii++) temp[length[ii]]++; 292b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 293b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley /* Calculate limit[] (the largest symbol-coding value at each bit 294b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * length, which is (previous limit<<1)+symbols at this level), and 295b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * base[] (number of symbols to ignore at each bit length, which is 296b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * limit minus the cumulative count of symbols coded for already). */ 297b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley pp = hh = 0; 298b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley for (ii = minLen; ii < maxLen; ii++) { 299b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley pp += temp[ii]; 300b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley limit[ii] = pp-1; 301b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley pp <<= 1; 302b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley base[ii+1] = pp-(hh+=temp[ii]); 303b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 304b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley limit[maxLen] = pp+temp[maxLen]-1; 305b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley limit[maxLen+1] = INT_MAX; 306b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley base[minLen] = 0; 307b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 308b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 309b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley return 0; 310b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley} 311b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 312b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley/* First pass, read block's symbols into dbuf[dbufCount]. 313b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * 314b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * This undoes three types of compression: huffman coding, run length encoding, 315b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * and move to front encoding. We have to undo all those to know when we've 316b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley * read enough input. 317b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley */ 318b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 319b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landleystatic int read_huffman_data(struct bunzip_data *bd, struct bwdata *bw) 320b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley{ 321b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley struct group_data *hufGroup; 3225ad93f32da3e2ac70b1fa929889d3034c79f7ed6Rob Landley int ii, jj, kk, runPos, dbufCount, symCount, selector, nextSym, 323b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley *byteCount, *base, *limit; 3245ad93f32da3e2ac70b1fa929889d3034c79f7ed6Rob Landley unsigned hh, *dbuf = bw->dbuf; 325b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley unsigned char uc; 326b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 327b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // We've finished reading and digesting the block header. Now read this 328b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // block's huffman coded symbols from the file and undo the huffman coding 329b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // and run length encoding, saving the result into dbuf[dbufCount++] = uc 330b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 331b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Initialize symbol occurrence counters and symbol mtf table 332b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley byteCount = bw->byteCount; 333b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley for(ii=0; ii<256; ii++) { 334b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley byteCount[ii] = 0; 335b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->mtfSymbol[ii] = ii; 336b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 337b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 338b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Loop through compressed symbols. This is the first "tight inner loop" 339b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // that needs to be micro-optimized for speed. (This one fills out dbuf[] 340b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // linearly, staying in cache more, so isn't as limited by DRAM access.) 341b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley runPos = dbufCount = symCount = selector = 0; 342b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Some unnecessary initializations to shut gcc up. 343b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley base = limit = 0; 344b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley hufGroup = 0; 345b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley hh = 0; 346b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 347b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley for (;;) { 348b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Have we reached the end of this huffman group? 349b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (!(symCount--)) { 350b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Determine which huffman coding group to use. 351b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley symCount = GROUP_SIZE-1; 352b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (selector >= bd->nSelectors) return RETVAL_DATA_ERROR; 353b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley hufGroup = bd->groups + bd->selectors[selector++]; 354b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley base = hufGroup->base-1; 355b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley limit = hufGroup->limit-1; 356b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 357b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 358b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Read next huffman-coded symbol (into jj). 359b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley ii = hufGroup->minLen; 360b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley jj = get_bits(bd, ii); 361b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley while (jj > limit[ii]) { 362b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // if (ii > hufGroup->maxLen) return RETVAL_DATA_ERROR; 363b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley ii++; 364b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 365b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Unroll get_bits() to avoid a function call when the data's in 366b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // the buffer already. 367b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley kk = bd->inbufBitCount 368b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley ? (bd->inbufBits >> --(bd->inbufBitCount)) & 1 : get_bits(bd, 1); 369b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley jj = (jj << 1) | kk; 370b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 371b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Huffman decode jj into nextSym (with bounds checking) 372b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley jj-=base[ii]; 373b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 374b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (ii > hufGroup->maxLen || (unsigned)jj >= MAX_SYMBOLS) 375b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley return RETVAL_DATA_ERROR; 376b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley nextSym = hufGroup->permute[jj]; 377b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 378b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // If this is a repeated run, loop collecting data 379b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if ((unsigned)nextSym <= SYMBOL_RUNB) { 380b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // If this is the start of a new run, zero out counter 381b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if(!runPos) { 382b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley runPos = 1; 383b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley hh = 0; 384b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 385b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 386b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley /* Neat trick that saves 1 symbol: instead of or-ing 0 or 1 at 387b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley each bit position, add 1 or 2 instead. For example, 388b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 1011 is 1<<0 + 1<<1 + 2<<2. 1010 is 2<<0 + 2<<1 + 1<<2. 389b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley You can make any bit pattern that way using 1 less symbol than 390b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley the basic or 0/1 method (except all bits 0, which would use no 391b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley symbols, but a run of length 0 doesn't mean anything in this 392b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley context). Thus space is saved. */ 393b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley hh += (runPos << nextSym); // +runPos if RUNA; +2*runPos if RUNB 394b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley runPos <<= 1; 395b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley continue; 396b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 397b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 398b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley /* When we hit the first non-run symbol after a run, we now know 399b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley how many times to repeat the last literal, so append that many 400b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley copies to our buffer of decoded symbols (dbuf) now. (The last 401b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley literal used is the one at the head of the mtfSymbol array.) */ 402b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (runPos) { 403b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley runPos = 0; 4045ad93f32da3e2ac70b1fa929889d3034c79f7ed6Rob Landley // Check for integer overflow 4055ad93f32da3e2ac70b1fa929889d3034c79f7ed6Rob Landley if (hh>bd->dbufSize || dbufCount+hh>bd->dbufSize) 4065ad93f32da3e2ac70b1fa929889d3034c79f7ed6Rob Landley return RETVAL_DATA_ERROR; 407b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 408b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley uc = bd->symToByte[bd->mtfSymbol[0]]; 409b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley byteCount[uc] += hh; 410b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley while (hh--) dbuf[dbufCount++] = uc; 411b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 412b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 413b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Is this the terminating symbol? 414b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (nextSym>bd->symTotal) break; 415b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 416b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley /* At this point, the symbol we just decoded indicates a new literal 417b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley character. Subtract one to get the position in the MTF array 418b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley at which this literal is currently to be found. (Note that the 419b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley result can't be -1 or 0, because 0 and 1 are RUNA and RUNB. 420b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley Another instance of the first symbol in the mtf array, position 0, 421b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley would have been handled as part of a run.) */ 422b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (dbufCount>=bd->dbufSize) return RETVAL_DATA_ERROR; 423b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley ii = nextSym - 1; 424b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley uc = bd->mtfSymbol[ii]; 425b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // On my laptop, unrolling this memmove() into a loop shaves 3.5% off 426b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // the total running time. 427b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley while(ii--) bd->mtfSymbol[ii+1] = bd->mtfSymbol[ii]; 428b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->mtfSymbol[0] = uc; 429b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley uc = bd->symToByte[uc]; 430b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 431b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // We have our literal byte. Save it into dbuf. 432b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley byteCount[uc]++; 433b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley dbuf[dbufCount++] = (unsigned int)uc; 434b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 435b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 436b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Now we know what dbufCount is, do a better sanity check on origPtr. 437b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (bw->origPtr >= (bw->writeCount = dbufCount)) return RETVAL_DATA_ERROR; 438b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 439b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley return 0; 440b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley} 441b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 442b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Flush output buffer to disk 44311d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landleystatic void flush_bunzip_outbuf(struct bunzip_data *bd, int out_fd) 444b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley{ 445b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (bd->outbufPos) { 446b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (write(out_fd, bd->outbuf, bd->outbufPos) != bd->outbufPos) 447b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley error_exit("output EOF"); 448b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->outbufPos = 0; 449b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 450b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley} 451b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 45211d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landleystatic void burrows_wheeler_prep(struct bunzip_data *bd, struct bwdata *bw) 453b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley{ 454b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley int ii, jj; 455b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley unsigned int *dbuf = bw->dbuf; 456b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley int *byteCount = bw->byteCount; 457b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 458b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Turn byteCount into cumulative occurrence counts of 0 to n-1. 459b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley jj = 0; 460b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley for (ii=0; ii<256; ii++) { 461b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley int kk = jj + byteCount[ii]; 462b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley byteCount[ii] = jj; 463b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley jj = kk; 464b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 465b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 466b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Use occurrence counts to quickly figure out what order dbuf would be in 467b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // if we sorted it. 468b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley for (ii=0; ii < bw->writeCount; ii++) { 469b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley unsigned char uc = dbuf[ii]; 470b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley dbuf[byteCount[uc]] |= (ii << 8); 471b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley byteCount[uc]++; 472b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 473b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 474b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // blockRandomised support would go here. 475b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 476b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Using ii as position, jj as previous character, hh as current character, 477b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // and uc as run count. 478b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bw->dataCRC = 0xffffffffL; 479b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 480b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley /* Decode first byte by hand to initialize "previous" byte. Note that it 481b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley doesn't get output, and if the first three characters are identical 482b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley it doesn't qualify as a run (hence uc=255, which will either wrap 483b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley to 1 or get reset). */ 484b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (bw->writeCount) { 485b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bw->writePos = dbuf[bw->origPtr]; 486b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bw->writeCurrent = (unsigned char)bw->writePos; 487b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bw->writePos >>= 8; 488b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bw->writeRun = -1; 489b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 490b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley} 491b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 492b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Decompress a block of text to intermediate buffer 49311d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landleystatic int read_bunzip_data(struct bunzip_data *bd) 494b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley{ 495b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley int rc = read_block_header(bd, bd->bwdata); 496b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (!rc) rc=read_huffman_data(bd, bd->bwdata); 497b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 498b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // First thing that can be done by a background thread. 499b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley burrows_wheeler_prep(bd, bd->bwdata); 500b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 501b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley return rc; 502b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley} 503b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 504b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Undo burrows-wheeler transform on intermediate buffer to produce output. 505b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// If !len, write up to len bytes of data to buf. Otherwise write to out_fd. 506b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Returns len ? bytes written : 0. Notice all errors are negative #'s. 507b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// 508b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Burrows-wheeler transform is described at: 509b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// http://dogma.net/markn/articles/bwt/bwt.htm 510b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// http://marknelson.us/1996/09/01/bwt/ 511b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 51211d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landleystatic int write_bunzip_data(struct bunzip_data *bd, struct bwdata *bw, 51311d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley int out_fd, char *outbuf, int len) 514b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley{ 515b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley unsigned int *dbuf = bw->dbuf; 516b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley int count, pos, current, run, copies, outbyte, previous, gotcount = 0; 517b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 518b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley for (;;) { 519b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // If last read was short due to end of file, return last block now 520b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (bw->writeCount < 0) return bw->writeCount; 521b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 522b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // If we need to refill dbuf, do it. 523b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (!bw->writeCount) { 524b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley int i = read_bunzip_data(bd); 525b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (i) { 526b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (i == RETVAL_LAST_BLOCK) { 527b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bw->writeCount = i; 528b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley return gotcount; 529b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } else return i; 530b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 531b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 532b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 533b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // loop generating output 534b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley count = bw->writeCount; 535b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley pos = bw->writePos; 536b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley current = bw->writeCurrent; 537b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley run = bw->writeRun; 538b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley while (count) { 539b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 540b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // If somebody (like tar) wants a certain number of bytes of 541b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // data from memory instead of written to a file, humor them. 542b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (len && bd->outbufPos >= len) goto dataus_interruptus; 543b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley count--; 544b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 545b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Follow sequence vector to undo Burrows-Wheeler transform. 546b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley previous = current; 547b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley pos = dbuf[pos]; 548b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley current = pos&0xff; 549b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley pos >>= 8; 550b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 551b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Whenever we see 3 consecutive copies of the same byte, 552b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // the 4th is a repeat count 553b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (run++ == 3) { 554b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley copies = current; 555b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley outbyte = previous; 556b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley current = -1; 557b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } else { 558b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley copies = 1; 559b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley outbyte = current; 560b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 561b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 562b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Output bytes to buffer, flushing to file if necessary 563b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley while (copies--) { 564b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (bd->outbufPos == IOBUF_SIZE) flush_bunzip_outbuf(bd, out_fd); 565b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->outbuf[bd->outbufPos++] = outbyte; 566b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bw->dataCRC = (bw->dataCRC << 8) 567b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley ^ bd->crc32Table[(bw->dataCRC >> 24) ^ outbyte]; 568b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 569b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (current != previous) run=0; 570b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 571b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 572b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // decompression of this block completed successfully 573b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bw->dataCRC = ~(bw->dataCRC); 574b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bw->dataCRC; 575b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 576b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // if this block had a crc error, force file level crc error. 577b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (bw->dataCRC != bw->headerCRC) { 578b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->totalCRC = bw->headerCRC+1; 579b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 580b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley return RETVAL_LAST_BLOCK; 581b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 582b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landleydataus_interruptus: 583b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bw->writeCount = count; 584b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (len) { 585b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley gotcount += bd->outbufPos; 586b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley memcpy(outbuf, bd->outbuf, len); 587b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 588b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // If we got enough data, checkpoint loop state and return 589b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if ((len -= bd->outbufPos)<1) { 590b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->outbufPos -= len; 591b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (bd->outbufPos) memmove(bd->outbuf, bd->outbuf+len, bd->outbufPos); 592b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bw->writePos = pos; 593b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bw->writeCurrent = current; 594b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bw->writeRun = run; 595b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 596b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley return gotcount; 597b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 598b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 599b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 600b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley} 601b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 602b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Allocate the structure, read file header. If !len, src_fd contains 603b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// filehandle to read from. Else inbuf contains data. 60411d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landleystatic int start_bunzip(struct bunzip_data **bdp, int src_fd, char *inbuf, 60511d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley int len) 606b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley{ 607b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley struct bunzip_data *bd; 608b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley unsigned int i; 609b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 610b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Figure out how much data to allocate. 611b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley i = sizeof(struct bunzip_data); 612b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (!len) i += IOBUF_SIZE; 613b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 614b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Allocate bunzip_data. Most fields initialize to zero. 615b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd = *bdp = xzalloc(i); 616b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (len) { 617b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->inbuf = inbuf; 618b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->inbufCount = len; 619b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->in_fd = -1; 620b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } else { 621b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->inbuf = (char *)(bd+1); 622b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->in_fd = src_fd; 623b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 624b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 625b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley crc_init(bd->crc32Table, 0); 626b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 627b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Ensure that file starts with "BZh". 628b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley for (i=0;i<3;i++) if (get_bits(bd,8)!="BZh"[i]) return RETVAL_NOT_BZIP_DATA; 629b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 630b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // Next byte ascii '1'-'9', indicates block size in units of 100k of 631b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley // uncompressed data. Allocate intermediate buffer for block. 632b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley i = get_bits(bd, 8); 633b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (i<'1' || i>'9') return RETVAL_NOT_BZIP_DATA; 634b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->dbufSize = 100000*(i-'0')*THREADS; 635b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley for (i=0; i<THREADS; i++) 636b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley bd->bwdata[i].dbuf = xmalloc(bd->dbufSize * sizeof(int)); 637b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 638b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley return 0; 639b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley} 640b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 641b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// Example usage: decompress src_fd to dst_fd. (Stops at end of bzip data, 642b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley// not end of file.) 64311d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landleystatic char *bunzipStream(int src_fd, int dst_fd) 644b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley{ 645b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley struct bunzip_data *bd; 64611d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley char *bunzip_errors[] = {0, "not bzip", "bad data", "old format"}; 647b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley int i, j; 648b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 649b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley if (!(i = start_bunzip(&bd,src_fd, 0, 0))) { 650b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley i = write_bunzip_data(bd,bd->bwdata, dst_fd, 0, 0); 651badd9f799eed7d7efb9357df1e931df1f422f275Rob Landley if (i==RETVAL_LAST_BLOCK) { 652badd9f799eed7d7efb9357df1e931df1f422f275Rob Landley if (bd->bwdata[0].headerCRC==bd->totalCRC) i = 0; 653badd9f799eed7d7efb9357df1e931df1f422f275Rob Landley else i = RETVAL_DATA_ERROR; 654badd9f799eed7d7efb9357df1e931df1f422f275Rob Landley } 655b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley } 656b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley flush_bunzip_outbuf(bd, dst_fd); 657b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 658b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley for (j=0; j<THREADS; j++) free(bd->bwdata[j].dbuf); 659b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley free(bd); 66011d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley 66111d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley return bunzip_errors[-i]; 662b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley} 663b6c8a8609fbfcbaf054e254f74974394c8932712Rob Landley 664df92ef5e8e6cf62358b37f441952b7b5f310b20cRob Landleystatic void do_bzcat(int fd, char *name) 665df92ef5e8e6cf62358b37f441952b7b5f310b20cRob Landley{ 66611d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley char *err = bunzipStream(fd, 1); 66711d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley 668d3a435e53c94ec25b4ae5fa2614f49ef8884e08aRob Landley if (err) error_exit_raw(err); 669df92ef5e8e6cf62358b37f441952b7b5f310b20cRob Landley} 670df92ef5e8e6cf62358b37f441952b7b5f310b20cRob Landley 671efda21ca931766eed6cfc49d1b2122c53827d9fcRob Landleyvoid bzcat_main(void) 672636b5cf46650d3aa3bbc9b29feb0e07589085e3fRob Landley{ 6737aa651a6a4496d848f86de9b1e6b3a003256a01fRob Landley loopfiles(toys.optargs, do_bzcat); 674636b5cf46650d3aa3bbc9b29feb0e07589085e3fRob Landley} 67511d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley 67611d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landleystatic void do_bunzip2(int fd, char *name) 67711d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley{ 67811d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley int outfd = 1, rename = 0, len = strlen(name); 67911d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley char *tmp, *err, *dotbz = 0; 68011d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley 68111d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley // Trim off .bz or .bz2 extension 68211d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley dotbz = name+len-3; 68311d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley if ((len>3 && !strcmp(dotbz, ".bz")) || (len>4 && !strcmp(--dotbz, ".bz2"))) 68411d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley dotbz = 0; 68511d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley 68611d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley // For - no replace 68711d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley if (toys.optflags&FLAG_t) outfd = xopen("/dev/null", O_WRONLY); 68811d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley else if ((fd || strcmp(name, "-")) && !(toys.optflags&FLAG_c)) { 68911d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley if (toys.optflags&FLAG_k) { 69011d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley if (!dotbz || !access(name, X_OK)) { 69111d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley error_msg("%s exists", name); 69211d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley 69311d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley return; 69411d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley } 69511d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley } 69611d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley outfd = copy_tempfile(fd, name, &tmp); 69711d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley rename++; 69811d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley } 69911d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley 70011d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley if (toys.optflags&FLAG_v) printf("%s:", name); 70111d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley err = bunzipStream(fd, outfd); 70211d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley if (toys.optflags&FLAG_v) { 70311d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley printf("%s\n", err ? err : "ok"); 70411d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley toys.exitval |= !!err; 705d3a435e53c94ec25b4ae5fa2614f49ef8884e08aRob Landley } else if (err) error_msg_raw(err); 70611d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley 70711d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley // can't test outfd==1 because may have been called with stdin+stdout closed 70811d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley if (rename) { 70911d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley if (toys.optflags&FLAG_k) { 71011d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley free(tmp); 71111d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley tmp = 0; 71211d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley } else { 71311d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley if (dotbz) *dotbz = '.'; 714d3a435e53c94ec25b4ae5fa2614f49ef8884e08aRob Landley if (!unlink(name)) perror_msg_raw(name); 71511d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley } 71611d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley (err ? delete_tempfile : replace_tempfile)(-1, outfd, &tmp); 71711d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley } 71811d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley} 71911d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley 72011d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landleyvoid bunzip2_main(void) 72111d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley{ 72211d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley loopfiles(toys.optargs, do_bunzip2); 72311d2ff5ffac9382be0d6971b8ac84df21eca85dfRob Landley} 724