1 2#define BZ_NO_STDIO 3 4 5/*-------------------------------------------------------------*/ 6/*--- Private header file for the library. ---*/ 7/*--- bzlib_private.h ---*/ 8/*-------------------------------------------------------------*/ 9 10/*-- 11 This file is a part of bzip2 and/or libbzip2, a program and 12 library for lossless, block-sorting data compression. 13 14 Copyright (C) 1996-2004 Julian R Seward. All rights reserved. 15 16 Redistribution and use in source and binary forms, with or without 17 modification, are permitted provided that the following conditions 18 are met: 19 20 1. Redistributions of source code must retain the above copyright 21 notice, this list of conditions and the following disclaimer. 22 23 2. The origin of this software must not be misrepresented; you must 24 not claim that you wrote the original software. If you use this 25 software in a product, an acknowledgment in the product 26 documentation would be appreciated but is not required. 27 28 3. Altered source versions must be plainly marked as such, and must 29 not be misrepresented as being the original software. 30 31 4. The name of the author may not be used to endorse or promote 32 products derived from this software without specific prior written 33 permission. 34 35 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 36 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 37 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 38 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 39 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 40 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 41 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 42 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 43 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 44 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 45 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 46 47 Julian Seward, Cambridge, UK. 48 jseward@bzip.org 49 bzip2/libbzip2 version 1.0 of 21 March 2000 50 51 This program is based on (at least) the work of: 52 Mike Burrows 53 David Wheeler 54 Peter Fenwick 55 Alistair Moffat 56 Radford Neal 57 Ian H. Witten 58 Robert Sedgewick 59 Jon L. Bentley 60 61 For more information on these sources, see the manual. 62--*/ 63 64 65#ifndef _BZLIB_PRIVATE_H 66#define _BZLIB_PRIVATE_H 67 68#include <stdlib.h> 69 70#ifndef BZ_NO_STDIO 71#include <stdio.h> 72#include <ctype.h> 73#include <string.h> 74#endif 75 76 77/*-------------------------------------------------------------*/ 78/*--- Public header file for the library. ---*/ 79/*--- bzlib.h ---*/ 80/*-------------------------------------------------------------*/ 81 82/*-- 83 This file is a part of bzip2 and/or libbzip2, a program and 84 library for lossless, block-sorting data compression. 85 86 Copyright (C) 1996-2004 Julian R Seward. All rights reserved. 87 88 Redistribution and use in source and binary forms, with or without 89 modification, are permitted provided that the following conditions 90 are met: 91 92 1. Redistributions of source code must retain the above copyright 93 notice, this list of conditions and the following disclaimer. 94 95 2. The origin of this software must not be misrepresented; you must 96 not claim that you wrote the original software. If you use this 97 software in a product, an acknowledgment in the product 98 documentation would be appreciated but is not required. 99 100 3. Altered source versions must be plainly marked as such, and must 101 not be misrepresented as being the original software. 102 103 4. The name of the author may not be used to endorse or promote 104 products derived from this software without specific prior written 105 permission. 106 107 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 108 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 109 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 110 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 111 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 112 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 113 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 114 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 115 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 116 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 117 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 118 119 Julian Seward, Cambridge, UK. 120 jseward@bzip.org 121 bzip2/libbzip2 version 1.0 of 21 March 2000 122 123 This program is based on (at least) the work of: 124 Mike Burrows 125 David Wheeler 126 Peter Fenwick 127 Alistair Moffat 128 Radford Neal 129 Ian H. Witten 130 Robert Sedgewick 131 Jon L. Bentley 132 133 For more information on these sources, see the manual. 134--*/ 135 136 137#ifndef _BZLIB_H 138#define _BZLIB_H 139 140#ifdef __cplusplus 141extern "C" { 142#endif 143 144#define BZ_RUN 0 145#define BZ_FLUSH 1 146#define BZ_FINISH 2 147 148#define BZ_OK 0 149#define BZ_RUN_OK 1 150#define BZ_FLUSH_OK 2 151#define BZ_FINISH_OK 3 152#define BZ_STREAM_END 4 153#define BZ_SEQUENCE_ERROR (-1) 154#define BZ_PARAM_ERROR (-2) 155#define BZ_MEM_ERROR (-3) 156#define BZ_DATA_ERROR (-4) 157#define BZ_DATA_ERROR_MAGIC (-5) 158#define BZ_IO_ERROR (-6) 159#define BZ_UNEXPECTED_EOF (-7) 160#define BZ_OUTBUFF_FULL (-8) 161#define BZ_CONFIG_ERROR (-9) 162 163typedef 164 struct { 165 char *next_in; 166 unsigned int avail_in; 167 unsigned int total_in_lo32; 168 unsigned int total_in_hi32; 169 170 char *next_out; 171 unsigned int avail_out; 172 unsigned int total_out_lo32; 173 unsigned int total_out_hi32; 174 175 void *state; 176 177 void *(*bzalloc)(void *,int,int); 178 void (*bzfree)(void *,void *); 179 void *opaque; 180 } 181 bz_stream; 182 183 184#ifndef BZ_IMPORT 185#define BZ_EXPORT 186#endif 187 188#ifndef BZ_NO_STDIO 189/* Need a definitition for FILE */ 190#include <stdio.h> 191#endif 192 193#ifdef _WIN32 194# include <windows.h> 195# ifdef small 196 /* windows.h define small to char */ 197# undef small 198# endif 199# ifdef BZ_EXPORT 200# define BZ_API(func) WINAPI func 201# define BZ_EXTERN extern 202# else 203 /* import windows dll dynamically */ 204# define BZ_API(func) (WINAPI * func) 205# define BZ_EXTERN 206# endif 207#else 208# define BZ_API(func) func 209# define BZ_EXTERN extern 210#endif 211 212 213/*-- Core (low-level) library functions --*/ 214 215BZ_EXTERN int BZ_API(BZ2_bzCompressInit) ( 216 bz_stream* strm, 217 int blockSize100k, 218 int verbosity, 219 int workFactor 220 ); 221 222BZ_EXTERN int BZ_API(BZ2_bzCompress) ( 223 bz_stream* strm, 224 int action 225 ); 226 227BZ_EXTERN int BZ_API(BZ2_bzCompressEnd) ( 228 bz_stream* strm 229 ); 230 231BZ_EXTERN int BZ_API(BZ2_bzDecompressInit) ( 232 bz_stream *strm, 233 int verbosity, 234 int small 235 ); 236 237BZ_EXTERN int BZ_API(BZ2_bzDecompress) ( 238 bz_stream* strm 239 ); 240 241BZ_EXTERN int BZ_API(BZ2_bzDecompressEnd) ( 242 bz_stream *strm 243 ); 244 245 246 247/*-- High(er) level library functions --*/ 248 249#ifndef BZ_NO_STDIO 250#define BZ_MAX_UNUSED 5000 251 252typedef void BZFILE; 253 254BZ_EXTERN BZFILE* BZ_API(BZ2_bzReadOpen) ( 255 int* bzerror, 256 FILE* f, 257 int verbosity, 258 int small, 259 void* unused, 260 int nUnused 261 ); 262 263BZ_EXTERN void BZ_API(BZ2_bzReadClose) ( 264 int* bzerror, 265 BZFILE* b 266 ); 267 268BZ_EXTERN void BZ_API(BZ2_bzReadGetUnused) ( 269 int* bzerror, 270 BZFILE* b, 271 void** unused, 272 int* nUnused 273 ); 274 275BZ_EXTERN int BZ_API(BZ2_bzRead) ( 276 int* bzerror, 277 BZFILE* b, 278 void* buf, 279 int len 280 ); 281 282BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) ( 283 int* bzerror, 284 FILE* f, 285 int blockSize100k, 286 int verbosity, 287 int workFactor 288 ); 289 290BZ_EXTERN void BZ_API(BZ2_bzWrite) ( 291 int* bzerror, 292 BZFILE* b, 293 void* buf, 294 int len 295 ); 296 297BZ_EXTERN void BZ_API(BZ2_bzWriteClose) ( 298 int* bzerror, 299 BZFILE* b, 300 int abandon, 301 unsigned int* nbytes_in, 302 unsigned int* nbytes_out 303 ); 304 305BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) ( 306 int* bzerror, 307 BZFILE* b, 308 int abandon, 309 unsigned int* nbytes_in_lo32, 310 unsigned int* nbytes_in_hi32, 311 unsigned int* nbytes_out_lo32, 312 unsigned int* nbytes_out_hi32 313 ); 314#endif 315 316 317/*-- Utility functions --*/ 318 319BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) ( 320 char* dest, 321 unsigned int* destLen, 322 char* source, 323 unsigned int sourceLen, 324 int blockSize100k, 325 int verbosity, 326 int workFactor 327 ); 328 329BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) ( 330 char* dest, 331 unsigned int* destLen, 332 char* source, 333 unsigned int sourceLen, 334 int small, 335 int verbosity 336 ); 337 338 339/*-- 340 Code contributed by Yoshioka Tsuneo 341 (QWF00133@niftyserve.or.jp/tsuneo-y@is.aist-nara.ac.jp), 342 to support better zlib compatibility. 343 This code is not _officially_ part of libbzip2 (yet); 344 I haven't tested it, documented it, or considered the 345 threading-safeness of it. 346 If this code breaks, please contact both Yoshioka and me. 347--*/ 348 349BZ_EXTERN const char * BZ_API(BZ2_bzlibVersion) ( 350 void 351 ); 352 353#ifndef BZ_NO_STDIO 354BZ_EXTERN BZFILE * BZ_API(BZ2_bzopen) ( 355 const char *path, 356 const char *mode 357 ); 358 359BZ_EXTERN BZFILE * BZ_API(BZ2_bzdopen) ( 360 int fd, 361 const char *mode 362 ); 363 364BZ_EXTERN int BZ_API(BZ2_bzread) ( 365 BZFILE* b, 366 void* buf, 367 int len 368 ); 369 370BZ_EXTERN int BZ_API(BZ2_bzwrite) ( 371 BZFILE* b, 372 void* buf, 373 int len 374 ); 375 376BZ_EXTERN int BZ_API(BZ2_bzflush) ( 377 BZFILE* b 378 ); 379 380BZ_EXTERN void BZ_API(BZ2_bzclose) ( 381 BZFILE* b 382 ); 383 384BZ_EXTERN const char * BZ_API(BZ2_bzerror) ( 385 BZFILE *b, 386 int *errnum 387 ); 388#endif 389 390#ifdef __cplusplus 391} 392#endif 393 394#endif 395 396/*-------------------------------------------------------------*/ 397/*--- end bzlib.h ---*/ 398/*-------------------------------------------------------------*/ 399 400 401 402 403/*-- General stuff. --*/ 404 405#define BZ_VERSION "1.0.3, 17-Oct-2004" 406 407typedef char Char; 408typedef unsigned char Bool; 409typedef unsigned char UChar; 410typedef int Int32; 411typedef unsigned int UInt32; 412typedef short Int16; 413typedef unsigned short UInt16; 414 415#define True ((Bool)1) 416#define False ((Bool)0) 417 418#ifndef __GNUC__ 419#define __inline__ /* */ 420#endif 421 422#ifndef BZ_NO_STDIO 423extern void BZ2_bz__AssertH__fail ( int errcode ); 424#define AssertH(cond,errcode) \ 425 { if (!(cond)) BZ2_bz__AssertH__fail ( errcode ); } 426#if BZ_DEBUG 427#define AssertD(cond,msg) \ 428 { if (!(cond)) { \ 429 fprintf ( stderr, \ 430 "\n\nlibbzip2(debug build): internal error\n\t%s\n", msg );\ 431 exit(1); \ 432 }} 433#else 434#define AssertD(cond,msg) /* */ 435#endif 436#define VPrintf0(zf) \ 437 fprintf(stderr,zf) 438#define VPrintf1(zf,za1) \ 439 fprintf(stderr,zf,za1) 440#define VPrintf2(zf,za1,za2) \ 441 fprintf(stderr,zf,za1,za2) 442#define VPrintf3(zf,za1,za2,za3) \ 443 fprintf(stderr,zf,za1,za2,za3) 444#define VPrintf4(zf,za1,za2,za3,za4) \ 445 fprintf(stderr,zf,za1,za2,za3,za4) 446#define VPrintf5(zf,za1,za2,za3,za4,za5) \ 447 fprintf(stderr,zf,za1,za2,za3,za4,za5) 448#else 449extern void bz_internal_error ( int errcode ); 450#define AssertH(cond,errcode) \ 451 { if (!(cond)) bz_internal_error ( errcode ); } 452#define AssertD(cond,msg) /* */ 453#define VPrintf0(zf) \ 454 vexxx_printf(zf) 455#define VPrintf1(zf,za1) \ 456 vexxx_printf(zf,za1) 457#define VPrintf2(zf,za1,za2) \ 458 vexxx_printf(zf,za1,za2) 459#define VPrintf3(zf,za1,za2,za3) \ 460 vexxx_printf(zf,za1,za2,za3) 461#define VPrintf4(zf,za1,za2,za3,za4) \ 462 vexxx_printf(zf,za1,za2,za3,za4) 463#define VPrintf5(zf,za1,za2,za3,za4,za5) \ 464 vexxx_printf(zf,za1,za2,za3,za4,za5) 465#endif 466 467 468#define BZALLOC(nnn) (strm->bzalloc)(strm->opaque,(nnn),1) 469#define BZFREE(ppp) (strm->bzfree)(strm->opaque,(ppp)) 470 471 472/*-- Header bytes. --*/ 473 474#define BZ_HDR_B 0x42 /* 'B' */ 475#define BZ_HDR_Z 0x5a /* 'Z' */ 476#define BZ_HDR_h 0x68 /* 'h' */ 477#define BZ_HDR_0 0x30 /* '0' */ 478 479/*-- Constants for the back end. --*/ 480 481#define BZ_MAX_ALPHA_SIZE 258 482#define BZ_MAX_CODE_LEN 23 483 484#define BZ_RUNA 0 485#define BZ_RUNB 1 486 487#define BZ_N_GROUPS 6 488#define BZ_G_SIZE 50 489#define BZ_N_ITERS 4 490 491#define BZ_MAX_SELECTORS (2 + (900000 / BZ_G_SIZE)) 492 493 494 495/*-- Stuff for randomising repetitive blocks. --*/ 496 497extern Int32 BZ2_rNums[512]; 498 499#define BZ_RAND_DECLS \ 500 Int32 rNToGo; \ 501 Int32 rTPos \ 502 503#define BZ_RAND_INIT_MASK \ 504 s->rNToGo = 0; \ 505 s->rTPos = 0 \ 506 507#define BZ_RAND_MASK ((s->rNToGo == 1) ? 1 : 0) 508 509#define BZ_RAND_UPD_MASK \ 510 if (s->rNToGo == 0) { \ 511 s->rNToGo = BZ2_rNums[s->rTPos]; \ 512 s->rTPos++; \ 513 if (s->rTPos == 512) s->rTPos = 0; \ 514 } \ 515 s->rNToGo--; 516 517 518 519/*-- Stuff for doing CRCs. --*/ 520 521extern UInt32 BZ2_crc32Table[256]; 522 523#define BZ_INITIALISE_CRC(crcVar) \ 524{ \ 525 crcVar = 0xffffffffL; \ 526} 527 528#define BZ_FINALISE_CRC(crcVar) \ 529{ \ 530 crcVar = ~(crcVar); \ 531} 532 533#define BZ_UPDATE_CRC(crcVar,cha) \ 534{ \ 535 crcVar = (crcVar << 8) ^ \ 536 BZ2_crc32Table[(crcVar >> 24) ^ \ 537 ((UChar)cha)]; \ 538} 539 540 541 542/*-- States and modes for compression. --*/ 543 544#define BZ_M_IDLE 1 545#define BZ_M_RUNNING 2 546#define BZ_M_FLUSHING 3 547#define BZ_M_FINISHING 4 548 549#define BZ_S_OUTPUT 1 550#define BZ_S_INPUT 2 551 552#define BZ_N_RADIX 2 553#define BZ_N_QSORT 12 554#define BZ_N_SHELL 18 555#define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2) 556 557 558 559 560/*-- Structure holding all the compression-side stuff. --*/ 561 562typedef 563 struct { 564 /* pointer back to the struct bz_stream */ 565 bz_stream* strm; 566 567 /* mode this stream is in, and whether inputting */ 568 /* or outputting data */ 569 Int32 mode; 570 Int32 state; 571 572 /* remembers avail_in when flush/finish requested */ 573 UInt32 avail_in_expect; 574 575 /* for doing the block sorting */ 576 UInt32* arr1; 577 UInt32* arr2; 578 UInt32* ftab; 579 Int32 origPtr; 580 581 /* aliases for arr1 and arr2 */ 582 UInt32* ptr; 583 UChar* block; 584 UInt16* mtfv; 585 UChar* zbits; 586 587 /* for deciding when to use the fallback sorting algorithm */ 588 Int32 workFactor; 589 590 /* run-length-encoding of the input */ 591 UInt32 state_in_ch; 592 Int32 state_in_len; 593 BZ_RAND_DECLS; 594 595 /* input and output limits and current posns */ 596 Int32 nblock; 597 Int32 nblockMAX; 598 Int32 numZ; 599 Int32 state_out_pos; 600 601 /* map of bytes used in block */ 602 Int32 nInUse; 603 Bool inUse[256]; 604 UChar unseqToSeq[256]; 605 606 /* the buffer for bit stream creation */ 607 UInt32 bsBuff; 608 Int32 bsLive; 609 610 /* block and combined CRCs */ 611 UInt32 blockCRC; 612 UInt32 combinedCRC; 613 614 /* misc administratium */ 615 Int32 verbosity; 616 Int32 blockNo; 617 Int32 blockSize100k; 618 619 /* stuff for coding the MTF values */ 620 Int32 nMTF; 621 Int32 mtfFreq [BZ_MAX_ALPHA_SIZE]; 622 UChar selector [BZ_MAX_SELECTORS]; 623 UChar selectorMtf[BZ_MAX_SELECTORS]; 624 625 UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 626 Int32 code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 627 Int32 rfreq [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 628 /* second dimension: only 3 needed; 4 makes index calculations faster */ 629 UInt32 len_pack[BZ_MAX_ALPHA_SIZE][4]; 630 631 } 632 EState; 633 634 635 636/*-- externs for compression. --*/ 637 638extern void 639BZ2_blockSort ( EState* ); 640 641extern void 642BZ2_compressBlock ( EState*, Bool ); 643 644extern void 645BZ2_bsInitWrite ( EState* ); 646 647extern void 648BZ2_hbAssignCodes ( Int32*, UChar*, Int32, Int32, Int32 ); 649 650extern void 651BZ2_hbMakeCodeLengths ( UChar*, Int32*, Int32, Int32 ); 652 653 654 655/*-- states for decompression. --*/ 656 657#define BZ_X_IDLE 1 658#define BZ_X_OUTPUT 2 659 660#define BZ_X_MAGIC_1 10 661#define BZ_X_MAGIC_2 11 662#define BZ_X_MAGIC_3 12 663#define BZ_X_MAGIC_4 13 664#define BZ_X_BLKHDR_1 14 665#define BZ_X_BLKHDR_2 15 666#define BZ_X_BLKHDR_3 16 667#define BZ_X_BLKHDR_4 17 668#define BZ_X_BLKHDR_5 18 669#define BZ_X_BLKHDR_6 19 670#define BZ_X_BCRC_1 20 671#define BZ_X_BCRC_2 21 672#define BZ_X_BCRC_3 22 673#define BZ_X_BCRC_4 23 674#define BZ_X_RANDBIT 24 675#define BZ_X_ORIGPTR_1 25 676#define BZ_X_ORIGPTR_2 26 677#define BZ_X_ORIGPTR_3 27 678#define BZ_X_MAPPING_1 28 679#define BZ_X_MAPPING_2 29 680#define BZ_X_SELECTOR_1 30 681#define BZ_X_SELECTOR_2 31 682#define BZ_X_SELECTOR_3 32 683#define BZ_X_CODING_1 33 684#define BZ_X_CODING_2 34 685#define BZ_X_CODING_3 35 686#define BZ_X_MTF_1 36 687#define BZ_X_MTF_2 37 688#define BZ_X_MTF_3 38 689#define BZ_X_MTF_4 39 690#define BZ_X_MTF_5 40 691#define BZ_X_MTF_6 41 692#define BZ_X_ENDHDR_2 42 693#define BZ_X_ENDHDR_3 43 694#define BZ_X_ENDHDR_4 44 695#define BZ_X_ENDHDR_5 45 696#define BZ_X_ENDHDR_6 46 697#define BZ_X_CCRC_1 47 698#define BZ_X_CCRC_2 48 699#define BZ_X_CCRC_3 49 700#define BZ_X_CCRC_4 50 701 702 703 704/*-- Constants for the fast MTF decoder. --*/ 705 706#define MTFA_SIZE 4096 707#define MTFL_SIZE 16 708 709 710 711/*-- Structure holding all the decompression-side stuff. --*/ 712 713typedef 714 struct { 715 /* pointer back to the struct bz_stream */ 716 bz_stream* strm; 717 718 /* state indicator for this stream */ 719 Int32 state; 720 721 /* for doing the final run-length decoding */ 722 UChar state_out_ch; 723 Int32 state_out_len; 724 Bool blockRandomised; 725 BZ_RAND_DECLS; 726 727 /* the buffer for bit stream reading */ 728 UInt32 bsBuff; 729 Int32 bsLive; 730 731 /* misc administratium */ 732 Int32 blockSize100k; 733 Bool smallDecompress; 734 Int32 currBlockNo; 735 Int32 verbosity; 736 737 /* for undoing the Burrows-Wheeler transform */ 738 Int32 origPtr; 739 UInt32 tPos; 740 Int32 k0; 741 Int32 unzftab[256]; 742 Int32 nblock_used; 743 Int32 cftab[257]; 744 Int32 cftabCopy[257]; 745 746 /* for undoing the Burrows-Wheeler transform (FAST) */ 747 UInt32 *tt; 748 749 /* for undoing the Burrows-Wheeler transform (SMALL) */ 750 UInt16 *ll16; 751 UChar *ll4; 752 753 /* stored and calculated CRCs */ 754 UInt32 storedBlockCRC; 755 UInt32 storedCombinedCRC; 756 UInt32 calculatedBlockCRC; 757 UInt32 calculatedCombinedCRC; 758 759 /* map of bytes used in block */ 760 Int32 nInUse; 761 Bool inUse[256]; 762 Bool inUse16[16]; 763 UChar seqToUnseq[256]; 764 765 /* for decoding the MTF values */ 766 UChar mtfa [MTFA_SIZE]; 767 Int32 mtfbase[256 / MTFL_SIZE]; 768 UChar selector [BZ_MAX_SELECTORS]; 769 UChar selectorMtf[BZ_MAX_SELECTORS]; 770 UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 771 772 Int32 limit [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 773 Int32 base [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 774 Int32 perm [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 775 Int32 minLens[BZ_N_GROUPS]; 776 777 /* save area for scalars in the main decompress code */ 778 Int32 save_i; 779 Int32 save_j; 780 Int32 save_t; 781 Int32 save_alphaSize; 782 Int32 save_nGroups; 783 Int32 save_nSelectors; 784 Int32 save_EOB; 785 Int32 save_groupNo; 786 Int32 save_groupPos; 787 Int32 save_nextSym; 788 Int32 save_nblockMAX; 789 Int32 save_nblock; 790 Int32 save_es; 791 Int32 save_N; 792 Int32 save_curr; 793 Int32 save_zt; 794 Int32 save_zn; 795 Int32 save_zvec; 796 Int32 save_zj; 797 Int32 save_gSel; 798 Int32 save_gMinlen; 799 Int32* save_gLimit; 800 Int32* save_gBase; 801 Int32* save_gPerm; 802 803 } 804 DState; 805 806 807 808/*-- Macros for decompression. --*/ 809 810#define BZ_GET_FAST(cccc) \ 811 s->tPos = s->tt[s->tPos]; \ 812 cccc = (UChar)(s->tPos & 0xff); \ 813 s->tPos >>= 8; 814 815#define BZ_GET_FAST_C(cccc) \ 816 c_tPos = c_tt[c_tPos]; \ 817 cccc = (UChar)(c_tPos & 0xff); \ 818 c_tPos >>= 8; 819 820#define SET_LL4(i,n) \ 821 { if (((i) & 0x1) == 0) \ 822 s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0xf0) | (n); else \ 823 s->ll4[(i) >> 1] = (s->ll4[(i) >> 1] & 0x0f) | ((n) << 4); \ 824 } 825 826#define GET_LL4(i) \ 827 ((((UInt32)(s->ll4[(i) >> 1])) >> (((i) << 2) & 0x4)) & 0xF) 828 829#define SET_LL(i,n) \ 830 { s->ll16[i] = (UInt16)(n & 0x0000ffff); \ 831 SET_LL4(i, n >> 16); \ 832 } 833 834#define GET_LL(i) \ 835 (((UInt32)s->ll16[i]) | (GET_LL4(i) << 16)) 836 837#define BZ_GET_SMALL(cccc) \ 838 cccc = BZ2_indexIntoF ( s->tPos, s->cftab ); \ 839 s->tPos = GET_LL(s->tPos); 840 841 842/*-- externs for decompression. --*/ 843 844extern Int32 845BZ2_indexIntoF ( Int32, Int32* ); 846 847extern Int32 848BZ2_decompress ( DState* ); 849 850extern void 851BZ2_hbCreateDecodeTables ( Int32*, Int32*, Int32*, UChar*, 852 Int32, Int32, Int32 ); 853 854 855#endif 856 857 858/*-- BZ_NO_STDIO seems to make NULL disappear on some platforms. --*/ 859 860#ifdef BZ_NO_STDIO 861#ifndef NULL 862#define NULL 0 863#endif 864#endif 865 866 867/*-------------------------------------------------------------*/ 868/*--- end bzlib_private.h ---*/ 869/*-------------------------------------------------------------*/ 870 871 872/* Something which has the same size as void* on the host. That is, 873 it is 32 bits on a 32-bit host and 64 bits on a 64-bit host, and so 874 it can safely be coerced to and from a pointer type on the host 875 machine. */ 876typedef unsigned long HWord; 877typedef char HChar; 878typedef signed int Int; 879typedef unsigned int UInt; 880 881typedef signed long long int Long; 882typedef unsigned long long int ULong; 883 884 885///////////////////////////////////////////////////////////////////// 886///////////////////////////////////////////////////////////////////// 887 888//#include "/home/sewardj/VEX/trunk/pub/libvex_basictypes.h" 889 890static HWord (*serviceFn)(HWord,HWord) = 0; 891 892 893static char* my_strcpy ( char* dest, const char* src ) 894{ 895 char* dest_orig = dest; 896 while (*src) *dest++ = *src++; 897 *dest = 0; 898 return dest_orig; 899} 900 901static void* my_memcpy ( void *dest, const void *src, int sz ) 902{ 903 const char *s = (const char *)src; 904 char *d = (char *)dest; 905 906 while (sz--) 907 *d++ = *s++; 908 909 return dest; 910} 911 912static void* my_memmove( void *dst, const void *src, unsigned int len ) 913{ 914 register char *d; 915 register char *s; 916 if ( dst > src ) { 917 d = (char *)dst + len - 1; 918 s = (char *)src + len - 1; 919 while ( len >= 4 ) { 920 *d-- = *s--; 921 *d-- = *s--; 922 *d-- = *s--; 923 *d-- = *s--; 924 len -= 4; 925 } 926 while ( len-- ) { 927 *d-- = *s--; 928 } 929 } else if ( dst < src ) { 930 d = (char *)dst; 931 s = (char *)src; 932 while ( len >= 4 ) { 933 *d++ = *s++; 934 *d++ = *s++; 935 *d++ = *s++; 936 *d++ = *s++; 937 len -= 4; 938 } 939 while ( len-- ) { 940 *d++ = *s++; 941 } 942 } 943 return dst; 944} 945 946char* my_strcat ( char* dest, const char* src ) 947{ 948 char* dest_orig = dest; 949 while (*dest) dest++; 950 while (*src) *dest++ = *src++; 951 *dest = 0; 952 return dest_orig; 953} 954 955 956///////////////////////////////////////////////////////////////////// 957 958static void vexxx_log_bytes ( char* p, int n ) 959{ 960 int i; 961 for (i = 0; i < n; i++) 962 (*serviceFn)( 1, (int)p[i] ); 963} 964 965/*---------------------------------------------------------*/ 966/*--- vexxx_printf ---*/ 967/*---------------------------------------------------------*/ 968 969/* This should be the only <...> include in the entire VEX library. 970 New code for vexxx_util.c should go above this point. */ 971#include <stdarg.h> 972 973static HChar vexxx_toupper ( HChar c ) 974{ 975 if (c >= 'a' && c <= 'z') 976 return c + ('A' - 'a'); 977 else 978 return c; 979} 980 981static Int vexxx_strlen ( const HChar* str ) 982{ 983 Int i = 0; 984 while (str[i] != 0) i++; 985 return i; 986} 987 988Bool vexxx_streq ( const HChar* s1, const HChar* s2 ) 989{ 990 while (True) { 991 if (*s1 == 0 && *s2 == 0) 992 return True; 993 if (*s1 != *s2) 994 return False; 995 s1++; 996 s2++; 997 } 998} 999 1000/* Some flags. */ 1001#define VG_MSG_SIGNED 1 /* The value is signed. */ 1002#define VG_MSG_ZJUSTIFY 2 /* Must justify with '0'. */ 1003#define VG_MSG_LJUSTIFY 4 /* Must justify on the left. */ 1004#define VG_MSG_PAREN 8 /* Parenthesize if present (for %y) */ 1005#define VG_MSG_COMMA 16 /* Add commas to numbers (for %d, %u) */ 1006 1007/* Copy a string into the buffer. */ 1008static UInt 1009myvprintf_str ( void(*send)(HChar), Int flags, Int width, HChar* str, 1010 Bool capitalise ) 1011{ 1012# define MAYBE_TOUPPER(ch) (capitalise ? vexxx_toupper(ch) : (ch)) 1013 UInt ret = 0; 1014 Int i, extra; 1015 Int len = vexxx_strlen(str); 1016 1017 if (width == 0) { 1018 ret += len; 1019 for (i = 0; i < len; i++) 1020 send(MAYBE_TOUPPER(str[i])); 1021 return ret; 1022 } 1023 1024 if (len > width) { 1025 ret += width; 1026 for (i = 0; i < width; i++) 1027 send(MAYBE_TOUPPER(str[i])); 1028 return ret; 1029 } 1030 1031 extra = width - len; 1032 if (flags & VG_MSG_LJUSTIFY) { 1033 ret += extra; 1034 for (i = 0; i < extra; i++) 1035 send(' '); 1036 } 1037 ret += len; 1038 for (i = 0; i < len; i++) 1039 send(MAYBE_TOUPPER(str[i])); 1040 if (!(flags & VG_MSG_LJUSTIFY)) { 1041 ret += extra; 1042 for (i = 0; i < extra; i++) 1043 send(' '); 1044 } 1045 1046# undef MAYBE_TOUPPER 1047 1048 return ret; 1049} 1050 1051/* Write P into the buffer according to these args: 1052 * If SIGN is true, p is a signed. 1053 * BASE is the base. 1054 * If WITH_ZERO is true, '0' must be added. 1055 * WIDTH is the width of the field. 1056 */ 1057static UInt 1058myvprintf_int64 ( void(*send)(HChar), Int flags, Int base, Int width, ULong pL) 1059{ 1060 HChar buf[40]; 1061 Int ind = 0; 1062 Int i, nc = 0; 1063 Bool neg = False; 1064 HChar *digits = "0123456789ABCDEF"; 1065 UInt ret = 0; 1066 UInt p = (UInt)pL; 1067 1068 if (base < 2 || base > 16) 1069 return ret; 1070 1071 if ((flags & VG_MSG_SIGNED) && (Int)p < 0) { 1072 p = - (Int)p; 1073 neg = True; 1074 } 1075 1076 if (p == 0) 1077 buf[ind++] = '0'; 1078 else { 1079 while (p > 0) { 1080 if ((flags & VG_MSG_COMMA) && 10 == base && 1081 0 == (ind-nc) % 3 && 0 != ind) 1082 { 1083 buf[ind++] = ','; 1084 nc++; 1085 } 1086 buf[ind++] = digits[p % base]; 1087 p /= base; 1088 } 1089 } 1090 1091 if (neg) 1092 buf[ind++] = '-'; 1093 1094 if (width > 0 && !(flags & VG_MSG_LJUSTIFY)) { 1095 for(; ind < width; ind++) { 1096 //vassert(ind < 39); 1097 buf[ind] = ((flags & VG_MSG_ZJUSTIFY) ? '0': ' '); 1098 } 1099 } 1100 1101 /* Reverse copy to buffer. */ 1102 ret += ind; 1103 for (i = ind -1; i >= 0; i--) { 1104 send(buf[i]); 1105 } 1106 if (width > 0 && (flags & VG_MSG_LJUSTIFY)) { 1107 for(; ind < width; ind++) { 1108 ret++; 1109 send(' '); // Never pad with zeroes on RHS -- changes the value! 1110 } 1111 } 1112 return ret; 1113} 1114 1115 1116/* A simple vprintf(). */ 1117static 1118UInt vprintf_wrk ( void(*send)(HChar), const HChar *format, va_list vargs ) 1119{ 1120 UInt ret = 0; 1121 int i; 1122 int flags; 1123 int width; 1124 Bool is_long; 1125 1126 /* We assume that vargs has already been initialised by the 1127 caller, using va_start, and that the caller will similarly 1128 clean up with va_end. 1129 */ 1130 1131 for (i = 0; format[i] != 0; i++) { 1132 if (format[i] != '%') { 1133 send(format[i]); 1134 ret++; 1135 continue; 1136 } 1137 i++; 1138 /* A '%' has been found. Ignore a trailing %. */ 1139 if (format[i] == 0) 1140 break; 1141 if (format[i] == '%') { 1142 /* `%%' is replaced by `%'. */ 1143 send('%'); 1144 ret++; 1145 continue; 1146 } 1147 flags = 0; 1148 is_long = False; 1149 width = 0; /* length of the field. */ 1150 if (format[i] == '(') { 1151 flags |= VG_MSG_PAREN; 1152 i++; 1153 } 1154 /* If ',' follows '%', commas will be inserted. */ 1155 if (format[i] == ',') { 1156 flags |= VG_MSG_COMMA; 1157 i++; 1158 } 1159 /* If '-' follows '%', justify on the left. */ 1160 if (format[i] == '-') { 1161 flags |= VG_MSG_LJUSTIFY; 1162 i++; 1163 } 1164 /* If '0' follows '%', pads will be inserted. */ 1165 if (format[i] == '0') { 1166 flags |= VG_MSG_ZJUSTIFY; 1167 i++; 1168 } 1169 /* Compute the field length. */ 1170 while (format[i] >= '0' && format[i] <= '9') { 1171 width *= 10; 1172 width += format[i++] - '0'; 1173 } 1174 while (format[i] == 'l') { 1175 i++; 1176 is_long = True; 1177 } 1178 1179 switch (format[i]) { 1180 case 'd': /* %d */ 1181 flags |= VG_MSG_SIGNED; 1182 if (is_long) 1183 ret += myvprintf_int64(send, flags, 10, width, 1184 (ULong)(va_arg (vargs, Long))); 1185 else 1186 ret += myvprintf_int64(send, flags, 10, width, 1187 (ULong)(va_arg (vargs, Int))); 1188 break; 1189 case 'u': /* %u */ 1190 if (is_long) 1191 ret += myvprintf_int64(send, flags, 10, width, 1192 (ULong)(va_arg (vargs, ULong))); 1193 else 1194 ret += myvprintf_int64(send, flags, 10, width, 1195 (ULong)(va_arg (vargs, UInt))); 1196 break; 1197 case 'p': /* %p */ 1198 ret += 2; 1199 send('0'); 1200 send('x'); 1201 ret += myvprintf_int64(send, flags, 16, width, 1202 (ULong)((HWord)va_arg (vargs, void *))); 1203 break; 1204 case 'x': /* %x */ 1205 if (is_long) 1206 ret += myvprintf_int64(send, flags, 16, width, 1207 (ULong)(va_arg (vargs, ULong))); 1208 else 1209 ret += myvprintf_int64(send, flags, 16, width, 1210 (ULong)(va_arg (vargs, UInt))); 1211 break; 1212 case 'c': /* %c */ 1213 ret++; 1214 send((va_arg (vargs, int))); 1215 break; 1216 case 's': case 'S': { /* %s */ 1217 char *str = va_arg (vargs, char *); 1218 if (str == (char*) 0) str = "(null)"; 1219 ret += myvprintf_str(send, flags, width, str, 1220 (format[i]=='S')); 1221 break; 1222 } 1223# if 0 1224 case 'y': { /* %y - print symbol */ 1225 Addr a = va_arg(vargs, Addr); 1226 1227 HChar *name; 1228 if (VG_(get_fnname_w_offset)(a, &name)) { 1229 HChar buf[1 + VG_strlen(name) + 1 + 1]; 1230 if (flags & VG_MSG_PAREN) { 1231 VG_(sprintf)(str, "(%s)", name): 1232 } else { 1233 VG_(sprintf)(str, "%s", name): 1234 } 1235 ret += myvprintf_str(send, flags, width, buf, 0); 1236 } 1237 break; 1238 } 1239# endif 1240 default: 1241 break; 1242 } 1243 } 1244 return ret; 1245} 1246 1247 1248/* A general replacement for printf(). Note that only low-level 1249 debugging info should be sent via here. The official route is to 1250 to use vg_message(). This interface is deprecated. 1251*/ 1252static HChar myprintf_buf[1000]; 1253static Int n_myprintf_buf; 1254 1255static void add_to_myprintf_buf ( HChar c ) 1256{ 1257 if (c == '\n' || n_myprintf_buf >= 1000-10 /*paranoia*/ ) { 1258 (*vexxx_log_bytes)( myprintf_buf, vexxx_strlen(myprintf_buf) ); 1259 n_myprintf_buf = 0; 1260 myprintf_buf[n_myprintf_buf] = 0; 1261 } 1262 myprintf_buf[n_myprintf_buf++] = c; 1263 myprintf_buf[n_myprintf_buf] = 0; 1264} 1265 1266static UInt vexxx_printf ( const char *format, ... ) 1267{ 1268 UInt ret; 1269 va_list vargs; 1270 va_start(vargs,format); 1271 1272 n_myprintf_buf = 0; 1273 myprintf_buf[n_myprintf_buf] = 0; 1274 ret = vprintf_wrk ( add_to_myprintf_buf, format, vargs ); 1275 1276 if (n_myprintf_buf > 0) { 1277 (*vexxx_log_bytes)( myprintf_buf, n_myprintf_buf ); 1278 } 1279 1280 va_end(vargs); 1281 1282 return ret; 1283} 1284 1285/*---------------------------------------------------------------*/ 1286/*--- end vexxx_util.c ---*/ 1287/*---------------------------------------------------------------*/ 1288 1289 1290///////////////////////////////////////////////////////////////////// 1291///////////////////////////////////////////////////////////////////// 1292///////////////////////////////////////////////////////////////////// 1293///////////////////////////////////////////////////////////////////// 1294 1295 1296/*-------------------------------------------------------------*/ 1297/*--- Decompression machinery ---*/ 1298/*--- decompress.c ---*/ 1299/*-------------------------------------------------------------*/ 1300 1301/*-- 1302 This file is a part of bzip2 and/or libbzip2, a program and 1303 library for lossless, block-sorting data compression. 1304 1305 Copyright (C) 1996-2004 Julian R Seward. All rights reserved. 1306 1307 Redistribution and use in source and binary forms, with or without 1308 modification, are permitted provided that the following conditions 1309 are met: 1310 1311 1. Redistributions of source code must retain the above copyright 1312 notice, this list of conditions and the following disclaimer. 1313 1314 2. The origin of this software must not be misrepresented; you must 1315 not claim that you wrote the original software. If you use this 1316 software in a product, an acknowledgment in the product 1317 documentation would be appreciated but is not required. 1318 1319 3. Altered source versions must be plainly marked as such, and must 1320 not be misrepresented as being the original software. 1321 1322 4. The name of the author may not be used to endorse or promote 1323 products derived from this software without specific prior written 1324 permission. 1325 1326 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 1327 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 1328 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1329 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 1330 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1331 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 1332 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 1333 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 1334 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 1335 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 1336 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1337 1338 Julian Seward, Cambridge, UK. 1339 jseward@bzip.org 1340 bzip2/libbzip2 version 1.0 of 21 March 2000 1341 1342 This program is based on (at least) the work of: 1343 Mike Burrows 1344 David Wheeler 1345 Peter Fenwick 1346 Alistair Moffat 1347 Radford Neal 1348 Ian H. Witten 1349 Robert Sedgewick 1350 Jon L. Bentley 1351 1352 For more information on these sources, see the manual. 1353--*/ 1354 1355 1356 1357 1358/*---------------------------------------------------*/ 1359static 1360void makeMaps_d ( DState* s ) 1361{ 1362 Int32 i; 1363 s->nInUse = 0; 1364 for (i = 0; i < 256; i++) 1365 if (s->inUse[i]) { 1366 s->seqToUnseq[s->nInUse] = i; 1367 s->nInUse++; 1368 } 1369} 1370 1371 1372/*---------------------------------------------------*/ 1373#define RETURN(rrr) \ 1374 { retVal = rrr; goto save_state_and_return; }; 1375 1376#define GET_BITS(lll,vvv,nnn) \ 1377 case lll: s->state = lll; \ 1378 while (True) { \ 1379 if (s->bsLive >= nnn) { \ 1380 UInt32 v; \ 1381 v = (s->bsBuff >> \ 1382 (s->bsLive-nnn)) & ((1 << nnn)-1); \ 1383 s->bsLive -= nnn; \ 1384 vvv = v; \ 1385 break; \ 1386 } \ 1387 if (s->strm->avail_in == 0) RETURN(BZ_OK); \ 1388 s->bsBuff \ 1389 = (s->bsBuff << 8) | \ 1390 ((UInt32) \ 1391 (*((UChar*)(s->strm->next_in)))); \ 1392 s->bsLive += 8; \ 1393 s->strm->next_in++; \ 1394 s->strm->avail_in--; \ 1395 s->strm->total_in_lo32++; \ 1396 if (s->strm->total_in_lo32 == 0) \ 1397 s->strm->total_in_hi32++; \ 1398 } 1399 1400#define GET_UCHAR(lll,uuu) \ 1401 GET_BITS(lll,uuu,8) 1402 1403#define GET_BIT(lll,uuu) \ 1404 GET_BITS(lll,uuu,1) 1405 1406/*---------------------------------------------------*/ 1407#define GET_MTF_VAL(label1,label2,lval) \ 1408{ \ 1409 if (groupPos == 0) { \ 1410 groupNo++; \ 1411 if (groupNo >= nSelectors) \ 1412 RETURN(BZ_DATA_ERROR); \ 1413 groupPos = BZ_G_SIZE; \ 1414 gSel = s->selector[groupNo]; \ 1415 gMinlen = s->minLens[gSel]; \ 1416 gLimit = &(s->limit[gSel][0]); \ 1417 gPerm = &(s->perm[gSel][0]); \ 1418 gBase = &(s->base[gSel][0]); \ 1419 } \ 1420 groupPos--; \ 1421 zn = gMinlen; \ 1422 GET_BITS(label1, zvec, zn); \ 1423 while (1) { \ 1424 if (zn > 20 /* the longest code */) \ 1425 RETURN(BZ_DATA_ERROR); \ 1426 if (zvec <= gLimit[zn]) break; \ 1427 zn++; \ 1428 GET_BIT(label2, zj); \ 1429 zvec = (zvec << 1) | zj; \ 1430 }; \ 1431 if (zvec - gBase[zn] < 0 \ 1432 || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \ 1433 RETURN(BZ_DATA_ERROR); \ 1434 lval = gPerm[zvec - gBase[zn]]; \ 1435} 1436 1437 1438 1439/*---------------------------------------------------*/ 1440__inline__ Int32 BZ2_indexIntoF ( Int32 indx, Int32 *cftab ) 1441{ 1442 Int32 nb, na, mid; 1443 nb = 0; 1444 na = 256; 1445 do { 1446 mid = (nb + na) >> 1; 1447 if (indx >= cftab[mid]) nb = mid; else na = mid; 1448 } 1449 while (na - nb != 1); 1450 return nb; 1451} 1452 1453/*---------------------------------------------------*/ 1454Int32 BZ2_decompress ( DState* s ) 1455{ 1456 UChar uc; 1457 Int32 retVal; 1458 Int32 minLen, maxLen; 1459 bz_stream* strm = s->strm; 1460 1461 /* stuff that needs to be saved/restored */ 1462 Int32 i; 1463 Int32 j; 1464 Int32 t; 1465 Int32 alphaSize; 1466 Int32 nGroups; 1467 Int32 nSelectors; 1468 Int32 EOB; 1469 Int32 groupNo; 1470 Int32 groupPos; 1471 Int32 nextSym; 1472 Int32 nblockMAX; 1473 Int32 nblock; 1474 Int32 es; 1475 Int32 N; 1476 Int32 curr; 1477 Int32 zt; 1478 Int32 zn; 1479 Int32 zvec; 1480 Int32 zj; 1481 Int32 gSel; 1482 Int32 gMinlen; 1483 Int32* gLimit; 1484 Int32* gBase; 1485 Int32* gPerm; 1486 1487 if (s->state == BZ_X_MAGIC_1) { 1488 /*initialise the save area*/ 1489 s->save_i = 0; 1490 s->save_j = 0; 1491 s->save_t = 0; 1492 s->save_alphaSize = 0; 1493 s->save_nGroups = 0; 1494 s->save_nSelectors = 0; 1495 s->save_EOB = 0; 1496 s->save_groupNo = 0; 1497 s->save_groupPos = 0; 1498 s->save_nextSym = 0; 1499 s->save_nblockMAX = 0; 1500 s->save_nblock = 0; 1501 s->save_es = 0; 1502 s->save_N = 0; 1503 s->save_curr = 0; 1504 s->save_zt = 0; 1505 s->save_zn = 0; 1506 s->save_zvec = 0; 1507 s->save_zj = 0; 1508 s->save_gSel = 0; 1509 s->save_gMinlen = 0; 1510 s->save_gLimit = NULL; 1511 s->save_gBase = NULL; 1512 s->save_gPerm = NULL; 1513 } 1514 1515 /*restore from the save area*/ 1516 i = s->save_i; 1517 j = s->save_j; 1518 t = s->save_t; 1519 alphaSize = s->save_alphaSize; 1520 nGroups = s->save_nGroups; 1521 nSelectors = s->save_nSelectors; 1522 EOB = s->save_EOB; 1523 groupNo = s->save_groupNo; 1524 groupPos = s->save_groupPos; 1525 nextSym = s->save_nextSym; 1526 nblockMAX = s->save_nblockMAX; 1527 nblock = s->save_nblock; 1528 es = s->save_es; 1529 N = s->save_N; 1530 curr = s->save_curr; 1531 zt = s->save_zt; 1532 zn = s->save_zn; 1533 zvec = s->save_zvec; 1534 zj = s->save_zj; 1535 gSel = s->save_gSel; 1536 gMinlen = s->save_gMinlen; 1537 gLimit = s->save_gLimit; 1538 gBase = s->save_gBase; 1539 gPerm = s->save_gPerm; 1540 1541 retVal = BZ_OK; 1542 1543 switch (s->state) { 1544 1545 GET_UCHAR(BZ_X_MAGIC_1, uc); 1546 if (uc != BZ_HDR_B) RETURN(BZ_DATA_ERROR_MAGIC); 1547 1548 GET_UCHAR(BZ_X_MAGIC_2, uc); 1549 if (uc != BZ_HDR_Z) RETURN(BZ_DATA_ERROR_MAGIC); 1550 1551 GET_UCHAR(BZ_X_MAGIC_3, uc) 1552 if (uc != BZ_HDR_h) RETURN(BZ_DATA_ERROR_MAGIC); 1553 1554 GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8) 1555 if (s->blockSize100k < (BZ_HDR_0 + 1) || 1556 s->blockSize100k > (BZ_HDR_0 + 9)) RETURN(BZ_DATA_ERROR_MAGIC); 1557 s->blockSize100k -= BZ_HDR_0; 1558 1559 if (s->smallDecompress) { 1560 s->ll16 = BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) ); 1561 s->ll4 = BZALLOC( 1562 ((1 + s->blockSize100k * 100000) >> 1) * sizeof(UChar) 1563 ); 1564 if (s->ll16 == NULL || s->ll4 == NULL) RETURN(BZ_MEM_ERROR); 1565 } else { 1566 s->tt = BZALLOC( s->blockSize100k * 100000 * sizeof(Int32) ); 1567 if (s->tt == NULL) RETURN(BZ_MEM_ERROR); 1568 } 1569 1570 GET_UCHAR(BZ_X_BLKHDR_1, uc); 1571 1572 if (uc == 0x17) goto endhdr_2; 1573 if (uc != 0x31) RETURN(BZ_DATA_ERROR); 1574 GET_UCHAR(BZ_X_BLKHDR_2, uc); 1575 if (uc != 0x41) RETURN(BZ_DATA_ERROR); 1576 GET_UCHAR(BZ_X_BLKHDR_3, uc); 1577 if (uc != 0x59) RETURN(BZ_DATA_ERROR); 1578 GET_UCHAR(BZ_X_BLKHDR_4, uc); 1579 if (uc != 0x26) RETURN(BZ_DATA_ERROR); 1580 GET_UCHAR(BZ_X_BLKHDR_5, uc); 1581 if (uc != 0x53) RETURN(BZ_DATA_ERROR); 1582 GET_UCHAR(BZ_X_BLKHDR_6, uc); 1583 if (uc != 0x59) RETURN(BZ_DATA_ERROR); 1584 1585 s->currBlockNo++; 1586 if (s->verbosity >= 2) 1587 VPrintf1 ( "\n [%d: huff+mtf ", s->currBlockNo ); 1588 1589 s->storedBlockCRC = 0; 1590 GET_UCHAR(BZ_X_BCRC_1, uc); 1591 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); 1592 GET_UCHAR(BZ_X_BCRC_2, uc); 1593 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); 1594 GET_UCHAR(BZ_X_BCRC_3, uc); 1595 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); 1596 GET_UCHAR(BZ_X_BCRC_4, uc); 1597 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc); 1598 1599 GET_BITS(BZ_X_RANDBIT, s->blockRandomised, 1); 1600 1601 s->origPtr = 0; 1602 GET_UCHAR(BZ_X_ORIGPTR_1, uc); 1603 s->origPtr = (s->origPtr << 8) | ((Int32)uc); 1604 GET_UCHAR(BZ_X_ORIGPTR_2, uc); 1605 s->origPtr = (s->origPtr << 8) | ((Int32)uc); 1606 GET_UCHAR(BZ_X_ORIGPTR_3, uc); 1607 s->origPtr = (s->origPtr << 8) | ((Int32)uc); 1608 1609 if (s->origPtr < 0) 1610 RETURN(BZ_DATA_ERROR); 1611 if (s->origPtr > 10 + 100000*s->blockSize100k) 1612 RETURN(BZ_DATA_ERROR); 1613 1614 /*--- Receive the mapping table ---*/ 1615 for (i = 0; i < 16; i++) { 1616 GET_BIT(BZ_X_MAPPING_1, uc); 1617 if (uc == 1) 1618 s->inUse16[i] = True; else 1619 s->inUse16[i] = False; 1620 } 1621 1622 for (i = 0; i < 256; i++) s->inUse[i] = False; 1623 1624 for (i = 0; i < 16; i++) 1625 if (s->inUse16[i]) 1626 for (j = 0; j < 16; j++) { 1627 GET_BIT(BZ_X_MAPPING_2, uc); 1628 if (uc == 1) s->inUse[i * 16 + j] = True; 1629 } 1630 makeMaps_d ( s ); 1631 if (s->nInUse == 0) RETURN(BZ_DATA_ERROR); 1632 alphaSize = s->nInUse+2; 1633 1634 /*--- Now the selectors ---*/ 1635 GET_BITS(BZ_X_SELECTOR_1, nGroups, 3); 1636 if (nGroups < 2 || nGroups > 6) RETURN(BZ_DATA_ERROR); 1637 GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15); 1638 if (nSelectors < 1) RETURN(BZ_DATA_ERROR); 1639 for (i = 0; i < nSelectors; i++) { 1640 j = 0; 1641 while (True) { 1642 GET_BIT(BZ_X_SELECTOR_3, uc); 1643 if (uc == 0) break; 1644 j++; 1645 if (j >= nGroups) RETURN(BZ_DATA_ERROR); 1646 } 1647 s->selectorMtf[i] = j; 1648 } 1649 1650 /*--- Undo the MTF values for the selectors. ---*/ 1651 { 1652 UChar pos[BZ_N_GROUPS], tmp, v; 1653 for (v = 0; v < nGroups; v++) pos[v] = v; 1654 1655 for (i = 0; i < nSelectors; i++) { 1656 v = s->selectorMtf[i]; 1657 tmp = pos[v]; 1658 while (v > 0) { pos[v] = pos[v-1]; v--; } 1659 pos[0] = tmp; 1660 s->selector[i] = tmp; 1661 } 1662 } 1663 1664 /*--- Now the coding tables ---*/ 1665 for (t = 0; t < nGroups; t++) { 1666 GET_BITS(BZ_X_CODING_1, curr, 5); 1667 for (i = 0; i < alphaSize; i++) { 1668 while (True) { 1669 if (curr < 1 || curr > 20) RETURN(BZ_DATA_ERROR); 1670 GET_BIT(BZ_X_CODING_2, uc); 1671 if (uc == 0) break; 1672 GET_BIT(BZ_X_CODING_3, uc); 1673 if (uc == 0) curr++; else curr--; 1674 } 1675 s->len[t][i] = curr; 1676 } 1677 } 1678 1679 /*--- Create the Huffman decoding tables ---*/ 1680 for (t = 0; t < nGroups; t++) { 1681 minLen = 32; 1682 maxLen = 0; 1683 for (i = 0; i < alphaSize; i++) { 1684 if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; 1685 if (s->len[t][i] < minLen) minLen = s->len[t][i]; 1686 } 1687 BZ2_hbCreateDecodeTables ( 1688 &(s->limit[t][0]), 1689 &(s->base[t][0]), 1690 &(s->perm[t][0]), 1691 &(s->len[t][0]), 1692 minLen, maxLen, alphaSize 1693 ); 1694 s->minLens[t] = minLen; 1695 } 1696 1697 /*--- Now the MTF values ---*/ 1698 1699 EOB = s->nInUse+1; 1700 nblockMAX = 100000 * s->blockSize100k; 1701 groupNo = -1; 1702 groupPos = 0; 1703 1704 for (i = 0; i <= 255; i++) s->unzftab[i] = 0; 1705 1706 /*-- MTF init --*/ 1707 { 1708 Int32 ii, jj, kk; 1709 kk = MTFA_SIZE-1; 1710 for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) { 1711 for (jj = MTFL_SIZE-1; jj >= 0; jj--) { 1712 s->mtfa[kk] = (UChar)(ii * MTFL_SIZE + jj); 1713 kk--; 1714 } 1715 s->mtfbase[ii] = kk + 1; 1716 } 1717 } 1718 /*-- end MTF init --*/ 1719 1720 nblock = 0; 1721 GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym); 1722 1723 while (True) { 1724 1725 if (nextSym == EOB) break; 1726 1727 if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) { 1728 1729 es = -1; 1730 N = 1; 1731 do { 1732 if (nextSym == BZ_RUNA) es = es + (0+1) * N; else 1733 if (nextSym == BZ_RUNB) es = es + (1+1) * N; 1734 N = N * 2; 1735 GET_MTF_VAL(BZ_X_MTF_3, BZ_X_MTF_4, nextSym); 1736 } 1737 while (nextSym == BZ_RUNA || nextSym == BZ_RUNB); 1738 1739 es++; 1740 uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ]; 1741 s->unzftab[uc] += es; 1742 1743 if (s->smallDecompress) 1744 while (es > 0) { 1745 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); 1746 s->ll16[nblock] = (UInt16)uc; 1747 nblock++; 1748 es--; 1749 } 1750 else 1751 while (es > 0) { 1752 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); 1753 s->tt[nblock] = (UInt32)uc; 1754 nblock++; 1755 es--; 1756 }; 1757 1758 continue; 1759 1760 } else { 1761 1762 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR); 1763 1764 /*-- uc = MTF ( nextSym-1 ) --*/ 1765 { 1766 Int32 ii, jj, kk, pp, lno, off; 1767 UInt32 nn; 1768 nn = (UInt32)(nextSym - 1); 1769 1770 if (nn < MTFL_SIZE) { 1771 /* avoid general-case expense */ 1772 pp = s->mtfbase[0]; 1773 uc = s->mtfa[pp+nn]; 1774 while (nn > 3) { 1775 Int32 z = pp+nn; 1776 s->mtfa[(z) ] = s->mtfa[(z)-1]; 1777 s->mtfa[(z)-1] = s->mtfa[(z)-2]; 1778 s->mtfa[(z)-2] = s->mtfa[(z)-3]; 1779 s->mtfa[(z)-3] = s->mtfa[(z)-4]; 1780 nn -= 4; 1781 } 1782 while (nn > 0) { 1783 s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--; 1784 }; 1785 s->mtfa[pp] = uc; 1786 } else { 1787 /* general case */ 1788 lno = nn / MTFL_SIZE; 1789 off = nn % MTFL_SIZE; 1790 pp = s->mtfbase[lno] + off; 1791 uc = s->mtfa[pp]; 1792 while (pp > s->mtfbase[lno]) { 1793 s->mtfa[pp] = s->mtfa[pp-1]; pp--; 1794 }; 1795 s->mtfbase[lno]++; 1796 while (lno > 0) { 1797 s->mtfbase[lno]--; 1798 s->mtfa[s->mtfbase[lno]] 1799 = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1]; 1800 lno--; 1801 } 1802 s->mtfbase[0]--; 1803 s->mtfa[s->mtfbase[0]] = uc; 1804 if (s->mtfbase[0] == 0) { 1805 kk = MTFA_SIZE-1; 1806 for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) { 1807 for (jj = MTFL_SIZE-1; jj >= 0; jj--) { 1808 s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj]; 1809 kk--; 1810 } 1811 s->mtfbase[ii] = kk + 1; 1812 } 1813 } 1814 } 1815 } 1816 /*-- end uc = MTF ( nextSym-1 ) --*/ 1817 1818 s->unzftab[s->seqToUnseq[uc]]++; 1819 if (s->smallDecompress) 1820 s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else 1821 s->tt[nblock] = (UInt32)(s->seqToUnseq[uc]); 1822 nblock++; 1823 1824 GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym); 1825 continue; 1826 } 1827 } 1828 1829 /* Now we know what nblock is, we can do a better sanity 1830 check on s->origPtr. 1831 */ 1832 if (s->origPtr < 0 || s->origPtr >= nblock) 1833 RETURN(BZ_DATA_ERROR); 1834 1835 /*-- Set up cftab to facilitate generation of T^(-1) --*/ 1836 s->cftab[0] = 0; 1837 for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1]; 1838 for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1]; 1839 for (i = 0; i <= 256; i++) { 1840 if (s->cftab[i] < 0 || s->cftab[i] > nblock) { 1841 /* s->cftab[i] can legitimately be == nblock */ 1842 RETURN(BZ_DATA_ERROR); 1843 } 1844 } 1845 1846 s->state_out_len = 0; 1847 s->state_out_ch = 0; 1848 BZ_INITIALISE_CRC ( s->calculatedBlockCRC ); 1849 s->state = BZ_X_OUTPUT; 1850 if (s->verbosity >= 2) VPrintf0 ( "rt+rld" ); 1851 1852 if (s->smallDecompress) { 1853 1854 /*-- Make a copy of cftab, used in generation of T --*/ 1855 for (i = 0; i <= 256; i++) s->cftabCopy[i] = s->cftab[i]; 1856 1857 /*-- compute the T vector --*/ 1858 for (i = 0; i < nblock; i++) { 1859 uc = (UChar)(s->ll16[i]); 1860 SET_LL(i, s->cftabCopy[uc]); 1861 s->cftabCopy[uc]++; 1862 } 1863 1864 /*-- Compute T^(-1) by pointer reversal on T --*/ 1865 i = s->origPtr; 1866 j = GET_LL(i); 1867 do { 1868 Int32 tmp = GET_LL(j); 1869 SET_LL(j, i); 1870 i = j; 1871 j = tmp; 1872 } 1873 while (i != s->origPtr); 1874 1875 s->tPos = s->origPtr; 1876 s->nblock_used = 0; 1877 if (s->blockRandomised) { 1878 BZ_RAND_INIT_MASK; 1879 BZ_GET_SMALL(s->k0); s->nblock_used++; 1880 BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK; 1881 } else { 1882 BZ_GET_SMALL(s->k0); s->nblock_used++; 1883 } 1884 1885 } else { 1886 1887 /*-- compute the T^(-1) vector --*/ 1888 for (i = 0; i < nblock; i++) { 1889 uc = (UChar)(s->tt[i] & 0xff); 1890 s->tt[s->cftab[uc]] |= (i << 8); 1891 s->cftab[uc]++; 1892 } 1893 1894 s->tPos = s->tt[s->origPtr] >> 8; 1895 s->nblock_used = 0; 1896 if (s->blockRandomised) { 1897 BZ_RAND_INIT_MASK; 1898 BZ_GET_FAST(s->k0); s->nblock_used++; 1899 BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK; 1900 } else { 1901 BZ_GET_FAST(s->k0); s->nblock_used++; 1902 } 1903 1904 } 1905 1906 RETURN(BZ_OK); 1907 1908 1909 1910 endhdr_2: 1911 1912 GET_UCHAR(BZ_X_ENDHDR_2, uc); 1913 if (uc != 0x72) RETURN(BZ_DATA_ERROR); 1914 GET_UCHAR(BZ_X_ENDHDR_3, uc); 1915 if (uc != 0x45) RETURN(BZ_DATA_ERROR); 1916 GET_UCHAR(BZ_X_ENDHDR_4, uc); 1917 if (uc != 0x38) RETURN(BZ_DATA_ERROR); 1918 GET_UCHAR(BZ_X_ENDHDR_5, uc); 1919 if (uc != 0x50) RETURN(BZ_DATA_ERROR); 1920 GET_UCHAR(BZ_X_ENDHDR_6, uc); 1921 if (uc != 0x90) RETURN(BZ_DATA_ERROR); 1922 1923 s->storedCombinedCRC = 0; 1924 GET_UCHAR(BZ_X_CCRC_1, uc); 1925 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); 1926 GET_UCHAR(BZ_X_CCRC_2, uc); 1927 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); 1928 GET_UCHAR(BZ_X_CCRC_3, uc); 1929 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); 1930 GET_UCHAR(BZ_X_CCRC_4, uc); 1931 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc); 1932 1933 s->state = BZ_X_IDLE; 1934 RETURN(BZ_STREAM_END); 1935 1936 default: AssertH ( False, 4001 ); 1937 } 1938 1939 AssertH ( False, 4002 ); 1940 1941 save_state_and_return: 1942 1943 s->save_i = i; 1944 s->save_j = j; 1945 s->save_t = t; 1946 s->save_alphaSize = alphaSize; 1947 s->save_nGroups = nGroups; 1948 s->save_nSelectors = nSelectors; 1949 s->save_EOB = EOB; 1950 s->save_groupNo = groupNo; 1951 s->save_groupPos = groupPos; 1952 s->save_nextSym = nextSym; 1953 s->save_nblockMAX = nblockMAX; 1954 s->save_nblock = nblock; 1955 s->save_es = es; 1956 s->save_N = N; 1957 s->save_curr = curr; 1958 s->save_zt = zt; 1959 s->save_zn = zn; 1960 s->save_zvec = zvec; 1961 s->save_zj = zj; 1962 s->save_gSel = gSel; 1963 s->save_gMinlen = gMinlen; 1964 s->save_gLimit = gLimit; 1965 s->save_gBase = gBase; 1966 s->save_gPerm = gPerm; 1967 1968 return retVal; 1969} 1970 1971 1972/*-------------------------------------------------------------*/ 1973/*--- end decompress.c ---*/ 1974/*-------------------------------------------------------------*/ 1975 1976/*-------------------------------------------------------------*/ 1977/*--- Block sorting machinery ---*/ 1978/*--- blocksort.c ---*/ 1979/*-------------------------------------------------------------*/ 1980 1981/*-- 1982 This file is a part of bzip2 and/or libbzip2, a program and 1983 library for lossless, block-sorting data compression. 1984 1985 Copyright (C) 1996-2004 Julian R Seward. All rights reserved. 1986 1987 Redistribution and use in source and binary forms, with or without 1988 modification, are permitted provided that the following conditions 1989 are met: 1990 1991 1. Redistributions of source code must retain the above copyright 1992 notice, this list of conditions and the following disclaimer. 1993 1994 2. The origin of this software must not be misrepresented; you must 1995 not claim that you wrote the original software. If you use this 1996 software in a product, an acknowledgment in the product 1997 documentation would be appreciated but is not required. 1998 1999 3. Altered source versions must be plainly marked as such, and must 2000 not be misrepresented as being the original software. 2001 2002 4. The name of the author may not be used to endorse or promote 2003 products derived from this software without specific prior written 2004 permission. 2005 2006 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 2007 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 2008 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2009 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 2010 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2011 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 2012 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 2013 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 2014 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 2015 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 2016 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2017 2018 Julian Seward, Cambridge, UK. 2019 jseward@bzip.org 2020 bzip2/libbzip2 version 1.0 of 21 March 2000 2021 2022 This program is based on (at least) the work of: 2023 Mike Burrows 2024 David Wheeler 2025 Peter Fenwick 2026 Alistair Moffat 2027 Radford Neal 2028 Ian H. Witten 2029 Robert Sedgewick 2030 Jon L. Bentley 2031 2032 For more information on these sources, see the manual. 2033 2034 To get some idea how the block sorting algorithms in this file 2035 work, read my paper 2036 On the Performance of BWT Sorting Algorithms 2037 in Proceedings of the IEEE Data Compression Conference 2000, 2038 Snowbird, Utah, USA, 27-30 March 2000. The main sort in this 2039 file implements the algorithm called cache in the paper. 2040--*/ 2041 2042 2043 2044/*---------------------------------------------*/ 2045/*--- Fallback O(N log(N)^2) sorting ---*/ 2046/*--- algorithm, for repetitive blocks ---*/ 2047/*---------------------------------------------*/ 2048 2049/*---------------------------------------------*/ 2050static 2051__inline__ 2052void fallbackSimpleSort ( UInt32* fmap, 2053 UInt32* eclass, 2054 Int32 lo, 2055 Int32 hi ) 2056{ 2057 Int32 i, j, tmp; 2058 UInt32 ec_tmp; 2059 2060 if (lo == hi) return; 2061 2062 if (hi - lo > 3) { 2063 for ( i = hi-4; i >= lo; i-- ) { 2064 tmp = fmap[i]; 2065 ec_tmp = eclass[tmp]; 2066 for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 ) 2067 fmap[j-4] = fmap[j]; 2068 fmap[j-4] = tmp; 2069 } 2070 } 2071 2072 for ( i = hi-1; i >= lo; i-- ) { 2073 tmp = fmap[i]; 2074 ec_tmp = eclass[tmp]; 2075 for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ ) 2076 fmap[j-1] = fmap[j]; 2077 fmap[j-1] = tmp; 2078 } 2079} 2080 2081 2082/*---------------------------------------------*/ 2083#define fswap(zz1, zz2) \ 2084 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 2085 2086#define fvswap(zzp1, zzp2, zzn) \ 2087{ \ 2088 Int32 yyp1 = (zzp1); \ 2089 Int32 yyp2 = (zzp2); \ 2090 Int32 yyn = (zzn); \ 2091 while (yyn > 0) { \ 2092 fswap(fmap[yyp1], fmap[yyp2]); \ 2093 yyp1++; yyp2++; yyn--; \ 2094 } \ 2095} 2096 2097 2098#define fmin(a,b) ((a) < (b)) ? (a) : (b) 2099 2100#define fpush(lz,hz) { stackLo[sp] = lz; \ 2101 stackHi[sp] = hz; \ 2102 sp++; } 2103 2104#define fpop(lz,hz) { sp--; \ 2105 lz = stackLo[sp]; \ 2106 hz = stackHi[sp]; } 2107 2108#define FALLBACK_QSORT_SMALL_THRESH 10 2109#define FALLBACK_QSORT_STACK_SIZE 100 2110 2111 2112static 2113void fallbackQSort3 ( UInt32* fmap, 2114 UInt32* eclass, 2115 Int32 loSt, 2116 Int32 hiSt ) 2117{ 2118 Int32 unLo, unHi, ltLo, gtHi, n, m; 2119 Int32 sp, lo, hi; 2120 UInt32 med, r, r3; 2121 Int32 stackLo[FALLBACK_QSORT_STACK_SIZE]; 2122 Int32 stackHi[FALLBACK_QSORT_STACK_SIZE]; 2123 2124 r = 0; 2125 2126 sp = 0; 2127 fpush ( loSt, hiSt ); 2128 2129 while (sp > 0) { 2130 2131 AssertH ( sp < FALLBACK_QSORT_STACK_SIZE, 1004 ); 2132 2133 fpop ( lo, hi ); 2134 if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { 2135 fallbackSimpleSort ( fmap, eclass, lo, hi ); 2136 continue; 2137 } 2138 2139 /* Random partitioning. Median of 3 sometimes fails to 2140 avoid bad cases. Median of 9 seems to help but 2141 looks rather expensive. This too seems to work but 2142 is cheaper. Guidance for the magic constants 2143 7621 and 32768 is taken from Sedgewick's algorithms 2144 book, chapter 35. 2145 */ 2146 r = ((r * 7621) + 1) % 32768; 2147 r3 = r % 3; 2148 if (r3 == 0) med = eclass[fmap[lo]]; else 2149 if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else 2150 med = eclass[fmap[hi]]; 2151 2152 unLo = ltLo = lo; 2153 unHi = gtHi = hi; 2154 2155 while (1) { 2156 while (1) { 2157 if (unLo > unHi) break; 2158 n = (Int32)eclass[fmap[unLo]] - (Int32)med; 2159 if (n == 0) { 2160 fswap(fmap[unLo], fmap[ltLo]); 2161 ltLo++; unLo++; 2162 continue; 2163 }; 2164 if (n > 0) break; 2165 unLo++; 2166 } 2167 while (1) { 2168 if (unLo > unHi) break; 2169 n = (Int32)eclass[fmap[unHi]] - (Int32)med; 2170 if (n == 0) { 2171 fswap(fmap[unHi], fmap[gtHi]); 2172 gtHi--; unHi--; 2173 continue; 2174 }; 2175 if (n < 0) break; 2176 unHi--; 2177 } 2178 if (unLo > unHi) break; 2179 fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; 2180 } 2181 2182 AssertD ( unHi == unLo-1, "fallbackQSort3(2)" ); 2183 2184 if (gtHi < ltLo) continue; 2185 2186 n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n); 2187 m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m); 2188 2189 n = lo + unLo - ltLo - 1; 2190 m = hi - (gtHi - unHi) + 1; 2191 2192 if (n - lo > hi - m) { 2193 fpush ( lo, n ); 2194 fpush ( m, hi ); 2195 } else { 2196 fpush ( m, hi ); 2197 fpush ( lo, n ); 2198 } 2199 } 2200} 2201 2202#undef fmin 2203#undef fpush 2204#undef fpop 2205#undef fswap 2206#undef fvswap 2207#undef FALLBACK_QSORT_SMALL_THRESH 2208#undef FALLBACK_QSORT_STACK_SIZE 2209 2210 2211/*---------------------------------------------*/ 2212/* Pre: 2213 nblock > 0 2214 eclass exists for [0 .. nblock-1] 2215 ((UChar*)eclass) [0 .. nblock-1] holds block 2216 ptr exists for [0 .. nblock-1] 2217 2218 Post: 2219 ((UChar*)eclass) [0 .. nblock-1] holds block 2220 All other areas of eclass destroyed 2221 fmap [0 .. nblock-1] holds sorted order 2222 bhtab [ 0 .. 2+(nblock/32) ] destroyed 2223*/ 2224 2225#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31)) 2226#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31)) 2227#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31))) 2228#define WORD_BH(zz) bhtab[(zz) >> 5] 2229#define UNALIGNED_BH(zz) ((zz) & 0x01f) 2230 2231static 2232void fallbackSort ( UInt32* fmap, 2233 UInt32* eclass, 2234 UInt32* bhtab, 2235 Int32 nblock, 2236 Int32 verb ) 2237{ 2238 Int32 ftab[257]; 2239 Int32 ftabCopy[256]; 2240 Int32 H, i, j, k, l, r, cc, cc1; 2241 Int32 nNotDone; 2242 Int32 nBhtab; 2243 UChar* eclass8 = (UChar*)eclass; 2244 2245 /*-- 2246 Initial 1-char radix sort to generate 2247 initial fmap and initial BH bits. 2248 --*/ 2249 if (verb >= 4) 2250 VPrintf0 ( " bucket sorting ...\n" ); 2251 for (i = 0; i < 257; i++) ftab[i] = 0; 2252 for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; 2253 for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; 2254 for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; 2255 2256 for (i = 0; i < nblock; i++) { 2257 j = eclass8[i]; 2258 k = ftab[j] - 1; 2259 ftab[j] = k; 2260 fmap[k] = i; 2261 } 2262 2263 nBhtab = 2 + (nblock / 32); 2264 for (i = 0; i < nBhtab; i++) bhtab[i] = 0; 2265 for (i = 0; i < 256; i++) SET_BH(ftab[i]); 2266 2267 /*-- 2268 Inductively refine the buckets. Kind-of an 2269 "exponential radix sort" (!), inspired by the 2270 Manber-Myers suffix array construction algorithm. 2271 --*/ 2272 2273 /*-- set sentinel bits for block-end detection --*/ 2274 for (i = 0; i < 32; i++) { 2275 SET_BH(nblock + 2*i); 2276 CLEAR_BH(nblock + 2*i + 1); 2277 } 2278 2279 /*-- the log(N) loop --*/ 2280 H = 1; 2281 while (1) { 2282 2283 if (verb >= 4) 2284 VPrintf1 ( " depth %6d has ", H ); 2285 2286 j = 0; 2287 for (i = 0; i < nblock; i++) { 2288 if (ISSET_BH(i)) j = i; 2289 k = fmap[i] - H; if (k < 0) k += nblock; 2290 eclass[k] = j; 2291 } 2292 2293 nNotDone = 0; 2294 r = -1; 2295 while (1) { 2296 2297 /*-- find the next non-singleton bucket --*/ 2298 k = r + 1; 2299 while (ISSET_BH(k) && UNALIGNED_BH(k)) k++; 2300 if (ISSET_BH(k)) { 2301 while (WORD_BH(k) == 0xffffffff) k += 32; 2302 while (ISSET_BH(k)) k++; 2303 } 2304 l = k - 1; 2305 if (l >= nblock) break; 2306 while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++; 2307 if (!ISSET_BH(k)) { 2308 while (WORD_BH(k) == 0x00000000) k += 32; 2309 while (!ISSET_BH(k)) k++; 2310 } 2311 r = k - 1; 2312 if (r >= nblock) break; 2313 2314 /*-- now [l, r] bracket current bucket --*/ 2315 if (r > l) { 2316 nNotDone += (r - l + 1); 2317 fallbackQSort3 ( fmap, eclass, l, r ); 2318 2319 /*-- scan bucket and generate header bits-- */ 2320 cc = -1; 2321 for (i = l; i <= r; i++) { 2322 cc1 = eclass[fmap[i]]; 2323 if (cc != cc1) { SET_BH(i); cc = cc1; }; 2324 } 2325 } 2326 } 2327 2328 if (verb >= 4) 2329 VPrintf1 ( "%6d unresolved strings\n", nNotDone ); 2330 2331 H *= 2; 2332 if (H > nblock || nNotDone == 0) break; 2333 } 2334 2335 /*-- 2336 Reconstruct the original block in 2337 eclass8 [0 .. nblock-1], since the 2338 previous phase destroyed it. 2339 --*/ 2340 if (verb >= 4) 2341 VPrintf0 ( " reconstructing block ...\n" ); 2342 j = 0; 2343 for (i = 0; i < nblock; i++) { 2344 while (ftabCopy[j] == 0) j++; 2345 ftabCopy[j]--; 2346 eclass8[fmap[i]] = (UChar)j; 2347 } 2348 AssertH ( j < 256, 1005 ); 2349} 2350 2351#undef SET_BH 2352#undef CLEAR_BH 2353#undef ISSET_BH 2354#undef WORD_BH 2355#undef UNALIGNED_BH 2356 2357 2358/*---------------------------------------------*/ 2359/*--- The main, O(N^2 log(N)) sorting ---*/ 2360/*--- algorithm. Faster for "normal" ---*/ 2361/*--- non-repetitive blocks. ---*/ 2362/*---------------------------------------------*/ 2363 2364/*---------------------------------------------*/ 2365static 2366__inline__ 2367Bool mainGtU ( UInt32 i1, 2368 UInt32 i2, 2369 UChar* block, 2370 UInt16* quadrant, 2371 UInt32 nblock, 2372 Int32* budget ) 2373{ 2374 Int32 k; 2375 UChar c1, c2; 2376 UInt16 s1, s2; 2377 2378 AssertD ( i1 != i2, "mainGtU" ); 2379 /* 1 */ 2380 c1 = block[i1]; c2 = block[i2]; 2381 if (c1 != c2) return (c1 > c2); 2382 i1++; i2++; 2383 /* 2 */ 2384 c1 = block[i1]; c2 = block[i2]; 2385 if (c1 != c2) return (c1 > c2); 2386 i1++; i2++; 2387 /* 3 */ 2388 c1 = block[i1]; c2 = block[i2]; 2389 if (c1 != c2) return (c1 > c2); 2390 i1++; i2++; 2391 /* 4 */ 2392 c1 = block[i1]; c2 = block[i2]; 2393 if (c1 != c2) return (c1 > c2); 2394 i1++; i2++; 2395 /* 5 */ 2396 c1 = block[i1]; c2 = block[i2]; 2397 if (c1 != c2) return (c1 > c2); 2398 i1++; i2++; 2399 /* 6 */ 2400 c1 = block[i1]; c2 = block[i2]; 2401 if (c1 != c2) return (c1 > c2); 2402 i1++; i2++; 2403 /* 7 */ 2404 c1 = block[i1]; c2 = block[i2]; 2405 if (c1 != c2) return (c1 > c2); 2406 i1++; i2++; 2407 /* 8 */ 2408 c1 = block[i1]; c2 = block[i2]; 2409 if (c1 != c2) return (c1 > c2); 2410 i1++; i2++; 2411 /* 9 */ 2412 c1 = block[i1]; c2 = block[i2]; 2413 if (c1 != c2) return (c1 > c2); 2414 i1++; i2++; 2415 /* 10 */ 2416 c1 = block[i1]; c2 = block[i2]; 2417 if (c1 != c2) return (c1 > c2); 2418 i1++; i2++; 2419 /* 11 */ 2420 c1 = block[i1]; c2 = block[i2]; 2421 if (c1 != c2) return (c1 > c2); 2422 i1++; i2++; 2423 /* 12 */ 2424 c1 = block[i1]; c2 = block[i2]; 2425 if (c1 != c2) return (c1 > c2); 2426 i1++; i2++; 2427 2428 k = nblock + 8; 2429 2430 do { 2431 /* 1 */ 2432 c1 = block[i1]; c2 = block[i2]; 2433 if (c1 != c2) return (c1 > c2); 2434 s1 = quadrant[i1]; s2 = quadrant[i2]; 2435 if (s1 != s2) return (s1 > s2); 2436 i1++; i2++; 2437 /* 2 */ 2438 c1 = block[i1]; c2 = block[i2]; 2439 if (c1 != c2) return (c1 > c2); 2440 s1 = quadrant[i1]; s2 = quadrant[i2]; 2441 if (s1 != s2) return (s1 > s2); 2442 i1++; i2++; 2443 /* 3 */ 2444 c1 = block[i1]; c2 = block[i2]; 2445 if (c1 != c2) return (c1 > c2); 2446 s1 = quadrant[i1]; s2 = quadrant[i2]; 2447 if (s1 != s2) return (s1 > s2); 2448 i1++; i2++; 2449 /* 4 */ 2450 c1 = block[i1]; c2 = block[i2]; 2451 if (c1 != c2) return (c1 > c2); 2452 s1 = quadrant[i1]; s2 = quadrant[i2]; 2453 if (s1 != s2) return (s1 > s2); 2454 i1++; i2++; 2455 /* 5 */ 2456 c1 = block[i1]; c2 = block[i2]; 2457 if (c1 != c2) return (c1 > c2); 2458 s1 = quadrant[i1]; s2 = quadrant[i2]; 2459 if (s1 != s2) return (s1 > s2); 2460 i1++; i2++; 2461 /* 6 */ 2462 c1 = block[i1]; c2 = block[i2]; 2463 if (c1 != c2) return (c1 > c2); 2464 s1 = quadrant[i1]; s2 = quadrant[i2]; 2465 if (s1 != s2) return (s1 > s2); 2466 i1++; i2++; 2467 /* 7 */ 2468 c1 = block[i1]; c2 = block[i2]; 2469 if (c1 != c2) return (c1 > c2); 2470 s1 = quadrant[i1]; s2 = quadrant[i2]; 2471 if (s1 != s2) return (s1 > s2); 2472 i1++; i2++; 2473 /* 8 */ 2474 c1 = block[i1]; c2 = block[i2]; 2475 if (c1 != c2) return (c1 > c2); 2476 s1 = quadrant[i1]; s2 = quadrant[i2]; 2477 if (s1 != s2) return (s1 > s2); 2478 i1++; i2++; 2479 2480 if (i1 >= nblock) i1 -= nblock; 2481 if (i2 >= nblock) i2 -= nblock; 2482 2483 k -= 8; 2484 (*budget)--; 2485 } 2486 while (k >= 0); 2487 2488 return False; 2489} 2490 2491 2492/*---------------------------------------------*/ 2493/*-- 2494 Knuth's increments seem to work better 2495 than Incerpi-Sedgewick here. Possibly 2496 because the number of elems to sort is 2497 usually small, typically <= 20. 2498--*/ 2499static 2500Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, 2501 9841, 29524, 88573, 265720, 2502 797161, 2391484 }; 2503 2504static 2505void mainSimpleSort ( UInt32* ptr, 2506 UChar* block, 2507 UInt16* quadrant, 2508 Int32 nblock, 2509 Int32 lo, 2510 Int32 hi, 2511 Int32 d, 2512 Int32* budget ) 2513{ 2514 Int32 i, j, h, bigN, hp; 2515 UInt32 v; 2516 2517 bigN = hi - lo + 1; 2518 if (bigN < 2) return; 2519 2520 hp = 0; 2521 while (incs[hp] < bigN) hp++; 2522 hp--; 2523 2524 for (; hp >= 0; hp--) { 2525 h = incs[hp]; 2526 2527 i = lo + h; 2528 while (True) { 2529 2530 /*-- copy 1 --*/ 2531 if (i > hi) break; 2532 v = ptr[i]; 2533 j = i; 2534 while ( mainGtU ( 2535 ptr[j-h]+d, v+d, block, quadrant, nblock, budget 2536 ) ) { 2537 ptr[j] = ptr[j-h]; 2538 j = j - h; 2539 if (j <= (lo + h - 1)) break; 2540 } 2541 ptr[j] = v; 2542 i++; 2543 2544 /*-- copy 2 --*/ 2545 if (i > hi) break; 2546 v = ptr[i]; 2547 j = i; 2548 while ( mainGtU ( 2549 ptr[j-h]+d, v+d, block, quadrant, nblock, budget 2550 ) ) { 2551 ptr[j] = ptr[j-h]; 2552 j = j - h; 2553 if (j <= (lo + h - 1)) break; 2554 } 2555 ptr[j] = v; 2556 i++; 2557 2558 /*-- copy 3 --*/ 2559 if (i > hi) break; 2560 v = ptr[i]; 2561 j = i; 2562 while ( mainGtU ( 2563 ptr[j-h]+d, v+d, block, quadrant, nblock, budget 2564 ) ) { 2565 ptr[j] = ptr[j-h]; 2566 j = j - h; 2567 if (j <= (lo + h - 1)) break; 2568 } 2569 ptr[j] = v; 2570 i++; 2571 2572 if (*budget < 0) return; 2573 } 2574 } 2575} 2576 2577 2578/*---------------------------------------------*/ 2579/*-- 2580 The following is an implementation of 2581 an elegant 3-way quicksort for strings, 2582 described in a paper "Fast Algorithms for 2583 Sorting and Searching Strings", by Robert 2584 Sedgewick and Jon L. Bentley. 2585--*/ 2586 2587#define mswap(zz1, zz2) \ 2588 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 2589 2590#define mvswap(zzp1, zzp2, zzn) \ 2591{ \ 2592 Int32 yyp1 = (zzp1); \ 2593 Int32 yyp2 = (zzp2); \ 2594 Int32 yyn = (zzn); \ 2595 while (yyn > 0) { \ 2596 mswap(ptr[yyp1], ptr[yyp2]); \ 2597 yyp1++; yyp2++; yyn--; \ 2598 } \ 2599} 2600 2601static 2602__inline__ 2603UChar mmed3 ( UChar a, UChar b, UChar c ) 2604{ 2605 UChar t; 2606 if (a > b) { t = a; a = b; b = t; }; 2607 if (b > c) { 2608 b = c; 2609 if (a > b) b = a; 2610 } 2611 return b; 2612} 2613 2614#define mmin(a,b) ((a) < (b)) ? (a) : (b) 2615 2616#define mpush(lz,hz,dz) { stackLo[sp] = lz; \ 2617 stackHi[sp] = hz; \ 2618 stackD [sp] = dz; \ 2619 sp++; } 2620 2621#define mpop(lz,hz,dz) { sp--; \ 2622 lz = stackLo[sp]; \ 2623 hz = stackHi[sp]; \ 2624 dz = stackD [sp]; } 2625 2626 2627#define mnextsize(az) (nextHi[az]-nextLo[az]) 2628 2629#define mnextswap(az,bz) \ 2630 { Int32 tz; \ 2631 tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ 2632 tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ 2633 tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; } 2634 2635 2636#define MAIN_QSORT_SMALL_THRESH 20 2637#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) 2638#define MAIN_QSORT_STACK_SIZE 100 2639 2640static 2641void mainQSort3 ( UInt32* ptr, 2642 UChar* block, 2643 UInt16* quadrant, 2644 Int32 nblock, 2645 Int32 loSt, 2646 Int32 hiSt, 2647 Int32 dSt, 2648 Int32* budget ) 2649{ 2650 Int32 unLo, unHi, ltLo, gtHi, n, m, med; 2651 Int32 sp, lo, hi, d; 2652 2653 Int32 stackLo[MAIN_QSORT_STACK_SIZE]; 2654 Int32 stackHi[MAIN_QSORT_STACK_SIZE]; 2655 Int32 stackD [MAIN_QSORT_STACK_SIZE]; 2656 2657 Int32 nextLo[3]; 2658 Int32 nextHi[3]; 2659 Int32 nextD [3]; 2660 2661 sp = 0; 2662 mpush ( loSt, hiSt, dSt ); 2663 2664 while (sp > 0) { 2665 2666 AssertH ( sp < MAIN_QSORT_STACK_SIZE, 1001 ); 2667 2668 mpop ( lo, hi, d ); 2669 if (hi - lo < MAIN_QSORT_SMALL_THRESH || 2670 d > MAIN_QSORT_DEPTH_THRESH) { 2671 mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget ); 2672 if (*budget < 0) return; 2673 continue; 2674 } 2675 2676 med = (Int32) 2677 mmed3 ( block[ptr[ lo ]+d], 2678 block[ptr[ hi ]+d], 2679 block[ptr[ (lo+hi)>>1 ]+d] ); 2680 2681 unLo = ltLo = lo; 2682 unHi = gtHi = hi; 2683 2684 while (True) { 2685 while (True) { 2686 if (unLo > unHi) break; 2687 n = ((Int32)block[ptr[unLo]+d]) - med; 2688 if (n == 0) { 2689 mswap(ptr[unLo], ptr[ltLo]); 2690 ltLo++; unLo++; continue; 2691 }; 2692 if (n > 0) break; 2693 unLo++; 2694 } 2695 while (True) { 2696 if (unLo > unHi) break; 2697 n = ((Int32)block[ptr[unHi]+d]) - med; 2698 if (n == 0) { 2699 mswap(ptr[unHi], ptr[gtHi]); 2700 gtHi--; unHi--; continue; 2701 }; 2702 if (n < 0) break; 2703 unHi--; 2704 } 2705 if (unLo > unHi) break; 2706 mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--; 2707 } 2708 2709 AssertD ( unHi == unLo-1, "mainQSort3(2)" ); 2710 2711 if (gtHi < ltLo) { 2712 mpush(lo, hi, d+1 ); 2713 continue; 2714 } 2715 2716 n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n); 2717 m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m); 2718 2719 n = lo + unLo - ltLo - 1; 2720 m = hi - (gtHi - unHi) + 1; 2721 2722 nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; 2723 nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; 2724 nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; 2725 2726 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 2727 if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); 2728 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 2729 2730 AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" ); 2731 AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" ); 2732 2733 mpush (nextLo[0], nextHi[0], nextD[0]); 2734 mpush (nextLo[1], nextHi[1], nextD[1]); 2735 mpush (nextLo[2], nextHi[2], nextD[2]); 2736 } 2737} 2738 2739#undef mswap 2740#undef mvswap 2741#undef mpush 2742#undef mpop 2743#undef mmin 2744#undef mnextsize 2745#undef mnextswap 2746#undef MAIN_QSORT_SMALL_THRESH 2747#undef MAIN_QSORT_DEPTH_THRESH 2748#undef MAIN_QSORT_STACK_SIZE 2749 2750 2751/*---------------------------------------------*/ 2752/* Pre: 2753 nblock > N_OVERSHOOT 2754 block32 exists for [0 .. nblock-1 +N_OVERSHOOT] 2755 ((UChar*)block32) [0 .. nblock-1] holds block 2756 ptr exists for [0 .. nblock-1] 2757 2758 Post: 2759 ((UChar*)block32) [0 .. nblock-1] holds block 2760 All other areas of block32 destroyed 2761 ftab [0 .. 65536 ] destroyed 2762 ptr [0 .. nblock-1] holds sorted order 2763 if (*budget < 0), sorting was abandoned 2764*/ 2765 2766#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) 2767#define SETMASK (1 << 21) 2768#define CLEARMASK (~(SETMASK)) 2769 2770static 2771void mainSort ( UInt32* ptr, 2772 UChar* block, 2773 UInt16* quadrant, 2774 UInt32* ftab, 2775 Int32 nblock, 2776 Int32 verb, 2777 Int32* budget ) 2778{ 2779 Int32 i, j, k, ss, sb; 2780 Int32 runningOrder[256]; 2781 Bool bigDone[256]; 2782 Int32 copyStart[256]; 2783 Int32 copyEnd [256]; 2784 UChar c1; 2785 Int32 numQSorted; 2786 UInt16 s; 2787 if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" ); 2788 2789 /*-- set up the 2-byte frequency table --*/ 2790 for (i = 65536; i >= 0; i--) ftab[i] = 0; 2791 2792 j = block[0] << 8; 2793 i = nblock-1; 2794 for (; i >= 3; i -= 4) { 2795 quadrant[i] = 0; 2796 j = (j >> 8) | ( ((UInt16)block[i]) << 8); 2797 ftab[j]++; 2798 quadrant[i-1] = 0; 2799 j = (j >> 8) | ( ((UInt16)block[i-1]) << 8); 2800 ftab[j]++; 2801 quadrant[i-2] = 0; 2802 j = (j >> 8) | ( ((UInt16)block[i-2]) << 8); 2803 ftab[j]++; 2804 quadrant[i-3] = 0; 2805 j = (j >> 8) | ( ((UInt16)block[i-3]) << 8); 2806 ftab[j]++; 2807 } 2808 for (; i >= 0; i--) { 2809 quadrant[i] = 0; 2810 j = (j >> 8) | ( ((UInt16)block[i]) << 8); 2811 ftab[j]++; 2812 } 2813 2814 /*-- (emphasises close relationship of block & quadrant) --*/ 2815 for (i = 0; i < BZ_N_OVERSHOOT; i++) { 2816 block [nblock+i] = block[i]; 2817 quadrant[nblock+i] = 0; 2818 } 2819 2820 if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" ); 2821 2822 /*-- Complete the initial radix sort --*/ 2823 for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; 2824 2825 s = block[0] << 8; 2826 i = nblock-1; 2827 for (; i >= 3; i -= 4) { 2828 s = (s >> 8) | (block[i] << 8); 2829 j = ftab[s] -1; 2830 ftab[s] = j; 2831 ptr[j] = i; 2832 s = (s >> 8) | (block[i-1] << 8); 2833 j = ftab[s] -1; 2834 ftab[s] = j; 2835 ptr[j] = i-1; 2836 s = (s >> 8) | (block[i-2] << 8); 2837 j = ftab[s] -1; 2838 ftab[s] = j; 2839 ptr[j] = i-2; 2840 s = (s >> 8) | (block[i-3] << 8); 2841 j = ftab[s] -1; 2842 ftab[s] = j; 2843 ptr[j] = i-3; 2844 } 2845 for (; i >= 0; i--) { 2846 s = (s >> 8) | (block[i] << 8); 2847 j = ftab[s] -1; 2848 ftab[s] = j; 2849 ptr[j] = i; 2850 } 2851 2852 /*-- 2853 Now ftab contains the first loc of every small bucket. 2854 Calculate the running order, from smallest to largest 2855 big bucket. 2856 --*/ 2857 for (i = 0; i <= 255; i++) { 2858 bigDone [i] = False; 2859 runningOrder[i] = i; 2860 } 2861 2862 { 2863 Int32 vv; 2864 Int32 h = 1; 2865 do h = 3 * h + 1; while (h <= 256); 2866 do { 2867 h = h / 3; 2868 for (i = h; i <= 255; i++) { 2869 vv = runningOrder[i]; 2870 j = i; 2871 while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { 2872 runningOrder[j] = runningOrder[j-h]; 2873 j = j - h; 2874 if (j <= (h - 1)) goto zero; 2875 } 2876 zero: 2877 runningOrder[j] = vv; 2878 } 2879 } while (h != 1); 2880 } 2881 2882 /*-- 2883 The main sorting loop. 2884 --*/ 2885 2886 numQSorted = 0; 2887 2888 for (i = 0; i <= 255; i++) { 2889 2890 /*-- 2891 Process big buckets, starting with the least full. 2892 Basically this is a 3-step process in which we call 2893 mainQSort3 to sort the small buckets [ss, j], but 2894 also make a big effort to avoid the calls if we can. 2895 --*/ 2896 ss = runningOrder[i]; 2897 2898 /*-- 2899 Step 1: 2900 Complete the big bucket [ss] by quicksorting 2901 any unsorted small buckets [ss, j], for j != ss. 2902 Hopefully previous pointer-scanning phases have already 2903 completed many of the small buckets [ss, j], so 2904 we don't have to sort them at all. 2905 --*/ 2906 for (j = 0; j <= 255; j++) { 2907 if (j != ss) { 2908 sb = (ss << 8) + j; 2909 if ( ! (ftab[sb] & SETMASK) ) { 2910 Int32 lo = ftab[sb] & CLEARMASK; 2911 Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; 2912 if (hi > lo) { 2913 if (verb >= 4) 2914 VPrintf4 ( " qsort [0x%x, 0x%x] " 2915 "done %d this %d\n", 2916 ss, j, numQSorted, hi - lo + 1 ); 2917 mainQSort3 ( 2918 ptr, block, quadrant, nblock, 2919 lo, hi, BZ_N_RADIX, budget 2920 ); 2921 numQSorted += (hi - lo + 1); 2922 if (*budget < 0) return; 2923 } 2924 } 2925 ftab[sb] |= SETMASK; 2926 } 2927 } 2928 2929 AssertH ( !bigDone[ss], 1006 ); 2930 2931 /*-- 2932 Step 2: 2933 Now scan this big bucket [ss] so as to synthesise the 2934 sorted order for small buckets [t, ss] for all t, 2935 including, magically, the bucket [ss,ss] too. 2936 This will avoid doing Real Work in subsequent Step 1's. 2937 --*/ 2938 { 2939 for (j = 0; j <= 255; j++) { 2940 copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; 2941 copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; 2942 } 2943 for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { 2944 k = ptr[j]-1; if (k < 0) k += nblock; 2945 c1 = block[k]; 2946 if (!bigDone[c1]) 2947 ptr[ copyStart[c1]++ ] = k; 2948 } 2949 for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { 2950 k = ptr[j]-1; if (k < 0) k += nblock; 2951 c1 = block[k]; 2952 if (!bigDone[c1]) 2953 ptr[ copyEnd[c1]-- ] = k; 2954 } 2955 } 2956 2957 AssertH ( (copyStart[ss]-1 == copyEnd[ss]) 2958 || 2959 /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. 2960 Necessity for this case is demonstrated by compressing 2961 a sequence of approximately 48.5 million of character 2962 251; 1.0.0/1.0.1 will then die here. */ 2963 (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), 2964 1007 ) 2965 2966 for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; 2967 2968 /*-- 2969 Step 3: 2970 The [ss] big bucket is now done. Record this fact, 2971 and update the quadrant descriptors. Remember to 2972 update quadrants in the overshoot area too, if 2973 necessary. The "if (i < 255)" test merely skips 2974 this updating for the last bucket processed, since 2975 updating for the last bucket is pointless. 2976 2977 The quadrant array provides a way to incrementally 2978 cache sort orderings, as they appear, so as to 2979 make subsequent comparisons in fullGtU() complete 2980 faster. For repetitive blocks this makes a big 2981 difference (but not big enough to be able to avoid 2982 the fallback sorting mechanism, exponential radix sort). 2983 2984 The precise meaning is: at all times: 2985 2986 for 0 <= i < nblock and 0 <= j <= nblock 2987 2988 if block[i] != block[j], 2989 2990 then the relative values of quadrant[i] and 2991 quadrant[j] are meaningless. 2992 2993 else { 2994 if quadrant[i] < quadrant[j] 2995 then the string starting at i lexicographically 2996 precedes the string starting at j 2997 2998 else if quadrant[i] > quadrant[j] 2999 then the string starting at j lexicographically 3000 precedes the string starting at i 3001 3002 else 3003 the relative ordering of the strings starting 3004 at i and j has not yet been determined. 3005 } 3006 --*/ 3007 bigDone[ss] = True; 3008 3009 if (i < 255) { 3010 Int32 bbStart = ftab[ss << 8] & CLEARMASK; 3011 Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; 3012 Int32 shifts = 0; 3013 3014 while ((bbSize >> shifts) > 65534) shifts++; 3015 3016 for (j = bbSize-1; j >= 0; j--) { 3017 Int32 a2update = ptr[bbStart + j]; 3018 UInt16 qVal = (UInt16)(j >> shifts); 3019 quadrant[a2update] = qVal; 3020 if (a2update < BZ_N_OVERSHOOT) 3021 quadrant[a2update + nblock] = qVal; 3022 } 3023 AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); 3024 } 3025 3026 } 3027 3028 if (verb >= 4) 3029 VPrintf3 ( " %d pointers, %d sorted, %d scanned\n", 3030 nblock, numQSorted, nblock - numQSorted ); 3031} 3032 3033#undef BIGFREQ 3034#undef SETMASK 3035#undef CLEARMASK 3036 3037 3038/*---------------------------------------------*/ 3039/* Pre: 3040 nblock > 0 3041 arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] 3042 ((UChar*)arr2) [0 .. nblock-1] holds block 3043 arr1 exists for [0 .. nblock-1] 3044 3045 Post: 3046 ((UChar*)arr2) [0 .. nblock-1] holds block 3047 All other areas of block destroyed 3048 ftab [ 0 .. 65536 ] destroyed 3049 arr1 [0 .. nblock-1] holds sorted order 3050*/ 3051void BZ2_blockSort ( EState* s ) 3052{ 3053 UInt32* ptr = s->ptr; 3054 UChar* block = s->block; 3055 UInt32* ftab = s->ftab; 3056 Int32 nblock = s->nblock; 3057 Int32 verb = s->verbosity; 3058 Int32 wfact = s->workFactor; 3059 UInt16* quadrant; 3060 Int32 budget; 3061 Int32 budgetInit; 3062 Int32 i; 3063 3064 if (nblock < /* 10000 */1000 ) { 3065 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 3066 } else { 3067 /* Calculate the location for quadrant, remembering to get 3068 the alignment right. Assumes that &(block[0]) is at least 3069 2-byte aligned -- this should be ok since block is really 3070 the first section of arr2. 3071 */ 3072 i = nblock+BZ_N_OVERSHOOT; 3073 if (i & 1) i++; 3074 quadrant = (UInt16*)(&(block[i])); 3075 3076 /* (wfact-1) / 3 puts the default-factor-30 3077 transition point at very roughly the same place as 3078 with v0.1 and v0.9.0. 3079 Not that it particularly matters any more, since the 3080 resulting compressed stream is now the same regardless 3081 of whether or not we use the main sort or fallback sort. 3082 */ 3083 if (wfact < 1 ) wfact = 1; 3084 if (wfact > 100) wfact = 100; 3085 budgetInit = nblock * ((wfact-1) / 3); 3086 budget = budgetInit; 3087 3088 mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget ); 3089 if (0 && verb >= 3) 3090 VPrintf3 ( " %d work, %d block, ratio %5.2f\n", 3091 budgetInit - budget, 3092 nblock, 3093 (float)(budgetInit - budget) / 3094 (float)(nblock==0 ? 1 : nblock) ); 3095 if (budget < 0) { 3096 if (verb >= 2) 3097 VPrintf0 ( " too repetitive; using fallback" 3098 " sorting algorithm\n" ); 3099 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 3100 } 3101 } 3102 3103 s->origPtr = -1; 3104 for (i = 0; i < s->nblock; i++) 3105 if (ptr[i] == 0) 3106 { s->origPtr = i; break; }; 3107 3108 AssertH( s->origPtr != -1, 1003 ); 3109} 3110 3111 3112/*-------------------------------------------------------------*/ 3113/*--- end blocksort.c ---*/ 3114/*-------------------------------------------------------------*/ 3115 3116/*-------------------------------------------------------------*/ 3117/*--- Huffman coding low-level stuff ---*/ 3118/*--- huffman.c ---*/ 3119/*-------------------------------------------------------------*/ 3120 3121/*-- 3122 This file is a part of bzip2 and/or libbzip2, a program and 3123 library for lossless, block-sorting data compression. 3124 3125 Copyright (C) 1996-2004 Julian R Seward. All rights reserved. 3126 3127 Redistribution and use in source and binary forms, with or without 3128 modification, are permitted provided that the following conditions 3129 are met: 3130 3131 1. Redistributions of source code must retain the above copyright 3132 notice, this list of conditions and the following disclaimer. 3133 3134 2. The origin of this software must not be misrepresented; you must 3135 not claim that you wrote the original software. If you use this 3136 software in a product, an acknowledgment in the product 3137 documentation would be appreciated but is not required. 3138 3139 3. Altered source versions must be plainly marked as such, and must 3140 not be misrepresented as being the original software. 3141 3142 4. The name of the author may not be used to endorse or promote 3143 products derived from this software without specific prior written 3144 permission. 3145 3146 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 3147 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 3148 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 3149 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 3150 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3151 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 3152 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 3153 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 3154 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 3155 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 3156 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3157 3158 Julian Seward, Cambridge, UK. 3159 jseward@bzip.org 3160 bzip2/libbzip2 version 1.0 of 21 March 2000 3161 3162 This program is based on (at least) the work of: 3163 Mike Burrows 3164 David Wheeler 3165 Peter Fenwick 3166 Alistair Moffat 3167 Radford Neal 3168 Ian H. Witten 3169 Robert Sedgewick 3170 Jon L. Bentley 3171 3172 For more information on these sources, see the manual. 3173--*/ 3174 3175 3176 3177/*---------------------------------------------------*/ 3178#define WEIGHTOF(zz0) ((zz0) & 0xffffff00) 3179#define DEPTHOF(zz1) ((zz1) & 0x000000ff) 3180#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3)) 3181 3182#define ADDWEIGHTS(zw1,zw2) \ 3183 (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \ 3184 (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2))) 3185 3186#define UPHEAP(z) \ 3187{ \ 3188 Int32 zz, tmp; \ 3189 zz = z; tmp = heap[zz]; \ 3190 while (weight[tmp] < weight[heap[zz >> 1]]) { \ 3191 heap[zz] = heap[zz >> 1]; \ 3192 zz >>= 1; \ 3193 } \ 3194 heap[zz] = tmp; \ 3195} 3196 3197#define DOWNHEAP(z) \ 3198{ \ 3199 Int32 zz, yy, tmp; \ 3200 zz = z; tmp = heap[zz]; \ 3201 while (True) { \ 3202 yy = zz << 1; \ 3203 if (yy > nHeap) break; \ 3204 if (yy < nHeap && \ 3205 weight[heap[yy+1]] < weight[heap[yy]]) \ 3206 yy++; \ 3207 if (weight[tmp] < weight[heap[yy]]) break; \ 3208 heap[zz] = heap[yy]; \ 3209 zz = yy; \ 3210 } \ 3211 heap[zz] = tmp; \ 3212} 3213 3214 3215/*---------------------------------------------------*/ 3216void BZ2_hbMakeCodeLengths ( UChar *len, 3217 Int32 *freq, 3218 Int32 alphaSize, 3219 Int32 maxLen ) 3220{ 3221 /*-- 3222 Nodes and heap entries run from 1. Entry 0 3223 for both the heap and nodes is a sentinel. 3224 --*/ 3225 Int32 nNodes, nHeap, n1, n2, i, j, k; 3226 Bool tooLong; 3227 3228 Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ]; 3229 Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ]; 3230 Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ]; 3231 3232 for (i = 0; i < alphaSize; i++) 3233 weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8; 3234 3235 while (True) { 3236 3237 nNodes = alphaSize; 3238 nHeap = 0; 3239 3240 heap[0] = 0; 3241 weight[0] = 0; 3242 parent[0] = -2; 3243 3244 for (i = 1; i <= alphaSize; i++) { 3245 parent[i] = -1; 3246 nHeap++; 3247 heap[nHeap] = i; 3248 UPHEAP(nHeap); 3249 } 3250 3251 AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 ); 3252 3253 while (nHeap > 1) { 3254 n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); 3255 n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); 3256 nNodes++; 3257 parent[n1] = parent[n2] = nNodes; 3258 weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]); 3259 parent[nNodes] = -1; 3260 nHeap++; 3261 heap[nHeap] = nNodes; 3262 UPHEAP(nHeap); 3263 } 3264 3265 AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 ); 3266 3267 tooLong = False; 3268 for (i = 1; i <= alphaSize; i++) { 3269 j = 0; 3270 k = i; 3271 while (parent[k] >= 0) { k = parent[k]; j++; } 3272 len[i-1] = j; 3273 if (j > maxLen) tooLong = True; 3274 } 3275 3276 if (! tooLong) break; 3277 3278 /* 17 Oct 04: keep-going condition for the following loop used 3279 to be 'i < alphaSize', which missed the last element, 3280 theoretically leading to the possibility of the compressor 3281 looping. However, this count-scaling step is only needed if 3282 one of the generated Huffman code words is longer than 3283 maxLen, which up to and including version 1.0.2 was 20 bits, 3284 which is extremely unlikely. In version 1.0.3 maxLen was 3285 changed to 17 bits, which has minimal effect on compression 3286 ratio, but does mean this scaling step is used from time to 3287 time, enough to verify that it works. 3288 3289 This means that bzip2-1.0.3 and later will only produce 3290 Huffman codes with a maximum length of 17 bits. However, in 3291 order to preserve backwards compatibility with bitstreams 3292 produced by versions pre-1.0.3, the decompressor must still 3293 handle lengths of up to 20. */ 3294 3295 for (i = 1; i <= alphaSize; i++) { 3296 j = weight[i] >> 8; 3297 j = 1 + (j / 2); 3298 weight[i] = j << 8; 3299 } 3300 } 3301} 3302 3303 3304/*---------------------------------------------------*/ 3305void BZ2_hbAssignCodes ( Int32 *code, 3306 UChar *length, 3307 Int32 minLen, 3308 Int32 maxLen, 3309 Int32 alphaSize ) 3310{ 3311 Int32 n, vec, i; 3312 3313 vec = 0; 3314 for (n = minLen; n <= maxLen; n++) { 3315 for (i = 0; i < alphaSize; i++) 3316 if (length[i] == n) { code[i] = vec; vec++; }; 3317 vec <<= 1; 3318 } 3319} 3320 3321 3322/*---------------------------------------------------*/ 3323void BZ2_hbCreateDecodeTables ( Int32 *limit, 3324 Int32 *base, 3325 Int32 *perm, 3326 UChar *length, 3327 Int32 minLen, 3328 Int32 maxLen, 3329 Int32 alphaSize ) 3330{ 3331 Int32 pp, i, j, vec; 3332 3333 pp = 0; 3334 for (i = minLen; i <= maxLen; i++) 3335 for (j = 0; j < alphaSize; j++) 3336 if (length[j] == i) { perm[pp] = j; pp++; }; 3337 3338 for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0; 3339 for (i = 0; i < alphaSize; i++) base[length[i]+1]++; 3340 3341 for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1]; 3342 3343 for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0; 3344 vec = 0; 3345 3346 for (i = minLen; i <= maxLen; i++) { 3347 vec += (base[i+1] - base[i]); 3348 limit[i] = vec-1; 3349 vec <<= 1; 3350 } 3351 for (i = minLen + 1; i <= maxLen; i++) 3352 base[i] = ((limit[i-1] + 1) << 1) - base[i]; 3353} 3354 3355 3356/*-------------------------------------------------------------*/ 3357/*--- end huffman.c ---*/ 3358/*-------------------------------------------------------------*/ 3359 3360/*-------------------------------------------------------------*/ 3361/*--- Compression machinery (not incl block sorting) ---*/ 3362/*--- compress.c ---*/ 3363/*-------------------------------------------------------------*/ 3364 3365/*-- 3366 This file is a part of bzip2 and/or libbzip2, a program and 3367 library for lossless, block-sorting data compression. 3368 3369 Copyright (C) 1996-2004 Julian R Seward. All rights reserved. 3370 3371 Redistribution and use in source and binary forms, with or without 3372 modification, are permitted provided that the following conditions 3373 are met: 3374 3375 1. Redistributions of source code must retain the above copyright 3376 notice, this list of conditions and the following disclaimer. 3377 3378 2. The origin of this software must not be misrepresented; you must 3379 not claim that you wrote the original software. If you use this 3380 software in a product, an acknowledgment in the product 3381 documentation would be appreciated but is not required. 3382 3383 3. Altered source versions must be plainly marked as such, and must 3384 not be misrepresented as being the original software. 3385 3386 4. The name of the author may not be used to endorse or promote 3387 products derived from this software without specific prior written 3388 permission. 3389 3390 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 3391 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 3392 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 3393 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 3394 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3395 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 3396 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 3397 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 3398 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 3399 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 3400 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3401 3402 Julian Seward, Cambridge, UK. 3403 jseward@bzip.org 3404 bzip2/libbzip2 version 1.0 of 21 March 2000 3405 3406 This program is based on (at least) the work of: 3407 Mike Burrows 3408 David Wheeler 3409 Peter Fenwick 3410 Alistair Moffat 3411 Radford Neal 3412 Ian H. Witten 3413 Robert Sedgewick 3414 Jon L. Bentley 3415 3416 For more information on these sources, see the manual. 3417--*/ 3418 3419/*-- 3420 CHANGES 3421 ~~~~~~~ 3422 0.9.0 -- original version. 3423 3424 0.9.0a/b -- no changes in this file. 3425 3426 0.9.0c 3427 * changed setting of nGroups in sendMTFValues() so as to 3428 do a bit better on small files 3429--*/ 3430 3431 3432 3433/*---------------------------------------------------*/ 3434/*--- Bit stream I/O ---*/ 3435/*---------------------------------------------------*/ 3436 3437/*---------------------------------------------------*/ 3438void BZ2_bsInitWrite ( EState* s ) 3439{ 3440 s->bsLive = 0; 3441 s->bsBuff = 0; 3442} 3443 3444 3445/*---------------------------------------------------*/ 3446static 3447void bsFinishWrite ( EState* s ) 3448{ 3449 while (s->bsLive > 0) { 3450 s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24); 3451 s->numZ++; 3452 s->bsBuff <<= 8; 3453 s->bsLive -= 8; 3454 } 3455} 3456 3457 3458/*---------------------------------------------------*/ 3459#define bsNEEDW(nz) \ 3460{ \ 3461 while (s->bsLive >= 8) { \ 3462 s->zbits[s->numZ] \ 3463 = (UChar)(s->bsBuff >> 24); \ 3464 s->numZ++; \ 3465 s->bsBuff <<= 8; \ 3466 s->bsLive -= 8; \ 3467 } \ 3468} 3469 3470 3471/*---------------------------------------------------*/ 3472static 3473__inline__ 3474void bsW ( EState* s, Int32 n, UInt32 v ) 3475{ 3476 bsNEEDW ( n ); 3477 s->bsBuff |= (v << (32 - s->bsLive - n)); 3478 s->bsLive += n; 3479} 3480 3481 3482/*---------------------------------------------------*/ 3483static 3484void bsPutUInt32 ( EState* s, UInt32 u ) 3485{ 3486 bsW ( s, 8, (u >> 24) & 0xffL ); 3487 bsW ( s, 8, (u >> 16) & 0xffL ); 3488 bsW ( s, 8, (u >> 8) & 0xffL ); 3489 bsW ( s, 8, u & 0xffL ); 3490} 3491 3492 3493/*---------------------------------------------------*/ 3494static 3495void bsPutUChar ( EState* s, UChar c ) 3496{ 3497 bsW( s, 8, (UInt32)c ); 3498} 3499 3500 3501/*---------------------------------------------------*/ 3502/*--- The back end proper ---*/ 3503/*---------------------------------------------------*/ 3504 3505/*---------------------------------------------------*/ 3506static 3507void makeMaps_e ( EState* s ) 3508{ 3509 Int32 i; 3510 s->nInUse = 0; 3511 for (i = 0; i < 256; i++) 3512 if (s->inUse[i]) { 3513 s->unseqToSeq[i] = s->nInUse; 3514 s->nInUse++; 3515 } 3516} 3517 3518 3519/*---------------------------------------------------*/ 3520static 3521void generateMTFValues ( EState* s ) 3522{ 3523 UChar yy[256]; 3524 Int32 i, j; 3525 Int32 zPend; 3526 Int32 wr; 3527 Int32 EOB; 3528 3529 /* 3530 After sorting (eg, here), 3531 s->arr1 [ 0 .. s->nblock-1 ] holds sorted order, 3532 and 3533 ((UChar*)s->arr2) [ 0 .. s->nblock-1 ] 3534 holds the original block data. 3535 3536 The first thing to do is generate the MTF values, 3537 and put them in 3538 ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ]. 3539 Because there are strictly fewer or equal MTF values 3540 than block values, ptr values in this area are overwritten 3541 with MTF values only when they are no longer needed. 3542 3543 The final compressed bitstream is generated into the 3544 area starting at 3545 (UChar*) (&((UChar*)s->arr2)[s->nblock]) 3546 3547 These storage aliases are set up in bzCompressInit(), 3548 except for the last one, which is arranged in 3549 compressBlock(). 3550 */ 3551 UInt32* ptr = s->ptr; 3552 UChar* block = s->block; 3553 UInt16* mtfv = s->mtfv; 3554 3555 makeMaps_e ( s ); 3556 EOB = s->nInUse+1; 3557 3558 for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0; 3559 3560 wr = 0; 3561 zPend = 0; 3562 for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i; 3563 3564 for (i = 0; i < s->nblock; i++) { 3565 UChar ll_i; 3566 AssertD ( wr <= i, "generateMTFValues(1)" ); 3567 j = ptr[i]-1; if (j < 0) j += s->nblock; 3568 ll_i = s->unseqToSeq[block[j]]; 3569 AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" ); 3570 3571 if (yy[0] == ll_i) { 3572 zPend++; 3573 } else { 3574 3575 if (zPend > 0) { 3576 zPend--; 3577 while (True) { 3578 if (zPend & 1) { 3579 mtfv[wr] = BZ_RUNB; wr++; 3580 s->mtfFreq[BZ_RUNB]++; 3581 } else { 3582 mtfv[wr] = BZ_RUNA; wr++; 3583 s->mtfFreq[BZ_RUNA]++; 3584 } 3585 if (zPend < 2) break; 3586 zPend = (zPend - 2) / 2; 3587 }; 3588 zPend = 0; 3589 } 3590 { 3591 register UChar rtmp; 3592 register UChar* ryy_j; 3593 register UChar rll_i; 3594 rtmp = yy[1]; 3595 yy[1] = yy[0]; 3596 ryy_j = &(yy[1]); 3597 rll_i = ll_i; 3598 while ( rll_i != rtmp ) { 3599 register UChar rtmp2; 3600 ryy_j++; 3601 rtmp2 = rtmp; 3602 rtmp = *ryy_j; 3603 *ryy_j = rtmp2; 3604 }; 3605 yy[0] = rtmp; 3606 j = ryy_j - &(yy[0]); 3607 mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++; 3608 } 3609 3610 } 3611 } 3612 3613 if (zPend > 0) { 3614 zPend--; 3615 while (True) { 3616 if (zPend & 1) { 3617 mtfv[wr] = BZ_RUNB; wr++; 3618 s->mtfFreq[BZ_RUNB]++; 3619 } else { 3620 mtfv[wr] = BZ_RUNA; wr++; 3621 s->mtfFreq[BZ_RUNA]++; 3622 } 3623 if (zPend < 2) break; 3624 zPend = (zPend - 2) / 2; 3625 }; 3626 zPend = 0; 3627 } 3628 3629 mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++; 3630 3631 s->nMTF = wr; 3632} 3633 3634 3635/*---------------------------------------------------*/ 3636#define BZ_LESSER_ICOST 0 3637#define BZ_GREATER_ICOST 15 3638 3639static 3640void sendMTFValues ( EState* s ) 3641{ 3642 Int32 v, t, i, j, gs, ge, totc, bt, bc, iter; 3643 Int32 nSelectors, alphaSize, minLen, maxLen, selCtr; 3644 Int32 nGroups, nBytes; 3645 3646 /*-- 3647 UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 3648 is a global since the decoder also needs it. 3649 3650 Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 3651 Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 3652 are also globals only used in this proc. 3653 Made global to keep stack frame size small. 3654 --*/ 3655 3656 3657 UInt16 cost[BZ_N_GROUPS]; 3658 Int32 fave[BZ_N_GROUPS]; 3659 3660 UInt16* mtfv = s->mtfv; 3661 3662 if (s->verbosity >= 3) 3663 VPrintf3( " %d in block, %d after MTF & 1-2 coding, " 3664 "%d+2 syms in use\n", 3665 s->nblock, s->nMTF, s->nInUse ); 3666 3667 alphaSize = s->nInUse+2; 3668 for (t = 0; t < BZ_N_GROUPS; t++) 3669 for (v = 0; v < alphaSize; v++) 3670 s->len[t][v] = BZ_GREATER_ICOST; 3671 3672 /*--- Decide how many coding tables to use ---*/ 3673 AssertH ( s->nMTF > 0, 3001 ); 3674 if (s->nMTF < 200) nGroups = 2; else 3675 if (s->nMTF < 600) nGroups = 3; else 3676 if (s->nMTF < 1200) nGroups = 4; else 3677 if (s->nMTF < 2400) nGroups = 5; else 3678 nGroups = 6; 3679 3680 /*--- Generate an initial set of coding tables ---*/ 3681 { 3682 Int32 nPart, remF, tFreq, aFreq; 3683 3684 nPart = nGroups; 3685 remF = s->nMTF; 3686 gs = 0; 3687 while (nPart > 0) { 3688 tFreq = remF / nPart; 3689 ge = gs-1; 3690 aFreq = 0; 3691 while (aFreq < tFreq && ge < alphaSize-1) { 3692 ge++; 3693 aFreq += s->mtfFreq[ge]; 3694 } 3695 3696 if (ge > gs 3697 && nPart != nGroups && nPart != 1 3698 && ((nGroups-nPart) % 2 == 1)) { 3699 aFreq -= s->mtfFreq[ge]; 3700 ge--; 3701 } 3702 3703 if (0 && s->verbosity >= 3) 3704 VPrintf5( " initial group %d, [%d .. %d], " 3705 "has %d syms (%4.1f%%)\n", 3706 nPart, gs, ge, aFreq, 3707 (100.0 * (float)aFreq) / (float)(s->nMTF) ); 3708 3709 for (v = 0; v < alphaSize; v++) 3710 if (v >= gs && v <= ge) 3711 s->len[nPart-1][v] = BZ_LESSER_ICOST; else 3712 s->len[nPart-1][v] = BZ_GREATER_ICOST; 3713 3714 nPart--; 3715 gs = ge+1; 3716 remF -= aFreq; 3717 } 3718 } 3719 3720 /*--- 3721 Iterate up to BZ_N_ITERS times to improve the tables. 3722 ---*/ 3723 for (iter = 0; iter < BZ_N_ITERS; iter++) { 3724 3725 for (t = 0; t < nGroups; t++) fave[t] = 0; 3726 3727 for (t = 0; t < nGroups; t++) 3728 for (v = 0; v < alphaSize; v++) 3729 s->rfreq[t][v] = 0; 3730 3731 /*--- 3732 Set up an auxiliary length table which is used to fast-track 3733 the common case (nGroups == 6). 3734 ---*/ 3735 if (nGroups == 6) { 3736 for (v = 0; v < alphaSize; v++) { 3737 s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v]; 3738 s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v]; 3739 s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v]; 3740 } 3741 } 3742 3743 nSelectors = 0; 3744 totc = 0; 3745 gs = 0; 3746 while (True) { 3747 3748 /*--- Set group start & end marks. --*/ 3749 if (gs >= s->nMTF) break; 3750 ge = gs + BZ_G_SIZE - 1; 3751 if (ge >= s->nMTF) ge = s->nMTF-1; 3752 3753 /*-- 3754 Calculate the cost of this group as coded 3755 by each of the coding tables. 3756 --*/ 3757 for (t = 0; t < nGroups; t++) cost[t] = 0; 3758 3759 if (nGroups == 6 && 50 == ge-gs+1) { 3760 /*--- fast track the common case ---*/ 3761 register UInt32 cost01, cost23, cost45; 3762 register UInt16 icv; 3763 cost01 = cost23 = cost45 = 0; 3764 3765# define BZ_ITER(nn) \ 3766 icv = mtfv[gs+(nn)]; \ 3767 cost01 += s->len_pack[icv][0]; \ 3768 cost23 += s->len_pack[icv][1]; \ 3769 cost45 += s->len_pack[icv][2]; \ 3770 3771 BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4); 3772 BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9); 3773 BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14); 3774 BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19); 3775 BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24); 3776 BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29); 3777 BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34); 3778 BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39); 3779 BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44); 3780 BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49); 3781 3782# undef BZ_ITER 3783 3784 cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16; 3785 cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16; 3786 cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16; 3787 3788 } else { 3789 /*--- slow version which correctly handles all situations ---*/ 3790 for (i = gs; i <= ge; i++) { 3791 UInt16 icv = mtfv[i]; 3792 for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv]; 3793 } 3794 } 3795 3796 /*-- 3797 Find the coding table which is best for this group, 3798 and record its identity in the selector table. 3799 --*/ 3800 bc = 999999999; bt = -1; 3801 for (t = 0; t < nGroups; t++) 3802 if (cost[t] < bc) { bc = cost[t]; bt = t; }; 3803 totc += bc; 3804 fave[bt]++; 3805 s->selector[nSelectors] = bt; 3806 nSelectors++; 3807 3808 /*-- 3809 Increment the symbol frequencies for the selected table. 3810 --*/ 3811 if (nGroups == 6 && 50 == ge-gs+1) { 3812 /*--- fast track the common case ---*/ 3813 3814# define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++ 3815 3816 BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4); 3817 BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9); 3818 BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14); 3819 BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19); 3820 BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24); 3821 BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29); 3822 BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34); 3823 BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39); 3824 BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44); 3825 BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49); 3826 3827# undef BZ_ITUR 3828 3829 } else { 3830 /*--- slow version which correctly handles all situations ---*/ 3831 for (i = gs; i <= ge; i++) 3832 s->rfreq[bt][ mtfv[i] ]++; 3833 } 3834 3835 gs = ge+1; 3836 } 3837 if (s->verbosity >= 3) { 3838 VPrintf2 ( " pass %d: size is %d, grp uses are ", 3839 iter+1, totc/8 ); 3840 for (t = 0; t < nGroups; t++) 3841 VPrintf1 ( "%d ", fave[t] ); 3842 VPrintf0 ( "\n" ); 3843 } 3844 3845 /*-- 3846 Recompute the tables based on the accumulated frequencies. 3847 --*/ 3848 /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See 3849 comment in huffman.c for details. */ 3850 for (t = 0; t < nGroups; t++) 3851 BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), 3852 alphaSize, 17 /*20*/ ); 3853 } 3854 3855 3856 AssertH( nGroups < 8, 3002 ); 3857 AssertH( nSelectors < 32768 && 3858 nSelectors <= (2 + (900000 / BZ_G_SIZE)), 3859 3003 ); 3860 3861 3862 /*--- Compute MTF values for the selectors. ---*/ 3863 { 3864 UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp; 3865 for (i = 0; i < nGroups; i++) pos[i] = i; 3866 for (i = 0; i < nSelectors; i++) { 3867 ll_i = s->selector[i]; 3868 j = 0; 3869 tmp = pos[j]; 3870 while ( ll_i != tmp ) { 3871 j++; 3872 tmp2 = tmp; 3873 tmp = pos[j]; 3874 pos[j] = tmp2; 3875 }; 3876 pos[0] = tmp; 3877 s->selectorMtf[i] = j; 3878 } 3879 }; 3880 3881 /*--- Assign actual codes for the tables. --*/ 3882 for (t = 0; t < nGroups; t++) { 3883 minLen = 32; 3884 maxLen = 0; 3885 for (i = 0; i < alphaSize; i++) { 3886 if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; 3887 if (s->len[t][i] < minLen) minLen = s->len[t][i]; 3888 } 3889 AssertH ( !(maxLen > 17 /*20*/ ), 3004 ); 3890 AssertH ( !(minLen < 1), 3005 ); 3891 BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), 3892 minLen, maxLen, alphaSize ); 3893 } 3894 3895 /*--- Transmit the mapping table. ---*/ 3896 { 3897 Bool inUse16[16]; 3898 for (i = 0; i < 16; i++) { 3899 inUse16[i] = False; 3900 for (j = 0; j < 16; j++) 3901 if (s->inUse[i * 16 + j]) inUse16[i] = True; 3902 } 3903 3904 nBytes = s->numZ; 3905 for (i = 0; i < 16; i++) 3906 if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0); 3907 3908 for (i = 0; i < 16; i++) 3909 if (inUse16[i]) 3910 for (j = 0; j < 16; j++) { 3911 if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0); 3912 } 3913 3914 if (s->verbosity >= 3) 3915 VPrintf1( " bytes: mapping %d, ", s->numZ-nBytes ); 3916 } 3917 3918 /*--- Now the selectors. ---*/ 3919 nBytes = s->numZ; 3920 bsW ( s, 3, nGroups ); 3921 bsW ( s, 15, nSelectors ); 3922 for (i = 0; i < nSelectors; i++) { 3923 for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1); 3924 bsW(s,1,0); 3925 } 3926 if (s->verbosity >= 3) 3927 VPrintf1( "selectors %d, ", s->numZ-nBytes ); 3928 3929 /*--- Now the coding tables. ---*/ 3930 nBytes = s->numZ; 3931 3932 for (t = 0; t < nGroups; t++) { 3933 Int32 curr = s->len[t][0]; 3934 bsW ( s, 5, curr ); 3935 for (i = 0; i < alphaSize; i++) { 3936 while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ }; 3937 while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ }; 3938 bsW ( s, 1, 0 ); 3939 } 3940 } 3941 3942 if (s->verbosity >= 3) 3943 VPrintf1 ( "code lengths %d, ", s->numZ-nBytes ); 3944 3945 /*--- And finally, the block data proper ---*/ 3946 nBytes = s->numZ; 3947 selCtr = 0; 3948 gs = 0; 3949 while (True) { 3950 if (gs >= s->nMTF) break; 3951 ge = gs + BZ_G_SIZE - 1; 3952 if (ge >= s->nMTF) ge = s->nMTF-1; 3953 AssertH ( s->selector[selCtr] < nGroups, 3006 ); 3954 3955 if (nGroups == 6 && 50 == ge-gs+1) { 3956 /*--- fast track the common case ---*/ 3957 UInt16 mtfv_i; 3958 UChar* s_len_sel_selCtr 3959 = &(s->len[s->selector[selCtr]][0]); 3960 Int32* s_code_sel_selCtr 3961 = &(s->code[s->selector[selCtr]][0]); 3962 3963# define BZ_ITAH(nn) \ 3964 mtfv_i = mtfv[gs+(nn)]; \ 3965 bsW ( s, \ 3966 s_len_sel_selCtr[mtfv_i], \ 3967 s_code_sel_selCtr[mtfv_i] ) 3968 3969 BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4); 3970 BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9); 3971 BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14); 3972 BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19); 3973 BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24); 3974 BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29); 3975 BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34); 3976 BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39); 3977 BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44); 3978 BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49); 3979 3980# undef BZ_ITAH 3981 3982 } else { 3983 /*--- slow version which correctly handles all situations ---*/ 3984 for (i = gs; i <= ge; i++) { 3985 bsW ( s, 3986 s->len [s->selector[selCtr]] [mtfv[i]], 3987 s->code [s->selector[selCtr]] [mtfv[i]] ); 3988 } 3989 } 3990 3991 3992 gs = ge+1; 3993 selCtr++; 3994 } 3995 AssertH( selCtr == nSelectors, 3007 ); 3996 3997 if (s->verbosity >= 3) 3998 VPrintf1( "codes %d\n", s->numZ-nBytes ); 3999} 4000 4001 4002/*---------------------------------------------------*/ 4003void BZ2_compressBlock ( EState* s, Bool is_last_block ) 4004{ 4005 if (s->nblock > 0) { 4006 4007 BZ_FINALISE_CRC ( s->blockCRC ); 4008 s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31); 4009 s->combinedCRC ^= s->blockCRC; 4010 if (s->blockNo > 1) s->numZ = 0; 4011 4012 if (s->verbosity >= 2) 4013 VPrintf4( " block %d: crc = 0x%08x, " 4014 "combined CRC = 0x%08x, size = %d\n", 4015 s->blockNo, s->blockCRC, s->combinedCRC, s->nblock ); 4016 4017 BZ2_blockSort ( s ); 4018 } 4019 4020 s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]); 4021 4022 /*-- If this is the first block, create the stream header. --*/ 4023 if (s->blockNo == 1) { 4024 BZ2_bsInitWrite ( s ); 4025 bsPutUChar ( s, BZ_HDR_B ); 4026 bsPutUChar ( s, BZ_HDR_Z ); 4027 bsPutUChar ( s, BZ_HDR_h ); 4028 bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) ); 4029 } 4030 4031 if (s->nblock > 0) { 4032 4033 bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 ); 4034 bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 ); 4035 bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 ); 4036 4037 /*-- Now the block's CRC, so it is in a known place. --*/ 4038 bsPutUInt32 ( s, s->blockCRC ); 4039 4040 /*-- 4041 Now a single bit indicating (non-)randomisation. 4042 As of version 0.9.5, we use a better sorting algorithm 4043 which makes randomisation unnecessary. So always set 4044 the randomised bit to 'no'. Of course, the decoder 4045 still needs to be able to handle randomised blocks 4046 so as to maintain backwards compatibility with 4047 older versions of bzip2. 4048 --*/ 4049 bsW(s,1,0); 4050 4051 bsW ( s, 24, s->origPtr ); 4052 generateMTFValues ( s ); 4053 sendMTFValues ( s ); 4054 } 4055 4056 4057 /*-- If this is the last block, add the stream trailer. --*/ 4058 if (is_last_block) { 4059 4060 bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 ); 4061 bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 ); 4062 bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 ); 4063 bsPutUInt32 ( s, s->combinedCRC ); 4064 if (s->verbosity >= 2) 4065 VPrintf1( " final combined CRC = 0x%08x\n ", s->combinedCRC ); 4066 bsFinishWrite ( s ); 4067 } 4068} 4069 4070 4071/*-------------------------------------------------------------*/ 4072/*--- end compress.c ---*/ 4073/*-------------------------------------------------------------*/ 4074 4075 4076/*-------------------------------------------------------------*/ 4077/*--- Table for randomising repetitive blocks ---*/ 4078/*--- randtable.c ---*/ 4079/*-------------------------------------------------------------*/ 4080 4081/*-- 4082 This file is a part of bzip2 and/or libbzip2, a program and 4083 library for lossless, block-sorting data compression. 4084 4085 Copyright (C) 1996-2004 Julian R Seward. All rights reserved. 4086 4087 Redistribution and use in source and binary forms, with or without 4088 modification, are permitted provided that the following conditions 4089 are met: 4090 4091 1. Redistributions of source code must retain the above copyright 4092 notice, this list of conditions and the following disclaimer. 4093 4094 2. The origin of this software must not be misrepresented; you must 4095 not claim that you wrote the original software. If you use this 4096 software in a product, an acknowledgment in the product 4097 documentation would be appreciated but is not required. 4098 4099 3. Altered source versions must be plainly marked as such, and must 4100 not be misrepresented as being the original software. 4101 4102 4. The name of the author may not be used to endorse or promote 4103 products derived from this software without specific prior written 4104 permission. 4105 4106 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 4107 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 4108 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 4109 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 4110 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 4111 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 4112 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 4113 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 4114 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 4115 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 4116 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 4117 4118 Julian Seward, Cambridge, UK. 4119 jseward@bzip.org 4120 bzip2/libbzip2 version 1.0 of 21 March 2000 4121 4122 This program is based on (at least) the work of: 4123 Mike Burrows 4124 David Wheeler 4125 Peter Fenwick 4126 Alistair Moffat 4127 Radford Neal 4128 Ian H. Witten 4129 Robert Sedgewick 4130 Jon L. Bentley 4131 4132 For more information on these sources, see the manual. 4133--*/ 4134 4135 4136 4137 4138/*---------------------------------------------*/ 4139Int32 BZ2_rNums[512] = { 4140 619, 720, 127, 481, 931, 816, 813, 233, 566, 247, 4141 985, 724, 205, 454, 863, 491, 741, 242, 949, 214, 4142 733, 859, 335, 708, 621, 574, 73, 654, 730, 472, 4143 419, 436, 278, 496, 867, 210, 399, 680, 480, 51, 4144 878, 465, 811, 169, 869, 675, 611, 697, 867, 561, 4145 862, 687, 507, 283, 482, 129, 807, 591, 733, 623, 4146 150, 238, 59, 379, 684, 877, 625, 169, 643, 105, 4147 170, 607, 520, 932, 727, 476, 693, 425, 174, 647, 4148 73, 122, 335, 530, 442, 853, 695, 249, 445, 515, 4149 909, 545, 703, 919, 874, 474, 882, 500, 594, 612, 4150 641, 801, 220, 162, 819, 984, 589, 513, 495, 799, 4151 161, 604, 958, 533, 221, 400, 386, 867, 600, 782, 4152 382, 596, 414, 171, 516, 375, 682, 485, 911, 276, 4153 98, 553, 163, 354, 666, 933, 424, 341, 533, 870, 4154 227, 730, 475, 186, 263, 647, 537, 686, 600, 224, 4155 469, 68, 770, 919, 190, 373, 294, 822, 808, 206, 4156 184, 943, 795, 384, 383, 461, 404, 758, 839, 887, 4157 715, 67, 618, 276, 204, 918, 873, 777, 604, 560, 4158 951, 160, 578, 722, 79, 804, 96, 409, 713, 940, 4159 652, 934, 970, 447, 318, 353, 859, 672, 112, 785, 4160 645, 863, 803, 350, 139, 93, 354, 99, 820, 908, 4161 609, 772, 154, 274, 580, 184, 79, 626, 630, 742, 4162 653, 282, 762, 623, 680, 81, 927, 626, 789, 125, 4163 411, 521, 938, 300, 821, 78, 343, 175, 128, 250, 4164 170, 774, 972, 275, 999, 639, 495, 78, 352, 126, 4165 857, 956, 358, 619, 580, 124, 737, 594, 701, 612, 4166 669, 112, 134, 694, 363, 992, 809, 743, 168, 974, 4167 944, 375, 748, 52, 600, 747, 642, 182, 862, 81, 4168 344, 805, 988, 739, 511, 655, 814, 334, 249, 515, 4169 897, 955, 664, 981, 649, 113, 974, 459, 893, 228, 4170 433, 837, 553, 268, 926, 240, 102, 654, 459, 51, 4171 686, 754, 806, 760, 493, 403, 415, 394, 687, 700, 4172 946, 670, 656, 610, 738, 392, 760, 799, 887, 653, 4173 978, 321, 576, 617, 626, 502, 894, 679, 243, 440, 4174 680, 879, 194, 572, 640, 724, 926, 56, 204, 700, 4175 707, 151, 457, 449, 797, 195, 791, 558, 945, 679, 4176 297, 59, 87, 824, 713, 663, 412, 693, 342, 606, 4177 134, 108, 571, 364, 631, 212, 174, 643, 304, 329, 4178 343, 97, 430, 751, 497, 314, 983, 374, 822, 928, 4179 140, 206, 73, 263, 980, 736, 876, 478, 430, 305, 4180 170, 514, 364, 692, 829, 82, 855, 953, 676, 246, 4181 369, 970, 294, 750, 807, 827, 150, 790, 288, 923, 4182 804, 378, 215, 828, 592, 281, 565, 555, 710, 82, 4183 896, 831, 547, 261, 524, 462, 293, 465, 502, 56, 4184 661, 821, 976, 991, 658, 869, 905, 758, 745, 193, 4185 768, 550, 608, 933, 378, 286, 215, 979, 792, 961, 4186 61, 688, 793, 644, 986, 403, 106, 366, 905, 644, 4187 372, 567, 466, 434, 645, 210, 389, 550, 919, 135, 4188 780, 773, 635, 389, 707, 100, 626, 958, 165, 504, 4189 920, 176, 193, 713, 857, 265, 203, 50, 668, 108, 4190 645, 990, 626, 197, 510, 357, 358, 850, 858, 364, 4191 936, 638 4192}; 4193 4194 4195/*-------------------------------------------------------------*/ 4196/*--- end randtable.c ---*/ 4197/*-------------------------------------------------------------*/ 4198 4199/*-------------------------------------------------------------*/ 4200/*--- Table for doing CRCs ---*/ 4201/*--- crctable.c ---*/ 4202/*-------------------------------------------------------------*/ 4203 4204/*-- 4205 This file is a part of bzip2 and/or libbzip2, a program and 4206 library for lossless, block-sorting data compression. 4207 4208 Copyright (C) 1996-2004 Julian R Seward. All rights reserved. 4209 4210 Redistribution and use in source and binary forms, with or without 4211 modification, are permitted provided that the following conditions 4212 are met: 4213 4214 1. Redistributions of source code must retain the above copyright 4215 notice, this list of conditions and the following disclaimer. 4216 4217 2. The origin of this software must not be misrepresented; you must 4218 not claim that you wrote the original software. If you use this 4219 software in a product, an acknowledgment in the product 4220 documentation would be appreciated but is not required. 4221 4222 3. Altered source versions must be plainly marked as such, and must 4223 not be misrepresented as being the original software. 4224 4225 4. The name of the author may not be used to endorse or promote 4226 products derived from this software without specific prior written 4227 permission. 4228 4229 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 4230 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 4231 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 4232 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 4233 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 4234 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 4235 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 4236 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 4237 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 4238 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 4239 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 4240 4241 Julian Seward, Cambridge, UK. 4242 jseward@bzip.org 4243 bzip2/libbzip2 version 1.0 of 21 March 2000 4244 4245 This program is based on (at least) the work of: 4246 Mike Burrows 4247 David Wheeler 4248 Peter Fenwick 4249 Alistair Moffat 4250 Radford Neal 4251 Ian H. Witten 4252 Robert Sedgewick 4253 Jon L. Bentley 4254 4255 For more information on these sources, see the manual. 4256--*/ 4257 4258 4259 4260 4261 4262/*-- 4263 I think this is an implementation of the AUTODIN-II, 4264 Ethernet & FDDI 32-bit CRC standard. Vaguely derived 4265 from code by Rob Warnock, in Section 51 of the 4266 comp.compression FAQ. 4267--*/ 4268 4269UInt32 BZ2_crc32Table[256] = { 4270 4271 /*-- Ugly, innit? --*/ 4272 4273 0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L, 4274 0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L, 4275 0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L, 4276 0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL, 4277 0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L, 4278 0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L, 4279 0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L, 4280 0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL, 4281 0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L, 4282 0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L, 4283 0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L, 4284 0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL, 4285 0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L, 4286 0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L, 4287 0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L, 4288 0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL, 4289 0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL, 4290 0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L, 4291 0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L, 4292 0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL, 4293 0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL, 4294 0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L, 4295 0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L, 4296 0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL, 4297 0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL, 4298 0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L, 4299 0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L, 4300 0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL, 4301 0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL, 4302 0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L, 4303 0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L, 4304 0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL, 4305 0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L, 4306 0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL, 4307 0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL, 4308 0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L, 4309 0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L, 4310 0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL, 4311 0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL, 4312 0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L, 4313 0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L, 4314 0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL, 4315 0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL, 4316 0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L, 4317 0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L, 4318 0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL, 4319 0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL, 4320 0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L, 4321 0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L, 4322 0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL, 4323 0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L, 4324 0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L, 4325 0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L, 4326 0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL, 4327 0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L, 4328 0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L, 4329 0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L, 4330 0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL, 4331 0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L, 4332 0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L, 4333 0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L, 4334 0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL, 4335 0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L, 4336 0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L 4337}; 4338 4339 4340/*-------------------------------------------------------------*/ 4341/*--- end crctable.c ---*/ 4342/*-------------------------------------------------------------*/ 4343 4344/*-------------------------------------------------------------*/ 4345/*--- Library top-level functions. ---*/ 4346/*--- bzlib.c ---*/ 4347/*-------------------------------------------------------------*/ 4348 4349/*-- 4350 This file is a part of bzip2 and/or libbzip2, a program and 4351 library for lossless, block-sorting data compression. 4352 4353 Copyright (C) 1996-2004 Julian R Seward. All rights reserved. 4354 4355 Redistribution and use in source and binary forms, with or without 4356 modification, are permitted provided that the following conditions 4357 are met: 4358 4359 1. Redistributions of source code must retain the above copyright 4360 notice, this list of conditions and the following disclaimer. 4361 4362 2. The origin of this software must not be misrepresented; you must 4363 not claim that you wrote the original software. If you use this 4364 software in a product, an acknowledgment in the product 4365 documentation would be appreciated but is not required. 4366 4367 3. Altered source versions must be plainly marked as such, and must 4368 not be misrepresented as being the original software. 4369 4370 4. The name of the author may not be used to endorse or promote 4371 products derived from this software without specific prior written 4372 permission. 4373 4374 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 4375 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 4376 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 4377 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 4378 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 4379 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 4380 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 4381 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 4382 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 4383 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 4384 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 4385 4386 Julian Seward, Cambridge, UK. 4387 jseward@bzip.org 4388 bzip2/libbzip2 version 1.0 of 21 March 2000 4389 4390 This program is based on (at least) the work of: 4391 Mike Burrows 4392 David Wheeler 4393 Peter Fenwick 4394 Alistair Moffat 4395 Radford Neal 4396 Ian H. Witten 4397 Robert Sedgewick 4398 Jon L. Bentley 4399 4400 For more information on these sources, see the manual. 4401--*/ 4402 4403/*-- 4404 CHANGES 4405 ~~~~~~~ 4406 0.9.0 -- original version. 4407 4408 0.9.0a/b -- no changes in this file. 4409 4410 0.9.0c 4411 * made zero-length BZ_FLUSH work correctly in bzCompress(). 4412 * fixed bzWrite/bzRead to ignore zero-length requests. 4413 * fixed bzread to correctly handle read requests after EOF. 4414 * wrong parameter order in call to bzDecompressInit in 4415 bzBuffToBuffDecompress. Fixed. 4416--*/ 4417 4418 4419 4420/*---------------------------------------------------*/ 4421/*--- Compression stuff ---*/ 4422/*---------------------------------------------------*/ 4423 4424 4425/*---------------------------------------------------*/ 4426void BZ2_bz__AssertH__fail ( int errcode ) 4427{ 4428 vexxx_printf("BZ2_bz__AssertH__fail(%d) called, exiting\n", errcode); 4429 (*serviceFn)(0,0); 4430} 4431 4432void bz_internal_error ( int errcode ) 4433{ 4434 vexxx_printf("bz_internal_error called, exiting\n", errcode); 4435 (*serviceFn)(0,0); 4436} 4437 4438/*---------------------------------------------------*/ 4439static 4440int bz_config_ok ( void ) 4441{ 4442 if (sizeof(int) != 4) return 0; 4443 if (sizeof(short) != 2) return 0; 4444 if (sizeof(char) != 1) return 0; 4445 return 1; 4446} 4447 4448 4449/*---------------------------------------------------*/ 4450static 4451void* default_bzalloc ( void* opaque, Int32 items, Int32 size ) 4452{ 4453 void* v = (void*) (*serviceFn)(2, items * size ); 4454 return v; 4455} 4456 4457static 4458void default_bzfree ( void* opaque, void* addr ) 4459{ 4460 if (addr != NULL) (*serviceFn)( 3, (HWord)addr ); 4461} 4462 4463 4464/*---------------------------------------------------*/ 4465static 4466void prepare_new_block ( EState* s ) 4467{ 4468 Int32 i; 4469 s->nblock = 0; 4470 s->numZ = 0; 4471 s->state_out_pos = 0; 4472 BZ_INITIALISE_CRC ( s->blockCRC ); 4473 for (i = 0; i < 256; i++) s->inUse[i] = False; 4474 s->blockNo++; 4475} 4476 4477 4478/*---------------------------------------------------*/ 4479static 4480void init_RL ( EState* s ) 4481{ 4482 s->state_in_ch = 256; 4483 s->state_in_len = 0; 4484} 4485 4486 4487static 4488Bool isempty_RL ( EState* s ) 4489{ 4490 if (s->state_in_ch < 256 && s->state_in_len > 0) 4491 return False; else 4492 return True; 4493} 4494 4495 4496/*---------------------------------------------------*/ 4497int BZ_API(BZ2_bzCompressInit) 4498 ( bz_stream* strm, 4499 int blockSize100k, 4500 int verbosity, 4501 int workFactor ) 4502{ 4503 Int32 n; 4504 EState* s; 4505 4506 if (!bz_config_ok()) return BZ_CONFIG_ERROR; 4507 4508 if (strm == NULL || 4509 blockSize100k < 1 || blockSize100k > 9 || 4510 workFactor < 0 || workFactor > 250) 4511 return BZ_PARAM_ERROR; 4512 4513 if (workFactor == 0) workFactor = 30; 4514 if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc; 4515 if (strm->bzfree == NULL) strm->bzfree = default_bzfree; 4516 4517 s = BZALLOC( sizeof(EState) ); 4518 if (s == NULL) return BZ_MEM_ERROR; 4519 s->strm = strm; 4520 4521 s->arr1 = NULL; 4522 s->arr2 = NULL; 4523 s->ftab = NULL; 4524 4525 n = 100000 * blockSize100k; 4526 s->arr1 = BZALLOC( n * sizeof(UInt32) ); 4527 s->arr2 = BZALLOC( (n+BZ_N_OVERSHOOT) * sizeof(UInt32) ); 4528 s->ftab = BZALLOC( 65537 * sizeof(UInt32) ); 4529 4530 if (s->arr1 == NULL || s->arr2 == NULL || s->ftab == NULL) { 4531 if (s->arr1 != NULL) BZFREE(s->arr1); 4532 if (s->arr2 != NULL) BZFREE(s->arr2); 4533 if (s->ftab != NULL) BZFREE(s->ftab); 4534 if (s != NULL) BZFREE(s); 4535 return BZ_MEM_ERROR; 4536 } 4537 4538 s->blockNo = 0; 4539 s->state = BZ_S_INPUT; 4540 s->mode = BZ_M_RUNNING; 4541 s->combinedCRC = 0; 4542 s->blockSize100k = blockSize100k; 4543 s->nblockMAX = 100000 * blockSize100k - 19; 4544 s->verbosity = verbosity; 4545 s->workFactor = workFactor; 4546 4547 s->block = (UChar*)s->arr2; 4548 s->mtfv = (UInt16*)s->arr1; 4549 s->zbits = NULL; 4550 s->ptr = (UInt32*)s->arr1; 4551 4552 strm->state = s; 4553 strm->total_in_lo32 = 0; 4554 strm->total_in_hi32 = 0; 4555 strm->total_out_lo32 = 0; 4556 strm->total_out_hi32 = 0; 4557 init_RL ( s ); 4558 prepare_new_block ( s ); 4559 return BZ_OK; 4560} 4561 4562 4563/*---------------------------------------------------*/ 4564static 4565void add_pair_to_block ( EState* s ) 4566{ 4567 Int32 i; 4568 UChar ch = (UChar)(s->state_in_ch); 4569 for (i = 0; i < s->state_in_len; i++) { 4570 BZ_UPDATE_CRC( s->blockCRC, ch ); 4571 } 4572 s->inUse[s->state_in_ch] = True; 4573 switch (s->state_in_len) { 4574 case 1: 4575 s->block[s->nblock] = (UChar)ch; s->nblock++; 4576 break; 4577 case 2: 4578 s->block[s->nblock] = (UChar)ch; s->nblock++; 4579 s->block[s->nblock] = (UChar)ch; s->nblock++; 4580 break; 4581 case 3: 4582 s->block[s->nblock] = (UChar)ch; s->nblock++; 4583 s->block[s->nblock] = (UChar)ch; s->nblock++; 4584 s->block[s->nblock] = (UChar)ch; s->nblock++; 4585 break; 4586 default: 4587 s->inUse[s->state_in_len-4] = True; 4588 s->block[s->nblock] = (UChar)ch; s->nblock++; 4589 s->block[s->nblock] = (UChar)ch; s->nblock++; 4590 s->block[s->nblock] = (UChar)ch; s->nblock++; 4591 s->block[s->nblock] = (UChar)ch; s->nblock++; 4592 s->block[s->nblock] = ((UChar)(s->state_in_len-4)); 4593 s->nblock++; 4594 break; 4595 } 4596} 4597 4598 4599/*---------------------------------------------------*/ 4600static 4601void flush_RL ( EState* s ) 4602{ 4603 if (s->state_in_ch < 256) add_pair_to_block ( s ); 4604 init_RL ( s ); 4605} 4606 4607 4608/*---------------------------------------------------*/ 4609#define ADD_CHAR_TO_BLOCK(zs,zchh0) \ 4610{ \ 4611 UInt32 zchh = (UInt32)(zchh0); \ 4612 /*-- fast track the common case --*/ \ 4613 if (zchh != zs->state_in_ch && \ 4614 zs->state_in_len == 1) { \ 4615 UChar ch = (UChar)(zs->state_in_ch); \ 4616 BZ_UPDATE_CRC( zs->blockCRC, ch ); \ 4617 zs->inUse[zs->state_in_ch] = True; \ 4618 zs->block[zs->nblock] = (UChar)ch; \ 4619 zs->nblock++; \ 4620 zs->state_in_ch = zchh; \ 4621 } \ 4622 else \ 4623 /*-- general, uncommon cases --*/ \ 4624 if (zchh != zs->state_in_ch || \ 4625 zs->state_in_len == 255) { \ 4626 if (zs->state_in_ch < 256) \ 4627 add_pair_to_block ( zs ); \ 4628 zs->state_in_ch = zchh; \ 4629 zs->state_in_len = 1; \ 4630 } else { \ 4631 zs->state_in_len++; \ 4632 } \ 4633} 4634 4635 4636/*---------------------------------------------------*/ 4637static 4638Bool copy_input_until_stop ( EState* s ) 4639{ 4640 Bool progress_in = False; 4641 4642 if (s->mode == BZ_M_RUNNING) { 4643 4644 /*-- fast track the common case --*/ 4645 while (True) { 4646 /*-- block full? --*/ 4647 if (s->nblock >= s->nblockMAX) break; 4648 /*-- no input? --*/ 4649 if (s->strm->avail_in == 0) break; 4650 progress_in = True; 4651 ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) ); 4652 s->strm->next_in++; 4653 s->strm->avail_in--; 4654 s->strm->total_in_lo32++; 4655 if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++; 4656 } 4657 4658 } else { 4659 4660 /*-- general, uncommon case --*/ 4661 while (True) { 4662 /*-- block full? --*/ 4663 if (s->nblock >= s->nblockMAX) break; 4664 /*-- no input? --*/ 4665 if (s->strm->avail_in == 0) break; 4666 /*-- flush/finish end? --*/ 4667 if (s->avail_in_expect == 0) break; 4668 progress_in = True; 4669 ADD_CHAR_TO_BLOCK ( s, (UInt32)(*((UChar*)(s->strm->next_in))) ); 4670 s->strm->next_in++; 4671 s->strm->avail_in--; 4672 s->strm->total_in_lo32++; 4673 if (s->strm->total_in_lo32 == 0) s->strm->total_in_hi32++; 4674 s->avail_in_expect--; 4675 } 4676 } 4677 return progress_in; 4678} 4679 4680 4681/*---------------------------------------------------*/ 4682static 4683Bool copy_output_until_stop ( EState* s ) 4684{ 4685 Bool progress_out = False; 4686 4687 while (True) { 4688 4689 /*-- no output space? --*/ 4690 if (s->strm->avail_out == 0) break; 4691 4692 /*-- block done? --*/ 4693 if (s->state_out_pos >= s->numZ) break; 4694 4695 progress_out = True; 4696 *(s->strm->next_out) = s->zbits[s->state_out_pos]; 4697 s->state_out_pos++; 4698 s->strm->avail_out--; 4699 s->strm->next_out++; 4700 s->strm->total_out_lo32++; 4701 if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; 4702 } 4703 4704 return progress_out; 4705} 4706 4707 4708/*---------------------------------------------------*/ 4709static 4710Bool handle_compress ( bz_stream* strm ) 4711{ 4712 Bool progress_in = False; 4713 Bool progress_out = False; 4714 EState* s = strm->state; 4715 4716 while (True) { 4717 4718 if (s->state == BZ_S_OUTPUT) { 4719 progress_out |= copy_output_until_stop ( s ); 4720 if (s->state_out_pos < s->numZ) break; 4721 if (s->mode == BZ_M_FINISHING && 4722 s->avail_in_expect == 0 && 4723 isempty_RL(s)) break; 4724 prepare_new_block ( s ); 4725 s->state = BZ_S_INPUT; 4726 if (s->mode == BZ_M_FLUSHING && 4727 s->avail_in_expect == 0 && 4728 isempty_RL(s)) break; 4729 } 4730 4731 if (s->state == BZ_S_INPUT) { 4732 progress_in |= copy_input_until_stop ( s ); 4733 if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) { 4734 flush_RL ( s ); 4735 BZ2_compressBlock ( s, (Bool)(s->mode == BZ_M_FINISHING) ); 4736 s->state = BZ_S_OUTPUT; 4737 } 4738 else 4739 if (s->nblock >= s->nblockMAX) { 4740 BZ2_compressBlock ( s, False ); 4741 s->state = BZ_S_OUTPUT; 4742 } 4743 else 4744 if (s->strm->avail_in == 0) { 4745 break; 4746 } 4747 } 4748 4749 } 4750 4751 return progress_in || progress_out; 4752} 4753 4754 4755/*---------------------------------------------------*/ 4756int BZ_API(BZ2_bzCompress) ( bz_stream *strm, int action ) 4757{ 4758 Bool progress; 4759 EState* s; 4760 if (strm == NULL) return BZ_PARAM_ERROR; 4761 s = strm->state; 4762 if (s == NULL) return BZ_PARAM_ERROR; 4763 if (s->strm != strm) return BZ_PARAM_ERROR; 4764 4765 preswitch: 4766 switch (s->mode) { 4767 4768 case BZ_M_IDLE: 4769 return BZ_SEQUENCE_ERROR; 4770 4771 case BZ_M_RUNNING: 4772 if (action == BZ_RUN) { 4773 progress = handle_compress ( strm ); 4774 return progress ? BZ_RUN_OK : BZ_PARAM_ERROR; 4775 } 4776 else 4777 if (action == BZ_FLUSH) { 4778 s->avail_in_expect = strm->avail_in; 4779 s->mode = BZ_M_FLUSHING; 4780 goto preswitch; 4781 } 4782 else 4783 if (action == BZ_FINISH) { 4784 s->avail_in_expect = strm->avail_in; 4785 s->mode = BZ_M_FINISHING; 4786 goto preswitch; 4787 } 4788 else 4789 return BZ_PARAM_ERROR; 4790 4791 case BZ_M_FLUSHING: 4792 if (action != BZ_FLUSH) return BZ_SEQUENCE_ERROR; 4793 if (s->avail_in_expect != s->strm->avail_in) 4794 return BZ_SEQUENCE_ERROR; 4795 progress = handle_compress ( strm ); 4796 if (s->avail_in_expect > 0 || !isempty_RL(s) || 4797 s->state_out_pos < s->numZ) return BZ_FLUSH_OK; 4798 s->mode = BZ_M_RUNNING; 4799 return BZ_RUN_OK; 4800 4801 case BZ_M_FINISHING: 4802 if (action != BZ_FINISH) return BZ_SEQUENCE_ERROR; 4803 if (s->avail_in_expect != s->strm->avail_in) 4804 return BZ_SEQUENCE_ERROR; 4805 progress = handle_compress ( strm ); 4806 if (!progress) return BZ_SEQUENCE_ERROR; 4807 if (s->avail_in_expect > 0 || !isempty_RL(s) || 4808 s->state_out_pos < s->numZ) return BZ_FINISH_OK; 4809 s->mode = BZ_M_IDLE; 4810 return BZ_STREAM_END; 4811 } 4812 return BZ_OK; /*--not reached--*/ 4813} 4814 4815 4816/*---------------------------------------------------*/ 4817int BZ_API(BZ2_bzCompressEnd) ( bz_stream *strm ) 4818{ 4819 EState* s; 4820 if (strm == NULL) return BZ_PARAM_ERROR; 4821 s = strm->state; 4822 if (s == NULL) return BZ_PARAM_ERROR; 4823 if (s->strm != strm) return BZ_PARAM_ERROR; 4824 4825 if (s->arr1 != NULL) BZFREE(s->arr1); 4826 if (s->arr2 != NULL) BZFREE(s->arr2); 4827 if (s->ftab != NULL) BZFREE(s->ftab); 4828 BZFREE(strm->state); 4829 4830 strm->state = NULL; 4831 4832 return BZ_OK; 4833} 4834 4835 4836/*---------------------------------------------------*/ 4837/*--- Decompression stuff ---*/ 4838/*---------------------------------------------------*/ 4839 4840/*---------------------------------------------------*/ 4841int BZ_API(BZ2_bzDecompressInit) 4842 ( bz_stream* strm, 4843 int verbosity, 4844 int small ) 4845{ 4846 DState* s; 4847 4848 if (!bz_config_ok()) return BZ_CONFIG_ERROR; 4849 4850 if (strm == NULL) return BZ_PARAM_ERROR; 4851 if (small != 0 && small != 1) return BZ_PARAM_ERROR; 4852 if (verbosity < 0 || verbosity > 4) return BZ_PARAM_ERROR; 4853 4854 if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc; 4855 if (strm->bzfree == NULL) strm->bzfree = default_bzfree; 4856 4857 s = BZALLOC( sizeof(DState) ); 4858 if (s == NULL) return BZ_MEM_ERROR; 4859 s->strm = strm; 4860 strm->state = s; 4861 s->state = BZ_X_MAGIC_1; 4862 s->bsLive = 0; 4863 s->bsBuff = 0; 4864 s->calculatedCombinedCRC = 0; 4865 strm->total_in_lo32 = 0; 4866 strm->total_in_hi32 = 0; 4867 strm->total_out_lo32 = 0; 4868 strm->total_out_hi32 = 0; 4869 s->smallDecompress = (Bool)small; 4870 s->ll4 = NULL; 4871 s->ll16 = NULL; 4872 s->tt = NULL; 4873 s->currBlockNo = 0; 4874 s->verbosity = verbosity; 4875 4876 return BZ_OK; 4877} 4878 4879 4880/*---------------------------------------------------*/ 4881/* Return True iff data corruption is discovered. 4882 Returns False if there is no problem. 4883*/ 4884static 4885Bool unRLE_obuf_to_output_FAST ( DState* s ) 4886{ 4887 UChar k1; 4888 4889 if (s->blockRandomised) { 4890 4891 while (True) { 4892 /* try to finish existing run */ 4893 while (True) { 4894 if (s->strm->avail_out == 0) return False; 4895 if (s->state_out_len == 0) break; 4896 *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; 4897 BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); 4898 s->state_out_len--; 4899 s->strm->next_out++; 4900 s->strm->avail_out--; 4901 s->strm->total_out_lo32++; 4902 if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; 4903 } 4904 4905 /* can a new run be started? */ 4906 if (s->nblock_used == s->save_nblock+1) return False; 4907 4908 /* Only caused by corrupt data stream? */ 4909 if (s->nblock_used > s->save_nblock+1) 4910 return True; 4911 4912 s->state_out_len = 1; 4913 s->state_out_ch = s->k0; 4914 BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; 4915 k1 ^= BZ_RAND_MASK; s->nblock_used++; 4916 if (s->nblock_used == s->save_nblock+1) continue; 4917 if (k1 != s->k0) { s->k0 = k1; continue; }; 4918 4919 s->state_out_len = 2; 4920 BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; 4921 k1 ^= BZ_RAND_MASK; s->nblock_used++; 4922 if (s->nblock_used == s->save_nblock+1) continue; 4923 if (k1 != s->k0) { s->k0 = k1; continue; }; 4924 4925 s->state_out_len = 3; 4926 BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; 4927 k1 ^= BZ_RAND_MASK; s->nblock_used++; 4928 if (s->nblock_used == s->save_nblock+1) continue; 4929 if (k1 != s->k0) { s->k0 = k1; continue; }; 4930 4931 BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; 4932 k1 ^= BZ_RAND_MASK; s->nblock_used++; 4933 s->state_out_len = ((Int32)k1) + 4; 4934 BZ_GET_FAST(s->k0); BZ_RAND_UPD_MASK; 4935 s->k0 ^= BZ_RAND_MASK; s->nblock_used++; 4936 } 4937 4938 } else { 4939 4940 /* restore */ 4941 UInt32 c_calculatedBlockCRC = s->calculatedBlockCRC; 4942 UChar c_state_out_ch = s->state_out_ch; 4943 Int32 c_state_out_len = s->state_out_len; 4944 Int32 c_nblock_used = s->nblock_used; 4945 Int32 c_k0 = s->k0; 4946 UInt32* c_tt = s->tt; 4947 UInt32 c_tPos = s->tPos; 4948 char* cs_next_out = s->strm->next_out; 4949 unsigned int cs_avail_out = s->strm->avail_out; 4950 /* end restore */ 4951 4952 UInt32 avail_out_INIT = cs_avail_out; 4953 Int32 s_save_nblockPP = s->save_nblock+1; 4954 unsigned int total_out_lo32_old; 4955 4956 while (True) { 4957 4958 /* try to finish existing run */ 4959 if (c_state_out_len > 0) { 4960 while (True) { 4961 if (cs_avail_out == 0) goto return_notr; 4962 if (c_state_out_len == 1) break; 4963 *( (UChar*)(cs_next_out) ) = c_state_out_ch; 4964 BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch ); 4965 c_state_out_len--; 4966 cs_next_out++; 4967 cs_avail_out--; 4968 } 4969 s_state_out_len_eq_one: 4970 { 4971 if (cs_avail_out == 0) { 4972 c_state_out_len = 1; goto return_notr; 4973 }; 4974 *( (UChar*)(cs_next_out) ) = c_state_out_ch; 4975 BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch ); 4976 cs_next_out++; 4977 cs_avail_out--; 4978 } 4979 } 4980 /* Only caused by corrupt data stream? */ 4981 if (c_nblock_used > s_save_nblockPP) 4982 return True; 4983 4984 /* can a new run be started? */ 4985 if (c_nblock_used == s_save_nblockPP) { 4986 c_state_out_len = 0; goto return_notr; 4987 }; 4988 c_state_out_ch = c_k0; 4989 BZ_GET_FAST_C(k1); c_nblock_used++; 4990 if (k1 != c_k0) { 4991 c_k0 = k1; goto s_state_out_len_eq_one; 4992 }; 4993 if (c_nblock_used == s_save_nblockPP) 4994 goto s_state_out_len_eq_one; 4995 4996 c_state_out_len = 2; 4997 BZ_GET_FAST_C(k1); c_nblock_used++; 4998 if (c_nblock_used == s_save_nblockPP) continue; 4999 if (k1 != c_k0) { c_k0 = k1; continue; }; 5000 5001 c_state_out_len = 3; 5002 BZ_GET_FAST_C(k1); c_nblock_used++; 5003 if (c_nblock_used == s_save_nblockPP) continue; 5004 if (k1 != c_k0) { c_k0 = k1; continue; }; 5005 5006 BZ_GET_FAST_C(k1); c_nblock_used++; 5007 c_state_out_len = ((Int32)k1) + 4; 5008 BZ_GET_FAST_C(c_k0); c_nblock_used++; 5009 } 5010 5011 return_notr: 5012 total_out_lo32_old = s->strm->total_out_lo32; 5013 s->strm->total_out_lo32 += (avail_out_INIT - cs_avail_out); 5014 if (s->strm->total_out_lo32 < total_out_lo32_old) 5015 s->strm->total_out_hi32++; 5016 5017 /* save */ 5018 s->calculatedBlockCRC = c_calculatedBlockCRC; 5019 s->state_out_ch = c_state_out_ch; 5020 s->state_out_len = c_state_out_len; 5021 s->nblock_used = c_nblock_used; 5022 s->k0 = c_k0; 5023 s->tt = c_tt; 5024 s->tPos = c_tPos; 5025 s->strm->next_out = cs_next_out; 5026 s->strm->avail_out = cs_avail_out; 5027 /* end save */ 5028 } 5029 return False; 5030} 5031 5032 5033 5034/*---------------------------------------------------*/ 5035/* Return True iff data corruption is discovered. 5036 Returns False if there is no problem. 5037*/ 5038static 5039Bool unRLE_obuf_to_output_SMALL ( DState* s ) 5040{ 5041 UChar k1; 5042 5043 if (s->blockRandomised) { 5044 5045 while (True) { 5046 /* try to finish existing run */ 5047 while (True) { 5048 if (s->strm->avail_out == 0) return False; 5049 if (s->state_out_len == 0) break; 5050 *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; 5051 BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); 5052 s->state_out_len--; 5053 s->strm->next_out++; 5054 s->strm->avail_out--; 5055 s->strm->total_out_lo32++; 5056 if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; 5057 } 5058 5059 /* can a new run be started? */ 5060 if (s->nblock_used == s->save_nblock+1) return False; 5061 5062 /* Only caused by corrupt data stream? */ 5063 if (s->nblock_used > s->save_nblock+1) 5064 return True; 5065 5066 s->state_out_len = 1; 5067 s->state_out_ch = s->k0; 5068 BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; 5069 k1 ^= BZ_RAND_MASK; s->nblock_used++; 5070 if (s->nblock_used == s->save_nblock+1) continue; 5071 if (k1 != s->k0) { s->k0 = k1; continue; }; 5072 5073 s->state_out_len = 2; 5074 BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; 5075 k1 ^= BZ_RAND_MASK; s->nblock_used++; 5076 if (s->nblock_used == s->save_nblock+1) continue; 5077 if (k1 != s->k0) { s->k0 = k1; continue; }; 5078 5079 s->state_out_len = 3; 5080 BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; 5081 k1 ^= BZ_RAND_MASK; s->nblock_used++; 5082 if (s->nblock_used == s->save_nblock+1) continue; 5083 if (k1 != s->k0) { s->k0 = k1; continue; }; 5084 5085 BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; 5086 k1 ^= BZ_RAND_MASK; s->nblock_used++; 5087 s->state_out_len = ((Int32)k1) + 4; 5088 BZ_GET_SMALL(s->k0); BZ_RAND_UPD_MASK; 5089 s->k0 ^= BZ_RAND_MASK; s->nblock_used++; 5090 } 5091 5092 } else { 5093 5094 while (True) { 5095 /* try to finish existing run */ 5096 while (True) { 5097 if (s->strm->avail_out == 0) return False; 5098 if (s->state_out_len == 0) break; 5099 *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; 5100 BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); 5101 s->state_out_len--; 5102 s->strm->next_out++; 5103 s->strm->avail_out--; 5104 s->strm->total_out_lo32++; 5105 if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; 5106 } 5107 5108 /* can a new run be started? */ 5109 if (s->nblock_used == s->save_nblock+1) return False; 5110 5111 /* Only caused by corrupt data stream? */ 5112 if (s->nblock_used > s->save_nblock+1) 5113 return True; 5114 5115 s->state_out_len = 1; 5116 s->state_out_ch = s->k0; 5117 BZ_GET_SMALL(k1); s->nblock_used++; 5118 if (s->nblock_used == s->save_nblock+1) continue; 5119 if (k1 != s->k0) { s->k0 = k1; continue; }; 5120 5121 s->state_out_len = 2; 5122 BZ_GET_SMALL(k1); s->nblock_used++; 5123 if (s->nblock_used == s->save_nblock+1) continue; 5124 if (k1 != s->k0) { s->k0 = k1; continue; }; 5125 5126 s->state_out_len = 3; 5127 BZ_GET_SMALL(k1); s->nblock_used++; 5128 if (s->nblock_used == s->save_nblock+1) continue; 5129 if (k1 != s->k0) { s->k0 = k1; continue; }; 5130 5131 BZ_GET_SMALL(k1); s->nblock_used++; 5132 s->state_out_len = ((Int32)k1) + 4; 5133 BZ_GET_SMALL(s->k0); s->nblock_used++; 5134 } 5135 5136 } 5137} 5138 5139 5140/*---------------------------------------------------*/ 5141int BZ_API(BZ2_bzDecompress) ( bz_stream *strm ) 5142{ 5143 Bool corrupt; 5144 DState* s; 5145 if (strm == NULL) return BZ_PARAM_ERROR; 5146 s = strm->state; 5147 if (s == NULL) return BZ_PARAM_ERROR; 5148 if (s->strm != strm) return BZ_PARAM_ERROR; 5149 5150 while (True) { 5151 if (s->state == BZ_X_IDLE) return BZ_SEQUENCE_ERROR; 5152 if (s->state == BZ_X_OUTPUT) { 5153 if (s->smallDecompress) 5154 corrupt = unRLE_obuf_to_output_SMALL ( s ); else 5155 corrupt = unRLE_obuf_to_output_FAST ( s ); 5156 if (corrupt) return BZ_DATA_ERROR; 5157 if (s->nblock_used == s->save_nblock+1 && s->state_out_len == 0) { 5158 BZ_FINALISE_CRC ( s->calculatedBlockCRC ); 5159 if (s->verbosity >= 3) 5160 VPrintf2 ( " {0x%08x, 0x%08x}", s->storedBlockCRC, 5161 s->calculatedBlockCRC ); 5162 if (s->verbosity >= 2) VPrintf0 ( "]" ); 5163 if (s->calculatedBlockCRC != s->storedBlockCRC) 5164 return BZ_DATA_ERROR; 5165 s->calculatedCombinedCRC 5166 = (s->calculatedCombinedCRC << 1) | 5167 (s->calculatedCombinedCRC >> 31); 5168 s->calculatedCombinedCRC ^= s->calculatedBlockCRC; 5169 s->state = BZ_X_BLKHDR_1; 5170 } else { 5171 return BZ_OK; 5172 } 5173 } 5174 if (s->state >= BZ_X_MAGIC_1) { 5175 Int32 r = BZ2_decompress ( s ); 5176 if (r == BZ_STREAM_END) { 5177 if (s->verbosity >= 3) 5178 VPrintf2 ( "\n combined CRCs: stored = 0x%08x, computed = 0x%08x", 5179 s->storedCombinedCRC, s->calculatedCombinedCRC ); 5180 if (s->calculatedCombinedCRC != s->storedCombinedCRC) 5181 return BZ_DATA_ERROR; 5182 return r; 5183 } 5184 if (s->state != BZ_X_OUTPUT) return r; 5185 } 5186 } 5187 5188 AssertH ( 0, 6001 ); 5189 5190 return 0; /*NOTREACHED*/ 5191} 5192 5193 5194/*---------------------------------------------------*/ 5195int BZ_API(BZ2_bzDecompressEnd) ( bz_stream *strm ) 5196{ 5197 DState* s; 5198 if (strm == NULL) return BZ_PARAM_ERROR; 5199 s = strm->state; 5200 if (s == NULL) return BZ_PARAM_ERROR; 5201 if (s->strm != strm) return BZ_PARAM_ERROR; 5202 5203 if (s->tt != NULL) BZFREE(s->tt); 5204 if (s->ll16 != NULL) BZFREE(s->ll16); 5205 if (s->ll4 != NULL) BZFREE(s->ll4); 5206 5207 BZFREE(strm->state); 5208 strm->state = NULL; 5209 5210 return BZ_OK; 5211} 5212 5213 5214#ifndef BZ_NO_STDIO 5215/*---------------------------------------------------*/ 5216/*--- File I/O stuff ---*/ 5217/*---------------------------------------------------*/ 5218 5219#define BZ_SETERR(eee) \ 5220{ \ 5221 if (bzerror != NULL) *bzerror = eee; \ 5222 if (bzf != NULL) bzf->lastErr = eee; \ 5223} 5224 5225typedef 5226 struct { 5227 FILE* handle; 5228 Char buf[BZ_MAX_UNUSED]; 5229 Int32 bufN; 5230 Bool writing; 5231 bz_stream strm; 5232 Int32 lastErr; 5233 Bool initialisedOk; 5234 } 5235 bzFile; 5236 5237 5238/*---------------------------------------------*/ 5239static Bool myfeof ( FILE* f ) 5240{ 5241 Int32 c = fgetc ( f ); 5242 if (c == EOF) return True; 5243 ungetc ( c, f ); 5244 return False; 5245} 5246 5247 5248/*---------------------------------------------------*/ 5249BZFILE* BZ_API(BZ2_bzWriteOpen) 5250 ( int* bzerror, 5251 FILE* f, 5252 int blockSize100k, 5253 int verbosity, 5254 int workFactor ) 5255{ 5256 Int32 ret; 5257 bzFile* bzf = NULL; 5258 5259 BZ_SETERR(BZ_OK); 5260 5261 if (f == NULL || 5262 (blockSize100k < 1 || blockSize100k > 9) || 5263 (workFactor < 0 || workFactor > 250) || 5264 (verbosity < 0 || verbosity > 4)) 5265 { BZ_SETERR(BZ_PARAM_ERROR); return NULL; }; 5266 5267 if (ferror(f)) 5268 { BZ_SETERR(BZ_IO_ERROR); return NULL; }; 5269 5270 bzf = malloc ( sizeof(bzFile) ); 5271 if (bzf == NULL) 5272 { BZ_SETERR(BZ_MEM_ERROR); return NULL; }; 5273 5274 BZ_SETERR(BZ_OK); 5275 bzf->initialisedOk = False; 5276 bzf->bufN = 0; 5277 bzf->handle = f; 5278 bzf->writing = True; 5279 bzf->strm.bzalloc = NULL; 5280 bzf->strm.bzfree = NULL; 5281 bzf->strm.opaque = NULL; 5282 5283 if (workFactor == 0) workFactor = 30; 5284 ret = BZ2_bzCompressInit ( &(bzf->strm), blockSize100k, 5285 verbosity, workFactor ); 5286 if (ret != BZ_OK) 5287 { BZ_SETERR(ret); free(bzf); return NULL; }; 5288 5289 bzf->strm.avail_in = 0; 5290 bzf->initialisedOk = True; 5291 return bzf; 5292} 5293 5294 5295 5296/*---------------------------------------------------*/ 5297void BZ_API(BZ2_bzWrite) 5298 ( int* bzerror, 5299 BZFILE* b, 5300 void* buf, 5301 int len ) 5302{ 5303 Int32 n, n2, ret; 5304 bzFile* bzf = (bzFile*)b; 5305 5306 BZ_SETERR(BZ_OK); 5307 if (bzf == NULL || buf == NULL || len < 0) 5308 { BZ_SETERR(BZ_PARAM_ERROR); return; }; 5309 if (!(bzf->writing)) 5310 { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; 5311 if (ferror(bzf->handle)) 5312 { BZ_SETERR(BZ_IO_ERROR); return; }; 5313 5314 if (len == 0) 5315 { BZ_SETERR(BZ_OK); return; }; 5316 5317 bzf->strm.avail_in = len; 5318 bzf->strm.next_in = buf; 5319 5320 while (True) { 5321 bzf->strm.avail_out = BZ_MAX_UNUSED; 5322 bzf->strm.next_out = bzf->buf; 5323 ret = BZ2_bzCompress ( &(bzf->strm), BZ_RUN ); 5324 if (ret != BZ_RUN_OK) 5325 { BZ_SETERR(ret); return; }; 5326 5327 if (bzf->strm.avail_out < BZ_MAX_UNUSED) { 5328 n = BZ_MAX_UNUSED - bzf->strm.avail_out; 5329 n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar), 5330 n, bzf->handle ); 5331 if (n != n2 || ferror(bzf->handle)) 5332 { BZ_SETERR(BZ_IO_ERROR); return; }; 5333 } 5334 5335 if (bzf->strm.avail_in == 0) 5336 { BZ_SETERR(BZ_OK); return; }; 5337 } 5338} 5339 5340 5341/*---------------------------------------------------*/ 5342void BZ_API(BZ2_bzWriteClose) 5343 ( int* bzerror, 5344 BZFILE* b, 5345 int abandon, 5346 unsigned int* nbytes_in, 5347 unsigned int* nbytes_out ) 5348{ 5349 BZ2_bzWriteClose64 ( bzerror, b, abandon, 5350 nbytes_in, NULL, nbytes_out, NULL ); 5351} 5352 5353 5354void BZ_API(BZ2_bzWriteClose64) 5355 ( int* bzerror, 5356 BZFILE* b, 5357 int abandon, 5358 unsigned int* nbytes_in_lo32, 5359 unsigned int* nbytes_in_hi32, 5360 unsigned int* nbytes_out_lo32, 5361 unsigned int* nbytes_out_hi32 ) 5362{ 5363 Int32 n, n2, ret; 5364 bzFile* bzf = (bzFile*)b; 5365 5366 if (bzf == NULL) 5367 { BZ_SETERR(BZ_OK); return; }; 5368 if (!(bzf->writing)) 5369 { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; 5370 if (ferror(bzf->handle)) 5371 { BZ_SETERR(BZ_IO_ERROR); return; }; 5372 5373 if (nbytes_in_lo32 != NULL) *nbytes_in_lo32 = 0; 5374 if (nbytes_in_hi32 != NULL) *nbytes_in_hi32 = 0; 5375 if (nbytes_out_lo32 != NULL) *nbytes_out_lo32 = 0; 5376 if (nbytes_out_hi32 != NULL) *nbytes_out_hi32 = 0; 5377 5378 if ((!abandon) && bzf->lastErr == BZ_OK) { 5379 while (True) { 5380 bzf->strm.avail_out = BZ_MAX_UNUSED; 5381 bzf->strm.next_out = bzf->buf; 5382 ret = BZ2_bzCompress ( &(bzf->strm), BZ_FINISH ); 5383 if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END) 5384 { BZ_SETERR(ret); return; }; 5385 5386 if (bzf->strm.avail_out < BZ_MAX_UNUSED) { 5387 n = BZ_MAX_UNUSED - bzf->strm.avail_out; 5388 n2 = fwrite ( (void*)(bzf->buf), sizeof(UChar), 5389 n, bzf->handle ); 5390 if (n != n2 || ferror(bzf->handle)) 5391 { BZ_SETERR(BZ_IO_ERROR); return; }; 5392 } 5393 5394 if (ret == BZ_STREAM_END) break; 5395 } 5396 } 5397 5398 if ( !abandon && !ferror ( bzf->handle ) ) { 5399 fflush ( bzf->handle ); 5400 if (ferror(bzf->handle)) 5401 { BZ_SETERR(BZ_IO_ERROR); return; }; 5402 } 5403 5404 if (nbytes_in_lo32 != NULL) 5405 *nbytes_in_lo32 = bzf->strm.total_in_lo32; 5406 if (nbytes_in_hi32 != NULL) 5407 *nbytes_in_hi32 = bzf->strm.total_in_hi32; 5408 if (nbytes_out_lo32 != NULL) 5409 *nbytes_out_lo32 = bzf->strm.total_out_lo32; 5410 if (nbytes_out_hi32 != NULL) 5411 *nbytes_out_hi32 = bzf->strm.total_out_hi32; 5412 5413 BZ_SETERR(BZ_OK); 5414 BZ2_bzCompressEnd ( &(bzf->strm) ); 5415 free ( bzf ); 5416} 5417 5418 5419/*---------------------------------------------------*/ 5420BZFILE* BZ_API(BZ2_bzReadOpen) 5421 ( int* bzerror, 5422 FILE* f, 5423 int verbosity, 5424 int small, 5425 void* unused, 5426 int nUnused ) 5427{ 5428 bzFile* bzf = NULL; 5429 int ret; 5430 5431 BZ_SETERR(BZ_OK); 5432 5433 if (f == NULL || 5434 (small != 0 && small != 1) || 5435 (verbosity < 0 || verbosity > 4) || 5436 (unused == NULL && nUnused != 0) || 5437 (unused != NULL && (nUnused < 0 || nUnused > BZ_MAX_UNUSED))) 5438 { BZ_SETERR(BZ_PARAM_ERROR); return NULL; }; 5439 5440 if (ferror(f)) 5441 { BZ_SETERR(BZ_IO_ERROR); return NULL; }; 5442 5443 bzf = malloc ( sizeof(bzFile) ); 5444 if (bzf == NULL) 5445 { BZ_SETERR(BZ_MEM_ERROR); return NULL; }; 5446 5447 BZ_SETERR(BZ_OK); 5448 5449 bzf->initialisedOk = False; 5450 bzf->handle = f; 5451 bzf->bufN = 0; 5452 bzf->writing = False; 5453 bzf->strm.bzalloc = NULL; 5454 bzf->strm.bzfree = NULL; 5455 bzf->strm.opaque = NULL; 5456 5457 while (nUnused > 0) { 5458 bzf->buf[bzf->bufN] = *((UChar*)(unused)); bzf->bufN++; 5459 unused = ((void*)( 1 + ((UChar*)(unused)) )); 5460 nUnused--; 5461 } 5462 5463 ret = BZ2_bzDecompressInit ( &(bzf->strm), verbosity, small ); 5464 if (ret != BZ_OK) 5465 { BZ_SETERR(ret); free(bzf); return NULL; }; 5466 5467 bzf->strm.avail_in = bzf->bufN; 5468 bzf->strm.next_in = bzf->buf; 5469 5470 bzf->initialisedOk = True; 5471 return bzf; 5472} 5473 5474 5475/*---------------------------------------------------*/ 5476void BZ_API(BZ2_bzReadClose) ( int *bzerror, BZFILE *b ) 5477{ 5478 bzFile* bzf = (bzFile*)b; 5479 5480 BZ_SETERR(BZ_OK); 5481 if (bzf == NULL) 5482 { BZ_SETERR(BZ_OK); return; }; 5483 5484 if (bzf->writing) 5485 { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; 5486 5487 if (bzf->initialisedOk) 5488 (void)BZ2_bzDecompressEnd ( &(bzf->strm) ); 5489 free ( bzf ); 5490} 5491 5492 5493/*---------------------------------------------------*/ 5494int BZ_API(BZ2_bzRead) 5495 ( int* bzerror, 5496 BZFILE* b, 5497 void* buf, 5498 int len ) 5499{ 5500 Int32 n, ret; 5501 bzFile* bzf = (bzFile*)b; 5502 5503 BZ_SETERR(BZ_OK); 5504 5505 if (bzf == NULL || buf == NULL || len < 0) 5506 { BZ_SETERR(BZ_PARAM_ERROR); return 0; }; 5507 5508 if (bzf->writing) 5509 { BZ_SETERR(BZ_SEQUENCE_ERROR); return 0; }; 5510 5511 if (len == 0) 5512 { BZ_SETERR(BZ_OK); return 0; }; 5513 5514 bzf->strm.avail_out = len; 5515 bzf->strm.next_out = buf; 5516 5517 while (True) { 5518 5519 if (ferror(bzf->handle)) 5520 { BZ_SETERR(BZ_IO_ERROR); return 0; }; 5521 5522 if (bzf->strm.avail_in == 0 && !myfeof(bzf->handle)) { 5523 n = fread ( bzf->buf, sizeof(UChar), 5524 BZ_MAX_UNUSED, bzf->handle ); 5525 if (ferror(bzf->handle)) 5526 { BZ_SETERR(BZ_IO_ERROR); return 0; }; 5527 bzf->bufN = n; 5528 bzf->strm.avail_in = bzf->bufN; 5529 bzf->strm.next_in = bzf->buf; 5530 } 5531 5532 ret = BZ2_bzDecompress ( &(bzf->strm) ); 5533 5534 if (ret != BZ_OK && ret != BZ_STREAM_END) 5535 { BZ_SETERR(ret); return 0; }; 5536 5537 if (ret == BZ_OK && myfeof(bzf->handle) && 5538 bzf->strm.avail_in == 0 && bzf->strm.avail_out > 0) 5539 { BZ_SETERR(BZ_UNEXPECTED_EOF); return 0; }; 5540 5541 if (ret == BZ_STREAM_END) 5542 { BZ_SETERR(BZ_STREAM_END); 5543 return len - bzf->strm.avail_out; }; 5544 if (bzf->strm.avail_out == 0) 5545 { BZ_SETERR(BZ_OK); return len; }; 5546 5547 } 5548 5549 return 0; /*not reached*/ 5550} 5551 5552 5553/*---------------------------------------------------*/ 5554void BZ_API(BZ2_bzReadGetUnused) 5555 ( int* bzerror, 5556 BZFILE* b, 5557 void** unused, 5558 int* nUnused ) 5559{ 5560 bzFile* bzf = (bzFile*)b; 5561 if (bzf == NULL) 5562 { BZ_SETERR(BZ_PARAM_ERROR); return; }; 5563 if (bzf->lastErr != BZ_STREAM_END) 5564 { BZ_SETERR(BZ_SEQUENCE_ERROR); return; }; 5565 if (unused == NULL || nUnused == NULL) 5566 { BZ_SETERR(BZ_PARAM_ERROR); return; }; 5567 5568 BZ_SETERR(BZ_OK); 5569 *nUnused = bzf->strm.avail_in; 5570 *unused = bzf->strm.next_in; 5571} 5572#endif 5573 5574 5575/*---------------------------------------------------*/ 5576/*--- Misc convenience stuff ---*/ 5577/*---------------------------------------------------*/ 5578 5579/*---------------------------------------------------*/ 5580int BZ_API(BZ2_bzBuffToBuffCompress) 5581 ( char* dest, 5582 unsigned int* destLen, 5583 char* source, 5584 unsigned int sourceLen, 5585 int blockSize100k, 5586 int verbosity, 5587 int workFactor ) 5588{ 5589 bz_stream strm; 5590 int ret; 5591 5592 if (dest == NULL || destLen == NULL || 5593 source == NULL || 5594 blockSize100k < 1 || blockSize100k > 9 || 5595 verbosity < 0 || verbosity > 4 || 5596 workFactor < 0 || workFactor > 250) 5597 return BZ_PARAM_ERROR; 5598 5599 if (workFactor == 0) workFactor = 30; 5600 strm.bzalloc = NULL; 5601 strm.bzfree = NULL; 5602 strm.opaque = NULL; 5603 ret = BZ2_bzCompressInit ( &strm, blockSize100k, 5604 verbosity, workFactor ); 5605 if (ret != BZ_OK) return ret; 5606 5607 strm.next_in = source; 5608 strm.next_out = dest; 5609 strm.avail_in = sourceLen; 5610 strm.avail_out = *destLen; 5611 5612 ret = BZ2_bzCompress ( &strm, BZ_FINISH ); 5613 if (ret == BZ_FINISH_OK) goto output_overflow; 5614 if (ret != BZ_STREAM_END) goto errhandler; 5615 5616 /* normal termination */ 5617 *destLen -= strm.avail_out; 5618 BZ2_bzCompressEnd ( &strm ); 5619 return BZ_OK; 5620 5621 output_overflow: 5622 BZ2_bzCompressEnd ( &strm ); 5623 return BZ_OUTBUFF_FULL; 5624 5625 errhandler: 5626 BZ2_bzCompressEnd ( &strm ); 5627 return ret; 5628} 5629 5630 5631/*---------------------------------------------------*/ 5632int BZ_API(BZ2_bzBuffToBuffDecompress) 5633 ( char* dest, 5634 unsigned int* destLen, 5635 char* source, 5636 unsigned int sourceLen, 5637 int small, 5638 int verbosity ) 5639{ 5640 bz_stream strm; 5641 int ret; 5642 5643 if (dest == NULL || destLen == NULL || 5644 source == NULL || 5645 (small != 0 && small != 1) || 5646 verbosity < 0 || verbosity > 4) 5647 return BZ_PARAM_ERROR; 5648 5649 strm.bzalloc = NULL; 5650 strm.bzfree = NULL; 5651 strm.opaque = NULL; 5652 ret = BZ2_bzDecompressInit ( &strm, verbosity, small ); 5653 if (ret != BZ_OK) return ret; 5654 5655 strm.next_in = source; 5656 strm.next_out = dest; 5657 strm.avail_in = sourceLen; 5658 strm.avail_out = *destLen; 5659 5660 ret = BZ2_bzDecompress ( &strm ); 5661 if (ret == BZ_OK) goto output_overflow_or_eof; 5662 if (ret != BZ_STREAM_END) goto errhandler; 5663 5664 /* normal termination */ 5665 *destLen -= strm.avail_out; 5666 BZ2_bzDecompressEnd ( &strm ); 5667 return BZ_OK; 5668 5669 output_overflow_or_eof: 5670 if (strm.avail_out > 0) { 5671 BZ2_bzDecompressEnd ( &strm ); 5672 return BZ_UNEXPECTED_EOF; 5673 } else { 5674 BZ2_bzDecompressEnd ( &strm ); 5675 return BZ_OUTBUFF_FULL; 5676 }; 5677 5678 errhandler: 5679 BZ2_bzDecompressEnd ( &strm ); 5680 return ret; 5681} 5682 5683 5684/*---------------------------------------------------*/ 5685/*-- 5686 Code contributed by Yoshioka Tsuneo 5687 (QWF00133@niftyserve.or.jp/tsuneo-y@is.aist-nara.ac.jp), 5688 to support better zlib compatibility. 5689 This code is not _officially_ part of libbzip2 (yet); 5690 I haven't tested it, documented it, or considered the 5691 threading-safeness of it. 5692 If this code breaks, please contact both Yoshioka and me. 5693--*/ 5694/*---------------------------------------------------*/ 5695 5696/*---------------------------------------------------*/ 5697/*-- 5698 return version like "0.9.0c". 5699--*/ 5700const char * BZ_API(BZ2_bzlibVersion)(void) 5701{ 5702 return BZ_VERSION; 5703} 5704 5705 5706#ifndef BZ_NO_STDIO 5707/*---------------------------------------------------*/ 5708 5709#if defined(_WIN32) || defined(OS2) || defined(MSDOS) 5710# include <fcntl.h> 5711# include <io.h> 5712# define SET_BINARY_MODE(file) setmode(fileno(file),O_BINARY) 5713#else 5714# define SET_BINARY_MODE(file) 5715#endif 5716static 5717BZFILE * bzopen_or_bzdopen 5718 ( const char *path, /* no use when bzdopen */ 5719 int fd, /* no use when bzdopen */ 5720 const char *mode, 5721 int open_mode) /* bzopen: 0, bzdopen:1 */ 5722{ 5723 int bzerr; 5724 char unused[BZ_MAX_UNUSED]; 5725 int blockSize100k = 9; 5726 int writing = 0; 5727 char mode2[10] = ""; 5728 FILE *fp = NULL; 5729 BZFILE *bzfp = NULL; 5730 int verbosity = 0; 5731 int workFactor = 30; 5732 int smallMode = 0; 5733 int nUnused = 0; 5734 5735 if (mode == NULL) return NULL; 5736 while (*mode) { 5737 switch (*mode) { 5738 case 'r': 5739 writing = 0; break; 5740 case 'w': 5741 writing = 1; break; 5742 case 's': 5743 smallMode = 1; break; 5744 default: 5745 if (isdigit((int)(*mode))) { 5746 blockSize100k = *mode-BZ_HDR_0; 5747 } 5748 } 5749 mode++; 5750 } 5751 strcat(mode2, writing ? "w" : "r" ); 5752 strcat(mode2,"b"); /* binary mode */ 5753 5754 if (open_mode==0) { 5755 if (path==NULL || strcmp(path,"")==0) { 5756 fp = (writing ? stdout : stdin); 5757 SET_BINARY_MODE(fp); 5758 } else { 5759 fp = fopen(path,mode2); 5760 } 5761 } else { 5762#ifdef BZ_STRICT_ANSI 5763 fp = NULL; 5764#else 5765 fp = fdopen(fd,mode2); 5766#endif 5767 } 5768 if (fp == NULL) return NULL; 5769 5770 if (writing) { 5771 /* Guard against total chaos and anarchy -- JRS */ 5772 if (blockSize100k < 1) blockSize100k = 1; 5773 if (blockSize100k > 9) blockSize100k = 9; 5774 bzfp = BZ2_bzWriteOpen(&bzerr,fp,blockSize100k, 5775 verbosity,workFactor); 5776 } else { 5777 bzfp = BZ2_bzReadOpen(&bzerr,fp,verbosity,smallMode, 5778 unused,nUnused); 5779 } 5780 if (bzfp == NULL) { 5781 if (fp != stdin && fp != stdout) fclose(fp); 5782 return NULL; 5783 } 5784 return bzfp; 5785} 5786 5787 5788/*---------------------------------------------------*/ 5789/*-- 5790 open file for read or write. 5791 ex) bzopen("file","w9") 5792 case path="" or NULL => use stdin or stdout. 5793--*/ 5794BZFILE * BZ_API(BZ2_bzopen) 5795 ( const char *path, 5796 const char *mode ) 5797{ 5798 return bzopen_or_bzdopen(path,-1,mode,/*bzopen*/0); 5799} 5800 5801 5802/*---------------------------------------------------*/ 5803BZFILE * BZ_API(BZ2_bzdopen) 5804 ( int fd, 5805 const char *mode ) 5806{ 5807 return bzopen_or_bzdopen(NULL,fd,mode,/*bzdopen*/1); 5808} 5809 5810 5811/*---------------------------------------------------*/ 5812int BZ_API(BZ2_bzread) (BZFILE* b, void* buf, int len ) 5813{ 5814 int bzerr, nread; 5815 if (((bzFile*)b)->lastErr == BZ_STREAM_END) return 0; 5816 nread = BZ2_bzRead(&bzerr,b,buf,len); 5817 if (bzerr == BZ_OK || bzerr == BZ_STREAM_END) { 5818 return nread; 5819 } else { 5820 return -1; 5821 } 5822} 5823 5824 5825/*---------------------------------------------------*/ 5826int BZ_API(BZ2_bzwrite) (BZFILE* b, void* buf, int len ) 5827{ 5828 int bzerr; 5829 5830 BZ2_bzWrite(&bzerr,b,buf,len); 5831 if(bzerr == BZ_OK){ 5832 return len; 5833 }else{ 5834 return -1; 5835 } 5836} 5837 5838 5839/*---------------------------------------------------*/ 5840int BZ_API(BZ2_bzflush) (BZFILE *b) 5841{ 5842 /* do nothing now... */ 5843 return 0; 5844} 5845 5846 5847/*---------------------------------------------------*/ 5848void BZ_API(BZ2_bzclose) (BZFILE* b) 5849{ 5850 int bzerr; 5851 FILE *fp = ((bzFile *)b)->handle; 5852 5853 if (b==NULL) {return;} 5854 if(((bzFile*)b)->writing){ 5855 BZ2_bzWriteClose(&bzerr,b,0,NULL,NULL); 5856 if(bzerr != BZ_OK){ 5857 BZ2_bzWriteClose(NULL,b,1,NULL,NULL); 5858 } 5859 }else{ 5860 BZ2_bzReadClose(&bzerr,b); 5861 } 5862 if(fp!=stdin && fp!=stdout){ 5863 fclose(fp); 5864 } 5865} 5866 5867 5868/*---------------------------------------------------*/ 5869/*-- 5870 return last error code 5871--*/ 5872static char *bzerrorstrings[] = { 5873 "OK" 5874 ,"SEQUENCE_ERROR" 5875 ,"PARAM_ERROR" 5876 ,"MEM_ERROR" 5877 ,"DATA_ERROR" 5878 ,"DATA_ERROR_MAGIC" 5879 ,"IO_ERROR" 5880 ,"UNEXPECTED_EOF" 5881 ,"OUTBUFF_FULL" 5882 ,"CONFIG_ERROR" 5883 ,"???" /* for future */ 5884 ,"???" /* for future */ 5885 ,"???" /* for future */ 5886 ,"???" /* for future */ 5887 ,"???" /* for future */ 5888 ,"???" /* for future */ 5889}; 5890 5891 5892const char * BZ_API(BZ2_bzerror) (BZFILE *b, int *errnum) 5893{ 5894 int err = ((bzFile *)b)->lastErr; 5895 5896 if(err>0) err = 0; 5897 *errnum = err; 5898 return bzerrorstrings[err*-1]; 5899} 5900#endif 5901 5902 5903/*-------------------------------------------------------------*/ 5904/*--- end bzlib.c ---*/ 5905/*-------------------------------------------------------------*/ 5906 5907 5908///////////////////////////////////////////////////////////////////// 5909///////////////////////////////////////////////////////////////////// 5910 5911 5912/* A test program written to test robustness to decompression of 5913 corrupted data. Usage is 5914 unzcrash filename 5915 and the program will read the specified file, compress it (in memory), 5916 and then repeatedly decompress it, each time with a different bit of 5917 the compressed data inverted, so as to test all possible one-bit errors. 5918 This should not cause any invalid memory accesses. If it does, 5919 I want to know about it! 5920 5921 p.s. As you can see from the above description, the process is 5922 incredibly slow. A file of size eg 5KB will cause it to run for 5923 many hours. 5924*/ 5925 5926//#include <stdio.h> 5927//#include <assert.h> 5928//#include "bzlib.h" 5929 5930#define M_BLOCK 1000000 5931 5932typedef unsigned char uchar; 5933 5934#define M_BLOCK_OUT (M_BLOCK + 1000000) 5935uchar inbuf[M_BLOCK]; 5936uchar outbuf[M_BLOCK_OUT]; 5937uchar zbuf[M_BLOCK + 600 + (M_BLOCK / 100)]; 5938 5939int nIn, nOut, nZ; 5940 5941static char *bzerrorstrings[] = { 5942 "OK" 5943 ,"SEQUENCE_ERROR" 5944 ,"PARAM_ERROR" 5945 ,"MEM_ERROR" 5946 ,"DATA_ERROR" 5947 ,"DATA_ERROR_MAGIC" 5948 ,"IO_ERROR" 5949 ,"UNEXPECTED_EOF" 5950 ,"OUTBUFF_FULL" 5951 ,"???" /* for future */ 5952 ,"???" /* for future */ 5953 ,"???" /* for future */ 5954 ,"???" /* for future */ 5955 ,"???" /* for future */ 5956 ,"???" /* for future */ 5957}; 5958 5959void flip_bit ( int bit ) 5960{ 5961 int byteno = bit / 8; 5962 int bitno = bit % 8; 5963 uchar mask = 1 << bitno; 5964 //fprintf ( stderr, "(byte %d bit %d mask %d)", 5965 // byteno, bitno, (int)mask ); 5966 zbuf[byteno] ^= mask; 5967} 5968 5969void set_inbuf ( void ) 5970{ 5971 inbuf[0] = 0; 5972 my_strcat(inbuf, "At her sixtieth birthday party, Margaret Thatcher "); 5973 my_strcat(inbuf, "blew on the cake to light the candles.\n"); 5974 my_strcat(inbuf, "This program, bzip2, the associated library libbzip2, and all\n"); 5975 my_strcat(inbuf, "documentation, are copyright (C) 1996-2004 Julian R Seward. All\n"); 5976 my_strcat(inbuf, "rights reserved.\n"); 5977 my_strcat(inbuf, "\n"); 5978 my_strcat(inbuf, "Redistribution and use in source and binary forms, with or without\n"); 5979 my_strcat(inbuf, "modification, are permitted provided that the following conditions\n"); 5980 my_strcat(inbuf, "are met:\n"); 5981 my_strcat(inbuf, "\n"); 5982 my_strcat(inbuf, "1. Redistributions of source code must retain the above copyright\n"); 5983 my_strcat(inbuf, " notice, this list of conditions and the following disclaimer.\n"); 5984 my_strcat(inbuf, "\n"); 5985 my_strcat(inbuf, "2. The origin of this software must not be misrepresented; you must\n"); 5986 my_strcat(inbuf, " not claim that you wrote the original software. If you use this\n"); 5987 my_strcat(inbuf, " software in a product, an acknowledgment in the product\n"); 5988 my_strcat(inbuf, " documentation would be appreciated but is not required.\n"); 5989 my_strcat(inbuf, "\n"); 5990 my_strcat(inbuf, "3. Altered source versions must be plainly marked as such, and must\n"); 5991 my_strcat(inbuf, " not be misrepresented as being the original software.\n"); 5992 my_strcat(inbuf, "\n"); 5993 my_strcat(inbuf, "4. The name of the author may not be used to endorse or promote\n"); 5994 my_strcat(inbuf, " products derived from this software without specific prior written\n"); 5995 my_strcat(inbuf, " permission.\n"); 5996 my_strcat(inbuf, "\n"); 5997 my_strcat(inbuf, "THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS\n"); 5998 my_strcat(inbuf, "OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\n"); 5999 my_strcat(inbuf, "WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n"); 6000 my_strcat(inbuf, "ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY\n"); 6001 my_strcat(inbuf, "DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n"); 6002 my_strcat(inbuf, "DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE\n"); 6003 my_strcat(inbuf, "GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n"); 6004 my_strcat(inbuf, "INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,\n"); 6005 my_strcat(inbuf, "WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\n"); 6006 my_strcat(inbuf, "NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n"); 6007 my_strcat(inbuf, "SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"); 6008 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6009 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6010 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6011 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6012 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6013 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6014 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6015 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6016 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6017 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6018 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6019 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6020 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6021 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6022 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6023 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6024 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6025 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6026 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6027 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6028 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6029 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6030 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6031 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6032 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6033 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6034 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6035 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6036 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6037 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6038 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6039 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6040 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6041 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6042 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6043 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6044 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6045 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6046 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6047 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6048 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6049 my_strcat(inbuf, "ababababababababababababababababababababababababababababababab"); 6050 my_strcat(inbuf, "\n"); 6051} 6052 6053 6054void entry ( HWord(*service)(HWord,HWord) ) 6055{ 6056 int r; 6057 int bit; 6058 int i; 6059 6060 serviceFn = service; 6061 6062 set_inbuf(); 6063 nIn = vexxx_strlen(inbuf)+1; 6064 vexxx_printf( "%d bytes read\n", nIn ); 6065 6066 nZ = M_BLOCK; 6067 r = BZ2_bzBuffToBuffCompress ( 6068 zbuf, &nZ, inbuf, nIn, 9, 4/*verb*/, 30 ); 6069 6070 if (r != BZ_OK) { 6071 vexxx_printf("initial compress failed!\n"); 6072 (*serviceFn)(0,0); 6073 } 6074 vexxx_printf( "%d after compression\n", nZ ); 6075 6076 for (bit = 0; bit < nZ*8; bit += (bit < 35 ? 1 : 377)) { 6077 vexxx_printf( "bit %d ", bit ); 6078 flip_bit ( bit ); 6079 nOut = M_BLOCK_OUT; 6080 r = BZ2_bzBuffToBuffDecompress ( 6081 outbuf, &nOut, zbuf, nZ, 1/*small*/, 0 ); 6082 vexxx_printf( " %d %s ", r, bzerrorstrings[-r] ); 6083 6084 if (r != BZ_OK) { 6085 vexxx_printf( "\n" ); 6086 } else { 6087 if (nOut != nIn) { 6088 vexxx_printf( "nIn/nOut mismatch %d %d\n", nIn, nOut ); 6089 (*serviceFn)(0,0); 6090 } else { 6091 for (i = 0; i < nOut; i++) 6092 if (inbuf[i] != outbuf[i]) { 6093 vexxx_printf( "mismatch at %d\n", i ); 6094 (*serviceFn)(0,0); 6095 } 6096 if (i == nOut) vexxx_printf( "really ok!\n" ); 6097 } 6098 } 6099 6100 flip_bit ( bit ); 6101 } 6102 6103#if 0 6104 assert (nOut == nIn); 6105 for (i = 0; i < nOut; i++) { 6106 if (inbuf[i] != outbuf[i]) { 6107 vexxx_printf( "difference at %d !\n", i ); 6108 return 1; 6109 } 6110 } 6111#endif 6112 6113 vexxx_printf( "all ok\n" ); 6114 (*serviceFn)(0,0); 6115} 6116