1/* $Id: tif_lzw.c,v 1.45 2011-04-02 20:54:09 bfriesen Exp $ */ 2 3/* 4 * Copyright (c) 1988-1997 Sam Leffler 5 * Copyright (c) 1991-1997 Silicon Graphics, Inc. 6 * 7 * Permission to use, copy, modify, distribute, and sell this software and 8 * its documentation for any purpose is hereby granted without fee, provided 9 * that (i) the above copyright notices and this permission notice appear in 10 * all copies of the software and related documentation, and (ii) the names of 11 * Sam Leffler and Silicon Graphics may not be used in any advertising or 12 * publicity relating to the software without the specific, prior written 13 * permission of Sam Leffler and Silicon Graphics. 14 * 15 * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 17 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. 18 * 19 * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR 20 * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, 21 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 22 * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 23 * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 24 * OF THIS SOFTWARE. 25 */ 26 27#include "tiffiop.h" 28#ifdef LZW_SUPPORT 29/* 30 * TIFF Library. 31 * Rev 5.0 Lempel-Ziv & Welch Compression Support 32 * 33 * This code is derived from the compress program whose code is 34 * derived from software contributed to Berkeley by James A. Woods, 35 * derived from original work by Spencer Thomas and Joseph Orost. 36 * 37 * The original Berkeley copyright notice appears below in its entirety. 38 */ 39#include "tif_predict.h" 40 41#include <stdio.h> 42 43/* 44 * NB: The 5.0 spec describes a different algorithm than Aldus 45 * implements. Specifically, Aldus does code length transitions 46 * one code earlier than should be done (for real LZW). 47 * Earlier versions of this library implemented the correct 48 * LZW algorithm, but emitted codes in a bit order opposite 49 * to the TIFF spec. Thus, to maintain compatibility w/ Aldus 50 * we interpret MSB-LSB ordered codes to be images written w/ 51 * old versions of this library, but otherwise adhere to the 52 * Aldus "off by one" algorithm. 53 * 54 * Future revisions to the TIFF spec are expected to "clarify this issue". 55 */ 56#define LZW_COMPAT /* include backwards compatibility code */ 57/* 58 * Each strip of data is supposed to be terminated by a CODE_EOI. 59 * If the following #define is included, the decoder will also 60 * check for end-of-strip w/o seeing this code. This makes the 61 * library more robust, but also slower. 62 */ 63#define LZW_CHECKEOS /* include checks for strips w/o EOI code */ 64 65#define MAXCODE(n) ((1L<<(n))-1) 66/* 67 * The TIFF spec specifies that encoded bit 68 * strings range from 9 to 12 bits. 69 */ 70#define BITS_MIN 9 /* start with 9 bits */ 71#define BITS_MAX 12 /* max of 12 bit strings */ 72/* predefined codes */ 73#define CODE_CLEAR 256 /* code to clear string table */ 74#define CODE_EOI 257 /* end-of-information code */ 75#define CODE_FIRST 258 /* first free code entry */ 76#define CODE_MAX MAXCODE(BITS_MAX) 77#define HSIZE 9001L /* 91% occupancy */ 78#define HSHIFT (13-8) 79#ifdef LZW_COMPAT 80/* NB: +1024 is for compatibility with old files */ 81#define CSIZE (MAXCODE(BITS_MAX)+1024L) 82#else 83#define CSIZE (MAXCODE(BITS_MAX)+1L) 84#endif 85 86/* 87 * State block for each open TIFF file using LZW 88 * compression/decompression. Note that the predictor 89 * state block must be first in this data structure. 90 */ 91typedef struct { 92 TIFFPredictorState predict; /* predictor super class */ 93 94 unsigned short nbits; /* # of bits/code */ 95 unsigned short maxcode; /* maximum code for lzw_nbits */ 96 unsigned short free_ent; /* next free entry in hash table */ 97 long nextdata; /* next bits of i/o */ 98 long nextbits; /* # of valid bits in lzw_nextdata */ 99 100 int rw_mode; /* preserve rw_mode from init */ 101} LZWBaseState; 102 103#define lzw_nbits base.nbits 104#define lzw_maxcode base.maxcode 105#define lzw_free_ent base.free_ent 106#define lzw_nextdata base.nextdata 107#define lzw_nextbits base.nextbits 108 109/* 110 * Encoding-specific state. 111 */ 112typedef uint16 hcode_t; /* codes fit in 16 bits */ 113typedef struct { 114 long hash; 115 hcode_t code; 116} hash_t; 117 118/* 119 * Decoding-specific state. 120 */ 121typedef struct code_ent { 122 struct code_ent *next; 123 unsigned short length; /* string len, including this token */ 124 unsigned char value; /* data value */ 125 unsigned char firstchar; /* first token of string */ 126} code_t; 127 128typedef int (*decodeFunc)(TIFF*, uint8*, tmsize_t, uint16); 129 130typedef struct { 131 LZWBaseState base; 132 133 /* Decoding specific data */ 134 long dec_nbitsmask; /* lzw_nbits 1 bits, right adjusted */ 135 long dec_restart; /* restart count */ 136#ifdef LZW_CHECKEOS 137 uint64 dec_bitsleft; /* available bits in raw data */ 138#endif 139 decodeFunc dec_decode; /* regular or backwards compatible */ 140 code_t* dec_codep; /* current recognized code */ 141 code_t* dec_oldcodep; /* previously recognized code */ 142 code_t* dec_free_entp; /* next free entry */ 143 code_t* dec_maxcodep; /* max available entry */ 144 code_t* dec_codetab; /* kept separate for small machines */ 145 146 /* Encoding specific data */ 147 int enc_oldcode; /* last code encountered */ 148 long enc_checkpoint; /* point at which to clear table */ 149#define CHECK_GAP 10000 /* enc_ratio check interval */ 150 long enc_ratio; /* current compression ratio */ 151 long enc_incount; /* (input) data bytes encoded */ 152 long enc_outcount; /* encoded (output) bytes */ 153 uint8* enc_rawlimit; /* bound on tif_rawdata buffer */ 154 hash_t* enc_hashtab; /* kept separate for small machines */ 155} LZWCodecState; 156 157#define LZWState(tif) ((LZWBaseState*) (tif)->tif_data) 158#define DecoderState(tif) ((LZWCodecState*) LZWState(tif)) 159#define EncoderState(tif) ((LZWCodecState*) LZWState(tif)) 160 161static int LZWDecode(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s); 162#ifdef LZW_COMPAT 163static int LZWDecodeCompat(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s); 164#endif 165static void cl_hash(LZWCodecState*); 166 167/* 168 * LZW Decoder. 169 */ 170 171#ifdef LZW_CHECKEOS 172/* 173 * This check shouldn't be necessary because each 174 * strip is suppose to be terminated with CODE_EOI. 175 */ 176#define NextCode(_tif, _sp, _bp, _code, _get) { \ 177 if ((_sp)->dec_bitsleft < (uint64)nbits) { \ 178 TIFFWarningExt(_tif->tif_clientdata, module, \ 179 "LZWDecode: Strip %d not terminated with EOI code", \ 180 _tif->tif_curstrip); \ 181 _code = CODE_EOI; \ 182 } else { \ 183 _get(_sp,_bp,_code); \ 184 (_sp)->dec_bitsleft -= nbits; \ 185 } \ 186} 187#else 188#define NextCode(tif, sp, bp, code, get) get(sp, bp, code) 189#endif 190 191static int 192LZWFixupTags(TIFF* tif) 193{ 194 (void) tif; 195 return (1); 196} 197 198static int 199LZWSetupDecode(TIFF* tif) 200{ 201 static const char module[] = "LZWSetupDecode"; 202 LZWCodecState* sp = DecoderState(tif); 203 int code; 204 205 if( sp == NULL ) 206 { 207 /* 208 * Allocate state block so tag methods have storage to record 209 * values. 210 */ 211 tif->tif_data = (uint8*) _TIFFmalloc(sizeof(LZWCodecState)); 212 if (tif->tif_data == NULL) 213 { 214 TIFFErrorExt(tif->tif_clientdata, module, "No space for LZW state block"); 215 return (0); 216 } 217 218 DecoderState(tif)->dec_codetab = NULL; 219 DecoderState(tif)->dec_decode = NULL; 220 221 /* 222 * Setup predictor setup. 223 */ 224 (void) TIFFPredictorInit(tif); 225 226 sp = DecoderState(tif); 227 } 228 229 assert(sp != NULL); 230 231 if (sp->dec_codetab == NULL) { 232 sp->dec_codetab = (code_t*)_TIFFmalloc(CSIZE*sizeof (code_t)); 233 if (sp->dec_codetab == NULL) { 234 TIFFErrorExt(tif->tif_clientdata, module, 235 "No space for LZW code table"); 236 return (0); 237 } 238 /* 239 * Pre-load the table. 240 */ 241 code = 255; 242 do { 243 sp->dec_codetab[code].value = code; 244 sp->dec_codetab[code].firstchar = code; 245 sp->dec_codetab[code].length = 1; 246 sp->dec_codetab[code].next = NULL; 247 } while (code--); 248 /* 249 * Zero-out the unused entries 250 */ 251 _TIFFmemset(&sp->dec_codetab[CODE_CLEAR], 0, 252 (CODE_FIRST - CODE_CLEAR) * sizeof (code_t)); 253 } 254 return (1); 255} 256 257/* 258 * Setup state for decoding a strip. 259 */ 260static int 261LZWPreDecode(TIFF* tif, uint16 s) 262{ 263 static const char module[] = "LZWPreDecode"; 264 LZWCodecState *sp = DecoderState(tif); 265 266 (void) s; 267 assert(sp != NULL); 268 if( sp->dec_codetab == NULL ) 269 { 270 tif->tif_setupdecode( tif ); 271 } 272 273 /* 274 * Check for old bit-reversed codes. 275 */ 276 if (tif->tif_rawdata[0] == 0 && (tif->tif_rawdata[1] & 0x1)) { 277#ifdef LZW_COMPAT 278 if (!sp->dec_decode) { 279 TIFFWarningExt(tif->tif_clientdata, module, 280 "Old-style LZW codes, convert file"); 281 /* 282 * Override default decoding methods with 283 * ones that deal with the old coding. 284 * Otherwise the predictor versions set 285 * above will call the compatibility routines 286 * through the dec_decode method. 287 */ 288 tif->tif_decoderow = LZWDecodeCompat; 289 tif->tif_decodestrip = LZWDecodeCompat; 290 tif->tif_decodetile = LZWDecodeCompat; 291 /* 292 * If doing horizontal differencing, must 293 * re-setup the predictor logic since we 294 * switched the basic decoder methods... 295 */ 296 (*tif->tif_setupdecode)(tif); 297 sp->dec_decode = LZWDecodeCompat; 298 } 299 sp->lzw_maxcode = MAXCODE(BITS_MIN); 300#else /* !LZW_COMPAT */ 301 if (!sp->dec_decode) { 302 TIFFErrorExt(tif->tif_clientdata, module, 303 "Old-style LZW codes not supported"); 304 sp->dec_decode = LZWDecode; 305 } 306 return (0); 307#endif/* !LZW_COMPAT */ 308 } else { 309 sp->lzw_maxcode = MAXCODE(BITS_MIN)-1; 310 sp->dec_decode = LZWDecode; 311 } 312 sp->lzw_nbits = BITS_MIN; 313 sp->lzw_nextbits = 0; 314 sp->lzw_nextdata = 0; 315 316 sp->dec_restart = 0; 317 sp->dec_nbitsmask = MAXCODE(BITS_MIN); 318#ifdef LZW_CHECKEOS 319 sp->dec_bitsleft = ((uint64)tif->tif_rawcc) << 3; 320#endif 321 sp->dec_free_entp = sp->dec_codetab + CODE_FIRST; 322 /* 323 * Zero entries that are not yet filled in. We do 324 * this to guard against bogus input data that causes 325 * us to index into undefined entries. If you can 326 * come up with a way to safely bounds-check input codes 327 * while decoding then you can remove this operation. 328 */ 329 _TIFFmemset(sp->dec_free_entp, 0, (CSIZE-CODE_FIRST)*sizeof (code_t)); 330 sp->dec_oldcodep = &sp->dec_codetab[-1]; 331 sp->dec_maxcodep = &sp->dec_codetab[sp->dec_nbitsmask-1]; 332 return (1); 333} 334 335/* 336 * Decode a "hunk of data". 337 */ 338#define GetNextCode(sp, bp, code) { \ 339 nextdata = (nextdata<<8) | *(bp)++; \ 340 nextbits += 8; \ 341 if (nextbits < nbits) { \ 342 nextdata = (nextdata<<8) | *(bp)++; \ 343 nextbits += 8; \ 344 } \ 345 code = (hcode_t)((nextdata >> (nextbits-nbits)) & nbitsmask); \ 346 nextbits -= nbits; \ 347} 348 349static void 350codeLoop(TIFF* tif, const char* module) 351{ 352 TIFFErrorExt(tif->tif_clientdata, module, 353 "Bogus encoding, loop in the code table; scanline %d", 354 tif->tif_row); 355} 356 357static int 358LZWDecode(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s) 359{ 360 static const char module[] = "LZWDecode"; 361 LZWCodecState *sp = DecoderState(tif); 362 char *op = (char*) op0; 363 long occ = (long) occ0; 364 char *tp; 365 unsigned char *bp; 366 hcode_t code; 367 int len; 368 long nbits, nextbits, nextdata, nbitsmask; 369 code_t *codep, *free_entp, *maxcodep, *oldcodep; 370 371 (void) s; 372 assert(sp != NULL); 373 assert(sp->dec_codetab != NULL); 374 375 /* 376 Fail if value does not fit in long. 377 */ 378 if ((tmsize_t) occ != occ0) 379 return (0); 380 /* 381 * Restart interrupted output operation. 382 */ 383 if (sp->dec_restart) { 384 long residue; 385 386 codep = sp->dec_codep; 387 residue = codep->length - sp->dec_restart; 388 if (residue > occ) { 389 /* 390 * Residue from previous decode is sufficient 391 * to satisfy decode request. Skip to the 392 * start of the decoded string, place decoded 393 * values in the output buffer, and return. 394 */ 395 sp->dec_restart += occ; 396 do { 397 codep = codep->next; 398 } while (--residue > occ && codep); 399 if (codep) { 400 tp = op + occ; 401 do { 402 *--tp = codep->value; 403 codep = codep->next; 404 } while (--occ && codep); 405 } 406 return (1); 407 } 408 /* 409 * Residue satisfies only part of the decode request. 410 */ 411 op += residue, occ -= residue; 412 tp = op; 413 do { 414 int t; 415 --tp; 416 t = codep->value; 417 codep = codep->next; 418 *tp = t; 419 } while (--residue && codep); 420 sp->dec_restart = 0; 421 } 422 423 bp = (unsigned char *)tif->tif_rawcp; 424 nbits = sp->lzw_nbits; 425 nextdata = sp->lzw_nextdata; 426 nextbits = sp->lzw_nextbits; 427 nbitsmask = sp->dec_nbitsmask; 428 oldcodep = sp->dec_oldcodep; 429 free_entp = sp->dec_free_entp; 430 maxcodep = sp->dec_maxcodep; 431 432 while (occ > 0) { 433 NextCode(tif, sp, bp, code, GetNextCode); 434 if (code == CODE_EOI) 435 break; 436 if (code == CODE_CLEAR) { 437 free_entp = sp->dec_codetab + CODE_FIRST; 438 _TIFFmemset(free_entp, 0, 439 (CSIZE - CODE_FIRST) * sizeof (code_t)); 440 nbits = BITS_MIN; 441 nbitsmask = MAXCODE(BITS_MIN); 442 maxcodep = sp->dec_codetab + nbitsmask-1; 443 NextCode(tif, sp, bp, code, GetNextCode); 444 if (code == CODE_EOI) 445 break; 446 if (code >= CODE_CLEAR) { 447 TIFFErrorExt(tif->tif_clientdata, tif->tif_name, 448 "LZWDecode: Corrupted LZW table at scanline %d", 449 tif->tif_row); 450 return (0); 451 } 452 *op++ = (char)code, occ--; 453 oldcodep = sp->dec_codetab + code; 454 continue; 455 } 456 codep = sp->dec_codetab + code; 457 458 /* 459 * Add the new entry to the code table. 460 */ 461 if (free_entp < &sp->dec_codetab[0] || 462 free_entp >= &sp->dec_codetab[CSIZE]) { 463 TIFFErrorExt(tif->tif_clientdata, module, 464 "Corrupted LZW table at scanline %d", 465 tif->tif_row); 466 return (0); 467 } 468 469 free_entp->next = oldcodep; 470 if (free_entp->next < &sp->dec_codetab[0] || 471 free_entp->next >= &sp->dec_codetab[CSIZE]) { 472 TIFFErrorExt(tif->tif_clientdata, module, 473 "Corrupted LZW table at scanline %d", 474 tif->tif_row); 475 return (0); 476 } 477 free_entp->firstchar = free_entp->next->firstchar; 478 free_entp->length = free_entp->next->length+1; 479 free_entp->value = (codep < free_entp) ? 480 codep->firstchar : free_entp->firstchar; 481 if (++free_entp > maxcodep) { 482 if (++nbits > BITS_MAX) /* should not happen */ 483 nbits = BITS_MAX; 484 nbitsmask = MAXCODE(nbits); 485 maxcodep = sp->dec_codetab + nbitsmask-1; 486 } 487 oldcodep = codep; 488 if (code >= 256) { 489 /* 490 * Code maps to a string, copy string 491 * value to output (written in reverse). 492 */ 493 if(codep->length == 0) { 494 TIFFErrorExt(tif->tif_clientdata, module, 495 "Wrong length of decoded string: " 496 "data probably corrupted at scanline %d", 497 tif->tif_row); 498 return (0); 499 } 500 if (codep->length > occ) { 501 /* 502 * String is too long for decode buffer, 503 * locate portion that will fit, copy to 504 * the decode buffer, and setup restart 505 * logic for the next decoding call. 506 */ 507 sp->dec_codep = codep; 508 do { 509 codep = codep->next; 510 } while (codep && codep->length > occ); 511 if (codep) { 512 sp->dec_restart = (long)occ; 513 tp = op + occ; 514 do { 515 *--tp = codep->value; 516 codep = codep->next; 517 } while (--occ && codep); 518 if (codep) 519 codeLoop(tif, module); 520 } 521 break; 522 } 523 len = codep->length; 524 tp = op + len; 525 do { 526 int t; 527 --tp; 528 t = codep->value; 529 codep = codep->next; 530 *tp = t; 531 } while (codep && tp > op); 532 if (codep) { 533 codeLoop(tif, module); 534 break; 535 } 536 assert(occ >= len); 537 op += len, occ -= len; 538 } else 539 *op++ = (char)code, occ--; 540 } 541 542 tif->tif_rawcp = (uint8*) bp; 543 sp->lzw_nbits = (unsigned short) nbits; 544 sp->lzw_nextdata = nextdata; 545 sp->lzw_nextbits = nextbits; 546 sp->dec_nbitsmask = nbitsmask; 547 sp->dec_oldcodep = oldcodep; 548 sp->dec_free_entp = free_entp; 549 sp->dec_maxcodep = maxcodep; 550 551 if (occ > 0) { 552#if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__)) 553 TIFFErrorExt(tif->tif_clientdata, module, 554 "Not enough data at scanline %d (short %I64d bytes)", 555 tif->tif_row, (unsigned __int64) occ); 556#else 557 TIFFErrorExt(tif->tif_clientdata, module, 558 "Not enough data at scanline %d (short %llu bytes)", 559 tif->tif_row, (unsigned long long) occ); 560#endif 561 return (0); 562 } 563 return (1); 564} 565 566#ifdef LZW_COMPAT 567/* 568 * Decode a "hunk of data" for old images. 569 */ 570#define GetNextCodeCompat(sp, bp, code) { \ 571 nextdata |= (unsigned long) *(bp)++ << nextbits; \ 572 nextbits += 8; \ 573 if (nextbits < nbits) { \ 574 nextdata |= (unsigned long) *(bp)++ << nextbits;\ 575 nextbits += 8; \ 576 } \ 577 code = (hcode_t)(nextdata & nbitsmask); \ 578 nextdata >>= nbits; \ 579 nextbits -= nbits; \ 580} 581 582static int 583LZWDecodeCompat(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s) 584{ 585 static const char module[] = "LZWDecodeCompat"; 586 LZWCodecState *sp = DecoderState(tif); 587 char *op = (char*) op0; 588 long occ = (long) occ0; 589 char *tp; 590 unsigned char *bp; 591 int code, nbits; 592 long nextbits, nextdata, nbitsmask; 593 code_t *codep, *free_entp, *maxcodep, *oldcodep; 594 595 (void) s; 596 assert(sp != NULL); 597 598 /* 599 Fail if value does not fit in long. 600 */ 601 if ((tmsize_t) occ != occ0) 602 return (0); 603 604 /* 605 * Restart interrupted output operation. 606 */ 607 if (sp->dec_restart) { 608 long residue; 609 610 codep = sp->dec_codep; 611 residue = codep->length - sp->dec_restart; 612 if (residue > occ) { 613 /* 614 * Residue from previous decode is sufficient 615 * to satisfy decode request. Skip to the 616 * start of the decoded string, place decoded 617 * values in the output buffer, and return. 618 */ 619 sp->dec_restart += occ; 620 do { 621 codep = codep->next; 622 } while (--residue > occ); 623 tp = op + occ; 624 do { 625 *--tp = codep->value; 626 codep = codep->next; 627 } while (--occ); 628 return (1); 629 } 630 /* 631 * Residue satisfies only part of the decode request. 632 */ 633 op += residue, occ -= residue; 634 tp = op; 635 do { 636 *--tp = codep->value; 637 codep = codep->next; 638 } while (--residue); 639 sp->dec_restart = 0; 640 } 641 642 bp = (unsigned char *)tif->tif_rawcp; 643 nbits = sp->lzw_nbits; 644 nextdata = sp->lzw_nextdata; 645 nextbits = sp->lzw_nextbits; 646 nbitsmask = sp->dec_nbitsmask; 647 oldcodep = sp->dec_oldcodep; 648 free_entp = sp->dec_free_entp; 649 maxcodep = sp->dec_maxcodep; 650 651 while (occ > 0) { 652 NextCode(tif, sp, bp, code, GetNextCodeCompat); 653 if (code == CODE_EOI) 654 break; 655 if (code == CODE_CLEAR) { 656 free_entp = sp->dec_codetab + CODE_FIRST; 657 _TIFFmemset(free_entp, 0, 658 (CSIZE - CODE_FIRST) * sizeof (code_t)); 659 nbits = BITS_MIN; 660 nbitsmask = MAXCODE(BITS_MIN); 661 maxcodep = sp->dec_codetab + nbitsmask; 662 NextCode(tif, sp, bp, code, GetNextCodeCompat); 663 if (code == CODE_EOI) 664 break; 665 if (code >= CODE_CLEAR) { 666 TIFFErrorExt(tif->tif_clientdata, tif->tif_name, 667 "LZWDecode: Corrupted LZW table at scanline %d", 668 tif->tif_row); 669 return (0); 670 } 671 *op++ = code, occ--; 672 oldcodep = sp->dec_codetab + code; 673 continue; 674 } 675 codep = sp->dec_codetab + code; 676 677 /* 678 * Add the new entry to the code table. 679 */ 680 if (free_entp < &sp->dec_codetab[0] || 681 free_entp >= &sp->dec_codetab[CSIZE]) { 682 TIFFErrorExt(tif->tif_clientdata, module, 683 "Corrupted LZW table at scanline %d", tif->tif_row); 684 return (0); 685 } 686 687 free_entp->next = oldcodep; 688 if (free_entp->next < &sp->dec_codetab[0] || 689 free_entp->next >= &sp->dec_codetab[CSIZE]) { 690 TIFFErrorExt(tif->tif_clientdata, module, 691 "Corrupted LZW table at scanline %d", tif->tif_row); 692 return (0); 693 } 694 free_entp->firstchar = free_entp->next->firstchar; 695 free_entp->length = free_entp->next->length+1; 696 free_entp->value = (codep < free_entp) ? 697 codep->firstchar : free_entp->firstchar; 698 if (++free_entp > maxcodep) { 699 if (++nbits > BITS_MAX) /* should not happen */ 700 nbits = BITS_MAX; 701 nbitsmask = MAXCODE(nbits); 702 maxcodep = sp->dec_codetab + nbitsmask; 703 } 704 oldcodep = codep; 705 if (code >= 256) { 706 /* 707 * Code maps to a string, copy string 708 * value to output (written in reverse). 709 */ 710 if(codep->length == 0) { 711 TIFFErrorExt(tif->tif_clientdata, module, 712 "Wrong length of decoded " 713 "string: data probably corrupted at scanline %d", 714 tif->tif_row); 715 return (0); 716 } 717 if (codep->length > occ) { 718 /* 719 * String is too long for decode buffer, 720 * locate portion that will fit, copy to 721 * the decode buffer, and setup restart 722 * logic for the next decoding call. 723 */ 724 sp->dec_codep = codep; 725 do { 726 codep = codep->next; 727 } while (codep->length > occ); 728 sp->dec_restart = occ; 729 tp = op + occ; 730 do { 731 *--tp = codep->value; 732 codep = codep->next; 733 } while (--occ); 734 break; 735 } 736 assert(occ >= codep->length); 737 op += codep->length, occ -= codep->length; 738 tp = op; 739 do { 740 *--tp = codep->value; 741 } while( (codep = codep->next) != NULL ); 742 } else 743 *op++ = code, occ--; 744 } 745 746 tif->tif_rawcp = (uint8*) bp; 747 sp->lzw_nbits = nbits; 748 sp->lzw_nextdata = nextdata; 749 sp->lzw_nextbits = nextbits; 750 sp->dec_nbitsmask = nbitsmask; 751 sp->dec_oldcodep = oldcodep; 752 sp->dec_free_entp = free_entp; 753 sp->dec_maxcodep = maxcodep; 754 755 if (occ > 0) { 756#if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__)) 757 TIFFErrorExt(tif->tif_clientdata, module, 758 "Not enough data at scanline %d (short %I64d bytes)", 759 tif->tif_row, (unsigned __int64) occ); 760#else 761 TIFFErrorExt(tif->tif_clientdata, module, 762 "Not enough data at scanline %d (short %llu bytes)", 763 tif->tif_row, (unsigned long long) occ); 764#endif 765 return (0); 766 } 767 return (1); 768} 769#endif /* LZW_COMPAT */ 770 771/* 772 * LZW Encoding. 773 */ 774 775static int 776LZWSetupEncode(TIFF* tif) 777{ 778 static const char module[] = "LZWSetupEncode"; 779 LZWCodecState* sp = EncoderState(tif); 780 781 assert(sp != NULL); 782 sp->enc_hashtab = (hash_t*) _TIFFmalloc(HSIZE*sizeof (hash_t)); 783 if (sp->enc_hashtab == NULL) { 784 TIFFErrorExt(tif->tif_clientdata, module, 785 "No space for LZW hash table"); 786 return (0); 787 } 788 return (1); 789} 790 791/* 792 * Reset encoding state at the start of a strip. 793 */ 794static int 795LZWPreEncode(TIFF* tif, uint16 s) 796{ 797 LZWCodecState *sp = EncoderState(tif); 798 799 (void) s; 800 assert(sp != NULL); 801 802 if( sp->enc_hashtab == NULL ) 803 { 804 tif->tif_setupencode( tif ); 805 } 806 807 sp->lzw_nbits = BITS_MIN; 808 sp->lzw_maxcode = MAXCODE(BITS_MIN); 809 sp->lzw_free_ent = CODE_FIRST; 810 sp->lzw_nextbits = 0; 811 sp->lzw_nextdata = 0; 812 sp->enc_checkpoint = CHECK_GAP; 813 sp->enc_ratio = 0; 814 sp->enc_incount = 0; 815 sp->enc_outcount = 0; 816 /* 817 * The 4 here insures there is space for 2 max-sized 818 * codes in LZWEncode and LZWPostDecode. 819 */ 820 sp->enc_rawlimit = tif->tif_rawdata + tif->tif_rawdatasize-1 - 4; 821 cl_hash(sp); /* clear hash table */ 822 sp->enc_oldcode = (hcode_t) -1; /* generates CODE_CLEAR in LZWEncode */ 823 return (1); 824} 825 826#define CALCRATIO(sp, rat) { \ 827 if (incount > 0x007fffff) { /* NB: shift will overflow */\ 828 rat = outcount >> 8; \ 829 rat = (rat == 0 ? 0x7fffffff : incount/rat); \ 830 } else \ 831 rat = (incount<<8) / outcount; \ 832} 833#define PutNextCode(op, c) { \ 834 nextdata = (nextdata << nbits) | c; \ 835 nextbits += nbits; \ 836 *op++ = (unsigned char)(nextdata >> (nextbits-8)); \ 837 nextbits -= 8; \ 838 if (nextbits >= 8) { \ 839 *op++ = (unsigned char)(nextdata >> (nextbits-8)); \ 840 nextbits -= 8; \ 841 } \ 842 outcount += nbits; \ 843} 844 845/* 846 * Encode a chunk of pixels. 847 * 848 * Uses an open addressing double hashing (no chaining) on the 849 * prefix code/next character combination. We do a variant of 850 * Knuth's algorithm D (vol. 3, sec. 6.4) along with G. Knott's 851 * relatively-prime secondary probe. Here, the modular division 852 * first probe is gives way to a faster exclusive-or manipulation. 853 * Also do block compression with an adaptive reset, whereby the 854 * code table is cleared when the compression ratio decreases, 855 * but after the table fills. The variable-length output codes 856 * are re-sized at this point, and a CODE_CLEAR is generated 857 * for the decoder. 858 */ 859static int 860LZWEncode(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s) 861{ 862 register LZWCodecState *sp = EncoderState(tif); 863 register long fcode; 864 register hash_t *hp; 865 register int h, c; 866 hcode_t ent; 867 long disp; 868 long incount, outcount, checkpoint; 869 long nextdata, nextbits; 870 int free_ent, maxcode, nbits; 871 uint8* op; 872 uint8* limit; 873 874 (void) s; 875 if (sp == NULL) 876 return (0); 877 878 assert(sp->enc_hashtab != NULL); 879 880 /* 881 * Load local state. 882 */ 883 incount = sp->enc_incount; 884 outcount = sp->enc_outcount; 885 checkpoint = sp->enc_checkpoint; 886 nextdata = sp->lzw_nextdata; 887 nextbits = sp->lzw_nextbits; 888 free_ent = sp->lzw_free_ent; 889 maxcode = sp->lzw_maxcode; 890 nbits = sp->lzw_nbits; 891 op = tif->tif_rawcp; 892 limit = sp->enc_rawlimit; 893 ent = sp->enc_oldcode; 894 895 if (ent == (hcode_t) -1 && cc > 0) { 896 /* 897 * NB: This is safe because it can only happen 898 * at the start of a strip where we know there 899 * is space in the data buffer. 900 */ 901 PutNextCode(op, CODE_CLEAR); 902 ent = *bp++; cc--; incount++; 903 } 904 while (cc > 0) { 905 c = *bp++; cc--; incount++; 906 fcode = ((long)c << BITS_MAX) + ent; 907 h = (c << HSHIFT) ^ ent; /* xor hashing */ 908#ifdef _WINDOWS 909 /* 910 * Check hash index for an overflow. 911 */ 912 if (h >= HSIZE) 913 h -= HSIZE; 914#endif 915 hp = &sp->enc_hashtab[h]; 916 if (hp->hash == fcode) { 917 ent = hp->code; 918 continue; 919 } 920 if (hp->hash >= 0) { 921 /* 922 * Primary hash failed, check secondary hash. 923 */ 924 disp = HSIZE - h; 925 if (h == 0) 926 disp = 1; 927 do { 928 /* 929 * Avoid pointer arithmetic 'cuz of 930 * wraparound problems with segments. 931 */ 932 if ((h -= disp) < 0) 933 h += HSIZE; 934 hp = &sp->enc_hashtab[h]; 935 if (hp->hash == fcode) { 936 ent = hp->code; 937 goto hit; 938 } 939 } while (hp->hash >= 0); 940 } 941 /* 942 * New entry, emit code and add to table. 943 */ 944 /* 945 * Verify there is space in the buffer for the code 946 * and any potential Clear code that might be emitted 947 * below. The value of limit is setup so that there 948 * are at least 4 bytes free--room for 2 codes. 949 */ 950 if (op > limit) { 951 tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata); 952 TIFFFlushData1(tif); 953 op = tif->tif_rawdata; 954 } 955 PutNextCode(op, ent); 956 ent = c; 957 hp->code = free_ent++; 958 hp->hash = fcode; 959 if (free_ent == CODE_MAX-1) { 960 /* table is full, emit clear code and reset */ 961 cl_hash(sp); 962 sp->enc_ratio = 0; 963 incount = 0; 964 outcount = 0; 965 free_ent = CODE_FIRST; 966 PutNextCode(op, CODE_CLEAR); 967 nbits = BITS_MIN; 968 maxcode = MAXCODE(BITS_MIN); 969 } else { 970 /* 971 * If the next entry is going to be too big for 972 * the code size, then increase it, if possible. 973 */ 974 if (free_ent > maxcode) { 975 nbits++; 976 assert(nbits <= BITS_MAX); 977 maxcode = (int) MAXCODE(nbits); 978 } else if (incount >= checkpoint) { 979 long rat; 980 /* 981 * Check compression ratio and, if things seem 982 * to be slipping, clear the hash table and 983 * reset state. The compression ratio is a 984 * 24+8-bit fractional number. 985 */ 986 checkpoint = incount+CHECK_GAP; 987 CALCRATIO(sp, rat); 988 if (rat <= sp->enc_ratio) { 989 cl_hash(sp); 990 sp->enc_ratio = 0; 991 incount = 0; 992 outcount = 0; 993 free_ent = CODE_FIRST; 994 PutNextCode(op, CODE_CLEAR); 995 nbits = BITS_MIN; 996 maxcode = MAXCODE(BITS_MIN); 997 } else 998 sp->enc_ratio = rat; 999 } 1000 } 1001 hit: 1002 ; 1003 } 1004 1005 /* 1006 * Restore global state. 1007 */ 1008 sp->enc_incount = incount; 1009 sp->enc_outcount = outcount; 1010 sp->enc_checkpoint = checkpoint; 1011 sp->enc_oldcode = ent; 1012 sp->lzw_nextdata = nextdata; 1013 sp->lzw_nextbits = nextbits; 1014 sp->lzw_free_ent = free_ent; 1015 sp->lzw_maxcode = maxcode; 1016 sp->lzw_nbits = nbits; 1017 tif->tif_rawcp = op; 1018 return (1); 1019} 1020 1021/* 1022 * Finish off an encoded strip by flushing the last 1023 * string and tacking on an End Of Information code. 1024 */ 1025static int 1026LZWPostEncode(TIFF* tif) 1027{ 1028 register LZWCodecState *sp = EncoderState(tif); 1029 uint8* op = tif->tif_rawcp; 1030 long nextbits = sp->lzw_nextbits; 1031 long nextdata = sp->lzw_nextdata; 1032 long outcount = sp->enc_outcount; 1033 int nbits = sp->lzw_nbits; 1034 1035 if (op > sp->enc_rawlimit) { 1036 tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata); 1037 TIFFFlushData1(tif); 1038 op = tif->tif_rawdata; 1039 } 1040 if (sp->enc_oldcode != (hcode_t) -1) { 1041 PutNextCode(op, sp->enc_oldcode); 1042 sp->enc_oldcode = (hcode_t) -1; 1043 } 1044 PutNextCode(op, CODE_EOI); 1045 if (nextbits > 0) 1046 *op++ = (unsigned char)(nextdata << (8-nextbits)); 1047 tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata); 1048 return (1); 1049} 1050 1051/* 1052 * Reset encoding hash table. 1053 */ 1054static void 1055cl_hash(LZWCodecState* sp) 1056{ 1057 register hash_t *hp = &sp->enc_hashtab[HSIZE-1]; 1058 register long i = HSIZE-8; 1059 1060 do { 1061 i -= 8; 1062 hp[-7].hash = -1; 1063 hp[-6].hash = -1; 1064 hp[-5].hash = -1; 1065 hp[-4].hash = -1; 1066 hp[-3].hash = -1; 1067 hp[-2].hash = -1; 1068 hp[-1].hash = -1; 1069 hp[ 0].hash = -1; 1070 hp -= 8; 1071 } while (i >= 0); 1072 for (i += 8; i > 0; i--, hp--) 1073 hp->hash = -1; 1074} 1075 1076static void 1077LZWCleanup(TIFF* tif) 1078{ 1079 (void)TIFFPredictorCleanup(tif); 1080 1081 assert(tif->tif_data != 0); 1082 1083 if (DecoderState(tif)->dec_codetab) 1084 _TIFFfree(DecoderState(tif)->dec_codetab); 1085 1086 if (EncoderState(tif)->enc_hashtab) 1087 _TIFFfree(EncoderState(tif)->enc_hashtab); 1088 1089 _TIFFfree(tif->tif_data); 1090 tif->tif_data = NULL; 1091 1092 _TIFFSetDefaultCompressionState(tif); 1093} 1094 1095int 1096TIFFInitLZW(TIFF* tif, int scheme) 1097{ 1098 static const char module[] = "TIFFInitLZW"; 1099 assert(scheme == COMPRESSION_LZW); 1100 /* 1101 * Allocate state block so tag methods have storage to record values. 1102 */ 1103 tif->tif_data = (uint8*) _TIFFmalloc(sizeof (LZWCodecState)); 1104 if (tif->tif_data == NULL) 1105 goto bad; 1106 DecoderState(tif)->dec_codetab = NULL; 1107 DecoderState(tif)->dec_decode = NULL; 1108 EncoderState(tif)->enc_hashtab = NULL; 1109 LZWState(tif)->rw_mode = tif->tif_mode; 1110 1111 /* 1112 * Install codec methods. 1113 */ 1114 tif->tif_fixuptags = LZWFixupTags; 1115 tif->tif_setupdecode = LZWSetupDecode; 1116 tif->tif_predecode = LZWPreDecode; 1117 tif->tif_decoderow = LZWDecode; 1118 tif->tif_decodestrip = LZWDecode; 1119 tif->tif_decodetile = LZWDecode; 1120 tif->tif_setupencode = LZWSetupEncode; 1121 tif->tif_preencode = LZWPreEncode; 1122 tif->tif_postencode = LZWPostEncode; 1123 tif->tif_encoderow = LZWEncode; 1124 tif->tif_encodestrip = LZWEncode; 1125 tif->tif_encodetile = LZWEncode; 1126 tif->tif_cleanup = LZWCleanup; 1127 /* 1128 * Setup predictor setup. 1129 */ 1130 (void) TIFFPredictorInit(tif); 1131 return (1); 1132bad: 1133 TIFFErrorExt(tif->tif_clientdata, module, 1134 "No space for LZW state block"); 1135 return (0); 1136} 1137 1138/* 1139 * Copyright (c) 1985, 1986 The Regents of the University of California. 1140 * All rights reserved. 1141 * 1142 * This code is derived from software contributed to Berkeley by 1143 * James A. Woods, derived from original work by Spencer Thomas 1144 * and Joseph Orost. 1145 * 1146 * Redistribution and use in source and binary forms are permitted 1147 * provided that the above copyright notice and this paragraph are 1148 * duplicated in all such forms and that any documentation, 1149 * advertising materials, and other materials related to such 1150 * distribution and use acknowledge that the software was developed 1151 * by the University of California, Berkeley. The name of the 1152 * University may not be used to endorse or promote products derived 1153 * from this software without specific prior written permission. 1154 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 1155 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 1156 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 1157 */ 1158#endif /* LZW_SUPPORT */ 1159 1160/* vim: set ts=8 sts=8 sw=8 noet: */ 1161/* 1162 * Local Variables: 1163 * mode: c 1164 * c-basic-offset: 8 1165 * fill-column: 78 1166 * End: 1167 */ 1168