1/* $Id: tif_lzw.c,v 1.45 2011-04-02 20:54:09 bfriesen Exp $ */
2
3/*
4 * Copyright (c) 1988-1997 Sam Leffler
5 * Copyright (c) 1991-1997 Silicon Graphics, Inc.
6 *
7 * Permission to use, copy, modify, distribute, and sell this software and
8 * its documentation for any purpose is hereby granted without fee, provided
9 * that (i) the above copyright notices and this permission notice appear in
10 * all copies of the software and related documentation, and (ii) the names of
11 * Sam Leffler and Silicon Graphics may not be used in any advertising or
12 * publicity relating to the software without the specific, prior written
13 * permission of Sam Leffler and Silicon Graphics.
14 *
15 * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
17 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
18 *
19 * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
20 * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
21 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
22 * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF
23 * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
24 * OF THIS SOFTWARE.
25 */
26
27#include "tiffiop.h"
28#ifdef LZW_SUPPORT
29/*
30 * TIFF Library.
31 * Rev 5.0 Lempel-Ziv & Welch Compression Support
32 *
33 * This code is derived from the compress program whose code is
34 * derived from software contributed to Berkeley by James A. Woods,
35 * derived from original work by Spencer Thomas and Joseph Orost.
36 *
37 * The original Berkeley copyright notice appears below in its entirety.
38 */
39#include "tif_predict.h"
40
41#include <stdio.h>
42
43/*
44 * NB: The 5.0 spec describes a different algorithm than Aldus
45 *     implements.  Specifically, Aldus does code length transitions
46 *     one code earlier than should be done (for real LZW).
47 *     Earlier versions of this library implemented the correct
48 *     LZW algorithm, but emitted codes in a bit order opposite
49 *     to the TIFF spec.  Thus, to maintain compatibility w/ Aldus
50 *     we interpret MSB-LSB ordered codes to be images written w/
51 *     old versions of this library, but otherwise adhere to the
52 *     Aldus "off by one" algorithm.
53 *
54 * Future revisions to the TIFF spec are expected to "clarify this issue".
55 */
56#define LZW_COMPAT              /* include backwards compatibility code */
57/*
58 * Each strip of data is supposed to be terminated by a CODE_EOI.
59 * If the following #define is included, the decoder will also
60 * check for end-of-strip w/o seeing this code.  This makes the
61 * library more robust, but also slower.
62 */
63#define LZW_CHECKEOS            /* include checks for strips w/o EOI code */
64
65#define MAXCODE(n)	((1L<<(n))-1)
66/*
67 * The TIFF spec specifies that encoded bit
68 * strings range from 9 to 12 bits.
69 */
70#define BITS_MIN        9               /* start with 9 bits */
71#define BITS_MAX        12              /* max of 12 bit strings */
72/* predefined codes */
73#define CODE_CLEAR      256             /* code to clear string table */
74#define CODE_EOI        257             /* end-of-information code */
75#define CODE_FIRST      258             /* first free code entry */
76#define CODE_MAX        MAXCODE(BITS_MAX)
77#define HSIZE           9001L           /* 91% occupancy */
78#define HSHIFT          (13-8)
79#ifdef LZW_COMPAT
80/* NB: +1024 is for compatibility with old files */
81#define CSIZE           (MAXCODE(BITS_MAX)+1024L)
82#else
83#define CSIZE           (MAXCODE(BITS_MAX)+1L)
84#endif
85
86/*
87 * State block for each open TIFF file using LZW
88 * compression/decompression.  Note that the predictor
89 * state block must be first in this data structure.
90 */
91typedef struct {
92    TIFFPredictorState predict;     /* predictor super class */
93
94    unsigned short  nbits;          /* # of bits/code */
95    unsigned short  maxcode;        /* maximum code for lzw_nbits */
96    unsigned short  free_ent;       /* next free entry in hash table */
97    long            nextdata;       /* next bits of i/o */
98    long            nextbits;       /* # of valid bits in lzw_nextdata */
99
100    int             rw_mode;        /* preserve rw_mode from init */
101} LZWBaseState;
102
103#define lzw_nbits       base.nbits
104#define lzw_maxcode     base.maxcode
105#define lzw_free_ent    base.free_ent
106#define lzw_nextdata    base.nextdata
107#define lzw_nextbits    base.nextbits
108
109/*
110 * Encoding-specific state.
111 */
112typedef uint16 hcode_t;			/* codes fit in 16 bits */
113typedef struct {
114    long	hash;
115    hcode_t	code;
116} hash_t;
117
118/*
119 * Decoding-specific state.
120 */
121typedef struct code_ent {
122    struct code_ent *next;
123    unsigned short	length;		/* string len, including this token */
124    unsigned char	value;		/* data value */
125    unsigned char	firstchar;	/* first token of string */
126} code_t;
127
128typedef int (*decodeFunc)(TIFF*, uint8*, tmsize_t, uint16);
129
130typedef struct {
131    LZWBaseState base;
132
133    /* Decoding specific data */
134    long    dec_nbitsmask;		/* lzw_nbits 1 bits, right adjusted */
135    long    dec_restart;		/* restart count */
136#ifdef LZW_CHECKEOS
137    uint64  dec_bitsleft;		/* available bits in raw data */
138#endif
139    decodeFunc dec_decode;		/* regular or backwards compatible */
140    code_t* dec_codep;		/* current recognized code */
141    code_t* dec_oldcodep;		/* previously recognized code */
142    code_t* dec_free_entp;		/* next free entry */
143    code_t* dec_maxcodep;		/* max available entry */
144    code_t* dec_codetab;		/* kept separate for small machines */
145
146    /* Encoding specific data */
147    int     enc_oldcode;		/* last code encountered */
148    long    enc_checkpoint;		/* point at which to clear table */
149#define CHECK_GAP	10000		/* enc_ratio check interval */
150    long    enc_ratio;		/* current compression ratio */
151    long    enc_incount;		/* (input) data bytes encoded */
152    long    enc_outcount;		/* encoded (output) bytes */
153    uint8*  enc_rawlimit;		/* bound on tif_rawdata buffer */
154    hash_t* enc_hashtab;		/* kept separate for small machines */
155} LZWCodecState;
156
157#define LZWState(tif)		((LZWBaseState*) (tif)->tif_data)
158#define DecoderState(tif)	((LZWCodecState*) LZWState(tif))
159#define EncoderState(tif)	((LZWCodecState*) LZWState(tif))
160
161static int LZWDecode(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s);
162#ifdef LZW_COMPAT
163static int LZWDecodeCompat(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s);
164#endif
165static void cl_hash(LZWCodecState*);
166
167/*
168 * LZW Decoder.
169 */
170
171#ifdef LZW_CHECKEOS
172/*
173 * This check shouldn't be necessary because each
174 * strip is suppose to be terminated with CODE_EOI.
175 */
176#define	NextCode(_tif, _sp, _bp, _code, _get) {				\
177    if ((_sp)->dec_bitsleft < (uint64)nbits) {			\
178        TIFFWarningExt(_tif->tif_clientdata, module,		\
179            "LZWDecode: Strip %d not terminated with EOI code", \
180            _tif->tif_curstrip);				\
181        _code = CODE_EOI;					\
182    } else {							\
183        _get(_sp,_bp,_code);					\
184        (_sp)->dec_bitsleft -= nbits;				\
185    }								\
186}
187#else
188#define	NextCode(tif, sp, bp, code, get) get(sp, bp, code)
189#endif
190
191static int
192LZWFixupTags(TIFF* tif)
193{
194    (void) tif;
195    return (1);
196}
197
198static int
199LZWSetupDecode(TIFF* tif)
200{
201    static const char module[] = "LZWSetupDecode";
202    LZWCodecState* sp = DecoderState(tif);
203    int code;
204
205    if( sp == NULL )
206    {
207        /*
208         * Allocate state block so tag methods have storage to record
209         * values.
210        */
211        tif->tif_data = (uint8*) _TIFFmalloc(sizeof(LZWCodecState));
212        if (tif->tif_data == NULL)
213        {
214            TIFFErrorExt(tif->tif_clientdata, module, "No space for LZW state block");
215            return (0);
216        }
217
218        DecoderState(tif)->dec_codetab = NULL;
219        DecoderState(tif)->dec_decode = NULL;
220
221        /*
222         * Setup predictor setup.
223         */
224        (void) TIFFPredictorInit(tif);
225
226        sp = DecoderState(tif);
227    }
228
229    assert(sp != NULL);
230
231    if (sp->dec_codetab == NULL) {
232        sp->dec_codetab = (code_t*)_TIFFmalloc(CSIZE*sizeof (code_t));
233        if (sp->dec_codetab == NULL) {
234            TIFFErrorExt(tif->tif_clientdata, module,
235                     "No space for LZW code table");
236            return (0);
237        }
238        /*
239         * Pre-load the table.
240         */
241        code = 255;
242        do {
243            sp->dec_codetab[code].value = code;
244            sp->dec_codetab[code].firstchar = code;
245            sp->dec_codetab[code].length = 1;
246            sp->dec_codetab[code].next = NULL;
247        } while (code--);
248        /*
249         * Zero-out the unused entries
250                 */
251                 _TIFFmemset(&sp->dec_codetab[CODE_CLEAR], 0,
252                 (CODE_FIRST - CODE_CLEAR) * sizeof (code_t));
253    }
254    return (1);
255}
256
257/*
258 * Setup state for decoding a strip.
259 */
260static int
261LZWPreDecode(TIFF* tif, uint16 s)
262{
263    static const char module[] = "LZWPreDecode";
264    LZWCodecState *sp = DecoderState(tif);
265
266    (void) s;
267    assert(sp != NULL);
268    if( sp->dec_codetab == NULL )
269        {
270            tif->tif_setupdecode( tif );
271        }
272
273    /*
274     * Check for old bit-reversed codes.
275     */
276    if (tif->tif_rawdata[0] == 0 && (tif->tif_rawdata[1] & 0x1)) {
277#ifdef LZW_COMPAT
278        if (!sp->dec_decode) {
279            TIFFWarningExt(tif->tif_clientdata, module,
280                "Old-style LZW codes, convert file");
281            /*
282             * Override default decoding methods with
283             * ones that deal with the old coding.
284             * Otherwise the predictor versions set
285             * above will call the compatibility routines
286             * through the dec_decode method.
287             */
288            tif->tif_decoderow = LZWDecodeCompat;
289            tif->tif_decodestrip = LZWDecodeCompat;
290            tif->tif_decodetile = LZWDecodeCompat;
291            /*
292             * If doing horizontal differencing, must
293             * re-setup the predictor logic since we
294             * switched the basic decoder methods...
295             */
296            (*tif->tif_setupdecode)(tif);
297            sp->dec_decode = LZWDecodeCompat;
298        }
299        sp->lzw_maxcode = MAXCODE(BITS_MIN);
300#else /* !LZW_COMPAT */
301        if (!sp->dec_decode) {
302            TIFFErrorExt(tif->tif_clientdata, module,
303                "Old-style LZW codes not supported");
304            sp->dec_decode = LZWDecode;
305        }
306        return (0);
307#endif/* !LZW_COMPAT */
308    } else {
309        sp->lzw_maxcode = MAXCODE(BITS_MIN)-1;
310        sp->dec_decode = LZWDecode;
311    }
312    sp->lzw_nbits = BITS_MIN;
313    sp->lzw_nextbits = 0;
314    sp->lzw_nextdata = 0;
315
316    sp->dec_restart = 0;
317    sp->dec_nbitsmask = MAXCODE(BITS_MIN);
318#ifdef LZW_CHECKEOS
319    sp->dec_bitsleft = ((uint64)tif->tif_rawcc) << 3;
320#endif
321    sp->dec_free_entp = sp->dec_codetab + CODE_FIRST;
322    /*
323     * Zero entries that are not yet filled in.  We do
324     * this to guard against bogus input data that causes
325     * us to index into undefined entries.  If you can
326     * come up with a way to safely bounds-check input codes
327     * while decoding then you can remove this operation.
328     */
329    _TIFFmemset(sp->dec_free_entp, 0, (CSIZE-CODE_FIRST)*sizeof (code_t));
330    sp->dec_oldcodep = &sp->dec_codetab[-1];
331    sp->dec_maxcodep = &sp->dec_codetab[sp->dec_nbitsmask-1];
332    return (1);
333}
334
335/*
336 * Decode a "hunk of data".
337 */
338#define	GetNextCode(sp, bp, code) {				\
339    nextdata = (nextdata<<8) | *(bp)++;			\
340    nextbits += 8;						\
341    if (nextbits < nbits) {					\
342        nextdata = (nextdata<<8) | *(bp)++;		\
343        nextbits += 8;					\
344    }							\
345    code = (hcode_t)((nextdata >> (nextbits-nbits)) & nbitsmask);	\
346    nextbits -= nbits;					\
347}
348
349static void
350codeLoop(TIFF* tif, const char* module)
351{
352    TIFFErrorExt(tif->tif_clientdata, module,
353        "Bogus encoding, loop in the code table; scanline %d",
354        tif->tif_row);
355}
356
357static int
358LZWDecode(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s)
359{
360    static const char module[] = "LZWDecode";
361    LZWCodecState *sp = DecoderState(tif);
362    char *op = (char*) op0;
363    long occ = (long) occ0;
364    char *tp;
365    unsigned char *bp;
366    hcode_t code;
367    int len;
368    long nbits, nextbits, nextdata, nbitsmask;
369    code_t *codep, *free_entp, *maxcodep, *oldcodep;
370
371    (void) s;
372    assert(sp != NULL);
373        assert(sp->dec_codetab != NULL);
374
375    /*
376      Fail if value does not fit in long.
377    */
378    if ((tmsize_t) occ != occ0)
379            return (0);
380    /*
381     * Restart interrupted output operation.
382     */
383    if (sp->dec_restart) {
384        long residue;
385
386        codep = sp->dec_codep;
387        residue = codep->length - sp->dec_restart;
388        if (residue > occ) {
389            /*
390             * Residue from previous decode is sufficient
391             * to satisfy decode request.  Skip to the
392             * start of the decoded string, place decoded
393             * values in the output buffer, and return.
394             */
395            sp->dec_restart += occ;
396            do {
397                codep = codep->next;
398            } while (--residue > occ && codep);
399            if (codep) {
400                tp = op + occ;
401                do {
402                    *--tp = codep->value;
403                    codep = codep->next;
404                } while (--occ && codep);
405            }
406            return (1);
407        }
408        /*
409         * Residue satisfies only part of the decode request.
410         */
411        op += residue, occ -= residue;
412        tp = op;
413        do {
414            int t;
415            --tp;
416            t = codep->value;
417            codep = codep->next;
418            *tp = t;
419        } while (--residue && codep);
420        sp->dec_restart = 0;
421    }
422
423    bp = (unsigned char *)tif->tif_rawcp;
424    nbits = sp->lzw_nbits;
425    nextdata = sp->lzw_nextdata;
426    nextbits = sp->lzw_nextbits;
427    nbitsmask = sp->dec_nbitsmask;
428    oldcodep = sp->dec_oldcodep;
429    free_entp = sp->dec_free_entp;
430    maxcodep = sp->dec_maxcodep;
431
432    while (occ > 0) {
433        NextCode(tif, sp, bp, code, GetNextCode);
434        if (code == CODE_EOI)
435            break;
436        if (code == CODE_CLEAR) {
437            free_entp = sp->dec_codetab + CODE_FIRST;
438            _TIFFmemset(free_entp, 0,
439                    (CSIZE - CODE_FIRST) * sizeof (code_t));
440            nbits = BITS_MIN;
441            nbitsmask = MAXCODE(BITS_MIN);
442            maxcodep = sp->dec_codetab + nbitsmask-1;
443            NextCode(tif, sp, bp, code, GetNextCode);
444            if (code == CODE_EOI)
445                break;
446            if (code >= CODE_CLEAR) {
447                TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
448                "LZWDecode: Corrupted LZW table at scanline %d",
449                         tif->tif_row);
450                return (0);
451            }
452            *op++ = (char)code, occ--;
453            oldcodep = sp->dec_codetab + code;
454            continue;
455        }
456        codep = sp->dec_codetab + code;
457
458        /*
459         * Add the new entry to the code table.
460         */
461        if (free_entp < &sp->dec_codetab[0] ||
462            free_entp >= &sp->dec_codetab[CSIZE]) {
463            TIFFErrorExt(tif->tif_clientdata, module,
464                "Corrupted LZW table at scanline %d",
465                tif->tif_row);
466            return (0);
467        }
468
469        free_entp->next = oldcodep;
470        if (free_entp->next < &sp->dec_codetab[0] ||
471            free_entp->next >= &sp->dec_codetab[CSIZE]) {
472            TIFFErrorExt(tif->tif_clientdata, module,
473                "Corrupted LZW table at scanline %d",
474                tif->tif_row);
475            return (0);
476        }
477        free_entp->firstchar = free_entp->next->firstchar;
478        free_entp->length = free_entp->next->length+1;
479        free_entp->value = (codep < free_entp) ?
480            codep->firstchar : free_entp->firstchar;
481        if (++free_entp > maxcodep) {
482            if (++nbits > BITS_MAX)		/* should not happen */
483                nbits = BITS_MAX;
484            nbitsmask = MAXCODE(nbits);
485            maxcodep = sp->dec_codetab + nbitsmask-1;
486        }
487        oldcodep = codep;
488        if (code >= 256) {
489            /*
490             * Code maps to a string, copy string
491             * value to output (written in reverse).
492             */
493            if(codep->length == 0) {
494                TIFFErrorExt(tif->tif_clientdata, module,
495                    "Wrong length of decoded string: "
496                    "data probably corrupted at scanline %d",
497                    tif->tif_row);
498                return (0);
499            }
500            if (codep->length > occ) {
501                /*
502                 * String is too long for decode buffer,
503                 * locate portion that will fit, copy to
504                 * the decode buffer, and setup restart
505                 * logic for the next decoding call.
506                 */
507                sp->dec_codep = codep;
508                do {
509                    codep = codep->next;
510                } while (codep && codep->length > occ);
511                if (codep) {
512                    sp->dec_restart = (long)occ;
513                    tp = op + occ;
514                    do  {
515                        *--tp = codep->value;
516                        codep = codep->next;
517                    }  while (--occ && codep);
518                    if (codep)
519                        codeLoop(tif, module);
520                }
521                break;
522            }
523            len = codep->length;
524            tp = op + len;
525            do {
526                int t;
527                --tp;
528                t = codep->value;
529                codep = codep->next;
530                *tp = t;
531            } while (codep && tp > op);
532            if (codep) {
533                codeLoop(tif, module);
534                break;
535            }
536            assert(occ >= len);
537            op += len, occ -= len;
538        } else
539            *op++ = (char)code, occ--;
540    }
541
542    tif->tif_rawcp = (uint8*) bp;
543    sp->lzw_nbits = (unsigned short) nbits;
544    sp->lzw_nextdata = nextdata;
545    sp->lzw_nextbits = nextbits;
546    sp->dec_nbitsmask = nbitsmask;
547    sp->dec_oldcodep = oldcodep;
548    sp->dec_free_entp = free_entp;
549    sp->dec_maxcodep = maxcodep;
550
551    if (occ > 0) {
552#if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__))
553        TIFFErrorExt(tif->tif_clientdata, module,
554            "Not enough data at scanline %d (short %I64d bytes)",
555                 tif->tif_row, (unsigned __int64) occ);
556#else
557        TIFFErrorExt(tif->tif_clientdata, module,
558            "Not enough data at scanline %d (short %llu bytes)",
559                 tif->tif_row, (unsigned long long) occ);
560#endif
561        return (0);
562    }
563    return (1);
564}
565
566#ifdef LZW_COMPAT
567/*
568 * Decode a "hunk of data" for old images.
569 */
570#define	GetNextCodeCompat(sp, bp, code) {			\
571    nextdata |= (unsigned long) *(bp)++ << nextbits;	\
572    nextbits += 8;						\
573    if (nextbits < nbits) {					\
574        nextdata |= (unsigned long) *(bp)++ << nextbits;\
575        nextbits += 8;					\
576    }							\
577    code = (hcode_t)(nextdata & nbitsmask);			\
578    nextdata >>= nbits;					\
579    nextbits -= nbits;					\
580}
581
582static int
583LZWDecodeCompat(TIFF* tif, uint8* op0, tmsize_t occ0, uint16 s)
584{
585    static const char module[] = "LZWDecodeCompat";
586    LZWCodecState *sp = DecoderState(tif);
587    char *op = (char*) op0;
588    long occ = (long) occ0;
589    char *tp;
590    unsigned char *bp;
591    int code, nbits;
592    long nextbits, nextdata, nbitsmask;
593    code_t *codep, *free_entp, *maxcodep, *oldcodep;
594
595    (void) s;
596    assert(sp != NULL);
597
598    /*
599      Fail if value does not fit in long.
600    */
601    if ((tmsize_t) occ != occ0)
602            return (0);
603
604    /*
605     * Restart interrupted output operation.
606     */
607    if (sp->dec_restart) {
608        long residue;
609
610        codep = sp->dec_codep;
611        residue = codep->length - sp->dec_restart;
612        if (residue > occ) {
613            /*
614             * Residue from previous decode is sufficient
615             * to satisfy decode request.  Skip to the
616             * start of the decoded string, place decoded
617             * values in the output buffer, and return.
618             */
619            sp->dec_restart += occ;
620            do {
621                codep = codep->next;
622            } while (--residue > occ);
623            tp = op + occ;
624            do {
625                *--tp = codep->value;
626                codep = codep->next;
627            } while (--occ);
628            return (1);
629        }
630        /*
631         * Residue satisfies only part of the decode request.
632         */
633        op += residue, occ -= residue;
634        tp = op;
635        do {
636            *--tp = codep->value;
637            codep = codep->next;
638        } while (--residue);
639        sp->dec_restart = 0;
640    }
641
642    bp = (unsigned char *)tif->tif_rawcp;
643    nbits = sp->lzw_nbits;
644    nextdata = sp->lzw_nextdata;
645    nextbits = sp->lzw_nextbits;
646    nbitsmask = sp->dec_nbitsmask;
647    oldcodep = sp->dec_oldcodep;
648    free_entp = sp->dec_free_entp;
649    maxcodep = sp->dec_maxcodep;
650
651    while (occ > 0) {
652        NextCode(tif, sp, bp, code, GetNextCodeCompat);
653        if (code == CODE_EOI)
654            break;
655        if (code == CODE_CLEAR) {
656            free_entp = sp->dec_codetab + CODE_FIRST;
657            _TIFFmemset(free_entp, 0,
658                    (CSIZE - CODE_FIRST) * sizeof (code_t));
659            nbits = BITS_MIN;
660            nbitsmask = MAXCODE(BITS_MIN);
661            maxcodep = sp->dec_codetab + nbitsmask;
662            NextCode(tif, sp, bp, code, GetNextCodeCompat);
663            if (code == CODE_EOI)
664                break;
665            if (code >= CODE_CLEAR) {
666                TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
667                "LZWDecode: Corrupted LZW table at scanline %d",
668                         tif->tif_row);
669                return (0);
670            }
671            *op++ = code, occ--;
672            oldcodep = sp->dec_codetab + code;
673            continue;
674        }
675        codep = sp->dec_codetab + code;
676
677        /*
678         * Add the new entry to the code table.
679         */
680        if (free_entp < &sp->dec_codetab[0] ||
681            free_entp >= &sp->dec_codetab[CSIZE]) {
682            TIFFErrorExt(tif->tif_clientdata, module,
683                "Corrupted LZW table at scanline %d", tif->tif_row);
684            return (0);
685        }
686
687        free_entp->next = oldcodep;
688        if (free_entp->next < &sp->dec_codetab[0] ||
689            free_entp->next >= &sp->dec_codetab[CSIZE]) {
690            TIFFErrorExt(tif->tif_clientdata, module,
691                "Corrupted LZW table at scanline %d", tif->tif_row);
692            return (0);
693        }
694        free_entp->firstchar = free_entp->next->firstchar;
695        free_entp->length = free_entp->next->length+1;
696        free_entp->value = (codep < free_entp) ?
697            codep->firstchar : free_entp->firstchar;
698        if (++free_entp > maxcodep) {
699            if (++nbits > BITS_MAX)		/* should not happen */
700                nbits = BITS_MAX;
701            nbitsmask = MAXCODE(nbits);
702            maxcodep = sp->dec_codetab + nbitsmask;
703        }
704        oldcodep = codep;
705        if (code >= 256) {
706            /*
707             * Code maps to a string, copy string
708             * value to output (written in reverse).
709             */
710            if(codep->length == 0) {
711                TIFFErrorExt(tif->tif_clientdata, module,
712                    "Wrong length of decoded "
713                    "string: data probably corrupted at scanline %d",
714                    tif->tif_row);
715                return (0);
716            }
717            if (codep->length > occ) {
718                /*
719                 * String is too long for decode buffer,
720                 * locate portion that will fit, copy to
721                 * the decode buffer, and setup restart
722                 * logic for the next decoding call.
723                 */
724                sp->dec_codep = codep;
725                do {
726                    codep = codep->next;
727                } while (codep->length > occ);
728                sp->dec_restart = occ;
729                tp = op + occ;
730                do  {
731                    *--tp = codep->value;
732                    codep = codep->next;
733                }  while (--occ);
734                break;
735            }
736            assert(occ >= codep->length);
737            op += codep->length, occ -= codep->length;
738            tp = op;
739            do {
740                *--tp = codep->value;
741            } while( (codep = codep->next) != NULL );
742        } else
743            *op++ = code, occ--;
744    }
745
746    tif->tif_rawcp = (uint8*) bp;
747    sp->lzw_nbits = nbits;
748    sp->lzw_nextdata = nextdata;
749    sp->lzw_nextbits = nextbits;
750    sp->dec_nbitsmask = nbitsmask;
751    sp->dec_oldcodep = oldcodep;
752    sp->dec_free_entp = free_entp;
753    sp->dec_maxcodep = maxcodep;
754
755    if (occ > 0) {
756#if defined(__WIN32__) && (defined(_MSC_VER) || defined(__MINGW32__))
757        TIFFErrorExt(tif->tif_clientdata, module,
758            "Not enough data at scanline %d (short %I64d bytes)",
759                 tif->tif_row, (unsigned __int64) occ);
760#else
761        TIFFErrorExt(tif->tif_clientdata, module,
762            "Not enough data at scanline %d (short %llu bytes)",
763                 tif->tif_row, (unsigned long long) occ);
764#endif
765        return (0);
766    }
767    return (1);
768}
769#endif /* LZW_COMPAT */
770
771/*
772 * LZW Encoding.
773 */
774
775static int
776LZWSetupEncode(TIFF* tif)
777{
778    static const char module[] = "LZWSetupEncode";
779    LZWCodecState* sp = EncoderState(tif);
780
781    assert(sp != NULL);
782    sp->enc_hashtab = (hash_t*) _TIFFmalloc(HSIZE*sizeof (hash_t));
783    if (sp->enc_hashtab == NULL) {
784        TIFFErrorExt(tif->tif_clientdata, module,
785                 "No space for LZW hash table");
786        return (0);
787    }
788    return (1);
789}
790
791/*
792 * Reset encoding state at the start of a strip.
793 */
794static int
795LZWPreEncode(TIFF* tif, uint16 s)
796{
797    LZWCodecState *sp = EncoderState(tif);
798
799    (void) s;
800    assert(sp != NULL);
801
802    if( sp->enc_hashtab == NULL )
803        {
804            tif->tif_setupencode( tif );
805        }
806
807    sp->lzw_nbits = BITS_MIN;
808    sp->lzw_maxcode = MAXCODE(BITS_MIN);
809    sp->lzw_free_ent = CODE_FIRST;
810    sp->lzw_nextbits = 0;
811    sp->lzw_nextdata = 0;
812    sp->enc_checkpoint = CHECK_GAP;
813    sp->enc_ratio = 0;
814    sp->enc_incount = 0;
815    sp->enc_outcount = 0;
816    /*
817     * The 4 here insures there is space for 2 max-sized
818     * codes in LZWEncode and LZWPostDecode.
819     */
820    sp->enc_rawlimit = tif->tif_rawdata + tif->tif_rawdatasize-1 - 4;
821    cl_hash(sp);		/* clear hash table */
822    sp->enc_oldcode = (hcode_t) -1;	/* generates CODE_CLEAR in LZWEncode */
823    return (1);
824}
825
826#define	CALCRATIO(sp, rat) {					\
827    if (incount > 0x007fffff) { /* NB: shift will overflow */\
828        rat = outcount >> 8;				\
829        rat = (rat == 0 ? 0x7fffffff : incount/rat);	\
830    } else							\
831        rat = (incount<<8) / outcount;			\
832}
833#define	PutNextCode(op, c) {					\
834    nextdata = (nextdata << nbits) | c;			\
835    nextbits += nbits;					\
836    *op++ = (unsigned char)(nextdata >> (nextbits-8));		\
837    nextbits -= 8;						\
838    if (nextbits >= 8) {					\
839        *op++ = (unsigned char)(nextdata >> (nextbits-8));	\
840        nextbits -= 8;					\
841    }							\
842    outcount += nbits;					\
843}
844
845/*
846 * Encode a chunk of pixels.
847 *
848 * Uses an open addressing double hashing (no chaining) on the
849 * prefix code/next character combination.  We do a variant of
850 * Knuth's algorithm D (vol. 3, sec. 6.4) along with G. Knott's
851 * relatively-prime secondary probe.  Here, the modular division
852 * first probe is gives way to a faster exclusive-or manipulation.
853 * Also do block compression with an adaptive reset, whereby the
854 * code table is cleared when the compression ratio decreases,
855 * but after the table fills.  The variable-length output codes
856 * are re-sized at this point, and a CODE_CLEAR is generated
857 * for the decoder.
858 */
859static int
860LZWEncode(TIFF* tif, uint8* bp, tmsize_t cc, uint16 s)
861{
862    register LZWCodecState *sp = EncoderState(tif);
863    register long fcode;
864    register hash_t *hp;
865    register int h, c;
866    hcode_t ent;
867    long disp;
868    long incount, outcount, checkpoint;
869    long nextdata, nextbits;
870    int free_ent, maxcode, nbits;
871    uint8* op;
872    uint8* limit;
873
874    (void) s;
875    if (sp == NULL)
876        return (0);
877
878        assert(sp->enc_hashtab != NULL);
879
880    /*
881     * Load local state.
882     */
883    incount = sp->enc_incount;
884    outcount = sp->enc_outcount;
885    checkpoint = sp->enc_checkpoint;
886    nextdata = sp->lzw_nextdata;
887    nextbits = sp->lzw_nextbits;
888    free_ent = sp->lzw_free_ent;
889    maxcode = sp->lzw_maxcode;
890    nbits = sp->lzw_nbits;
891    op = tif->tif_rawcp;
892    limit = sp->enc_rawlimit;
893    ent = sp->enc_oldcode;
894
895    if (ent == (hcode_t) -1 && cc > 0) {
896        /*
897         * NB: This is safe because it can only happen
898         *     at the start of a strip where we know there
899         *     is space in the data buffer.
900         */
901        PutNextCode(op, CODE_CLEAR);
902        ent = *bp++; cc--; incount++;
903    }
904    while (cc > 0) {
905        c = *bp++; cc--; incount++;
906        fcode = ((long)c << BITS_MAX) + ent;
907        h = (c << HSHIFT) ^ ent;	/* xor hashing */
908#ifdef _WINDOWS
909        /*
910         * Check hash index for an overflow.
911         */
912        if (h >= HSIZE)
913            h -= HSIZE;
914#endif
915        hp = &sp->enc_hashtab[h];
916        if (hp->hash == fcode) {
917            ent = hp->code;
918            continue;
919        }
920        if (hp->hash >= 0) {
921            /*
922             * Primary hash failed, check secondary hash.
923             */
924            disp = HSIZE - h;
925            if (h == 0)
926                disp = 1;
927            do {
928                /*
929                 * Avoid pointer arithmetic 'cuz of
930                 * wraparound problems with segments.
931                 */
932                if ((h -= disp) < 0)
933                    h += HSIZE;
934                hp = &sp->enc_hashtab[h];
935                if (hp->hash == fcode) {
936                    ent = hp->code;
937                    goto hit;
938                }
939            } while (hp->hash >= 0);
940        }
941        /*
942         * New entry, emit code and add to table.
943         */
944        /*
945         * Verify there is space in the buffer for the code
946         * and any potential Clear code that might be emitted
947         * below.  The value of limit is setup so that there
948         * are at least 4 bytes free--room for 2 codes.
949         */
950        if (op > limit) {
951            tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata);
952            TIFFFlushData1(tif);
953            op = tif->tif_rawdata;
954        }
955        PutNextCode(op, ent);
956        ent = c;
957        hp->code = free_ent++;
958        hp->hash = fcode;
959        if (free_ent == CODE_MAX-1) {
960            /* table is full, emit clear code and reset */
961            cl_hash(sp);
962            sp->enc_ratio = 0;
963            incount = 0;
964            outcount = 0;
965            free_ent = CODE_FIRST;
966            PutNextCode(op, CODE_CLEAR);
967            nbits = BITS_MIN;
968            maxcode = MAXCODE(BITS_MIN);
969        } else {
970            /*
971             * If the next entry is going to be too big for
972             * the code size, then increase it, if possible.
973             */
974            if (free_ent > maxcode) {
975                nbits++;
976                assert(nbits <= BITS_MAX);
977                maxcode = (int) MAXCODE(nbits);
978            } else if (incount >= checkpoint) {
979                long rat;
980                /*
981                 * Check compression ratio and, if things seem
982                 * to be slipping, clear the hash table and
983                 * reset state.  The compression ratio is a
984                 * 24+8-bit fractional number.
985                 */
986                checkpoint = incount+CHECK_GAP;
987                CALCRATIO(sp, rat);
988                if (rat <= sp->enc_ratio) {
989                    cl_hash(sp);
990                    sp->enc_ratio = 0;
991                    incount = 0;
992                    outcount = 0;
993                    free_ent = CODE_FIRST;
994                    PutNextCode(op, CODE_CLEAR);
995                    nbits = BITS_MIN;
996                    maxcode = MAXCODE(BITS_MIN);
997                } else
998                    sp->enc_ratio = rat;
999            }
1000        }
1001    hit:
1002        ;
1003    }
1004
1005    /*
1006     * Restore global state.
1007     */
1008    sp->enc_incount = incount;
1009    sp->enc_outcount = outcount;
1010    sp->enc_checkpoint = checkpoint;
1011    sp->enc_oldcode = ent;
1012    sp->lzw_nextdata = nextdata;
1013    sp->lzw_nextbits = nextbits;
1014    sp->lzw_free_ent = free_ent;
1015    sp->lzw_maxcode = maxcode;
1016    sp->lzw_nbits = nbits;
1017    tif->tif_rawcp = op;
1018    return (1);
1019}
1020
1021/*
1022 * Finish off an encoded strip by flushing the last
1023 * string and tacking on an End Of Information code.
1024 */
1025static int
1026LZWPostEncode(TIFF* tif)
1027{
1028    register LZWCodecState *sp = EncoderState(tif);
1029    uint8* op = tif->tif_rawcp;
1030    long nextbits = sp->lzw_nextbits;
1031    long nextdata = sp->lzw_nextdata;
1032    long outcount = sp->enc_outcount;
1033    int nbits = sp->lzw_nbits;
1034
1035    if (op > sp->enc_rawlimit) {
1036        tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata);
1037        TIFFFlushData1(tif);
1038        op = tif->tif_rawdata;
1039    }
1040    if (sp->enc_oldcode != (hcode_t) -1) {
1041        PutNextCode(op, sp->enc_oldcode);
1042        sp->enc_oldcode = (hcode_t) -1;
1043    }
1044    PutNextCode(op, CODE_EOI);
1045    if (nextbits > 0)
1046        *op++ = (unsigned char)(nextdata << (8-nextbits));
1047    tif->tif_rawcc = (tmsize_t)(op - tif->tif_rawdata);
1048    return (1);
1049}
1050
1051/*
1052 * Reset encoding hash table.
1053 */
1054static void
1055cl_hash(LZWCodecState* sp)
1056{
1057    register hash_t *hp = &sp->enc_hashtab[HSIZE-1];
1058    register long i = HSIZE-8;
1059
1060    do {
1061        i -= 8;
1062        hp[-7].hash = -1;
1063        hp[-6].hash = -1;
1064        hp[-5].hash = -1;
1065        hp[-4].hash = -1;
1066        hp[-3].hash = -1;
1067        hp[-2].hash = -1;
1068        hp[-1].hash = -1;
1069        hp[ 0].hash = -1;
1070        hp -= 8;
1071    } while (i >= 0);
1072    for (i += 8; i > 0; i--, hp--)
1073        hp->hash = -1;
1074}
1075
1076static void
1077LZWCleanup(TIFF* tif)
1078{
1079    (void)TIFFPredictorCleanup(tif);
1080
1081    assert(tif->tif_data != 0);
1082
1083    if (DecoderState(tif)->dec_codetab)
1084        _TIFFfree(DecoderState(tif)->dec_codetab);
1085
1086    if (EncoderState(tif)->enc_hashtab)
1087        _TIFFfree(EncoderState(tif)->enc_hashtab);
1088
1089    _TIFFfree(tif->tif_data);
1090    tif->tif_data = NULL;
1091
1092    _TIFFSetDefaultCompressionState(tif);
1093}
1094
1095int
1096TIFFInitLZW(TIFF* tif, int scheme)
1097{
1098    static const char module[] = "TIFFInitLZW";
1099    assert(scheme == COMPRESSION_LZW);
1100    /*
1101     * Allocate state block so tag methods have storage to record values.
1102     */
1103    tif->tif_data = (uint8*) _TIFFmalloc(sizeof (LZWCodecState));
1104    if (tif->tif_data == NULL)
1105        goto bad;
1106    DecoderState(tif)->dec_codetab = NULL;
1107    DecoderState(tif)->dec_decode = NULL;
1108    EncoderState(tif)->enc_hashtab = NULL;
1109        LZWState(tif)->rw_mode = tif->tif_mode;
1110
1111    /*
1112     * Install codec methods.
1113     */
1114    tif->tif_fixuptags = LZWFixupTags;
1115    tif->tif_setupdecode = LZWSetupDecode;
1116    tif->tif_predecode = LZWPreDecode;
1117    tif->tif_decoderow = LZWDecode;
1118    tif->tif_decodestrip = LZWDecode;
1119    tif->tif_decodetile = LZWDecode;
1120    tif->tif_setupencode = LZWSetupEncode;
1121    tif->tif_preencode = LZWPreEncode;
1122    tif->tif_postencode = LZWPostEncode;
1123    tif->tif_encoderow = LZWEncode;
1124    tif->tif_encodestrip = LZWEncode;
1125    tif->tif_encodetile = LZWEncode;
1126    tif->tif_cleanup = LZWCleanup;
1127    /*
1128     * Setup predictor setup.
1129     */
1130    (void) TIFFPredictorInit(tif);
1131    return (1);
1132bad:
1133    TIFFErrorExt(tif->tif_clientdata, module,
1134             "No space for LZW state block");
1135    return (0);
1136}
1137
1138/*
1139 * Copyright (c) 1985, 1986 The Regents of the University of California.
1140 * All rights reserved.
1141 *
1142 * This code is derived from software contributed to Berkeley by
1143 * James A. Woods, derived from original work by Spencer Thomas
1144 * and Joseph Orost.
1145 *
1146 * Redistribution and use in source and binary forms are permitted
1147 * provided that the above copyright notice and this paragraph are
1148 * duplicated in all such forms and that any documentation,
1149 * advertising materials, and other materials related to such
1150 * distribution and use acknowledge that the software was developed
1151 * by the University of California, Berkeley.  The name of the
1152 * University may not be used to endorse or promote products derived
1153 * from this software without specific prior written permission.
1154 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
1155 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
1156 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
1157 */
1158#endif /* LZW_SUPPORT */
1159
1160/* vim: set ts=8 sts=8 sw=8 noet: */
1161/*
1162 * Local Variables:
1163 * mode: c
1164 * c-basic-offset: 8
1165 * fill-column: 78
1166 * End:
1167 */
1168