lz4hc.c revision 61289dea1d94510a4d6386bc44c08d3d15083123
1/*
2LZ4 HC - High Compression Mode of LZ4
3Copyright (C) 2011-2014, Yann Collet.
4BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are
8met:
9
10* Redistributions of source code must retain the above copyright
11notice, this list of conditions and the following disclaimer.
12* Redistributions in binary form must reproduce the above
13copyright notice, this list of conditions and the following disclaimer
14in the documentation and/or other materials provided with the
15distribution.
16
17THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29You can contact the author at :
30- LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
31- LZ4 source repository : http://code.google.com/p/lz4/
32*/
33
34
35
36/**************************************
37Tuning Parameter
38**************************************/
39#define LZ4HC_DEFAULT_COMPRESSIONLEVEL 8
40
41
42/**************************************
43Memory routines
44**************************************/
45#include <stdlib.h>   /* calloc, free */
46#define ALLOCATOR(s)  calloc(1,s)
47#define FREEMEM       free
48#include <string.h>   /* memset, memcpy */
49#define MEM_INIT      memset
50
51
52/**************************************
53CPU Feature Detection
54**************************************/
55/* 32 or 64 bits ? */
56#if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \
57    || defined(__64BIT__)  || defined(__mips64) \
58    || defined(__powerpc64__) || defined(__powerpc64le__) \
59    || defined(__ppc64__) || defined(__ppc64le__) \
60    || defined(__PPC64__) || defined(__PPC64LE__) \
61    || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) \
62    || defined(__s390x__) )   /* Detects 64 bits mode */
63#  define LZ4_ARCH64 1
64#else
65#  define LZ4_ARCH64 0
66#endif
67
68/*
69* Little Endian or Big Endian ?
70* Overwrite the #define below if you know your architecture endianess
71*/
72#include <stdlib.h>   /* Apparently required to detect endianess */
73#if defined (__GLIBC__)
74#  include <endian.h>
75#  if (__BYTE_ORDER == __BIG_ENDIAN)
76#     define LZ4_BIG_ENDIAN 1
77#  endif
78#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
79#  define LZ4_BIG_ENDIAN 1
80#elif defined(__sparc) || defined(__sparc__) \
81    || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \
82    || defined(__hpux)  || defined(__hppa) \
83    || defined(_MIPSEB) || defined(__s390__)
84#  define LZ4_BIG_ENDIAN 1
85#else
86/* Little Endian assumed. PDP Endian and other very rare endian format are unsupported. */
87#endif
88
89/*
90* Unaligned memory access is automatically enabled for "common" CPU, such as x86.
91* For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected
92* If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance
93*/
94#if defined(__ARM_FEATURE_UNALIGNED)
95#  define LZ4_FORCE_UNALIGNED_ACCESS 1
96#endif
97
98/* Define this parameter if your target system or compiler does not support hardware bit count */
99#if defined(_MSC_VER) && defined(_WIN32_WCE)            /* Visual Studio for Windows CE does not support Hardware bit count */
100#  define LZ4_FORCE_SW_BITCOUNT
101#endif
102
103
104/**************************************
105Compiler Options
106**************************************/
107#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
108/* "restrict" is a known keyword */
109#else
110#  define restrict /* Disable restrict */
111#endif
112
113#ifdef _MSC_VER    /* Visual Studio */
114#  define FORCE_INLINE static __forceinline
115#  include <intrin.h>                    /* For Visual 2005 */
116#  if LZ4_ARCH64   /* 64-bits */
117#    pragma intrinsic(_BitScanForward64) /* For Visual 2005 */
118#    pragma intrinsic(_BitScanReverse64) /* For Visual 2005 */
119#  else            /* 32-bits */
120#    pragma intrinsic(_BitScanForward)   /* For Visual 2005 */
121#    pragma intrinsic(_BitScanReverse)   /* For Visual 2005 */
122#  endif
123#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
124#  pragma warning(disable : 4701)        /* disable: C4701: potentially uninitialized local variable used */
125#else
126#  ifdef __GNUC__
127#    define FORCE_INLINE static inline __attribute__((always_inline))
128#  else
129#    define FORCE_INLINE static inline
130#  endif
131#endif
132
133#ifdef _MSC_VER  /* Visual Studio */
134#  define lz4_bswap16(x) _byteswap_ushort(x)
135#else
136#  define lz4_bswap16(x)  ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
137#endif
138
139
140/**************************************
141Includes
142**************************************/
143#include "lz4hc.h"
144#include "lz4.h"
145
146
147/**************************************
148Basic Types
149**************************************/
150#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
151# include <stdint.h>
152typedef uint8_t  BYTE;
153typedef uint16_t U16;
154typedef uint32_t U32;
155typedef  int32_t S32;
156typedef uint64_t U64;
157#else
158typedef unsigned char       BYTE;
159typedef unsigned short      U16;
160typedef unsigned int        U32;
161typedef   signed int        S32;
162typedef unsigned long long  U64;
163#endif
164
165#if defined(__GNUC__)  && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
166#  define _PACKED __attribute__ ((packed))
167#else
168#  define _PACKED
169#endif
170
171#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
172#  ifdef __IBMC__
173#    pragma pack(1)
174#  else
175#    pragma pack(push, 1)
176#  endif
177#endif
178
179typedef struct _U16_S { U16 v; } _PACKED U16_S;
180typedef struct _U32_S { U32 v; } _PACKED U32_S;
181typedef struct _U64_S { U64 v; } _PACKED U64_S;
182
183#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
184#  pragma pack(pop)
185#endif
186
187#define A64(x) (((U64_S *)(x))->v)
188#define A32(x) (((U32_S *)(x))->v)
189#define A16(x) (((U16_S *)(x))->v)
190
191
192/**************************************
193Constants
194**************************************/
195#define MINMATCH 4
196
197#define DICTIONARY_LOGSIZE 16
198#define MAXD (1<<DICTIONARY_LOGSIZE)
199#define MAXD_MASK ((U32)(MAXD - 1))
200#define MAX_DISTANCE (MAXD - 1)
201
202#define HASH_LOG (DICTIONARY_LOGSIZE-1)
203#define HASHTABLESIZE (1 << HASH_LOG)
204#define HASH_MASK (HASHTABLESIZE - 1)
205
206#define ML_BITS  4
207#define ML_MASK  (size_t)((1U<<ML_BITS)-1)
208#define RUN_BITS (8-ML_BITS)
209#define RUN_MASK ((1U<<RUN_BITS)-1)
210
211#define COPYLENGTH 8
212#define LASTLITERALS 5
213#define MFLIMIT (COPYLENGTH+MINMATCH)
214#define MINLENGTH (MFLIMIT+1)
215#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
216
217#define KB *(1<<10)
218#define MB *(1<<20)
219#define GB *(1U<<30)
220
221
222/**************************************
223Architecture-specific macros
224**************************************/
225#if LZ4_ARCH64   /* 64-bit */
226#  define STEPSIZE 8
227#  define LZ4_COPYSTEP(s,d)     A64(d) = A64(s); d+=8; s+=8;
228#  define LZ4_COPYPACKET(s,d)   LZ4_COPYSTEP(s,d)
229#  define AARCH                 A64
230#else            /* 32-bit */
231#  define STEPSIZE 4
232#  define LZ4_COPYSTEP(s,d)     A32(d) = A32(s); d+=4; s+=4;
233#  define LZ4_COPYPACKET(s,d)   LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d);
234#  define AARCH                 A32
235#endif
236
237#if defined(LZ4_BIG_ENDIAN)
238#  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
239#  define LZ4_WRITE_LITTLEENDIAN_16(p,i)  { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
240#else      /* Little Endian */
241#  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
242#  define LZ4_WRITE_LITTLEENDIAN_16(p,v)  { A16(p) = v; p+=2; }
243#endif
244
245
246/**************************************
247 Local Types
248**************************************/
249typedef struct
250{
251    U32   hashTable[HASHTABLESIZE];
252    U16   chainTable[MAXD];
253    const BYTE* end;        /* next block here to keep current prefix as prefix */
254    const BYTE* base;       /* All index relative to this position */
255    const BYTE* dictBase;   /* alternate base for extDict */
256    U32   dictLimit;        /* below that point, need extDict */
257    U32   lowLimit;         /* below that point, no more dict */
258    U32   nextToUpdate;
259    U32   compressionLevel;
260    const BYTE* inputBuffer;   /* deprecated */
261} LZ4HC_Data_Structure;
262
263
264/**************************************
265 Macros
266**************************************/
267#define LZ4_STATIC_ASSERT(c)   { enum { LZ4_static_assert = 1/(!!(c)) }; }   /* Visual : use only *after* variable declarations */
268#define LZ4_WILDCOPY(s,d,e)    do { LZ4_COPYPACKET(s,d) } while (d<e);
269#define LZ4_BLINDCOPY(s,d,l)   { BYTE* e=d+l; LZ4_WILDCOPY(s,d,e); d=e; }
270#define HASH_FUNCTION(i)       (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG))
271#define DELTANEXT(p)           chainTable[(size_t)(p) & MAXD_MASK]
272#define GETNEXT(p)             ((p) - (size_t)DELTANEXT(p))
273
274static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(A32(ptr)); }
275
276/**************************************
277Private functions
278**************************************/
279#if LZ4_ARCH64
280
281FORCE_INLINE int LZ4_NbCommonBytes (register U64 val)
282{
283#if defined(LZ4_BIG_ENDIAN)
284#  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
285    unsigned long r = 0;
286    _BitScanReverse64( &r, val );
287    return (int)(r>>3);
288#  elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
289    return (__builtin_clzll(val) >> 3);
290#  else
291    int r;
292    if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
293    if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
294    r += (!val);
295    return r;
296#  endif
297#else
298#  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
299    unsigned long r = 0;
300    _BitScanForward64( &r, val );
301    return (int)(r>>3);
302#  elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
303    return (__builtin_ctzll(val) >> 3);
304#  else
305    static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
306    return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
307#  endif
308#endif
309}
310
311#else
312
313FORCE_INLINE int LZ4_NbCommonBytes (register U32 val)
314{
315#if defined(LZ4_BIG_ENDIAN)
316#  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
317    unsigned long r;
318    _BitScanReverse( &r, val );
319    return (int)(r>>3);
320#  elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
321    return (__builtin_clz(val) >> 3);
322#  else
323    int r;
324    if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
325    r += (!val);
326    return r;
327#  endif
328#else
329#  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
330    unsigned long r;
331    _BitScanForward( &r, val );
332    return (int)(r>>3);
333#  elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
334    return (__builtin_ctz(val) >> 3);
335#  else
336    static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
337    return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
338#  endif
339#endif
340}
341
342#endif
343
344
345static void LZ4HC_init (LZ4HC_Data_Structure* hc4, const BYTE* base)
346{
347    MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
348    MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
349    hc4->nextToUpdate = 64 KB;
350    hc4->base = base - 64 KB;
351    hc4->inputBuffer = base;
352    hc4->end = base;
353    hc4->dictBase = base - 64 KB;
354    hc4->dictLimit = 64 KB;
355    hc4->lowLimit = 64 KB;
356}
357
358
359/* Update chains up to ip (excluded) */
360FORCE_INLINE void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
361{
362    U16* chainTable = hc4->chainTable;
363    U32* HashTable  = hc4->hashTable;
364    const BYTE* const base = hc4->base;
365    const U32 target = (U32)(ip - base);
366    U32 idx = hc4->nextToUpdate;
367
368    while(idx < target)
369    {
370        U32 h = LZ4HC_hashPtr(base+idx);
371        size_t delta = idx - HashTable[h];
372        if (delta>MAX_DISTANCE) delta = MAX_DISTANCE;
373        chainTable[idx & 0xFFFF] = (U16)delta;
374        HashTable[h] = idx;
375        idx++;
376    }
377
378    hc4->nextToUpdate = target;
379}
380
381
382static void LZ4HC_setExternalDict(LZ4HC_Data_Structure* ctxPtr, const BYTE* newBlock)
383{
384    if (ctxPtr->end >= ctxPtr->base + 4)
385        LZ4HC_Insert (ctxPtr, ctxPtr->end-3);   // finish referencing dictionary content
386    // Note : need to handle risk of index overflow
387    // Use only one memory segment for dict, so any previous External Dict is lost at this stage
388    ctxPtr->lowLimit  = ctxPtr->dictLimit;
389    ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
390    ctxPtr->dictBase  = ctxPtr->base;
391    ctxPtr->base = newBlock - ctxPtr->dictLimit;
392    ctxPtr->end  = newBlock;
393    ctxPtr->nextToUpdate = ctxPtr->dictLimit;   // reference table must skip to from beginning of block
394}
395
396
397static size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const p1Limit)
398{
399    const BYTE* const p1Start = p1;
400
401    while (p1 <= p1Limit - STEPSIZE)
402    {
403        size_t diff = AARCH(p2) ^ AARCH(p1);
404        if (!diff) { p1+=STEPSIZE; p2+=STEPSIZE; continue; }
405        p1 += LZ4_NbCommonBytes(diff);
406        return (p1 - p1Start);
407    }
408    if (LZ4_ARCH64) if ((p1<(p1Limit-3)) && (A32(p2) == A32(p1))) { p1+=4; p2+=4; }
409    if ((p1<(p1Limit-1)) && (A16(p2) == A16(p1))) { p1+=2; p2+=2; }
410    if ((p1<p1Limit) && (*p2 == *p1)) p1++;
411    return (p1 - p1Start);
412}
413
414
415FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4,   // Index table will be updated
416                                               const BYTE* ip, const BYTE* const iLimit,
417                                               const BYTE** matchpos,
418                                               const int maxNbAttempts)
419{
420    U16* const chainTable = hc4->chainTable;
421    U32* const HashTable = hc4->hashTable;
422    const BYTE* const base = hc4->base;
423    const BYTE* const dictBase = hc4->dictBase;
424    const U32 dictLimit = hc4->dictLimit;
425    const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
426    U32 matchIndex;
427    const BYTE* match;
428    int nbAttempts=maxNbAttempts;
429    size_t ml=0;
430
431    /* HC4 match finder */
432    LZ4HC_Insert(hc4, ip);
433    matchIndex = HashTable[LZ4HC_hashPtr(ip)];
434
435    while ((matchIndex>=lowLimit) && (nbAttempts))
436    {
437        nbAttempts--;
438        if (matchIndex >= dictLimit)
439        {
440            match = base + matchIndex;
441            if (*(match+ml) == *(ip+ml)
442                && (A32(match) == A32(ip)))
443            {
444                size_t mlt = LZ4HC_CommonLength(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH;
445                if (mlt > ml) { ml = mlt; *matchpos = match; }
446            }
447        }
448        else
449        {
450            match = dictBase + matchIndex;
451            if (A32(match) == A32(ip))
452            {
453                size_t mlt;
454                const BYTE* vLimit = ip + (dictLimit - matchIndex);
455                if (vLimit > iLimit) vLimit = iLimit;
456                mlt = LZ4HC_CommonLength(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH;
457                if ((ip+mlt == vLimit) && (vLimit < iLimit))
458                    mlt += LZ4HC_CommonLength(ip+mlt, base+dictLimit, iLimit);
459                if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; }   // virtual matchpos
460            }
461        }
462        matchIndex -= chainTable[matchIndex & 0xFFFF];
463    }
464
465    return (int)ml;
466}
467
468
469FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
470    LZ4HC_Data_Structure* hc4,
471    const BYTE* ip,
472    const BYTE* iLowLimit,
473    const BYTE* iHighLimit,
474    int longest,
475    const BYTE** matchpos,
476    const BYTE** startpos,
477    const int maxNbAttempts)
478{
479    U16* const chainTable = hc4->chainTable;
480    U32* const HashTable = hc4->hashTable;
481    const BYTE* const base = hc4->base;
482    const U32 dictLimit = hc4->dictLimit;
483    const U32 lowLimit = (hc4->lowLimit + 64 KB > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (64 KB - 1);
484    const BYTE* const dictBase = hc4->dictBase;
485    const BYTE* match;
486    U32   matchIndex;
487    int nbAttempts = maxNbAttempts;
488    int delta = (int)(ip-iLowLimit);
489
490
491    /* First Match */
492    LZ4HC_Insert(hc4, ip);
493    matchIndex = HashTable[LZ4HC_hashPtr(ip)];
494
495    while ((matchIndex>=lowLimit) && (nbAttempts))
496    {
497        nbAttempts--;
498        if (matchIndex >= dictLimit)
499        {
500            match = base + matchIndex;
501            if (*(iLowLimit + longest) == *(match - delta + longest))
502                if (A32(match) == A32(ip))
503                {
504                    const BYTE* startt = ip;
505                    const BYTE* tmpMatch = match;
506                    const BYTE* const matchEnd = ip + MINMATCH + LZ4HC_CommonLength(ip+MINMATCH, match+MINMATCH, iHighLimit);
507
508                    while ((startt>iLowLimit) && (tmpMatch > iLowLimit) && (startt[-1] == tmpMatch[-1])) {startt--; tmpMatch--;}
509
510                    if ((matchEnd-startt) > longest)
511                    {
512                        longest = (int)(matchEnd-startt);
513                        *matchpos = tmpMatch;
514                        *startpos = startt;
515                    }
516                }
517        }
518        else
519        {
520            match = dictBase + matchIndex;
521            if (A32(match) == A32(ip))
522            {
523                size_t mlt;
524                int back=0;
525                const BYTE* vLimit = ip + (dictLimit - matchIndex);
526                if (vLimit > iHighLimit) vLimit = iHighLimit;
527                mlt = LZ4HC_CommonLength(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH;
528                if ((ip+mlt == vLimit) && (vLimit < iHighLimit))
529                    mlt += LZ4HC_CommonLength(ip+mlt, base+dictLimit, iHighLimit);
530                while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == match[back-1])) back--;
531                mlt -= back;
532                if ((int)mlt > longest) { longest = (int)mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; }
533            }
534        }
535        matchIndex -= chainTable[matchIndex & 0xFFFF];
536    }
537
538    return longest;
539}
540
541
542typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
543
544//static unsigned debug = 0;
545
546FORCE_INLINE int LZ4HC_encodeSequence (
547    const BYTE** ip,
548    BYTE** op,
549    const BYTE** anchor,
550    int matchLength,
551    const BYTE* const match,
552    limitedOutput_directive limitedOutputBuffer,
553    BYTE* oend)
554{
555    int length;
556    BYTE* token;
557
558    //if (debug) printf("literal : %u  --  match : %u  --  offset : %u\n", (U32)(*ip - *anchor), (U32)matchLength, (U32)(*ip-match));   // debug
559
560    /* Encode Literal length */
561    length = (int)(*ip - *anchor);
562    token = (*op)++;
563    if ((limitedOutputBuffer) && ((*op + (length>>8) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1;   /* Check output limit */
564    if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255;  *(*op)++ = (BYTE)len; }
565    else *token = (BYTE)(length<<ML_BITS);
566
567    /* Copy Literals */
568    LZ4_BLINDCOPY(*anchor, *op, length);
569
570    /* Encode Offset */
571    LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-match));
572
573    /* Encode MatchLength */
574    length = (int)(matchLength-MINMATCH);
575    if ((limitedOutputBuffer) && (*op + (length>>8) + (1 + LASTLITERALS) > oend)) return 1;   /* Check output limit */
576    if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; }
577    else *token += (BYTE)(length);
578
579    /* Prepare next loop */
580    *ip += matchLength;
581    *anchor = *ip;
582
583    return 0;
584}
585
586
587#define MAX_COMPRESSION_LEVEL 16
588static int LZ4HC_compress_generic (
589    void* ctxvoid,
590    const char* source,
591    char* dest,
592    int inputSize,
593    int maxOutputSize,
594    int compressionLevel,
595    limitedOutput_directive limit
596    )
597{
598    LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid;
599    const BYTE* ip = (const BYTE*) source;
600    const BYTE* anchor = ip;
601    const BYTE* const iend = ip + inputSize;
602    const BYTE* const mflimit = iend - MFLIMIT;
603    const BYTE* const matchlimit = (iend - LASTLITERALS);
604
605    BYTE* op = (BYTE*) dest;
606    BYTE* const oend = op + maxOutputSize;
607
608    unsigned maxNbAttempts;
609    int   ml, ml2, ml3, ml0;
610    const BYTE* ref=NULL;
611    const BYTE* start2=NULL;
612    const BYTE* ref2=NULL;
613    const BYTE* start3=NULL;
614    const BYTE* ref3=NULL;
615    const BYTE* start0;
616    const BYTE* ref0;
617
618
619    /* init */
620    if (compressionLevel > MAX_COMPRESSION_LEVEL) compressionLevel = MAX_COMPRESSION_LEVEL;
621    if (compressionLevel == 0) compressionLevel = LZ4HC_DEFAULT_COMPRESSIONLEVEL;
622    maxNbAttempts = 1 << compressionLevel;
623    ctx->end += inputSize;
624
625    ip++;
626
627    /* Main Loop */
628    while (ip < mflimit)
629    {
630        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts);
631        if (!ml) { ip++; continue; }
632
633        /* saved, in case we would skip too much */
634        start0 = ip;
635        ref0 = ref;
636        ml0 = ml;
637
638_Search2:
639        if (ip+ml < mflimit)
640            ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2, maxNbAttempts);
641        else ml2 = ml;
642
643        if (ml2 == ml)  /* No better match */
644        {
645            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
646            continue;
647        }
648
649        if (start0 < ip)
650        {
651            if (start2 < ip + ml0)   /* empirical */
652            {
653                ip = start0;
654                ref = ref0;
655                ml = ml0;
656            }
657        }
658
659        /* Here, start0==ip */
660        if ((start2 - ip) < 3)   /* First Match too small : removed */
661        {
662            ml = ml2;
663            ip = start2;
664            ref =ref2;
665            goto _Search2;
666        }
667
668_Search3:
669        /*
670        * Currently we have :
671        * ml2 > ml1, and
672        * ip1+3 <= ip2 (usually < ip1+ml1)
673        */
674        if ((start2 - ip) < OPTIMAL_ML)
675        {
676            int correction;
677            int new_ml = ml;
678            if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
679            if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
680            correction = new_ml - (int)(start2 - ip);
681            if (correction > 0)
682            {
683                start2 += correction;
684                ref2 += correction;
685                ml2 -= correction;
686            }
687        }
688        /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
689
690        if (start2 + ml2 < mflimit)
691            ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts);
692        else ml3 = ml2;
693
694        if (ml3 == ml2) /* No better match : 2 sequences to encode */
695        {
696            /* ip & ref are known; Now for ml */
697            if (start2 < ip+ml)  ml = (int)(start2 - ip);
698            /* Now, encode 2 sequences */
699            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
700            ip = start2;
701            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) return 0;
702            continue;
703        }
704
705        if (start3 < ip+ml+3) /* Not enough space for match 2 : remove it */
706        {
707            if (start3 >= (ip+ml)) /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */
708            {
709                if (start2 < ip+ml)
710                {
711                    int correction = (int)(ip+ml - start2);
712                    start2 += correction;
713                    ref2 += correction;
714                    ml2 -= correction;
715                    if (ml2 < MINMATCH)
716                    {
717                        start2 = start3;
718                        ref2 = ref3;
719                        ml2 = ml3;
720                    }
721                }
722
723                if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
724                ip  = start3;
725                ref = ref3;
726                ml  = ml3;
727
728                start0 = start2;
729                ref0 = ref2;
730                ml0 = ml2;
731                goto _Search2;
732            }
733
734            start2 = start3;
735            ref2 = ref3;
736            ml2 = ml3;
737            goto _Search3;
738        }
739
740        /*
741        * OK, now we have 3 ascending matches; let's write at least the first one
742        * ip & ref are known; Now for ml
743        */
744        if (start2 < ip+ml)
745        {
746            if ((start2 - ip) < (int)ML_MASK)
747            {
748                int correction;
749                if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
750                if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
751                correction = ml - (int)(start2 - ip);
752                if (correction > 0)
753                {
754                    start2 += correction;
755                    ref2 += correction;
756                    ml2 -= correction;
757                }
758            }
759            else
760            {
761                ml = (int)(start2 - ip);
762            }
763        }
764        if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
765
766        ip = start2;
767        ref = ref2;
768        ml = ml2;
769
770        start2 = start3;
771        ref2 = ref3;
772        ml2 = ml3;
773
774        goto _Search3;
775    }
776
777    /* Encode Last Literals */
778    {
779        int lastRun = (int)(iend - anchor);
780        if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0;  /* Check output limit */
781        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
782        else *op++ = (BYTE)(lastRun<<ML_BITS);
783        memcpy(op, anchor, iend - anchor);
784        op += iend-anchor;
785    }
786
787    /* End */
788    return (int) (((char*)op)-dest);
789}
790
791
792int LZ4_compressHC2(const char* source, char* dest, int inputSize, int compressionLevel)
793{
794    LZ4HC_Data_Structure ctx;
795    LZ4HC_init(&ctx, (const BYTE*)source);
796    return LZ4HC_compress_generic (&ctx, source, dest, inputSize, 0, compressionLevel, noLimit);
797}
798
799int LZ4_compressHC(const char* source, char* dest, int inputSize) { return LZ4_compressHC2(source, dest, inputSize, 0); }
800
801int LZ4_compressHC2_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel)
802{
803    LZ4HC_Data_Structure ctx;
804    LZ4HC_init(&ctx, (const BYTE*)source);
805    return LZ4HC_compress_generic (&ctx, source, dest, inputSize, maxOutputSize, compressionLevel, limitedOutput);
806}
807
808int LZ4_compressHC_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
809{
810    return LZ4_compressHC2_limitedOutput(source, dest, inputSize, maxOutputSize, 0);
811}
812
813
814/*****************************
815 Using external allocation
816*****************************/
817int LZ4_sizeofStateHC(void) { return sizeof(LZ4HC_Data_Structure); }
818
819
820int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel)
821{
822    if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0;   /* Error : state is not aligned for pointers (32 or 64 bits) */
823    LZ4HC_init ((LZ4HC_Data_Structure*)state, (const BYTE*)source);
824    return LZ4HC_compress_generic (state, source, dest, inputSize, 0, compressionLevel, noLimit);
825}
826
827int LZ4_compressHC_withStateHC (void* state, const char* source, char* dest, int inputSize)
828{ return LZ4_compressHC2_withStateHC (state, source, dest, inputSize, 0); }
829
830
831int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel)
832{
833    if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0;   /* Error : state is not aligned for pointers (32 or 64 bits) */
834    LZ4HC_init ((LZ4HC_Data_Structure*)state, (const BYTE*)source);
835    return LZ4HC_compress_generic (state, source, dest, inputSize, maxOutputSize, compressionLevel, limitedOutput);
836}
837
838int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize)
839{ return LZ4_compressHC2_limitedOutput_withStateHC (state, source, dest, inputSize, maxOutputSize, 0); }
840
841
842/**************************************
843 Experimental Streaming Functions
844**************************************/
845/* allocation */
846LZ4_streamHC_t* LZ4_createStreamHC(void) { return (LZ4_streamHC_t*)malloc(sizeof(LZ4_streamHC_t)); }
847int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) { free(LZ4_streamHCPtr); return 0; };
848
849
850/* initialization */
851void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel)
852{
853    LZ4_STATIC_ASSERT(sizeof(LZ4HC_Data_Structure) <= LZ4_STREAMHCSIZE);   /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */
854    ((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->base = NULL;
855    ((LZ4HC_Data_Structure*)LZ4_streamHCPtr)->compressionLevel = (unsigned)compressionLevel;
856}
857
858int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int dictSize)
859{
860    LZ4HC_Data_Structure* streamPtr = (LZ4HC_Data_Structure*) LZ4_streamHCPtr;
861    if (dictSize > 64 KB)
862    {
863        dictionary += dictSize - 64 KB;
864        dictSize = 64 KB;
865    }
866    LZ4HC_init (streamPtr, (const BYTE*)dictionary);
867    if (dictSize >= 4) LZ4HC_Insert (streamPtr, (const BYTE*)dictionary +(dictSize-3));
868    streamPtr->end = (const BYTE*)dictionary + dictSize;
869    return dictSize;
870}
871
872
873/* compression */
874
875static int LZ4_compressHC_continue_generic (LZ4HC_Data_Structure* dsPtr,
876                                            const char* source, char* dest,
877                                            int inputSize, int maxOutputSize, limitedOutput_directive limit)
878{
879    /* auto-init if forgotten */
880    if (dsPtr->base == NULL)
881        LZ4HC_init (dsPtr, (const BYTE*) source);
882
883    /* Check overflow */
884    if ((size_t)(dsPtr->end - dsPtr->base) > 2 GB)
885    {
886        size_t dictSize = (size_t)(dsPtr->end - dsPtr->base) - dsPtr->dictLimit;
887        if (dictSize > 64 KB) dictSize = 64 KB;
888
889        LZ4_loadDictHC((LZ4_streamHC_t*)dsPtr, (const char*)(dsPtr->end) - dictSize, (int)dictSize);
890    }
891
892    /* Check if blocks follow each other */
893    if ((const BYTE*)source != dsPtr->end) LZ4HC_setExternalDict(dsPtr, (const BYTE*)source);
894
895    /* Check overlapping input/dictionary space */
896    {
897        const BYTE* sourceEnd = (const BYTE*) source + inputSize;
898        const BYTE* dictBegin = dsPtr->dictBase + dsPtr->lowLimit;
899        const BYTE* dictEnd   = dsPtr->dictBase + dsPtr->dictLimit;
900        if ((sourceEnd > dictBegin) && ((BYTE*)source < dictEnd))
901        {
902            if (sourceEnd > dictEnd) sourceEnd = dictEnd;
903            dsPtr->lowLimit = (U32)(sourceEnd - dsPtr->dictBase);
904            if (dsPtr->dictLimit - dsPtr->lowLimit < 4) dsPtr->lowLimit = dsPtr->dictLimit;
905        }
906    }
907
908    return LZ4HC_compress_generic (dsPtr, source, dest, inputSize, maxOutputSize, dsPtr->compressionLevel, limit);
909}
910
911int LZ4_compressHC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize)
912{
913    return LZ4_compressHC_continue_generic ((LZ4HC_Data_Structure*)LZ4_streamHCPtr, source, dest, inputSize, 0, noLimit);
914}
915
916int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize)
917{
918    return LZ4_compressHC_continue_generic ((LZ4HC_Data_Structure*)LZ4_streamHCPtr, source, dest, inputSize, maxOutputSize, limitedOutput);
919}
920
921
922/* dictionary saving */
923
924int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize)
925{
926    LZ4HC_Data_Structure* streamPtr = (LZ4HC_Data_Structure*)LZ4_streamHCPtr;
927    int prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit));
928    if (dictSize > 64 KB) dictSize = 64 KB;
929    if (dictSize < 4) dictSize = 0;
930    if (dictSize > prefixSize) dictSize = prefixSize;
931    memcpy(safeBuffer, streamPtr->end - dictSize, dictSize);
932    //LZ4_loadDictHC(LZ4_streamHCPtr, safeBuffer, dictSize);
933    {
934        U32 endIndex = (U32)(streamPtr->end - streamPtr->base);
935        streamPtr->end = (const BYTE*)safeBuffer + dictSize;
936        streamPtr->base = streamPtr->end - endIndex;
937        streamPtr->dictLimit = endIndex - dictSize;
938        streamPtr->lowLimit = endIndex - dictSize;
939    }
940    return dictSize;
941}
942
943
944/***********************************
945 * Deprecated Functions
946 ***********************************/
947int LZ4_sizeofStreamStateHC(void) { return LZ4_STREAMHCSIZE; }
948
949int LZ4_resetStreamStateHC(void* state, const char* inputBuffer)
950{
951    if ((((size_t)state) & (sizeof(void*)-1)) != 0) return 1;   /* Error : pointer is not aligned for pointer (32 or 64 bits) */
952    LZ4HC_init((LZ4HC_Data_Structure*)state, (const BYTE*)inputBuffer);
953    return 0;
954}
955
956void* LZ4_createHC (const char* inputBuffer)
957{
958    void* hc4 = ALLOCATOR(sizeof(LZ4HC_Data_Structure));
959    LZ4HC_init ((LZ4HC_Data_Structure*)hc4, (const BYTE*)inputBuffer);
960    return hc4;
961}
962
963int LZ4_freeHC (void* LZ4HC_Data)
964{
965    FREEMEM(LZ4HC_Data);
966    return (0);
967}
968
969/*
970int LZ4_compressHC_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize)
971{
972return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, 0, 0, noLimit);
973}
974int LZ4_compressHC_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize)
975{
976return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, maxOutputSize, 0, limitedOutput);
977}
978*/
979
980int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel)
981{
982    return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, 0, compressionLevel, noLimit);
983}
984
985int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel)
986{
987    return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, maxOutputSize, compressionLevel, limitedOutput);
988}
989
990char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
991{
992    LZ4HC_Data_Structure* hc4 = (LZ4HC_Data_Structure*)LZ4HC_Data;
993    size_t distance = (hc4->end - 64 KB) - hc4->inputBuffer;
994
995    if (hc4->end <= hc4->inputBuffer + 64 KB) return (char*)(hc4->end);   /* no update : less than 64KB within buffer */
996
997    distance = (distance >> 16) << 16;   /* Must be a multiple of 64 KB */
998    LZ4HC_Insert(hc4, hc4->end - MINMATCH);
999    memcpy((void*)(hc4->end - 64 KB - distance), (const void*)(hc4->end - 64 KB), 64 KB);
1000    hc4->base -= distance;
1001    if ((U32)(hc4->inputBuffer - hc4->base) > 1 GB + 64 KB)   /* Avoid overflow */
1002    {
1003        int i;
1004        hc4->base += 1 GB;
1005        for (i=0; i<HASHTABLESIZE; i++) hc4->hashTable[i] -= 1 GB;
1006    }
1007    hc4->end -= distance;
1008    return (char*)(hc4->end);
1009}
1010