1/*
2 * LZ4 HC - High Compression Mode of LZ4
3 * Copyright (C) 2011-2012, Yann Collet.
4 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
8 * met:
9 *
10 *     * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *     * Redistributions in binary form must reproduce the above
13 * copyright notice, this list of conditions and the following disclaimer
14 * in the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * You can contact the author at :
30 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
31 * - LZ4 source repository : http://code.google.com/p/lz4/
32 *
33 *  Changed for kernel use by:
34 *  Chanho Min <chanho.min@lge.com>
35 */
36
37#include <linux/module.h>
38#include <linux/kernel.h>
39#include <linux/lz4.h>
40#include <asm/unaligned.h>
41#include "lz4defs.h"
42
43struct lz4hc_data {
44	const u8 *base;
45	HTYPE hashtable[HASHTABLESIZE];
46	u16 chaintable[MAXD];
47	const u8 *nexttoupdate;
48} __attribute__((__packed__));
49
50static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base)
51{
52	memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable));
53	memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable));
54
55#if LZ4_ARCH64
56	hc4->nexttoupdate = base + 1;
57#else
58	hc4->nexttoupdate = base;
59#endif
60	hc4->base = base;
61	return 1;
62}
63
64/* Update chains up to ip (excluded) */
65static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip)
66{
67	u16 *chaintable = hc4->chaintable;
68	HTYPE *hashtable  = hc4->hashtable;
69#if LZ4_ARCH64
70	const BYTE * const base = hc4->base;
71#else
72	const int base = 0;
73#endif
74
75	while (hc4->nexttoupdate < ip) {
76		const u8 *p = hc4->nexttoupdate;
77		size_t delta = p - (hashtable[HASH_VALUE(p)] + base);
78		if (delta > MAX_DISTANCE)
79			delta = MAX_DISTANCE;
80		chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta;
81		hashtable[HASH_VALUE(p)] = (p) - base;
82		hc4->nexttoupdate++;
83	}
84}
85
86static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2,
87		const u8 *const matchlimit)
88{
89	const u8 *p1t = p1;
90
91	while (p1t < matchlimit - (STEPSIZE - 1)) {
92#if LZ4_ARCH64
93		u64 diff = A64(p2) ^ A64(p1t);
94#else
95		u32 diff = A32(p2) ^ A32(p1t);
96#endif
97		if (!diff) {
98			p1t += STEPSIZE;
99			p2 += STEPSIZE;
100			continue;
101		}
102		p1t += LZ4_NBCOMMONBYTES(diff);
103		return p1t - p1;
104	}
105#if LZ4_ARCH64
106	if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) {
107		p1t += 4;
108		p2 += 4;
109	}
110#endif
111
112	if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) {
113		p1t += 2;
114		p2 += 2;
115	}
116	if ((p1t < matchlimit) && (*p2 == *p1t))
117		p1t++;
118	return p1t - p1;
119}
120
121static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4,
122		const u8 *ip, const u8 *const matchlimit, const u8 **matchpos)
123{
124	u16 *const chaintable = hc4->chaintable;
125	HTYPE *const hashtable = hc4->hashtable;
126	const u8 *ref;
127#if LZ4_ARCH64
128	const BYTE * const base = hc4->base;
129#else
130	const int base = 0;
131#endif
132	int nbattempts = MAX_NB_ATTEMPTS;
133	size_t repl = 0, ml = 0;
134	u16 delta;
135
136	/* HC4 match finder */
137	lz4hc_insert(hc4, ip);
138	ref = hashtable[HASH_VALUE(ip)] + base;
139
140	/* potential repetition */
141	if (ref >= ip-4) {
142		/* confirmed */
143		if (A32(ref) == A32(ip)) {
144			delta = (u16)(ip-ref);
145			repl = ml  = lz4hc_commonlength(ip + MINMATCH,
146					ref + MINMATCH, matchlimit) + MINMATCH;
147			*matchpos = ref;
148		}
149		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
150	}
151
152	while ((ref >= ip - MAX_DISTANCE) && nbattempts) {
153		nbattempts--;
154		if (*(ref + ml) == *(ip + ml)) {
155			if (A32(ref) == A32(ip)) {
156				size_t mlt =
157					lz4hc_commonlength(ip + MINMATCH,
158					ref + MINMATCH, matchlimit) + MINMATCH;
159				if (mlt > ml) {
160					ml = mlt;
161					*matchpos = ref;
162				}
163			}
164		}
165		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
166	}
167
168	/* Complete table */
169	if (repl) {
170		const BYTE *ptr = ip;
171		const BYTE *end;
172		end = ip + repl - (MINMATCH-1);
173		/* Pre-Load */
174		while (ptr < end - delta) {
175			chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
176			ptr++;
177		}
178		do {
179			chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
180			/* Head of chain */
181			hashtable[HASH_VALUE(ptr)] = (ptr) - base;
182			ptr++;
183		} while (ptr < end);
184		hc4->nexttoupdate = end;
185	}
186
187	return (int)ml;
188}
189
190static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4,
191	const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest,
192	const u8 **matchpos, const u8 **startpos)
193{
194	u16 *const chaintable = hc4->chaintable;
195	HTYPE *const hashtable = hc4->hashtable;
196#if LZ4_ARCH64
197	const BYTE * const base = hc4->base;
198#else
199	const int base = 0;
200#endif
201	const u8 *ref;
202	int nbattempts = MAX_NB_ATTEMPTS;
203	int delta = (int)(ip - startlimit);
204
205	/* First Match */
206	lz4hc_insert(hc4, ip);
207	ref = hashtable[HASH_VALUE(ip)] + base;
208
209	while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base)
210		&& (nbattempts)) {
211		nbattempts--;
212		if (*(startlimit + longest) == *(ref - delta + longest)) {
213			if (A32(ref) == A32(ip)) {
214				const u8 *reft = ref + MINMATCH;
215				const u8 *ipt = ip + MINMATCH;
216				const u8 *startt = ip;
217
218				while (ipt < matchlimit-(STEPSIZE - 1)) {
219					#if LZ4_ARCH64
220					u64 diff = A64(reft) ^ A64(ipt);
221					#else
222					u32 diff = A32(reft) ^ A32(ipt);
223					#endif
224
225					if (!diff) {
226						ipt += STEPSIZE;
227						reft += STEPSIZE;
228						continue;
229					}
230					ipt += LZ4_NBCOMMONBYTES(diff);
231					goto _endcount;
232				}
233				#if LZ4_ARCH64
234				if ((ipt < (matchlimit - 3))
235					&& (A32(reft) == A32(ipt))) {
236					ipt += 4;
237					reft += 4;
238				}
239				ipt += 2;
240				#endif
241				if ((ipt < (matchlimit - 1))
242					&& (A16(reft) == A16(ipt))) {
243					reft += 2;
244				}
245				if ((ipt < matchlimit) && (*reft == *ipt))
246					ipt++;
247_endcount:
248				reft = ref;
249
250				while ((startt > startlimit)
251					&& (reft > hc4->base)
252					&& (startt[-1] == reft[-1])) {
253					startt--;
254					reft--;
255				}
256
257				if ((ipt - startt) > longest) {
258					longest = (int)(ipt - startt);
259					*matchpos = reft;
260					*startpos = startt;
261				}
262			}
263		}
264		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
265	}
266	return longest;
267}
268
269static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor,
270		int ml, const u8 *ref)
271{
272	int length, len;
273	u8 *token;
274
275	/* Encode Literal length */
276	length = (int)(*ip - *anchor);
277	token = (*op)++;
278	if (length >= (int)RUN_MASK) {
279		*token = (RUN_MASK << ML_BITS);
280		len = length - RUN_MASK;
281		for (; len > 254 ; len -= 255)
282			*(*op)++ = 255;
283		*(*op)++ = (u8)len;
284	} else
285		*token = (length << ML_BITS);
286
287	/* Copy Literals */
288	LZ4_BLINDCOPY(*anchor, *op, length);
289
290	/* Encode Offset */
291	LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref));
292
293	/* Encode MatchLength */
294	len = (int)(ml - MINMATCH);
295	if (len >= (int)ML_MASK) {
296		*token += ML_MASK;
297		len -= ML_MASK;
298		for (; len > 509 ; len -= 510) {
299			*(*op)++ = 255;
300			*(*op)++ = 255;
301		}
302		if (len > 254) {
303			len -= 255;
304			*(*op)++ = 255;
305		}
306		*(*op)++ = (u8)len;
307	} else
308		*token += len;
309
310	/* Prepare next loop */
311	*ip += ml;
312	*anchor = *ip;
313
314	return 0;
315}
316
317static int lz4_compresshcctx(struct lz4hc_data *ctx,
318		const char *source,
319		char *dest,
320		int isize)
321{
322	const u8 *ip = (const u8 *)source;
323	const u8 *anchor = ip;
324	const u8 *const iend = ip + isize;
325	const u8 *const mflimit = iend - MFLIMIT;
326	const u8 *const matchlimit = (iend - LASTLITERALS);
327
328	u8 *op = (u8 *)dest;
329
330	int ml, ml2, ml3, ml0;
331	const u8 *ref = NULL;
332	const u8 *start2 = NULL;
333	const u8 *ref2 = NULL;
334	const u8 *start3 = NULL;
335	const u8 *ref3 = NULL;
336	const u8 *start0;
337	const u8 *ref0;
338	int lastrun;
339
340	ip++;
341
342	/* Main Loop */
343	while (ip < mflimit) {
344		ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref));
345		if (!ml) {
346			ip++;
347			continue;
348		}
349
350		/* saved, in case we would skip too much */
351		start0 = ip;
352		ref0 = ref;
353		ml0 = ml;
354_search2:
355		if (ip+ml < mflimit)
356			ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2,
357				ip + 1, matchlimit, ml, &ref2, &start2);
358		else
359			ml2 = ml;
360		/* No better match */
361		if (ml2 == ml) {
362			lz4_encodesequence(&ip, &op, &anchor, ml, ref);
363			continue;
364		}
365
366		if (start0 < ip) {
367			/* empirical */
368			if (start2 < ip + ml0) {
369				ip = start0;
370				ref = ref0;
371				ml = ml0;
372			}
373		}
374		/*
375		 * Here, start0==ip
376		 * First Match too small : removed
377		 */
378		if ((start2 - ip) < 3) {
379			ml = ml2;
380			ip = start2;
381			ref = ref2;
382			goto _search2;
383		}
384
385_search3:
386		/*
387		 * Currently we have :
388		 * ml2 > ml1, and
389		 * ip1+3 <= ip2 (usually < ip1+ml1)
390		 */
391		if ((start2 - ip) < OPTIMAL_ML) {
392			int correction;
393			int new_ml = ml;
394			if (new_ml > OPTIMAL_ML)
395				new_ml = OPTIMAL_ML;
396			if (ip + new_ml > start2 + ml2 - MINMATCH)
397				new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
398			correction = new_ml - (int)(start2 - ip);
399			if (correction > 0) {
400				start2 += correction;
401				ref2 += correction;
402				ml2 -= correction;
403			}
404		}
405		/*
406		 * Now, we have start2 = ip+new_ml,
407		 * with new_ml=min(ml, OPTIMAL_ML=18)
408		 */
409		if (start2 + ml2 < mflimit)
410			ml3 = lz4hc_insertandgetwidermatch(ctx,
411				start2 + ml2 - 3, start2, matchlimit,
412				ml2, &ref3, &start3);
413		else
414			ml3 = ml2;
415
416		/* No better match : 2 sequences to encode */
417		if (ml3 == ml2) {
418			/* ip & ref are known; Now for ml */
419			if (start2 < ip+ml)
420				ml = (int)(start2 - ip);
421
422			/* Now, encode 2 sequences */
423			lz4_encodesequence(&ip, &op, &anchor, ml, ref);
424			ip = start2;
425			lz4_encodesequence(&ip, &op, &anchor, ml2, ref2);
426			continue;
427		}
428
429		/* Not enough space for match 2 : remove it */
430		if (start3 < ip + ml + 3) {
431			/*
432			 * can write Seq1 immediately ==> Seq2 is removed,
433			 * so Seq3 becomes Seq1
434			 */
435			if (start3 >= (ip + ml)) {
436				if (start2 < ip + ml) {
437					int correction =
438						(int)(ip + ml - start2);
439					start2 += correction;
440					ref2 += correction;
441					ml2 -= correction;
442					if (ml2 < MINMATCH) {
443						start2 = start3;
444						ref2 = ref3;
445						ml2 = ml3;
446					}
447				}
448
449				lz4_encodesequence(&ip, &op, &anchor, ml, ref);
450				ip  = start3;
451				ref = ref3;
452				ml  = ml3;
453
454				start0 = start2;
455				ref0 = ref2;
456				ml0 = ml2;
457				goto _search2;
458			}
459
460			start2 = start3;
461			ref2 = ref3;
462			ml2 = ml3;
463			goto _search3;
464		}
465
466		/*
467		 * OK, now we have 3 ascending matches; let's write at least
468		 * the first one ip & ref are known; Now for ml
469		 */
470		if (start2 < ip + ml) {
471			if ((start2 - ip) < (int)ML_MASK) {
472				int correction;
473				if (ml > OPTIMAL_ML)
474					ml = OPTIMAL_ML;
475				if (ip + ml > start2 + ml2 - MINMATCH)
476					ml = (int)(start2 - ip) + ml2
477						- MINMATCH;
478				correction = ml - (int)(start2 - ip);
479				if (correction > 0) {
480					start2 += correction;
481					ref2 += correction;
482					ml2 -= correction;
483				}
484			} else
485				ml = (int)(start2 - ip);
486		}
487		lz4_encodesequence(&ip, &op, &anchor, ml, ref);
488
489		ip = start2;
490		ref = ref2;
491		ml = ml2;
492
493		start2 = start3;
494		ref2 = ref3;
495		ml2 = ml3;
496
497		goto _search3;
498	}
499
500	/* Encode Last Literals */
501	lastrun = (int)(iend - anchor);
502	if (lastrun >= (int)RUN_MASK) {
503		*op++ = (RUN_MASK << ML_BITS);
504		lastrun -= RUN_MASK;
505		for (; lastrun > 254 ; lastrun -= 255)
506			*op++ = 255;
507		*op++ = (u8) lastrun;
508	} else
509		*op++ = (lastrun << ML_BITS);
510	memcpy(op, anchor, iend - anchor);
511	op += iend - anchor;
512	/* End */
513	return (int) (((char *)op) - dest);
514}
515
516int lz4hc_compress(const unsigned char *src, size_t src_len,
517			unsigned char *dst, size_t *dst_len, void *wrkmem)
518{
519	int ret = -1;
520	int out_len = 0;
521
522	struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem;
523	lz4hc_init(hc4, (const u8 *)src);
524	out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src,
525		(char *)dst, (int)src_len);
526
527	if (out_len < 0)
528		goto exit;
529
530	*dst_len = out_len;
531	return 0;
532
533exit:
534	return ret;
535}
536EXPORT_SYMBOL(lz4hc_compress);
537
538MODULE_LICENSE("Dual BSD/GPL");
539MODULE_DESCRIPTION("LZ4HC compressor");
540