11e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet/* 21e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet datagen.c - compressible data generator test tool 38b233b228dbda484d1d545c5a3ecaf06aef3e930Yann Collet Copyright (C) Yann Collet 2012-2016 495cc6cef6444b202a93ba414b7a9996eb2c72ca3Yann Collet 51e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet GPL v2 License 61e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet 71e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet This program is free software; you can redistribute it and/or modify 81e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet it under the terms of the GNU General Public License as published by 91e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet the Free Software Foundation; either version 2 of the License, or 101e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet (at your option) any later version. 111e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet 121e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet This program is distributed in the hope that it will be useful, 131e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet but WITHOUT ANY WARRANTY; without even the implied warranty of 141e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 151e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet GNU General Public License for more details. 161e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet 171e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet You should have received a copy of the GNU General Public License along 181e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet with this program; if not, write to the Free Software Foundation, Inc., 191e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 201e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet 211e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet You can contact the author at : 2284cedb4632ab87fbb108b4f7ed6e9ec164b6a4d4Przemyslaw Skibinski - LZ4 source repository : https://github.com/lz4/lz4 236b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet - Public forum : https://groups.google.com/forum/#!forum/lz4c 241e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet*/ 251e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet 261e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet/************************************** 276b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet* Includes 281e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet**************************************/ 296adf05d1ed83053b8bb1f762494d2c10fdd8ac1dPrzemyslaw Skibinski#include "platform.h" /* Compiler options, SET_BINARY_MODE */ 309546ba62d01a9618aab91eafe77929120653d275Przemyslaw Skibinski#include "util.h" /* U32 */ 316b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet#include <stdlib.h> /* malloc */ 326b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet#include <stdio.h> /* FILE, fwrite */ 336b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet#include <string.h> /* memcpy */ 341e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet 351e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet 361e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet/************************************** 376b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet* Constants 381e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet**************************************/ 396b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet#define KB *(1 <<10) 401e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet 416b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet#define PRIME1 2654435761U 426b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet#define PRIME2 2246822519U 431e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet 441e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet 456fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Collet/************************************** 466fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Collet* Local types 476fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Collet**************************************/ 486fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Collet#define LTLOG 13 496fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Collet#define LTSIZE (1<<LTLOG) 506fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Collet#define LTMASK (LTSIZE-1) 516fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Collettypedef BYTE litDistribTable[LTSIZE]; 526fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Collet 536fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Collet 546fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Collet 551e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet/********************************************************* 566b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet* Local Functions 571e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet*********************************************************/ 58d2be69b144d6c5fd9a3dcbc4133e93e710cda998Yann Collet#define MIN(a,b) ( (a) < (b) ? (a) :(b) ) 596b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet#define RDG_rotl32(x,r) ((x << r) | (x >> (32 - r))) 606b0c39b839b8343da195252a8c46e6d93138f3b8Yann Colletstatic unsigned int RDG_rand(U32* src) 611e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet{ 621e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet U32 rand32 = *src; 631e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet rand32 *= PRIME1; 646b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet rand32 ^= PRIME2; 656b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet rand32 = RDG_rotl32(rand32, 13); 661e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet *src = rand32; 671e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet return rand32; 681e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet} 691e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet 701e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet 716fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Colletstatic void RDG_fillLiteralDistrib(litDistribTable lt, double ld) 721e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet{ 73d2be69b144d6c5fd9a3dcbc4133e93e710cda998Yann Collet BYTE const firstChar = ld <= 0.0 ? 0 : '('; 74d2be69b144d6c5fd9a3dcbc4133e93e710cda998Yann Collet BYTE const lastChar = ld <= 0.0 ? 255 : '}'; 75d2be69b144d6c5fd9a3dcbc4133e93e710cda998Yann Collet BYTE character = ld <= 0.0 ? 0 : '0'; 76d2be69b144d6c5fd9a3dcbc4133e93e710cda998Yann Collet U32 u = 0; 77d2be69b144d6c5fd9a3dcbc4133e93e710cda998Yann Collet 78d2be69b144d6c5fd9a3dcbc4133e93e710cda998Yann Collet while (u<LTSIZE) { 79d2be69b144d6c5fd9a3dcbc4133e93e710cda998Yann Collet U32 const weight = (U32)((double)(LTSIZE - u) * ld) + 1; 80d2be69b144d6c5fd9a3dcbc4133e93e710cda998Yann Collet U32 const end = MIN(u+weight, LTSIZE); 81d2be69b144d6c5fd9a3dcbc4133e93e710cda998Yann Collet while (u < end) lt[u++] = character; 826b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet character++; 836b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet if (character > lastChar) character = firstChar; 846b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet } 856b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet} 866b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet 876fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Collet 886fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Colletstatic BYTE RDG_genChar(U32* seed, const litDistribTable lt) 896b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet{ 906b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet U32 id = RDG_rand(seed) & LTMASK; 916fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Collet return (lt[id]); 926b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet} 936b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet 946fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Collet 956b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet#define RDG_DICTSIZE (32 KB) 966b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet#define RDG_RAND15BITS ((RDG_rand(seed) >> 3) & 32767) 976b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet#define RDG_RANDLENGTH ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15) 986fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Colletvoid RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, litDistribTable lt, unsigned* seedPtr) 996b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet{ 1006b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet BYTE* buffPtr = (BYTE*)buffer; 1016b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet const U32 matchProba32 = (U32)(32768 * matchProba); 1026b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet size_t pos = prefixSize; 1036b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet U32* seed = seedPtr; 1046b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet 1056b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet /* special case */ 1066b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet while (matchProba >= 1.0) 1076b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet { 1086b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet size_t size0 = RDG_rand(seed) & 3; 10945a357fd1704e9c6d2d8037277bda62e8c86308eYann Collet size0 = (size_t)1 << (16 + size0 * 2); 1106b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet size0 += RDG_rand(seed) & (size0-1); /* because size0 is power of 2*/ 1116b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet if (buffSize < pos + size0) 1126b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet { 1136b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet memset(buffPtr+pos, 0, buffSize-pos); 1146b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet return; 1156b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet } 1166b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet memset(buffPtr+pos, 0, size0); 1176b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet pos += size0; 1186fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Collet buffPtr[pos-1] = RDG_genChar(seed, lt); 1196b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet } 1206b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet 1216b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet /* init */ 1226fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Collet if (pos==0) buffPtr[0] = RDG_genChar(seed, lt), pos=1; 1236b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet 1246b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet /* Generate compressible data */ 1256b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet while (pos < buffSize) 1266b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet { 1276b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet /* Select : Literal (char) or Match (within 32K) */ 1286b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet if (RDG_RAND15BITS < matchProba32) 129661e4ddb78ce89d5de3ad0824e6abb161044aa06Yann Collet { 1306b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet /* Copy (within 32K) */ 13145a357fd1704e9c6d2d8037277bda62e8c86308eYann Collet size_t match; 13245a357fd1704e9c6d2d8037277bda62e8c86308eYann Collet size_t d; 1336b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet int length = RDG_RANDLENGTH + 4; 1346b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet U32 offset = RDG_RAND15BITS + 1; 13545a357fd1704e9c6d2d8037277bda62e8c86308eYann Collet if (offset > pos) offset = (U32)pos; 1366b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet match = pos - offset; 137661e4ddb78ce89d5de3ad0824e6abb161044aa06Yann Collet d = pos + length; 1386b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet if (d > buffSize) d = buffSize; 1396b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet while (pos < d) buffPtr[pos++] = buffPtr[match++]; 140661e4ddb78ce89d5de3ad0824e6abb161044aa06Yann Collet } 141661e4ddb78ce89d5de3ad0824e6abb161044aa06Yann Collet else 142661e4ddb78ce89d5de3ad0824e6abb161044aa06Yann Collet { 1436b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet /* Literal (noise) */ 1446b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet size_t d; 1456b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet size_t length = RDG_RANDLENGTH; 1466b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet d = pos + length; 1476b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet if (d > buffSize) d = buffSize; 1486fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Collet while (pos < d) buffPtr[pos++] = RDG_genChar(seed, lt); 1491e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet } 1501e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet } 1511e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet} 1521e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet 1531e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet 1546b0c39b839b8343da195252a8c46e6d93138f3b8Yann Colletvoid RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed) 1551e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet{ 1566fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Collet litDistribTable lt; 1576b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet if (litProba==0.0) litProba = matchProba / 4.5; 1586fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Collet RDG_fillLiteralDistrib(lt, litProba); 1596fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Collet RDG_genBlock(buffer, size, 0, matchProba, lt, &seed); 1601e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet} 1611e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet 1621e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet 1636b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet#define RDG_BLOCKSIZE (128 KB) 1646b0c39b839b8343da195252a8c46e6d93138f3b8Yann Colletvoid RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed) 1651e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet{ 1666b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet BYTE buff[RDG_DICTSIZE + RDG_BLOCKSIZE]; 1676b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet U64 total = 0; 1686b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet size_t genBlockSize = RDG_BLOCKSIZE; 1696fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Collet litDistribTable lt; 1701e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet 1716b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet /* init */ 1726b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet if (litProba==0.0) litProba = matchProba / 4.5; 1736fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Collet RDG_fillLiteralDistrib(lt, litProba); 1746b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet SET_BINARY_MODE(stdout); 1751e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet 1766b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet /* Generate dict */ 1776fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Collet RDG_genBlock(buff, RDG_DICTSIZE, 0, matchProba, lt, &seed); 1781e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet 1796b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet /* Generate compressible data */ 1806b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet while (total < size) 1816b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet { 1826fe48b91832b9f8b5869a8a6d2a86b2b0d99988aYann Collet RDG_genBlock(buff, RDG_DICTSIZE+RDG_BLOCKSIZE, RDG_DICTSIZE, matchProba, lt, &seed); 1836b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet if (size-total < RDG_BLOCKSIZE) genBlockSize = (size_t)(size-total); 1846b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet total += genBlockSize; 1856b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet fwrite(buff, 1, genBlockSize, stdout); 1866b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet /* update dict */ 1876b0c39b839b8343da195252a8c46e6d93138f3b8Yann Collet memcpy(buff, buff + RDG_BLOCKSIZE, RDG_DICTSIZE); 1881e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet } 1891e053a290ab55af5d03eae209e9ee64f555670b7Yann Collet} 190