datagen.c revision 6b0c39b839b8343da195252a8c46e6d93138f3b8
1/* 2 datagen.c - compressible data generator test tool 3 Copyright (C) Yann Collet 2012-2015 4 5 GPL v2 License 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 2 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License along 18 with this program; if not, write to the Free Software Foundation, Inc., 19 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 20 21 You can contact the author at : 22 - ZSTD source repository : https://github.com/Cyan4973/zstd 23 - Public forum : https://groups.google.com/forum/#!forum/lz4c 24*/ 25 26/************************************** 27* Includes 28**************************************/ 29#include <stdlib.h> /* malloc */ 30#include <stdio.h> /* FILE, fwrite */ 31#include <string.h> /* memcpy */ 32 33 34/************************************** 35* Basic Types 36**************************************/ 37#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ 38# include <stdint.h> 39 typedef uint8_t BYTE; 40 typedef uint16_t U16; 41 typedef uint32_t U32; 42 typedef int32_t S32; 43 typedef uint64_t U64; 44#else 45 typedef unsigned char BYTE; 46 typedef unsigned short U16; 47 typedef unsigned int U32; 48 typedef signed int S32; 49 typedef unsigned long long U64; 50#endif 51 52 53/************************************** 54* OS-specific Includes 55**************************************/ 56#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__) 57# include <fcntl.h> /* _O_BINARY */ 58# include <io.h> /* _setmode, _isatty */ 59# define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY) 60#else 61# define SET_BINARY_MODE(file) 62#endif 63 64 65/************************************** 66* Constants 67**************************************/ 68#define KB *(1 <<10) 69 70#define PRIME1 2654435761U 71#define PRIME2 2246822519U 72 73 74/********************************************************* 75* Local Functions 76*********************************************************/ 77#define RDG_rotl32(x,r) ((x << r) | (x >> (32 - r))) 78static unsigned int RDG_rand(U32* src) 79{ 80 U32 rand32 = *src; 81 rand32 *= PRIME1; 82 rand32 ^= PRIME2; 83 rand32 = RDG_rotl32(rand32, 13); 84 *src = rand32; 85 return rand32; 86} 87 88 89#define LTSIZE 8192 90#define LTMASK (LTSIZE-1) 91static void* RDG_createLiteralDistrib(double ld) 92{ 93 BYTE* lt = malloc(LTSIZE); 94 U32 i = 0; 95 BYTE character = '0'; 96 BYTE firstChar = '('; 97 BYTE lastChar = '}'; 98 99 if (ld==0.0) 100 { 101 character = 0; 102 firstChar = 0; 103 lastChar =255; 104 } 105 while (i<LTSIZE) 106 { 107 U32 weight = (U32)((double)(LTSIZE - i) * ld) + 1; 108 U32 end; 109 if (weight + i > LTSIZE) weight = LTSIZE-i; 110 end = i + weight; 111 while (i < end) lt[i++] = character; 112 character++; 113 if (character > lastChar) character = firstChar; 114 } 115 return lt; 116} 117 118static char RDG_genChar(U32* seed, const void* ltctx) 119{ 120 const BYTE* lt = ltctx; 121 U32 id = RDG_rand(seed) & LTMASK; 122 return lt[id]; 123} 124 125#define RDG_DICTSIZE (32 KB) 126#define RDG_RAND15BITS ((RDG_rand(seed) >> 3) & 32767) 127#define RDG_RANDLENGTH ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15) 128void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, void* litTable, unsigned* seedPtr) 129{ 130 BYTE* buffPtr = (BYTE*)buffer; 131 const U32 matchProba32 = (U32)(32768 * matchProba); 132 size_t pos = prefixSize; 133 void* ldctx = litTable; 134 U32* seed = seedPtr; 135 136 /* special case */ 137 while (matchProba >= 1.0) 138 { 139 size_t size0 = RDG_rand(seed) & 3; 140 size0 = 1U << (16 + size0 * 2); 141 size0 += RDG_rand(seed) & (size0-1); /* because size0 is power of 2*/ 142 if (buffSize < pos + size0) 143 { 144 memset(buffPtr+pos, 0, buffSize-pos); 145 return; 146 } 147 memset(buffPtr+pos, 0, size0); 148 pos += size0; 149 buffPtr[pos-1] = RDG_genChar(seed, ldctx); 150 } 151 152 /* init */ 153 if (pos==0) buffPtr[0] = RDG_genChar(seed, ldctx), pos=1; 154 155 /* Generate compressible data */ 156 while (pos < buffSize) 157 { 158 /* Select : Literal (char) or Match (within 32K) */ 159 if (RDG_RAND15BITS < matchProba32) 160 { 161 /* Copy (within 32K) */ 162 int match; 163 U32 d; 164 int length = RDG_RANDLENGTH + 4; 165 U32 offset = RDG_RAND15BITS + 1; 166 if (offset > pos) offset = pos; 167 match = pos - offset; 168 d = pos + length; 169 if (d > buffSize) d = buffSize; 170 while (pos < d) buffPtr[pos++] = buffPtr[match++]; 171 } 172 else 173 { 174 /* Literal (noise) */ 175 size_t d; 176 size_t length = RDG_RANDLENGTH; 177 d = pos + length; 178 if (d > buffSize) d = buffSize; 179 while (pos < d) buffPtr[pos++] = RDG_genChar(seed, ldctx); 180 } 181 } 182} 183 184 185void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed) 186{ 187 void* ldctx; 188 if (litProba==0.0) litProba = matchProba / 4.5; 189 ldctx = RDG_createLiteralDistrib(litProba); 190 RDG_genBlock(buffer, size, 0, matchProba, ldctx, &seed); 191 free(ldctx); 192} 193 194 195#define RDG_BLOCKSIZE (128 KB) 196void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed) 197{ 198 BYTE buff[RDG_DICTSIZE + RDG_BLOCKSIZE]; 199 U64 total = 0; 200 size_t genBlockSize = RDG_BLOCKSIZE; 201 void* ldctx; 202 203 /* init */ 204 if (litProba==0.0) litProba = matchProba / 4.5; 205 ldctx = RDG_createLiteralDistrib(litProba); 206 SET_BINARY_MODE(stdout); 207 208 /* Generate dict */ 209 RDG_genBlock(buff, RDG_DICTSIZE, 0, matchProba, ldctx, &seed); 210 211 /* Generate compressible data */ 212 while (total < size) 213 { 214 RDG_genBlock(buff, RDG_DICTSIZE+RDG_BLOCKSIZE, RDG_DICTSIZE, matchProba, ldctx, &seed); 215 if (size-total < RDG_BLOCKSIZE) genBlockSize = (size_t)(size-total); 216 total += genBlockSize; 217 fwrite(buff, 1, genBlockSize, stdout); 218 /* update dict */ 219 memcpy(buff, buff + RDG_BLOCKSIZE, RDG_DICTSIZE); 220 } 221 222 free(ldctx); 223} 224