1/*
2    datagen.c - compressible data generator test tool
3    Copyright (C) Yann Collet 2012-2015
4
5    GPL v2 License
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2 of the License, or
10    (at your option) any later version.
11
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16
17    You should have received a copy of the GNU General Public License along
18    with this program; if not, write to the Free Software Foundation, Inc.,
19    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20
21    You can contact the author at :
22   - LZ4 source repository : http://code.google.com/p/lz4
23   - LZ4 source mirror : https://github.com/Cyan4973/lz4
24   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
25*/
26
27/**************************************
28 Remove Visual warning messages
29**************************************/
30#define _CRT_SECURE_NO_WARNINGS   // fgets
31
32
33/**************************************
34 Includes
35**************************************/
36#include <stdio.h>      // fgets, sscanf
37#include <string.h>     // strcmp
38
39
40/**************************************
41   Basic Types
42**************************************/
43#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
44# include <stdint.h>
45  typedef  uint8_t BYTE;
46  typedef uint16_t U16;
47  typedef uint32_t U32;
48  typedef  int32_t S32;
49  typedef uint64_t U64;
50#else
51  typedef unsigned char       BYTE;
52  typedef unsigned short      U16;
53  typedef unsigned int        U32;
54  typedef   signed int        S32;
55  typedef unsigned long long  U64;
56#endif
57
58
59/**************************************
60 Constants
61**************************************/
62#ifndef LZ4_VERSION
63#  define LZ4_VERSION "r125"
64#endif
65
66#define KB *(1 <<10)
67#define MB *(1 <<20)
68#define GB *(1U<<30)
69
70#define CDG_SIZE_DEFAULT (64 KB)
71#define CDG_SEED_DEFAULT 0
72#define CDG_COMPRESSIBILITY_DEFAULT 50
73#define PRIME1   2654435761U
74#define PRIME2   2246822519U
75
76
77/**************************************
78  Macros
79**************************************/
80#define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
81#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
82
83
84/**************************************
85  Local Parameters
86**************************************/
87static unsigned no_prompt = 0;
88static char*    programName;
89static unsigned displayLevel = 2;
90
91
92/*********************************************************
93  functions
94*********************************************************/
95
96#define CDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
97static unsigned int CDG_rand(U32* src)
98{
99    U32 rand32 = *src;
100    rand32 *= PRIME1;
101    rand32 += PRIME2;
102    rand32  = CDG_rotl32(rand32, 13);
103    *src = rand32;
104    return rand32;
105}
106
107
108#define CDG_RAND15BITS  ((CDG_rand(seed) >> 3) & 32767)
109#define CDG_RANDLENGTH  ( ((CDG_rand(seed) >> 7) & 3) ? (CDG_rand(seed) % 14) : (CDG_rand(seed) & 511) + 15)
110#define CDG_RANDCHAR    (((CDG_rand(seed) >> 9) & 63) + '0')
111static void CDG_generate(U64 size, U32* seed, double proba)
112{
113    BYTE fullbuff[32 KB + 128 KB + 1];
114    BYTE* buff = fullbuff + 32 KB;
115    U64 total=0;
116    U32 P32 = (U32)(32768 * proba);
117    U32 pos=1;
118    U32 genBlockSize = 128 KB;
119
120    // Build initial prefix
121    fullbuff[0] = CDG_RANDCHAR;
122    while (pos<32 KB)
123    {
124        // Select : Literal (char) or Match (within 32K)
125        if (CDG_RAND15BITS < P32)
126        {
127            // Copy (within 64K)
128            U32 d;
129            int ref;
130            int length = CDG_RANDLENGTH + 4;
131            U32 offset = CDG_RAND15BITS + 1;
132            if (offset > pos) offset = pos;
133            ref = pos - offset;
134            d = pos + length;
135            while (pos < d) fullbuff[pos++] = fullbuff[ref++];
136        }
137        else
138        {
139            // Literal (noise)
140            U32 d = pos + CDG_RANDLENGTH;
141            while (pos < d) fullbuff[pos++] = CDG_RANDCHAR;
142        }
143    }
144
145    // Generate compressible data
146    pos = 0;
147    while (total < size)
148    {
149        if (size-total < 128 KB) genBlockSize = (U32)(size-total);
150        total += genBlockSize;
151        buff[genBlockSize] = 0;
152        pos = 0;
153        while (pos<genBlockSize)
154        {
155            // Select : Literal (char) or Match (within 32K)
156            if (CDG_RAND15BITS < P32)
157            {
158                // Copy (within 64K)
159                int ref;
160                U32 d;
161                int length = CDG_RANDLENGTH + 4;
162                U32 offset = CDG_RAND15BITS + 1;
163                if (pos + length > genBlockSize ) length = genBlockSize - pos;
164                ref = pos - offset;
165                d = pos + length;
166                while (pos < d) buff[pos++] = buff[ref++];
167            }
168            else
169            {
170                // Literal (noise)
171                U32 d;
172                int length = CDG_RANDLENGTH;
173                if (pos + length > genBlockSize) length = genBlockSize - pos;
174                d = pos + length;
175                while (pos < d) buff[pos++] = CDG_RANDCHAR;
176            }
177        }
178        // output datagen
179        pos=0;
180        for (;pos+512<=genBlockSize;pos+=512)
181            printf("%512.512s", buff+pos);
182        for (;pos<genBlockSize;pos++) printf("%c", buff[pos]);
183        // Regenerate prefix
184        memcpy(fullbuff, buff + 96 KB, 32 KB);
185    }
186}
187
188
189int CDG_usage(void)
190{
191    DISPLAY( "Compressible data generator\n");
192    DISPLAY( "Usage :\n");
193    DISPLAY( "      %s [size] [args]\n", programName);
194    DISPLAY( "\n");
195    DISPLAY( "Arguments :\n");
196    DISPLAY( " -g#    : generate # data (default:%i)\n", CDG_SIZE_DEFAULT);
197    DISPLAY( " -s#    : Select seed (default:%i)\n", CDG_SEED_DEFAULT);
198    DISPLAY( " -p#    : Select compressibility in %% (default:%i%%)\n", CDG_COMPRESSIBILITY_DEFAULT);
199    DISPLAY( " -h     : display help and exit\n");
200    return 0;
201}
202
203
204int main(int argc, char** argv)
205{
206    int argNb;
207    int proba = CDG_COMPRESSIBILITY_DEFAULT;
208    U64 size = CDG_SIZE_DEFAULT;
209    U32 seed = CDG_SEED_DEFAULT;
210
211    // Check command line
212    programName = argv[0];
213    for(argNb=1; argNb<argc; argNb++)
214    {
215        char* argument = argv[argNb];
216
217        if(!argument) continue;   // Protection if argument empty
218
219        // Decode command (note : aggregated commands are allowed)
220        if (*argument=='-')
221        {
222            if (!strcmp(argument, "--no-prompt")) { no_prompt=1; continue; }
223
224            argument++;
225            while (*argument!=0)
226            {
227                switch(*argument)
228                {
229                case 'h':
230                    return CDG_usage();
231                case 'g':
232                    argument++;
233                    size=0;
234                    while ((*argument>='0') && (*argument<='9'))
235                    {
236                        size *= 10;
237                        size += *argument - '0';
238                        argument++;
239                    }
240                    if (*argument=='K') { size <<= 10; argument++; }
241                    if (*argument=='M') { size <<= 20; argument++; }
242                    if (*argument=='G') { size <<= 30; argument++; }
243                    if (*argument=='B') { argument++; }
244                    break;
245                case 's':
246                    argument++;
247                    seed=0;
248                    while ((*argument>='0') && (*argument<='9'))
249                    {
250                        seed *= 10;
251                        seed += *argument - '0';
252                        argument++;
253                    }
254                    break;
255                case 'p':
256                    argument++;
257                    proba=0;
258                    while ((*argument>='0') && (*argument<='9'))
259                    {
260                        proba *= 10;
261                        proba += *argument - '0';
262                        argument++;
263                    }
264                    if (proba<0) proba=0;
265                    if (proba>100) proba=100;
266                    break;
267                case 'v':
268                    displayLevel = 4;
269                    argument++;
270                    break;
271                default: ;
272                }
273            }
274
275        }
276    }
277
278    // Get Seed
279    DISPLAYLEVEL(4, "Data Generator %s \n", LZ4_VERSION);
280    DISPLAYLEVEL(3, "Seed = %u \n", seed);
281    if (proba!=CDG_COMPRESSIBILITY_DEFAULT) DISPLAYLEVEL(3, "Compressibility : %i%%\n", proba);
282
283    CDG_generate(size, &seed, ((double)proba) / 100);
284
285    return 0;
286}
287