1/*
2    bench.c - Demo program to benchmark open-source compression algorithms
3    Copyright (C) Yann Collet 2012-2016
4
5    GPL v2 License
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2 of the License, or
10    (at your option) any later version.
11
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16
17    You should have received a copy of the GNU General Public License along
18    with this program; if not, write to the Free Software Foundation, Inc.,
19    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20
21    You can contact the author at :
22    - LZ4 homepage : http://www.lz4.org
23    - LZ4 source repository : https://github.com/lz4/lz4
24*/
25
26
27/*-************************************
28*  Compiler options
29**************************************/
30#ifdef _MSC_VER    /* Visual Studio */
31#  pragma warning(disable : 4127)    /* disable: C4127: conditional expression is constant */
32#endif
33
34
35/* *************************************
36*  Includes
37***************************************/
38#include "platform.h"    /* Compiler options */
39#include "util.h"        /* UTIL_GetFileSize, UTIL_sleep */
40#include <stdlib.h>      /* malloc, free */
41#include <string.h>      /* memset */
42#include <stdio.h>       /* fprintf, fopen, ftello */
43#include <time.h>        /* clock_t, clock, CLOCKS_PER_SEC */
44
45#include "datagen.h"     /* RDG_genBuffer */
46#include "xxhash.h"
47
48
49#include "lz4.h"
50#define COMPRESSOR0 LZ4_compress_local
51static int LZ4_compress_local(const char* src, char* dst, int srcSize, int dstSize, int clevel) { (void)clevel; return LZ4_compress_default(src, dst, srcSize, dstSize); }
52#include "lz4hc.h"
53#define COMPRESSOR1 LZ4_compress_HC
54#define DEFAULTCOMPRESSOR COMPRESSOR0
55#define LZ4_isError(errcode) (errcode==0)
56
57
58/* *************************************
59*  Constants
60***************************************/
61#ifndef LZ4_GIT_COMMIT_STRING
62#  define LZ4_GIT_COMMIT_STRING ""
63#else
64#  define LZ4_GIT_COMMIT_STRING LZ4_EXPAND_AND_QUOTE(LZ4_GIT_COMMIT)
65#endif
66
67#define NBSECONDS             3
68#define TIMELOOP_MICROSEC     1*1000000ULL /* 1 second */
69#define ACTIVEPERIOD_MICROSEC 70*1000000ULL /* 70 seconds */
70#define COOLPERIOD_SEC        10
71#define DECOMP_MULT           2 /* test decompression DECOMP_MULT times longer than compression */
72
73#define KB *(1 <<10)
74#define MB *(1 <<20)
75#define GB *(1U<<30)
76
77static const size_t maxMemory = (sizeof(size_t)==4)  ?  (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31));
78
79static U32 g_compressibilityDefault = 50;
80
81
82/* *************************************
83*  console display
84***************************************/
85#define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
86#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
87static U32 g_displayLevel = 2;   /* 0 : no display;   1: errors;   2 : + result + interaction + warnings;   3 : + progression;   4 : + information */
88
89#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
90            if ((clock() - g_time > refreshRate) || (g_displayLevel>=4)) \
91            { g_time = clock(); DISPLAY(__VA_ARGS__); \
92            if (g_displayLevel>=4) fflush(stdout); } }
93static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
94static clock_t g_time = 0;
95
96
97/* *************************************
98*  Exceptions
99***************************************/
100#ifndef DEBUG
101#  define DEBUG 0
102#endif
103#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
104#define EXM_THROW(error, ...)                                             \
105{                                                                         \
106    DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
107    DISPLAYLEVEL(1, "Error %i : ", error);                                \
108    DISPLAYLEVEL(1, __VA_ARGS__);                                         \
109    DISPLAYLEVEL(1, "\n");                                                \
110    exit(error);                                                          \
111}
112
113
114/* *************************************
115*  Benchmark Parameters
116***************************************/
117static U32 g_nbSeconds = NBSECONDS;
118static size_t g_blockSize = 0;
119int g_additionalParam = 0;
120
121void BMK_setNotificationLevel(unsigned level) { g_displayLevel=level; }
122
123void BMK_setAdditionalParam(int additionalParam) { g_additionalParam=additionalParam; }
124
125void BMK_SetNbSeconds(unsigned nbSeconds)
126{
127    g_nbSeconds = nbSeconds;
128    DISPLAYLEVEL(3, "- test >= %u seconds per compression / decompression -\n", g_nbSeconds);
129}
130
131void BMK_SetBlockSize(size_t blockSize)
132{
133    g_blockSize = blockSize;
134}
135
136
137/* ********************************************************
138*  Bench functions
139**********************************************************/
140typedef struct {
141    const char* srcPtr;
142    size_t srcSize;
143    char*  cPtr;
144    size_t cRoom;
145    size_t cSize;
146    char*  resPtr;
147    size_t resSize;
148} blockParam_t;
149
150struct compressionParameters
151{
152    int (*compressionFunction)(const char* src, char* dst, int srcSize, int dstSize, int cLevel);
153};
154
155#define MIN(a,b) ((a)<(b) ? (a) : (b))
156#define MAX(a,b) ((a)>(b) ? (a) : (b))
157
158static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
159                        const char* displayName, int cLevel,
160                        const size_t* fileSizes, U32 nbFiles)
161{
162    size_t const blockSize = (g_blockSize>=32 ? g_blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ;
163    U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles;
164    blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t));
165    size_t const maxCompressedSize = LZ4_compressBound((int)srcSize) + (maxNbBlocks * 1024);   /* add some room for safety */
166    void* const compressedBuffer = malloc(maxCompressedSize);
167    void* const resultBuffer = malloc(srcSize);
168    U32 nbBlocks;
169    UTIL_time_t ticksPerSecond;
170    struct compressionParameters compP;
171    int cfunctionId;
172
173    /* checks */
174    if (!compressedBuffer || !resultBuffer || !blockTable)
175        EXM_THROW(31, "allocation error : not enough memory");
176
177    /* init */
178    if (strlen(displayName)>17) displayName += strlen(displayName)-17;   /* can only display 17 characters */
179    UTIL_initTimer(&ticksPerSecond);
180
181    /* Init */
182    if (cLevel < LZ4HC_CLEVEL_MIN) cfunctionId = 0; else cfunctionId = 1;
183    switch (cfunctionId)
184    {
185#ifdef COMPRESSOR0
186    case 0 : compP.compressionFunction = COMPRESSOR0; break;
187#endif
188#ifdef COMPRESSOR1
189    case 1 : compP.compressionFunction = COMPRESSOR1; break;
190#endif
191    default : compP.compressionFunction = DEFAULTCOMPRESSOR;
192    }
193
194    /* Init blockTable data */
195    {   const char* srcPtr = (const char*)srcBuffer;
196        char* cPtr = (char*)compressedBuffer;
197        char* resPtr = (char*)resultBuffer;
198        U32 fileNb;
199        for (nbBlocks=0, fileNb=0; fileNb<nbFiles; fileNb++) {
200            size_t remaining = fileSizes[fileNb];
201            U32 const nbBlocksforThisFile = (U32)((remaining + (blockSize-1)) / blockSize);
202            U32 const blockEnd = nbBlocks + nbBlocksforThisFile;
203            for ( ; nbBlocks<blockEnd; nbBlocks++) {
204                size_t const thisBlockSize = MIN(remaining, blockSize);
205                blockTable[nbBlocks].srcPtr = srcPtr;
206                blockTable[nbBlocks].cPtr = cPtr;
207                blockTable[nbBlocks].resPtr = resPtr;
208                blockTable[nbBlocks].srcSize = thisBlockSize;
209                blockTable[nbBlocks].cRoom = LZ4_compressBound((int)thisBlockSize);
210                srcPtr += thisBlockSize;
211                cPtr += blockTable[nbBlocks].cRoom;
212                resPtr += thisBlockSize;
213                remaining -= thisBlockSize;
214    }   }   }
215
216    /* warmimg up memory */
217    RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1);
218
219    /* Bench */
220    {   U64 fastestC = (U64)(-1LL), fastestD = (U64)(-1LL);
221        U64 const crcOrig = XXH64(srcBuffer, srcSize, 0);
222        UTIL_time_t coolTime;
223        U64 const maxTime = (g_nbSeconds * TIMELOOP_MICROSEC) + 100;
224        U64 totalCTime=0, totalDTime=0;
225        U32 cCompleted=0, dCompleted=0;
226#       define NB_MARKS 4
227        const char* const marks[NB_MARKS] = { " |", " /", " =",  "\\" };
228        U32 markNb = 0;
229        size_t cSize = 0;
230        double ratio = 0.;
231
232        UTIL_getTime(&coolTime);
233        DISPLAYLEVEL(2, "\r%79s\r", "");
234        while (!cCompleted | !dCompleted) {
235            UTIL_time_t clockStart;
236            U64 clockLoop = g_nbSeconds ? TIMELOOP_MICROSEC : 1;
237
238            /* overheat protection */
239            if (UTIL_clockSpanMicro(coolTime, ticksPerSecond) > ACTIVEPERIOD_MICROSEC) {
240                DISPLAYLEVEL(2, "\rcooling down ...    \r");
241                UTIL_sleep(COOLPERIOD_SEC);
242                UTIL_getTime(&coolTime);
243            }
244
245            /* Compression */
246            DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize);
247            if (!cCompleted) memset(compressedBuffer, 0xE5, maxCompressedSize);  /* warm up and erase result buffer */
248
249            UTIL_sleepMilli(1);  /* give processor time to other processes */
250            UTIL_waitForNextTick(ticksPerSecond);
251            UTIL_getTime(&clockStart);
252
253            if (!cCompleted) {   /* still some time to do compression tests */
254                U32 nbLoops = 0;
255                do {
256                    U32 blockNb;
257                    for (blockNb=0; blockNb<nbBlocks; blockNb++) {
258                        size_t const rSize = compP.compressionFunction(blockTable[blockNb].srcPtr, blockTable[blockNb].cPtr, (int)blockTable[blockNb].srcSize, (int)blockTable[blockNb].cRoom, cLevel);
259                        if (LZ4_isError(rSize)) EXM_THROW(1, "LZ4_compress() failed");
260                        blockTable[blockNb].cSize = rSize;
261                    }
262                    nbLoops++;
263                } while (UTIL_clockSpanMicro(clockStart, ticksPerSecond) < clockLoop);
264                {   U64 const clockSpan = UTIL_clockSpanMicro(clockStart, ticksPerSecond);
265                    if (clockSpan < fastestC*nbLoops) fastestC = clockSpan / nbLoops;
266                    totalCTime += clockSpan;
267                    cCompleted = totalCTime>maxTime;
268            }   }
269
270            cSize = 0;
271            { U32 blockNb; for (blockNb=0; blockNb<nbBlocks; blockNb++) cSize += blockTable[blockNb].cSize; }
272            cSize += !cSize;  /* avoid div by 0 */
273            ratio = (double)srcSize / (double)cSize;
274            markNb = (markNb+1) % NB_MARKS;
275            DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s\r",
276                    marks[markNb], displayName, (U32)srcSize, (U32)cSize, ratio,
277                    (double)srcSize / fastestC );
278
279            (void)fastestD; (void)crcOrig;   /*  unused when decompression disabled */
280#if 1
281            /* Decompression */
282            if (!dCompleted) memset(resultBuffer, 0xD6, srcSize);  /* warm result buffer */
283
284            UTIL_sleepMilli(1); /* give processor time to other processes */
285            UTIL_waitForNextTick(ticksPerSecond);
286            UTIL_getTime(&clockStart);
287
288            if (!dCompleted) {
289                U32 nbLoops = 0;
290                do {
291                    U32 blockNb;
292                    for (blockNb=0; blockNb<nbBlocks; blockNb++) {
293                        size_t const regenSize = LZ4_decompress_safe(blockTable[blockNb].cPtr, blockTable[blockNb].resPtr, (int)blockTable[blockNb].cSize, (int)blockTable[blockNb].srcSize);
294                        if (LZ4_isError(regenSize)) {
295                            DISPLAY("LZ4_decompress_safe() failed on block %u  \n", blockNb);
296                            clockLoop = 0;   /* force immediate test end */
297                            break;
298                        }
299
300                        blockTable[blockNb].resSize = regenSize;
301                    }
302                    nbLoops++;
303                } while (UTIL_clockSpanMicro(clockStart, ticksPerSecond) < DECOMP_MULT*clockLoop);
304                {   U64 const clockSpan = UTIL_clockSpanMicro(clockStart, ticksPerSecond);
305                    if (clockSpan < fastestD*nbLoops) fastestD = clockSpan / nbLoops;
306                    totalDTime += clockSpan;
307                    dCompleted = totalDTime>(DECOMP_MULT*maxTime);
308            }   }
309
310            markNb = (markNb+1) % NB_MARKS;
311            DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s ,%6.1f MB/s\r",
312                    marks[markNb], displayName, (U32)srcSize, (U32)cSize, ratio,
313                    (double)srcSize / fastestC,
314                    (double)srcSize / fastestD );
315
316            /* CRC Checking */
317            {   U64 const crcCheck = XXH64(resultBuffer, srcSize, 0);
318                if (crcOrig!=crcCheck) {
319                    size_t u;
320                    DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x   \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck);
321                    for (u=0; u<srcSize; u++) {
322                        if (((const BYTE*)srcBuffer)[u] != ((const BYTE*)resultBuffer)[u]) {
323                            U32 segNb, bNb, pos;
324                            size_t bacc = 0;
325                            DISPLAY("Decoding error at pos %u ", (U32)u);
326                            for (segNb = 0; segNb < nbBlocks; segNb++) {
327                                if (bacc + blockTable[segNb].srcSize > u) break;
328                                bacc += blockTable[segNb].srcSize;
329                            }
330                            pos = (U32)(u - bacc);
331                            bNb = pos / (128 KB);
332                            DISPLAY("(block %u, sub %u, pos %u) \n", segNb, bNb, pos);
333                            break;
334                        }
335                        if (u==srcSize-1) {  /* should never happen */
336                            DISPLAY("no difference detected\n");
337                    }   }
338                    break;
339            }   }   /* CRC Checking */
340#endif
341        }   /* for (testNb = 1; testNb <= (g_nbSeconds + !g_nbSeconds); testNb++) */
342
343        if (g_displayLevel == 1) {
344            double cSpeed = (double)srcSize / fastestC;
345            double dSpeed = (double)srcSize / fastestD;
346            if (g_additionalParam)
347                DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, g_additionalParam);
348            else
349                DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName);
350        }
351        DISPLAYLEVEL(2, "%2i#\n", cLevel);
352    }   /* Bench */
353
354    /* clean up */
355    free(blockTable);
356    free(compressedBuffer);
357    free(resultBuffer);
358    return 0;
359}
360
361
362static size_t BMK_findMaxMem(U64 requiredMem)
363{
364    size_t step = 64 MB;
365    BYTE* testmem=NULL;
366
367    requiredMem = (((requiredMem >> 26) + 1) << 26);
368    requiredMem += 2*step;
369    if (requiredMem > maxMemory) requiredMem = maxMemory;
370
371    while (!testmem) {
372        if (requiredMem > step) requiredMem -= step;
373        else requiredMem >>= 1;
374        testmem = (BYTE*) malloc ((size_t)requiredMem);
375    }
376    free (testmem);
377
378    /* keep some space available */
379    if (requiredMem > step) requiredMem -= step;
380    else requiredMem >>= 1;
381
382    return (size_t)requiredMem;
383}
384
385
386static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize,
387                            const char* displayName, int cLevel, int cLevelLast,
388                            const size_t* fileSizes, unsigned nbFiles)
389{
390    int l;
391
392    const char* pch = strrchr(displayName, '\\'); /* Windows */
393    if (!pch) pch = strrchr(displayName, '/'); /* Linux */
394    if (pch) displayName = pch+1;
395
396    SET_HIGH_PRIORITY;
397
398    if (g_displayLevel == 1 && !g_additionalParam)
399        DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", LZ4_VERSION_STRING, LZ4_GIT_COMMIT_STRING, (U32)benchedSize, g_nbSeconds, (U32)(g_blockSize>>10));
400
401    if (cLevelLast < cLevel) cLevelLast = cLevel;
402
403    for (l=cLevel; l <= cLevelLast; l++) {
404        BMK_benchMem(srcBuffer, benchedSize,
405                     displayName, l,
406                     fileSizes, nbFiles);
407    }
408}
409
410
411/*! BMK_loadFiles() :
412    Loads `buffer` with content of files listed within `fileNamesTable`.
413    At most, fills `buffer` entirely */
414static void BMK_loadFiles(void* buffer, size_t bufferSize,
415                          size_t* fileSizes,
416                          const char** fileNamesTable, unsigned nbFiles)
417{
418    size_t pos = 0, totalSize = 0;
419    unsigned n;
420    for (n=0; n<nbFiles; n++) {
421        FILE* f;
422        U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
423        if (UTIL_isDirectory(fileNamesTable[n])) {
424            DISPLAYLEVEL(2, "Ignoring %s directory...       \n", fileNamesTable[n]);
425            fileSizes[n] = 0;
426            continue;
427        }
428        f = fopen(fileNamesTable[n], "rb");
429        if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]);
430        DISPLAYUPDATE(2, "Loading %s...       \r", fileNamesTable[n]);
431        if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n;   /* buffer too small - stop after this file */
432        { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
433          if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]);
434          pos += readSize; }
435        fileSizes[n] = (size_t)fileSize;
436        totalSize += (size_t)fileSize;
437        fclose(f);
438    }
439
440    if (totalSize == 0) EXM_THROW(12, "no data to bench");
441}
442
443static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles,
444                               int cLevel, int cLevelLast)
445{
446    void* srcBuffer;
447    size_t benchedSize;
448    size_t* fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t));
449    U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
450    char mfName[20] = {0};
451
452    if (!fileSizes) EXM_THROW(12, "not enough memory for fileSizes");
453
454    /* Memory allocation & restrictions */
455    benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3;
456    if (benchedSize==0) EXM_THROW(12, "not enough memory");
457    if ((U64)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad;
458    if (benchedSize < totalSizeToLoad)
459        DISPLAY("Not enough memory; testing %u MB only...\n", (U32)(benchedSize >> 20));
460    srcBuffer = malloc(benchedSize + !benchedSize);   /* avoid alloc of zero */
461    if (!srcBuffer) EXM_THROW(12, "not enough memory");
462
463    /* Load input buffer */
464    BMK_loadFiles(srcBuffer, benchedSize, fileSizes, fileNamesTable, nbFiles);
465
466    /* Bench */
467    snprintf (mfName, sizeof(mfName), " %u files", nbFiles);
468    {   const char* displayName = (nbFiles > 1) ? mfName : fileNamesTable[0];
469        BMK_benchCLevel(srcBuffer, benchedSize,
470                        displayName, cLevel, cLevelLast,
471                        fileSizes, nbFiles);
472    }
473
474    /* clean up */
475    free(srcBuffer);
476    free(fileSizes);
477}
478
479
480static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility)
481{
482    char name[20] = {0};
483    size_t benchedSize = 10000000;
484    void* const srcBuffer = malloc(benchedSize);
485
486    /* Memory allocation */
487    if (!srcBuffer) EXM_THROW(21, "not enough memory");
488
489    /* Fill input buffer */
490    RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
491
492    /* Bench */
493    snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100));
494    BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, cLevelLast, &benchedSize, 1);
495
496    /* clean up */
497    free(srcBuffer);
498}
499
500
501int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles,
502                   int cLevel, int cLevelLast)
503{
504    double const compressibility = (double)g_compressibilityDefault / 100;
505
506    if (cLevel > LZ4HC_CLEVEL_MAX) cLevel = LZ4HC_CLEVEL_MAX;
507    if (cLevelLast > LZ4HC_CLEVEL_MAX) cLevelLast = LZ4HC_CLEVEL_MAX;
508    if (cLevelLast < cLevel) cLevelLast = cLevel;
509    if (cLevelLast > cLevel) DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);
510
511    if (nbFiles == 0)
512        BMK_syntheticTest(cLevel, cLevelLast, compressibility);
513    else
514        BMK_benchFileTable(fileNamesTable, nbFiles, cLevel, cLevelLast);
515    return 0;
516}
517