bench.c revision e8a6067e8a2d033049e5eafa0617c29b0e51146f
1/*
2    bench.c - Demo program to benchmark open-source compression algorithms
3    Copyright (C) Yann Collet 2012-2016
4
5    GPL v2 License
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2 of the License, or
10    (at your option) any later version.
11
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16
17    You should have received a copy of the GNU General Public License along
18    with this program; if not, write to the Free Software Foundation, Inc.,
19    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20
21    You can contact the author at :
22    - LZ4 homepage : http://www.lz4.org
23    - LZ4 source repository : https://github.com/lz4/lz4
24*/
25
26
27/* *************************************
28*  Includes
29***************************************/
30#include "util.h"        /* Compiler options, UTIL_GetFileSize, UTIL_sleep */
31#include <stdlib.h>      /* malloc, free */
32#include <string.h>      /* memset */
33#include <stdio.h>       /* fprintf, fopen, ftello64 */
34#include <time.h>        /* clock_t, clock, CLOCKS_PER_SEC */
35
36#include "datagen.h"     /* RDG_genBuffer */
37#include "xxhash.h"
38
39
40#include "lz4.h"
41#define COMPRESSOR0 LZ4_compress_local
42static int LZ4_compress_local(const char* src, char* dst, int srcSize, int dstSize, int clevel) { (void)clevel; return LZ4_compress_default(src, dst, srcSize, dstSize); }
43#include "lz4hc.h"
44#define COMPRESSOR1 LZ4_compress_HC
45#define DEFAULTCOMPRESSOR COMPRESSOR0
46#define LZ4_isError(errcode) (errcode==0)
47
48
49/* *************************************
50*  Constants
51***************************************/
52#ifndef LZ4_GIT_COMMIT_STRING
53#  define LZ4_GIT_COMMIT_STRING ""
54#else
55#  define LZ4_GIT_COMMIT_STRING LZ4_EXPAND_AND_QUOTE(LZ4_GIT_COMMIT)
56#endif
57
58#define NBSECONDS             3
59#define TIMELOOP_MICROSEC     1*1000000ULL /* 1 second */
60#define ACTIVEPERIOD_MICROSEC 70*1000000ULL /* 70 seconds */
61#define COOLPERIOD_SEC        10
62#define DECOMP_MULT           2 /* test decompression DECOMP_MULT times longer than compression */
63
64#define KB *(1 <<10)
65#define MB *(1 <<20)
66#define GB *(1U<<30)
67
68static const size_t maxMemory = (sizeof(size_t)==4)  ?  (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31));
69
70static U32 g_compressibilityDefault = 50;
71
72
73/* *************************************
74*  console display
75***************************************/
76#define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
77#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
78static U32 g_displayLevel = 2;   /* 0 : no display;   1: errors;   2 : + result + interaction + warnings;   3 : + progression;   4 : + information */
79
80#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
81            if ((clock() - g_time > refreshRate) || (g_displayLevel>=4)) \
82            { g_time = clock(); DISPLAY(__VA_ARGS__); \
83            if (g_displayLevel>=4) fflush(stdout); } }
84static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
85static clock_t g_time = 0;
86
87
88/* *************************************
89*  Exceptions
90***************************************/
91#ifndef DEBUG
92#  define DEBUG 0
93#endif
94#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
95#define EXM_THROW(error, ...)                                             \
96{                                                                         \
97    DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
98    DISPLAYLEVEL(1, "Error %i : ", error);                                \
99    DISPLAYLEVEL(1, __VA_ARGS__);                                         \
100    DISPLAYLEVEL(1, "\n");                                                \
101    exit(error);                                                          \
102}
103
104
105/* *************************************
106*  Benchmark Parameters
107***************************************/
108static U32 g_nbSeconds = NBSECONDS;
109static size_t g_blockSize = 0;
110int g_additionalParam = 0;
111
112void BMK_setNotificationLevel(unsigned level) { g_displayLevel=level; }
113
114void BMK_setAdditionalParam(int additionalParam) { g_additionalParam=additionalParam; }
115
116void BMK_SetNbSeconds(unsigned nbSeconds)
117{
118    g_nbSeconds = nbSeconds;
119    DISPLAYLEVEL(3, "- test >= %u seconds per compression / decompression -\n", g_nbSeconds);
120}
121
122void BMK_SetBlockSize(size_t blockSize)
123{
124    g_blockSize = blockSize;
125    DISPLAYLEVEL(2, "using blocks of size %u KB \n", (U32)(blockSize>>10));
126}
127
128
129/* ********************************************************
130*  Bench functions
131**********************************************************/
132typedef struct
133{
134    const char* srcPtr;
135    size_t srcSize;
136    char*  cPtr;
137    size_t cRoom;
138    size_t cSize;
139    char*  resPtr;
140    size_t resSize;
141} blockParam_t;
142
143struct compressionParameters
144{
145    int (*compressionFunction)(const char* src, char* dst, int srcSize, int dstSize, int cLevel);
146};
147
148#define MIN(a,b) ((a)<(b) ? (a) : (b))
149#define MAX(a,b) ((a)>(b) ? (a) : (b))
150
151static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
152                        const char* displayName, int cLevel,
153                        const size_t* fileSizes, U32 nbFiles)
154{
155    size_t const blockSize = (g_blockSize>=32 ? g_blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ;
156    U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles;
157    blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t));
158    size_t const maxCompressedSize = LZ4_compressBound((int)srcSize) + (maxNbBlocks * 1024);   /* add some room for safety */
159    void* const compressedBuffer = malloc(maxCompressedSize);
160    void* const resultBuffer = malloc(srcSize);
161    U32 nbBlocks;
162    UTIL_time_t ticksPerSecond;
163    struct compressionParameters compP;
164    int cfunctionId;
165
166    /* checks */
167    if (!compressedBuffer || !resultBuffer || !blockTable)
168        EXM_THROW(31, "allocation error : not enough memory");
169
170    /* init */
171    if (strlen(displayName)>17) displayName += strlen(displayName)-17;   /* can only display 17 characters */
172    UTIL_initTimer(&ticksPerSecond);
173
174    /* Init */
175    if (cLevel < LZ4HC_MIN_CLEVEL) cfunctionId = 0; else cfunctionId = 1;
176    switch (cfunctionId)
177    {
178#ifdef COMPRESSOR0
179    case 0 : compP.compressionFunction = COMPRESSOR0; break;
180#endif
181#ifdef COMPRESSOR1
182    case 1 : compP.compressionFunction = COMPRESSOR1; break;
183#endif
184    default : compP.compressionFunction = DEFAULTCOMPRESSOR;
185    }
186
187    /* Init blockTable data */
188    {   const char* srcPtr = (const char*)srcBuffer;
189        char* cPtr = (char*)compressedBuffer;
190        char* resPtr = (char*)resultBuffer;
191        U32 fileNb;
192        for (nbBlocks=0, fileNb=0; fileNb<nbFiles; fileNb++) {
193            size_t remaining = fileSizes[fileNb];
194            U32 const nbBlocksforThisFile = (U32)((remaining + (blockSize-1)) / blockSize);
195            U32 const blockEnd = nbBlocks + nbBlocksforThisFile;
196            for ( ; nbBlocks<blockEnd; nbBlocks++) {
197                size_t const thisBlockSize = MIN(remaining, blockSize);
198                blockTable[nbBlocks].srcPtr = srcPtr;
199                blockTable[nbBlocks].cPtr = cPtr;
200                blockTable[nbBlocks].resPtr = resPtr;
201                blockTable[nbBlocks].srcSize = thisBlockSize;
202                blockTable[nbBlocks].cRoom = LZ4_compressBound((int)thisBlockSize);
203                srcPtr += thisBlockSize;
204                cPtr += blockTable[nbBlocks].cRoom;
205                resPtr += thisBlockSize;
206                remaining -= thisBlockSize;
207    }   }   }
208
209    /* warmimg up memory */
210    RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1);
211
212    /* Bench */
213    {   U64 fastestC = (U64)(-1LL), fastestD = (U64)(-1LL);
214        U64 const crcOrig = XXH64(srcBuffer, srcSize, 0);
215        UTIL_time_t coolTime;
216        U64 const maxTime = (g_nbSeconds * TIMELOOP_MICROSEC) + 100;
217        U64 totalCTime=0, totalDTime=0;
218        U32 cCompleted=0, dCompleted=0;
219#       define NB_MARKS 4
220        const char* const marks[NB_MARKS] = { " |", " /", " =",  "\\" };
221        U32 markNb = 0;
222        size_t cSize = 0;
223        double ratio = 0.;
224
225        UTIL_getTime(&coolTime);
226        DISPLAYLEVEL(2, "\r%79s\r", "");
227        while (!cCompleted | !dCompleted) {
228            UTIL_time_t clockStart;
229            U64 clockLoop = g_nbSeconds ? TIMELOOP_MICROSEC : 1;
230
231            /* overheat protection */
232            if (UTIL_clockSpanMicro(coolTime, ticksPerSecond) > ACTIVEPERIOD_MICROSEC) {
233                DISPLAYLEVEL(2, "\rcooling down ...    \r");
234                UTIL_sleep(COOLPERIOD_SEC);
235                UTIL_getTime(&coolTime);
236            }
237
238            /* Compression */
239            DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize);
240            if (!cCompleted) memset(compressedBuffer, 0xE5, maxCompressedSize);  /* warm up and erase result buffer */
241
242            UTIL_sleepMilli(1);  /* give processor time to other processes */
243            UTIL_waitForNextTick(ticksPerSecond);
244            UTIL_getTime(&clockStart);
245
246            if (!cCompleted) {   /* still some time to do compression tests */
247                U32 nbLoops = 0;
248                do {
249                    U32 blockNb;
250                    for (blockNb=0; blockNb<nbBlocks; blockNb++) {
251                        size_t const rSize = compP.compressionFunction(blockTable[blockNb].srcPtr, blockTable[blockNb].cPtr, (int)blockTable[blockNb].srcSize, (int)blockTable[blockNb].cRoom, cLevel);
252                        if (LZ4_isError(rSize)) EXM_THROW(1, "LZ4_compress() failed");
253                        blockTable[blockNb].cSize = rSize;
254                    }
255                    nbLoops++;
256                } while (UTIL_clockSpanMicro(clockStart, ticksPerSecond) < clockLoop);
257                {   U64 const clockSpan = UTIL_clockSpanMicro(clockStart, ticksPerSecond);
258                    if (clockSpan < fastestC*nbLoops) fastestC = clockSpan / nbLoops;
259                    totalCTime += clockSpan;
260                    cCompleted = totalCTime>maxTime;
261            }   }
262
263            cSize = 0;
264            { U32 blockNb; for (blockNb=0; blockNb<nbBlocks; blockNb++) cSize += blockTable[blockNb].cSize; }
265            ratio = (double)srcSize / (double)cSize;
266            markNb = (markNb+1) % NB_MARKS;
267            DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s\r",
268                    marks[markNb], displayName, (U32)srcSize, (U32)cSize, ratio,
269                    (double)srcSize / fastestC );
270
271            (void)fastestD; (void)crcOrig;   /*  unused when decompression disabled */
272#if 1
273            /* Decompression */
274            if (!dCompleted) memset(resultBuffer, 0xD6, srcSize);  /* warm result buffer */
275
276            UTIL_sleepMilli(1); /* give processor time to other processes */
277            UTIL_waitForNextTick(ticksPerSecond);
278            UTIL_getTime(&clockStart);
279
280            if (!dCompleted) {
281                U32 nbLoops = 0;
282                do {
283                    U32 blockNb;
284                    for (blockNb=0; blockNb<nbBlocks; blockNb++) {
285                        size_t const regenSize = LZ4_decompress_safe(blockTable[blockNb].cPtr, blockTable[blockNb].resPtr, (int)blockTable[blockNb].cSize, (int)blockTable[blockNb].srcSize);
286                        if (LZ4_isError(regenSize)) {
287                            DISPLAY("LZ4_decompress_safe() failed on block %u  \n", blockNb);
288                            clockLoop = 0;   /* force immediate test end */
289                            break;
290                        }
291
292                        blockTable[blockNb].resSize = regenSize;
293                    }
294                    nbLoops++;
295                } while (UTIL_clockSpanMicro(clockStart, ticksPerSecond) < DECOMP_MULT*clockLoop);
296                {   U64 const clockSpan = UTIL_clockSpanMicro(clockStart, ticksPerSecond);
297                    if (clockSpan < fastestD*nbLoops) fastestD = clockSpan / nbLoops;
298                    totalDTime += clockSpan;
299                    dCompleted = totalDTime>(DECOMP_MULT*maxTime);
300            }   }
301
302            markNb = (markNb+1) % NB_MARKS;
303            DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s ,%6.1f MB/s\r",
304                    marks[markNb], displayName, (U32)srcSize, (U32)cSize, ratio,
305                    (double)srcSize / fastestC,
306                    (double)srcSize / fastestD );
307
308            /* CRC Checking */
309            {   U64 const crcCheck = XXH64(resultBuffer, srcSize, 0);
310                if (crcOrig!=crcCheck) {
311                    size_t u;
312                    DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x   \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck);
313                    for (u=0; u<srcSize; u++) {
314                        if (((const BYTE*)srcBuffer)[u] != ((const BYTE*)resultBuffer)[u]) {
315                            U32 segNb, bNb, pos;
316                            size_t bacc = 0;
317                            DISPLAY("Decoding error at pos %u ", (U32)u);
318                            for (segNb = 0; segNb < nbBlocks; segNb++) {
319                                if (bacc + blockTable[segNb].srcSize > u) break;
320                                bacc += blockTable[segNb].srcSize;
321                            }
322                            pos = (U32)(u - bacc);
323                            bNb = pos / (128 KB);
324                            DISPLAY("(block %u, sub %u, pos %u) \n", segNb, bNb, pos);
325                            break;
326                        }
327                        if (u==srcSize-1) {  /* should never happen */
328                            DISPLAY("no difference detected\n");
329                    }   }
330                    break;
331            }   }   /* CRC Checking */
332#endif
333        }   /* for (testNb = 1; testNb <= (g_nbSeconds + !g_nbSeconds); testNb++) */
334
335        if (g_displayLevel == 1) {
336            double cSpeed = (double)srcSize / fastestC;
337            double dSpeed = (double)srcSize / fastestD;
338            if (g_additionalParam)
339                DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, g_additionalParam);
340            else
341                DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName);
342        }
343        DISPLAYLEVEL(2, "%2i#\n", cLevel);
344    }   /* Bench */
345
346    /* clean up */
347    free(blockTable);
348    free(compressedBuffer);
349    free(resultBuffer);
350    return 0;
351}
352
353
354static size_t BMK_findMaxMem(U64 requiredMem)
355{
356    size_t const step = 64 MB;
357    BYTE* testmem = NULL;
358
359    requiredMem = (((requiredMem >> 26) + 1) << 26);
360    requiredMem += step;
361    if (requiredMem > maxMemory) requiredMem = maxMemory;
362
363    do {
364        testmem = (BYTE*)malloc((size_t)requiredMem);
365        requiredMem -= step;
366    } while (!testmem);
367
368    free(testmem);
369    return (size_t)(requiredMem);
370}
371
372static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize,
373                            const char* displayName, int cLevel, int cLevelLast,
374                            const size_t* fileSizes, unsigned nbFiles)
375{
376    int l;
377
378    const char* pch = strrchr(displayName, '\\'); /* Windows */
379    if (!pch) pch = strrchr(displayName, '/'); /* Linux */
380    if (pch) displayName = pch+1;
381
382    SET_HIGH_PRIORITY;
383
384    if (g_displayLevel == 1 && !g_additionalParam)
385        DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", LZ4_VERSION_STRING, LZ4_GIT_COMMIT_STRING, (U32)benchedSize, g_nbSeconds, (U32)(g_blockSize>>10));
386
387    if (cLevelLast < cLevel) cLevelLast = cLevel;
388
389    for (l=cLevel; l <= cLevelLast; l++) {
390        BMK_benchMem(srcBuffer, benchedSize,
391                     displayName, l,
392                     fileSizes, nbFiles);
393    }
394}
395
396
397/*! BMK_loadFiles() :
398    Loads `buffer` with content of files listed within `fileNamesTable`.
399    At most, fills `buffer` entirely */
400static void BMK_loadFiles(void* buffer, size_t bufferSize,
401                          size_t* fileSizes,
402                          const char** fileNamesTable, unsigned nbFiles)
403{
404    size_t pos = 0, totalSize = 0;
405    unsigned n;
406    for (n=0; n<nbFiles; n++) {
407        FILE* f;
408        U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
409        if (UTIL_isDirectory(fileNamesTable[n])) {
410            DISPLAYLEVEL(2, "Ignoring %s directory...       \n", fileNamesTable[n]);
411            fileSizes[n] = 0;
412            continue;
413        }
414        f = fopen(fileNamesTable[n], "rb");
415        if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]);
416        DISPLAYUPDATE(2, "Loading %s...       \r", fileNamesTable[n]);
417        if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n;   /* buffer too small - stop after this file */
418        { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
419          if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]);
420          pos += readSize; }
421        fileSizes[n] = (size_t)fileSize;
422        totalSize += (size_t)fileSize;
423        fclose(f);
424    }
425
426    if (totalSize == 0) EXM_THROW(12, "no data to bench");
427}
428
429static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles,
430                               int cLevel, int cLevelLast)
431{
432    void* srcBuffer;
433    size_t benchedSize;
434    size_t* fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t));
435    U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
436    char mfName[20] = {0};
437
438    if (!fileSizes) EXM_THROW(12, "not enough memory for fileSizes");
439
440    /* Memory allocation & restrictions */
441    benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3;
442    if (benchedSize==0) EXM_THROW(12, "not enough memory");
443    if ((U64)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad;
444    if (benchedSize < totalSizeToLoad)
445        DISPLAY("Not enough memory; testing %u MB only...\n", (U32)(benchedSize >> 20));
446    srcBuffer = malloc(benchedSize);
447    if (!srcBuffer) EXM_THROW(12, "not enough memory");
448
449    /* Load input buffer */
450    BMK_loadFiles(srcBuffer, benchedSize, fileSizes, fileNamesTable, nbFiles);
451
452    /* Bench */
453    snprintf (mfName, sizeof(mfName), " %u files", nbFiles);
454    {   const char* displayName = (nbFiles > 1) ? mfName : fileNamesTable[0];
455        BMK_benchCLevel(srcBuffer, benchedSize,
456                        displayName, cLevel, cLevelLast,
457                        fileSizes, nbFiles);
458    }
459
460    /* clean up */
461    free(srcBuffer);
462    free(fileSizes);
463}
464
465
466static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility)
467{
468    char name[20] = {0};
469    size_t benchedSize = 10000000;
470    void* const srcBuffer = malloc(benchedSize);
471
472    /* Memory allocation */
473    if (!srcBuffer) EXM_THROW(21, "not enough memory");
474
475    /* Fill input buffer */
476    RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
477
478    /* Bench */
479    snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100));
480    BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, cLevelLast, &benchedSize, 1);
481
482    /* clean up */
483    free(srcBuffer);
484}
485
486
487int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles,
488                   int cLevel, int cLevelLast)
489{
490    double const compressibility = (double)g_compressibilityDefault / 100;
491
492    if (nbFiles == 0)
493        BMK_syntheticTest(cLevel, cLevelLast, compressibility);
494    else
495        BMK_benchFileTable(fileNamesTable, nbFiles, cLevel, cLevelLast);
496    return 0;
497}
498