bench.c revision 52cac9a97342641315c76cfb861206d6acd631a8
1/* 2 bench.c - Demo program to benchmark open-source compression algorithms 3 Copyright (C) Yann Collet 2012-2016 4 5 GPL v2 License 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 2 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License along 18 with this program; if not, write to the Free Software Foundation, Inc., 19 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 20 21 You can contact the author at : 22 - LZ4 homepage : http://www.lz4.org 23 - LZ4 source repository : https://github.com/lz4/lz4 24*/ 25 26 27/* ************************************* 28* Includes 29***************************************/ 30#include "util.h" /* Compiler options, UTIL_GetFileSize, UTIL_sleep */ 31#include <stdlib.h> /* malloc, free */ 32#include <string.h> /* memset */ 33#include <stdio.h> /* fprintf, fopen, ftello64 */ 34#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC */ 35 36#include "datagen.h" /* RDG_genBuffer */ 37#include "xxhash.h" 38 39 40#include "lz4.h" 41#define COMPRESSOR0 LZ4_compress_local 42static int LZ4_compress_local(const char* src, char* dst, int srcSize, int dstSize, int clevel) { (void)clevel; return LZ4_compress_default(src, dst, srcSize, dstSize); } 43#include "lz4hc.h" 44#define COMPRESSOR1 LZ4_compress_HC 45#define DEFAULTCOMPRESSOR COMPRESSOR0 46#define LZ4_isError(errcode) (errcode==0) 47 48 49/* ************************************* 50* Constants 51***************************************/ 52#ifndef LZ4_GIT_COMMIT_STRING 53# define LZ4_GIT_COMMIT_STRING "" 54#else 55# define LZ4_GIT_COMMIT_STRING LZ4_EXPAND_AND_QUOTE(LZ4_GIT_COMMIT) 56#endif 57 58#define NBSECONDS 3 59#define TIMELOOP_MICROSEC 1*1000000ULL /* 1 second */ 60#define ACTIVEPERIOD_MICROSEC 70*1000000ULL /* 70 seconds */ 61#define COOLPERIOD_SEC 10 62#define DECOMP_MULT 2 /* test decompression DECOMP_MULT times longer than compression */ 63 64#define KB *(1 <<10) 65#define MB *(1 <<20) 66#define GB *(1U<<30) 67 68static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31)); 69 70static U32 g_compressibilityDefault = 50; 71 72 73/* ************************************* 74* console display 75***************************************/ 76#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) 77#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } 78static U32 g_displayLevel = 2; /* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + progression; 4 : + information */ 79 80#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \ 81 if ((clock() - g_time > refreshRate) || (g_displayLevel>=4)) \ 82 { g_time = clock(); DISPLAY(__VA_ARGS__); \ 83 if (g_displayLevel>=4) fflush(stdout); } } 84static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100; 85static clock_t g_time = 0; 86 87 88/* ************************************* 89* Exceptions 90***************************************/ 91#ifndef DEBUG 92# define DEBUG 0 93#endif 94#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__); 95#define EXM_THROW(error, ...) \ 96{ \ 97 DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \ 98 DISPLAYLEVEL(1, "Error %i : ", error); \ 99 DISPLAYLEVEL(1, __VA_ARGS__); \ 100 DISPLAYLEVEL(1, "\n"); \ 101 exit(error); \ 102} 103 104 105/* ************************************* 106* Benchmark Parameters 107***************************************/ 108static U32 g_nbSeconds = NBSECONDS; 109static size_t g_blockSize = 0; 110int g_additionalParam = 0; 111 112void BMK_setNotificationLevel(unsigned level) { g_displayLevel=level; } 113 114void BMK_setAdditionalParam(int additionalParam) { g_additionalParam=additionalParam; } 115 116void BMK_SetNbSeconds(unsigned nbSeconds) 117{ 118 g_nbSeconds = nbSeconds; 119 DISPLAYLEVEL(3, "- test >= %u seconds per compression / decompression -\n", g_nbSeconds); 120} 121 122void BMK_SetBlockSize(size_t blockSize) 123{ 124 g_blockSize = blockSize; 125} 126 127 128/* ******************************************************** 129* Bench functions 130**********************************************************/ 131typedef struct { 132 const char* srcPtr; 133 size_t srcSize; 134 char* cPtr; 135 size_t cRoom; 136 size_t cSize; 137 char* resPtr; 138 size_t resSize; 139} blockParam_t; 140 141struct compressionParameters 142{ 143 int (*compressionFunction)(const char* src, char* dst, int srcSize, int dstSize, int cLevel); 144}; 145 146#define MIN(a,b) ((a)<(b) ? (a) : (b)) 147#define MAX(a,b) ((a)>(b) ? (a) : (b)) 148 149static int BMK_benchMem(const void* srcBuffer, size_t srcSize, 150 const char* displayName, int cLevel, 151 const size_t* fileSizes, U32 nbFiles) 152{ 153 size_t const blockSize = (g_blockSize>=32 ? g_blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ; 154 U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; 155 blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t)); 156 size_t const maxCompressedSize = LZ4_compressBound((int)srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ 157 void* const compressedBuffer = malloc(maxCompressedSize); 158 void* const resultBuffer = malloc(srcSize); 159 U32 nbBlocks; 160 UTIL_time_t ticksPerSecond; 161 struct compressionParameters compP; 162 int cfunctionId; 163 164 /* checks */ 165 if (!compressedBuffer || !resultBuffer || !blockTable) 166 EXM_THROW(31, "allocation error : not enough memory"); 167 168 /* init */ 169 if (strlen(displayName)>17) displayName += strlen(displayName)-17; /* can only display 17 characters */ 170 UTIL_initTimer(&ticksPerSecond); 171 172 /* Init */ 173 if (cLevel < LZ4HC_CLEVEL_MIN) cfunctionId = 0; else cfunctionId = 1; 174 switch (cfunctionId) 175 { 176#ifdef COMPRESSOR0 177 case 0 : compP.compressionFunction = COMPRESSOR0; break; 178#endif 179#ifdef COMPRESSOR1 180 case 1 : compP.compressionFunction = COMPRESSOR1; break; 181#endif 182 default : compP.compressionFunction = DEFAULTCOMPRESSOR; 183 } 184 185 /* Init blockTable data */ 186 { const char* srcPtr = (const char*)srcBuffer; 187 char* cPtr = (char*)compressedBuffer; 188 char* resPtr = (char*)resultBuffer; 189 U32 fileNb; 190 for (nbBlocks=0, fileNb=0; fileNb<nbFiles; fileNb++) { 191 size_t remaining = fileSizes[fileNb]; 192 U32 const nbBlocksforThisFile = (U32)((remaining + (blockSize-1)) / blockSize); 193 U32 const blockEnd = nbBlocks + nbBlocksforThisFile; 194 for ( ; nbBlocks<blockEnd; nbBlocks++) { 195 size_t const thisBlockSize = MIN(remaining, blockSize); 196 blockTable[nbBlocks].srcPtr = srcPtr; 197 blockTable[nbBlocks].cPtr = cPtr; 198 blockTable[nbBlocks].resPtr = resPtr; 199 blockTable[nbBlocks].srcSize = thisBlockSize; 200 blockTable[nbBlocks].cRoom = LZ4_compressBound((int)thisBlockSize); 201 srcPtr += thisBlockSize; 202 cPtr += blockTable[nbBlocks].cRoom; 203 resPtr += thisBlockSize; 204 remaining -= thisBlockSize; 205 } } } 206 207 /* warmimg up memory */ 208 RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1); 209 210 /* Bench */ 211 { U64 fastestC = (U64)(-1LL), fastestD = (U64)(-1LL); 212 U64 const crcOrig = XXH64(srcBuffer, srcSize, 0); 213 UTIL_time_t coolTime; 214 U64 const maxTime = (g_nbSeconds * TIMELOOP_MICROSEC) + 100; 215 U64 totalCTime=0, totalDTime=0; 216 U32 cCompleted=0, dCompleted=0; 217# define NB_MARKS 4 218 const char* const marks[NB_MARKS] = { " |", " /", " =", "\\" }; 219 U32 markNb = 0; 220 size_t cSize = 0; 221 double ratio = 0.; 222 223 UTIL_getTime(&coolTime); 224 DISPLAYLEVEL(2, "\r%79s\r", ""); 225 while (!cCompleted | !dCompleted) { 226 UTIL_time_t clockStart; 227 U64 clockLoop = g_nbSeconds ? TIMELOOP_MICROSEC : 1; 228 229 /* overheat protection */ 230 if (UTIL_clockSpanMicro(coolTime, ticksPerSecond) > ACTIVEPERIOD_MICROSEC) { 231 DISPLAYLEVEL(2, "\rcooling down ... \r"); 232 UTIL_sleep(COOLPERIOD_SEC); 233 UTIL_getTime(&coolTime); 234 } 235 236 /* Compression */ 237 DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize); 238 if (!cCompleted) memset(compressedBuffer, 0xE5, maxCompressedSize); /* warm up and erase result buffer */ 239 240 UTIL_sleepMilli(1); /* give processor time to other processes */ 241 UTIL_waitForNextTick(ticksPerSecond); 242 UTIL_getTime(&clockStart); 243 244 if (!cCompleted) { /* still some time to do compression tests */ 245 U32 nbLoops = 0; 246 do { 247 U32 blockNb; 248 for (blockNb=0; blockNb<nbBlocks; blockNb++) { 249 size_t const rSize = compP.compressionFunction(blockTable[blockNb].srcPtr, blockTable[blockNb].cPtr, (int)blockTable[blockNb].srcSize, (int)blockTable[blockNb].cRoom, cLevel); 250 if (LZ4_isError(rSize)) EXM_THROW(1, "LZ4_compress() failed"); 251 blockTable[blockNb].cSize = rSize; 252 } 253 nbLoops++; 254 } while (UTIL_clockSpanMicro(clockStart, ticksPerSecond) < clockLoop); 255 { U64 const clockSpan = UTIL_clockSpanMicro(clockStart, ticksPerSecond); 256 if (clockSpan < fastestC*nbLoops) fastestC = clockSpan / nbLoops; 257 totalCTime += clockSpan; 258 cCompleted = totalCTime>maxTime; 259 } } 260 261 cSize = 0; 262 { U32 blockNb; for (blockNb=0; blockNb<nbBlocks; blockNb++) cSize += blockTable[blockNb].cSize; } 263 cSize += !cSize; /* avoid div by 0 */ 264 ratio = (double)srcSize / (double)cSize; 265 markNb = (markNb+1) % NB_MARKS; 266 DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s\r", 267 marks[markNb], displayName, (U32)srcSize, (U32)cSize, ratio, 268 (double)srcSize / fastestC ); 269 270 (void)fastestD; (void)crcOrig; /* unused when decompression disabled */ 271#if 1 272 /* Decompression */ 273 if (!dCompleted) memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */ 274 275 UTIL_sleepMilli(1); /* give processor time to other processes */ 276 UTIL_waitForNextTick(ticksPerSecond); 277 UTIL_getTime(&clockStart); 278 279 if (!dCompleted) { 280 U32 nbLoops = 0; 281 do { 282 U32 blockNb; 283 for (blockNb=0; blockNb<nbBlocks; blockNb++) { 284 size_t const regenSize = LZ4_decompress_safe(blockTable[blockNb].cPtr, blockTable[blockNb].resPtr, (int)blockTable[blockNb].cSize, (int)blockTable[blockNb].srcSize); 285 if (LZ4_isError(regenSize)) { 286 DISPLAY("LZ4_decompress_safe() failed on block %u \n", blockNb); 287 clockLoop = 0; /* force immediate test end */ 288 break; 289 } 290 291 blockTable[blockNb].resSize = regenSize; 292 } 293 nbLoops++; 294 } while (UTIL_clockSpanMicro(clockStart, ticksPerSecond) < DECOMP_MULT*clockLoop); 295 { U64 const clockSpan = UTIL_clockSpanMicro(clockStart, ticksPerSecond); 296 if (clockSpan < fastestD*nbLoops) fastestD = clockSpan / nbLoops; 297 totalDTime += clockSpan; 298 dCompleted = totalDTime>(DECOMP_MULT*maxTime); 299 } } 300 301 markNb = (markNb+1) % NB_MARKS; 302 DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s ,%6.1f MB/s\r", 303 marks[markNb], displayName, (U32)srcSize, (U32)cSize, ratio, 304 (double)srcSize / fastestC, 305 (double)srcSize / fastestD ); 306 307 /* CRC Checking */ 308 { U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); 309 if (crcOrig!=crcCheck) { 310 size_t u; 311 DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck); 312 for (u=0; u<srcSize; u++) { 313 if (((const BYTE*)srcBuffer)[u] != ((const BYTE*)resultBuffer)[u]) { 314 U32 segNb, bNb, pos; 315 size_t bacc = 0; 316 DISPLAY("Decoding error at pos %u ", (U32)u); 317 for (segNb = 0; segNb < nbBlocks; segNb++) { 318 if (bacc + blockTable[segNb].srcSize > u) break; 319 bacc += blockTable[segNb].srcSize; 320 } 321 pos = (U32)(u - bacc); 322 bNb = pos / (128 KB); 323 DISPLAY("(block %u, sub %u, pos %u) \n", segNb, bNb, pos); 324 break; 325 } 326 if (u==srcSize-1) { /* should never happen */ 327 DISPLAY("no difference detected\n"); 328 } } 329 break; 330 } } /* CRC Checking */ 331#endif 332 } /* for (testNb = 1; testNb <= (g_nbSeconds + !g_nbSeconds); testNb++) */ 333 334 if (g_displayLevel == 1) { 335 double cSpeed = (double)srcSize / fastestC; 336 double dSpeed = (double)srcSize / fastestD; 337 if (g_additionalParam) 338 DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, g_additionalParam); 339 else 340 DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName); 341 } 342 DISPLAYLEVEL(2, "%2i#\n", cLevel); 343 } /* Bench */ 344 345 /* clean up */ 346 free(blockTable); 347 free(compressedBuffer); 348 free(resultBuffer); 349 return 0; 350} 351 352 353static size_t BMK_findMaxMem(U64 requiredMem) 354{ 355 size_t step = 64 MB; 356 BYTE* testmem=NULL; 357 358 requiredMem = (((requiredMem >> 26) + 1) << 26); 359 requiredMem += 2*step; 360 if (requiredMem > maxMemory) requiredMem = maxMemory; 361 362 while (!testmem) { 363 if (requiredMem > step) requiredMem -= step; 364 else requiredMem >>= 1; 365 testmem = (BYTE*) malloc ((size_t)requiredMem); 366 } 367 free (testmem); 368 369 /* keep some space available */ 370 if (requiredMem > step) requiredMem -= step; 371 else requiredMem >>= 1; 372 373 return (size_t)requiredMem; 374} 375 376 377static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize, 378 const char* displayName, int cLevel, int cLevelLast, 379 const size_t* fileSizes, unsigned nbFiles) 380{ 381 int l; 382 383 const char* pch = strrchr(displayName, '\\'); /* Windows */ 384 if (!pch) pch = strrchr(displayName, '/'); /* Linux */ 385 if (pch) displayName = pch+1; 386 387 SET_HIGH_PRIORITY; 388 389 if (g_displayLevel == 1 && !g_additionalParam) 390 DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", LZ4_VERSION_STRING, LZ4_GIT_COMMIT_STRING, (U32)benchedSize, g_nbSeconds, (U32)(g_blockSize>>10)); 391 392 if (cLevelLast < cLevel) cLevelLast = cLevel; 393 394 for (l=cLevel; l <= cLevelLast; l++) { 395 BMK_benchMem(srcBuffer, benchedSize, 396 displayName, l, 397 fileSizes, nbFiles); 398 } 399} 400 401 402/*! BMK_loadFiles() : 403 Loads `buffer` with content of files listed within `fileNamesTable`. 404 At most, fills `buffer` entirely */ 405static void BMK_loadFiles(void* buffer, size_t bufferSize, 406 size_t* fileSizes, 407 const char** fileNamesTable, unsigned nbFiles) 408{ 409 size_t pos = 0, totalSize = 0; 410 unsigned n; 411 for (n=0; n<nbFiles; n++) { 412 FILE* f; 413 U64 fileSize = UTIL_getFileSize(fileNamesTable[n]); 414 if (UTIL_isDirectory(fileNamesTable[n])) { 415 DISPLAYLEVEL(2, "Ignoring %s directory... \n", fileNamesTable[n]); 416 fileSizes[n] = 0; 417 continue; 418 } 419 f = fopen(fileNamesTable[n], "rb"); 420 if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]); 421 DISPLAYUPDATE(2, "Loading %s... \r", fileNamesTable[n]); 422 if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n; /* buffer too small - stop after this file */ 423 { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f); 424 if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]); 425 pos += readSize; } 426 fileSizes[n] = (size_t)fileSize; 427 totalSize += (size_t)fileSize; 428 fclose(f); 429 } 430 431 if (totalSize == 0) EXM_THROW(12, "no data to bench"); 432} 433 434static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles, 435 int cLevel, int cLevelLast) 436{ 437 void* srcBuffer; 438 size_t benchedSize; 439 size_t* fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t)); 440 U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles); 441 char mfName[20] = {0}; 442 443 if (!fileSizes) EXM_THROW(12, "not enough memory for fileSizes"); 444 445 /* Memory allocation & restrictions */ 446 benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3; 447 if (benchedSize==0) EXM_THROW(12, "not enough memory"); 448 if ((U64)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad; 449 if (benchedSize < totalSizeToLoad) 450 DISPLAY("Not enough memory; testing %u MB only...\n", (U32)(benchedSize >> 20)); 451 srcBuffer = malloc(benchedSize + !benchedSize); /* avoid alloc of zero */ 452 if (!srcBuffer) EXM_THROW(12, "not enough memory"); 453 454 /* Load input buffer */ 455 BMK_loadFiles(srcBuffer, benchedSize, fileSizes, fileNamesTable, nbFiles); 456 457 /* Bench */ 458 snprintf (mfName, sizeof(mfName), " %u files", nbFiles); 459 { const char* displayName = (nbFiles > 1) ? mfName : fileNamesTable[0]; 460 BMK_benchCLevel(srcBuffer, benchedSize, 461 displayName, cLevel, cLevelLast, 462 fileSizes, nbFiles); 463 } 464 465 /* clean up */ 466 free(srcBuffer); 467 free(fileSizes); 468} 469 470 471static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility) 472{ 473 char name[20] = {0}; 474 size_t benchedSize = 10000000; 475 void* const srcBuffer = malloc(benchedSize); 476 477 /* Memory allocation */ 478 if (!srcBuffer) EXM_THROW(21, "not enough memory"); 479 480 /* Fill input buffer */ 481 RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0); 482 483 /* Bench */ 484 snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100)); 485 BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, cLevelLast, &benchedSize, 1); 486 487 /* clean up */ 488 free(srcBuffer); 489} 490 491 492int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, 493 int cLevel, int cLevelLast) 494{ 495 double const compressibility = (double)g_compressibilityDefault / 100; 496 497 if (cLevel > LZ4HC_CLEVEL_MAX) cLevel = LZ4HC_CLEVEL_MAX; 498 if (cLevelLast > LZ4HC_CLEVEL_MAX) cLevelLast = LZ4HC_CLEVEL_MAX; 499 if (cLevelLast < cLevel) cLevelLast = cLevel; 500 if (cLevelLast > cLevel) DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast); 501 502 if (nbFiles == 0) 503 BMK_syntheticTest(cLevel, cLevelLast, compressibility); 504 else 505 BMK_benchFileTable(fileNamesTable, nbFiles, cLevel, cLevelLast); 506 return 0; 507} 508