1// Bench.cpp
2
3#include "StdAfx.h"
4
5#ifndef _WIN32
6#define USE_POSIX_TIME
7#define USE_POSIX_TIME2
8#endif
9
10#ifdef USE_POSIX_TIME
11#include <time.h>
12#ifdef USE_POSIX_TIME2
13#include <sys/time.h>
14#endif
15#endif
16
17#ifdef _WIN32
18#define USE_ALLOCA
19#endif
20
21#ifdef USE_ALLOCA
22#ifdef _WIN32
23#include <malloc.h>
24#else
25#include <stdlib.h>
26#endif
27#endif
28
29#include "../../../../C/7zCrc.h"
30#include "../../../../C/Alloc.h"
31#include "../../../../C/CpuArch.h"
32
33#if !defined(_7ZIP_ST) || defined(_WIN32)
34#include "../../../Windows/System.h"
35#endif
36
37#ifndef _7ZIP_ST
38#include "../../../Windows/Synchronization.h"
39#include "../../../Windows/Thread.h"
40#endif
41
42#include "../../../Common/IntToString.h"
43#include "../../../Common/StringConvert.h"
44#include "../../../Common/StringToInt.h"
45
46#include "../../Common/MethodProps.h"
47#include "../../Common/StreamUtils.h"
48
49#include "Bench.h"
50
51using namespace NWindows;
52
53static const UInt64 kComplexInCommands = (UInt64)1 <<
54  #ifdef UNDER_CE
55    31;
56  #else
57    34;
58  #endif
59
60static const UInt64 kComplexInSeconds = 4;
61
62static void SetComplexCommands(UInt32 complexInSeconds, UInt64 cpuFreq, UInt64 &complexInCommands)
63{
64  complexInCommands = kComplexInCommands;
65  const UInt64 kMinFreq = (UInt64)1000000 * 30;
66  const UInt64 kMaxFreq = (UInt64)1000000 * 20000;
67  if (cpuFreq < kMinFreq) cpuFreq = kMinFreq;
68  if (cpuFreq < kMaxFreq)
69  {
70    if (complexInSeconds != 0)
71      complexInCommands = complexInSeconds * cpuFreq;
72    else
73      complexInCommands = cpuFreq >> 2;
74  }
75}
76
77static const unsigned kNumHashDictBits = 17;
78static const UInt32 kFilterUnpackSize = (48 << 10);
79
80static const unsigned kOldLzmaDictBits = 30;
81
82static const UInt32 kAdditionalSize = (1 << 16);
83static const UInt32 kCompressedAdditionalSize = (1 << 10);
84static const UInt32 kMaxLzmaPropSize = 5;
85
86class CBaseRandomGenerator
87{
88  UInt32 A1;
89  UInt32 A2;
90public:
91  CBaseRandomGenerator() { Init(); }
92  void Init() { A1 = 362436069; A2 = 521288629;}
93  UInt32 GetRnd()
94  {
95    return
96      ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) +
97      ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) );
98  }
99};
100
101class CBenchBuffer
102{
103public:
104  size_t BufferSize;
105  Byte *Buffer;
106
107  CBenchBuffer(): Buffer(0) {}
108  virtual ~CBenchBuffer() { Free(); }
109  void Free()
110  {
111    ::MidFree(Buffer);
112    Buffer = 0;
113  }
114  bool Alloc(size_t bufferSize)
115  {
116    if (Buffer != 0 && BufferSize == bufferSize)
117      return true;
118    Free();
119    Buffer = (Byte *)::MidAlloc(bufferSize);
120    BufferSize = bufferSize;
121    return (Buffer != 0 || bufferSize == 0);
122  }
123};
124
125class CBenchRandomGenerator: public CBenchBuffer
126{
127  CBaseRandomGenerator *RG;
128public:
129  void Set(CBaseRandomGenerator *rg) { RG = rg; }
130  UInt32 GetVal(UInt32 &res, unsigned numBits)
131  {
132    UInt32 val = res & (((UInt32)1 << numBits) - 1);
133    res >>= numBits;
134    return val;
135  }
136  UInt32 GetLen(UInt32 &res)
137  {
138    UInt32 len = GetVal(res, 2);
139    return GetVal(res, 1 + len);
140  }
141
142  void GenerateSimpleRandom()
143  {
144    for (UInt32 i = 0; i < BufferSize; i++)
145      Buffer[i] = (Byte)RG->GetRnd();
146  }
147
148  void Generate(unsigned dictBits)
149  {
150    UInt32 pos = 0;
151    UInt32 rep0 = 1;
152    while (pos < BufferSize)
153    {
154      UInt32 res = RG->GetRnd();
155      res >>= 1;
156      if (GetVal(res, 1) == 0 || pos < 1024)
157        Buffer[pos++] = (Byte)(res & 0xFF);
158      else
159      {
160        UInt32 len;
161        len = 1 + GetLen(res);
162        if (GetVal(res, 3) != 0)
163        {
164          len += GetLen(res);
165          do
166          {
167            UInt32 ppp = GetVal(res, 5) + 6;
168            res = RG->GetRnd();
169            if (ppp > dictBits)
170              continue;
171            rep0 = /* (1 << ppp) +*/  GetVal(res, ppp);
172            res = RG->GetRnd();
173          }
174          while (rep0 >= pos);
175          rep0++;
176        }
177
178        for (UInt32 i = 0; i < len && pos < BufferSize; i++, pos++)
179          Buffer[pos] = Buffer[pos - rep0];
180      }
181    }
182  }
183};
184
185
186class CBenchmarkInStream:
187  public ISequentialInStream,
188  public CMyUnknownImp
189{
190  const Byte *Data;
191  size_t Pos;
192  size_t Size;
193public:
194  MY_UNKNOWN_IMP
195  void Init(const Byte *data, size_t size)
196  {
197    Data = data;
198    Size = size;
199    Pos = 0;
200  }
201  STDMETHOD(Read)(void *data, UInt32 size, UInt32 *processedSize);
202};
203
204STDMETHODIMP CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize)
205{
206  size_t remain = Size - Pos;
207  UInt32 kMaxBlockSize = (1 << 20);
208  if (size > kMaxBlockSize)
209    size = kMaxBlockSize;
210  if (size > remain)
211    size = (UInt32)remain;
212  for (UInt32 i = 0; i < size; i++)
213    ((Byte *)data)[i] = Data[Pos + i];
214  Pos += size;
215  if(processedSize != NULL)
216    *processedSize = size;
217  return S_OK;
218}
219
220class CBenchmarkOutStream:
221  public ISequentialOutStream,
222  public CBenchBuffer,
223  public CMyUnknownImp
224{
225  // bool _overflow;
226public:
227  UInt32 Pos;
228  bool RealCopy;
229  bool CalcCrc;
230  UInt32 Crc;
231
232  // CBenchmarkOutStream(): _overflow(false) {}
233  void Init(bool realCopy, bool calcCrc)
234  {
235    Crc = CRC_INIT_VAL;
236    RealCopy = realCopy;
237    CalcCrc = calcCrc;
238    // _overflow = false;
239    Pos = 0;
240  }
241  MY_UNKNOWN_IMP
242  STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
243};
244
245STDMETHODIMP CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
246{
247  size_t curSize = BufferSize - Pos;
248  if (curSize > size)
249    curSize = size;
250  if (RealCopy)
251    memcpy(Buffer + Pos, data, curSize);
252  if (CalcCrc)
253    Crc = CrcUpdate(Crc, data, curSize);
254  Pos += (UInt32)curSize;
255  if(processedSize != NULL)
256    *processedSize = (UInt32)curSize;
257  if (curSize != size)
258  {
259    // _overflow = true;
260    return E_FAIL;
261  }
262  return S_OK;
263}
264
265class CCrcOutStream:
266  public ISequentialOutStream,
267  public CMyUnknownImp
268{
269public:
270  bool CalcCrc;
271  UInt32 Crc;
272  MY_UNKNOWN_IMP
273
274  CCrcOutStream(): CalcCrc(true) {};
275  void Init() { Crc = CRC_INIT_VAL; }
276  STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
277};
278
279STDMETHODIMP CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
280{
281  if (CalcCrc)
282    Crc = CrcUpdate(Crc, data, size);
283  if (processedSize != NULL)
284    *processedSize = size;
285  return S_OK;
286}
287
288static UInt64 GetTimeCount()
289{
290  #ifdef USE_POSIX_TIME
291  #ifdef USE_POSIX_TIME2
292  timeval v;
293  if (gettimeofday(&v, 0) == 0)
294    return (UInt64)(v.tv_sec) * 1000000 + v.tv_usec;
295  return (UInt64)time(NULL) * 1000000;
296  #else
297  return time(NULL);
298  #endif
299  #else
300  /*
301  LARGE_INTEGER value;
302  if (::QueryPerformanceCounter(&value))
303    return value.QuadPart;
304  */
305  return GetTickCount();
306  #endif
307}
308
309static UInt64 GetFreq()
310{
311  #ifdef USE_POSIX_TIME
312  #ifdef USE_POSIX_TIME2
313  return 1000000;
314  #else
315  return 1;
316  #endif
317  #else
318  /*
319  LARGE_INTEGER value;
320  if (::QueryPerformanceFrequency(&value))
321    return value.QuadPart;
322  */
323  return 1000;
324  #endif
325}
326
327#ifdef USE_POSIX_TIME
328
329struct CUserTime
330{
331  UInt64 Sum;
332  clock_t Prev;
333
334  void Init()
335  {
336    Prev = clock();
337    Sum = 0;
338  }
339
340  UInt64 GetUserTime()
341  {
342    clock_t v = clock();
343    Sum += v - Prev;
344    Prev = v;
345    return Sum;
346  }
347};
348
349#else
350
351static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; }
352UInt64 GetWinUserTime()
353{
354  FILETIME creationTime, exitTime, kernelTime, userTime;
355  if (
356  #ifdef UNDER_CE
357    ::GetThreadTimes(::GetCurrentThread()
358  #else
359    ::GetProcessTimes(::GetCurrentProcess()
360  #endif
361    , &creationTime, &exitTime, &kernelTime, &userTime) != 0)
362    return GetTime64(userTime) + GetTime64(kernelTime);
363  return (UInt64)GetTickCount() * 10000;
364}
365
366struct CUserTime
367{
368  UInt64 StartTime;
369
370  void Init() { StartTime = GetWinUserTime(); }
371  UInt64 GetUserTime() { return GetWinUserTime() - StartTime; }
372};
373
374#endif
375
376static UInt64 GetUserFreq()
377{
378  #ifdef USE_POSIX_TIME
379  return CLOCKS_PER_SEC;
380  #else
381  return 10000000;
382  #endif
383}
384
385class CBenchProgressStatus
386{
387  #ifndef _7ZIP_ST
388  NSynchronization::CCriticalSection CS;
389  #endif
390public:
391  HRESULT Res;
392  bool EncodeMode;
393  void SetResult(HRESULT res)
394  {
395    #ifndef _7ZIP_ST
396    NSynchronization::CCriticalSectionLock lock(CS);
397    #endif
398    Res = res;
399  }
400  HRESULT GetResult()
401  {
402    #ifndef _7ZIP_ST
403    NSynchronization::CCriticalSectionLock lock(CS);
404    #endif
405    return Res;
406  }
407};
408
409struct CBenchInfoCalc
410{
411  CBenchInfo BenchInfo;
412  CUserTime UserTime;
413
414  void SetStartTime();
415  void SetFinishTime(CBenchInfo &dest);
416};
417
418void CBenchInfoCalc::SetStartTime()
419{
420  BenchInfo.GlobalFreq = GetFreq();
421  BenchInfo.UserFreq = GetUserFreq();
422  BenchInfo.GlobalTime = ::GetTimeCount();
423  BenchInfo.UserTime = 0;
424  UserTime.Init();
425}
426
427void CBenchInfoCalc::SetFinishTime(CBenchInfo &dest)
428{
429  dest = BenchInfo;
430  dest.GlobalTime = ::GetTimeCount() - BenchInfo.GlobalTime;
431  dest.UserTime = UserTime.GetUserTime();
432}
433
434class CBenchProgressInfo:
435  public ICompressProgressInfo,
436  public CMyUnknownImp,
437  public CBenchInfoCalc
438{
439public:
440  CBenchProgressStatus *Status;
441  HRESULT Res;
442  IBenchCallback *Callback;
443
444  CBenchProgressInfo(): Callback(0) {}
445  MY_UNKNOWN_IMP
446  STDMETHOD(SetRatioInfo)(const UInt64 *inSize, const UInt64 *outSize);
447};
448
449STDMETHODIMP CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize)
450{
451  HRESULT res = Status->GetResult();
452  if (res != S_OK)
453    return res;
454  if (!Callback)
455    return res;
456  CBenchInfo info;
457  SetFinishTime(info);
458  if (Status->EncodeMode)
459  {
460    info.UnpackSize = BenchInfo.UnpackSize + *inSize;
461    info.PackSize = BenchInfo.PackSize + *outSize;
462    res = Callback->SetEncodeResult(info, false);
463  }
464  else
465  {
466    info.PackSize = BenchInfo.PackSize + *inSize;
467    info.UnpackSize = BenchInfo.UnpackSize + *outSize;
468    res = Callback->SetDecodeResult(info, false);
469  }
470  if (res != S_OK)
471    Status->SetResult(res);
472  return res;
473}
474
475static const int kSubBits = 8;
476
477static UInt32 GetLogSize(UInt32 size)
478{
479  for (int i = kSubBits; i < 32; i++)
480    for (UInt32 j = 0; j < (1 << kSubBits); j++)
481      if (size <= (((UInt32)1) << i) + (j << (i - kSubBits)))
482        return (i << kSubBits) + j;
483  return (32 << kSubBits);
484}
485
486static void NormalizeVals(UInt64 &v1, UInt64 &v2)
487{
488  while (v1 > 1000000)
489  {
490    v1 >>= 1;
491    v2 >>= 1;
492  }
493}
494
495UInt64 CBenchInfo::GetUsage() const
496{
497  UInt64 userTime = UserTime;
498  UInt64 userFreq = UserFreq;
499  UInt64 globalTime = GlobalTime;
500  UInt64 globalFreq = GlobalFreq;
501  NormalizeVals(userTime, userFreq);
502  NormalizeVals(globalFreq, globalTime);
503  if (userFreq == 0)
504    userFreq = 1;
505  if (globalTime == 0)
506    globalTime = 1;
507  return userTime * globalFreq * 1000000 / userFreq / globalTime;
508}
509
510UInt64 CBenchInfo::GetRatingPerUsage(UInt64 rating) const
511{
512  UInt64 userTime = UserTime;
513  UInt64 userFreq = UserFreq;
514  UInt64 globalTime = GlobalTime;
515  UInt64 globalFreq = GlobalFreq;
516  NormalizeVals(userFreq, userTime);
517  NormalizeVals(globalTime, globalFreq);
518  if (globalFreq == 0)
519    globalFreq = 1;
520  if (userTime == 0)
521    userTime = 1;
522  return userFreq * globalTime / globalFreq * rating / userTime;
523}
524
525static UInt64 MyMultDiv64(UInt64 value, UInt64 elapsedTime, UInt64 freq)
526{
527  UInt64 elTime = elapsedTime;
528  NormalizeVals(freq, elTime);
529  if (elTime == 0)
530    elTime = 1;
531  return value * freq / elTime;
532}
533
534UInt64 CBenchInfo::GetSpeed(UInt64 numCommands) const
535{
536  return MyMultDiv64(numCommands, GlobalTime, GlobalFreq);
537}
538
539struct CBenchProps
540{
541  bool LzmaRatingMode;
542
543  UInt32 EncComplex;
544  UInt32 DecComplexCompr;
545  UInt32 DecComplexUnc;
546
547  CBenchProps(): LzmaRatingMode(false) {}
548  void SetLzmaCompexity();
549
550  UInt64 GeComprCommands(UInt64 unpackSize)
551  {
552    return unpackSize * EncComplex;
553  }
554
555  UInt64 GeDecomprCommands(UInt64 packSize, UInt64 unpackSize)
556  {
557    return (packSize * DecComplexCompr + unpackSize * DecComplexUnc);
558  }
559
560  UInt64 GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size);
561  UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations);
562};
563
564void CBenchProps::SetLzmaCompexity()
565{
566  EncComplex = 1200;
567  DecComplexUnc = 4;
568  DecComplexCompr = 190;
569  LzmaRatingMode = true;
570}
571
572UInt64 CBenchProps::GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size)
573{
574  if (dictSize < (1 << kBenchMinDicLogSize))
575    dictSize = (1 << kBenchMinDicLogSize);
576  UInt64 encComplex = EncComplex;
577  if (LzmaRatingMode)
578  {
579    UInt64 t = GetLogSize(dictSize) - (kBenchMinDicLogSize << kSubBits);
580    encComplex = 870 + ((t * t * 5) >> (2 * kSubBits));
581  }
582  UInt64 numCommands = (UInt64)size * encComplex;
583  return MyMultDiv64(numCommands, elapsedTime, freq);
584}
585
586UInt64 CBenchProps::GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations)
587{
588  UInt64 numCommands = (inSize * DecComplexCompr + outSize * DecComplexUnc) * numIterations;
589  return MyMultDiv64(numCommands, elapsedTime, freq);
590}
591
592UInt64 GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size)
593{
594  CBenchProps props;
595  props.SetLzmaCompexity();
596  return props.GetCompressRating(dictSize, elapsedTime, freq, size);
597}
598
599UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations)
600{
601  CBenchProps props;
602  props.SetLzmaCompexity();
603  return props.GetDecompressRating(elapsedTime, freq, outSize, inSize, numIterations);
604}
605
606struct CEncoderInfo;
607
608struct CEncoderInfo
609{
610  #ifndef _7ZIP_ST
611  NWindows::CThread thread[2];
612  UInt32 NumDecoderSubThreads;
613  #endif
614  CMyComPtr<ICompressCoder> _encoder;
615  CMyComPtr<ICompressFilter> _encoderFilter;
616  CBenchProgressInfo *progressInfoSpec[2];
617  CMyComPtr<ICompressProgressInfo> progressInfo[2];
618  UInt64 NumIterations;
619  #ifdef USE_ALLOCA
620  size_t AllocaSize;
621  #endif
622
623  Byte _key[32];
624  Byte _iv[16];
625  Byte _psw[16];
626  bool CheckCrc_Enc;
627  bool CheckCrc_Dec;
628
629  struct CDecoderInfo
630  {
631    CEncoderInfo *Encoder;
632    UInt32 DecoderIndex;
633    #ifdef USE_ALLOCA
634    size_t AllocaSize;
635    #endif
636    bool CallbackMode;
637  };
638  CDecoderInfo decodersInfo[2];
639
640  CMyComPtr<ICompressCoder> _decoders[2];
641  CMyComPtr<ICompressFilter> _decoderFilter;
642
643  HRESULT Results[2];
644  CBenchmarkOutStream *outStreamSpec;
645  CMyComPtr<ISequentialOutStream> outStream;
646  IBenchCallback *callback;
647  IBenchPrintCallback *printCallback;
648  UInt32 crc;
649  UInt32 kBufferSize;
650  UInt32 compressedSize;
651  CBenchRandomGenerator rg;
652  CBenchBuffer rgCopy; // it must be 16-byte aligned !!!
653  CBenchmarkOutStream *propStreamSpec;
654  CMyComPtr<ISequentialOutStream> propStream;
655
656  // for decode
657  COneMethodInfo _method;
658  UInt32 _uncompressedDataSize;
659
660  HRESULT Init(
661      const COneMethodInfo &method,
662      UInt32 uncompressedDataSize,
663      unsigned generateDictBits,
664      CBaseRandomGenerator *rg);
665  HRESULT Encode();
666  HRESULT Decode(UInt32 decoderIndex);
667
668  CEncoderInfo():
669    CheckCrc_Enc(true),
670    CheckCrc_Dec(true),
671    outStreamSpec(0), callback(0), printCallback(0), propStreamSpec(0) {}
672
673  #ifndef _7ZIP_ST
674  static THREAD_FUNC_DECL EncodeThreadFunction(void *param)
675  {
676    HRESULT res;
677    CEncoderInfo *encoder = (CEncoderInfo *)param;
678    try
679    {
680      #ifdef USE_ALLOCA
681      alloca(encoder->AllocaSize);
682      #endif
683      res = encoder->Encode();
684      encoder->Results[0] = res;
685    }
686    catch(...)
687    {
688      res = E_FAIL;
689    }
690    if (res != S_OK)
691      encoder->progressInfoSpec[0]->Status->SetResult(res);
692    return 0;
693  }
694  static THREAD_FUNC_DECL DecodeThreadFunction(void *param)
695  {
696    CDecoderInfo *decoder = (CDecoderInfo *)param;
697    #ifdef USE_ALLOCA
698    alloca(decoder->AllocaSize);
699    #endif
700    CEncoderInfo *encoder = decoder->Encoder;
701    encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex);
702    return 0;
703  }
704
705  HRESULT CreateEncoderThread()
706  {
707    return thread[0].Create(EncodeThreadFunction, this);
708  }
709
710  HRESULT CreateDecoderThread(int index, bool callbackMode
711      #ifdef USE_ALLOCA
712      , size_t allocaSize
713      #endif
714      )
715  {
716    CDecoderInfo &decoder = decodersInfo[index];
717    decoder.DecoderIndex = index;
718    decoder.Encoder = this;
719    #ifdef USE_ALLOCA
720    decoder.AllocaSize = allocaSize;
721    #endif
722    decoder.CallbackMode = callbackMode;
723    return thread[index].Create(DecodeThreadFunction, &decoder);
724  }
725  #endif
726};
727
728static const UInt32 k_LZMA  = 0x030101;
729
730HRESULT CEncoderInfo::Init(
731    const COneMethodInfo &method,
732    UInt32 uncompressedDataSize,
733    unsigned generateDictBits,
734    CBaseRandomGenerator *rgLoc)
735{
736  rg.Set(rgLoc);
737  kBufferSize = uncompressedDataSize;
738  UInt32 kCompressedBufferSize =
739      kBufferSize + kCompressedAdditionalSize;
740      // (kBufferSize - kBufferSize / 4) + kCompressedAdditionalSize;
741  if (!rg.Alloc(kBufferSize))
742    return E_OUTOFMEMORY;
743  if (generateDictBits == 0)
744    rg.GenerateSimpleRandom();
745  else
746    rg.Generate(generateDictBits);
747  crc = CrcCalc(rg.Buffer, rg.BufferSize);
748
749  if (_encoderFilter)
750  {
751    if (!rgCopy.Alloc(rg.BufferSize))
752      return E_OUTOFMEMORY;
753  }
754
755
756  outStreamSpec = new CBenchmarkOutStream;
757  if (!outStreamSpec->Alloc(kCompressedBufferSize))
758    return E_OUTOFMEMORY;
759
760  outStream = outStreamSpec;
761
762  propStreamSpec = 0;
763  if (!propStream)
764  {
765    propStreamSpec = new CBenchmarkOutStream;
766    propStream = propStreamSpec;
767  }
768  if (!propStreamSpec->Alloc(kMaxLzmaPropSize))
769    return E_OUTOFMEMORY;
770  propStreamSpec->Init(true, false);
771
772
773  CMyComPtr<IUnknown> coder;
774  if (_encoderFilter)
775    coder = _encoderFilter;
776  else
777    coder = _encoder;
778  {
779    CMyComPtr<ICompressSetCoderProperties> scp;
780    coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
781    if (scp)
782    {
783      UInt64 reduceSize = uncompressedDataSize;
784      RINOK(method.SetCoderProps(scp, &reduceSize));
785    }
786    else
787    {
788      if (method.AreThereNonOptionalProps())
789        return E_INVALIDARG;
790    }
791
792    CMyComPtr<ICompressWriteCoderProperties> writeCoderProps;
793    coder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProps);
794    if (writeCoderProps)
795    {
796      RINOK(writeCoderProps->WriteCoderProperties(propStream));
797    }
798
799    {
800      CMyComPtr<ICryptoSetPassword> sp;
801      coder.QueryInterface(IID_ICryptoSetPassword, &sp);
802      if (sp)
803      {
804        RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)));
805
806        // we must call encoding one time to calculate password key for key cache.
807        // it must be after WriteCoderProperties!
808        CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
809        CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
810        Byte temp[16];
811        memset(temp, 0, sizeof(temp));
812        inStreamSpec->Init(temp, sizeof(temp));
813
814        CCrcOutStream *outStreamSpec = new CCrcOutStream;
815        CMyComPtr<ISequentialOutStream> outStream = outStreamSpec;
816        outStreamSpec->Init();
817
818        if (_encoderFilter)
819        {
820          _encoderFilter->Init();
821          _encoderFilter->Filter(temp, sizeof(temp));
822        }
823        else
824        {
825          RINOK(_encoder->Code(inStream, outStream, 0, 0, NULL));
826        }
827      }
828    }
829
830  }
831  return S_OK;
832}
833
834HRESULT CEncoderInfo::Encode()
835{
836  CBenchInfo &bi = progressInfoSpec[0]->BenchInfo;
837  bi.UnpackSize = 0;
838  bi.PackSize = 0;
839  CMyComPtr<ICryptoProperties> cp;
840  CMyComPtr<IUnknown> coder;
841  if (_encoderFilter)
842    coder = _encoderFilter;
843  else
844    coder = _encoder;
845  coder.QueryInterface(IID_ICryptoProperties, &cp);
846  CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
847  CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
848  UInt64 prev = 0;
849
850  UInt32 crcPrev = 0;
851
852  if (cp)
853  {
854    RINOK(cp->SetKey(_key, sizeof(_key)));
855    RINOK(cp->SetInitVector(_iv, sizeof(_iv)));
856  }
857
858  for (UInt64 i = 0; i < NumIterations; i++)
859  {
860    if (printCallback && bi.UnpackSize - prev > (1 << 20))
861    {
862      RINOK(printCallback->CheckBreak());
863      prev = bi.UnpackSize;
864    }
865
866    bool isLast = (i == NumIterations - 1);
867    bool calcCrc = ((isLast || (i & 0x7F) == 0 || CheckCrc_Enc) && NumIterations != 1);
868    outStreamSpec->Init(isLast, calcCrc);
869
870    if (_encoderFilter)
871    {
872      memcpy(rgCopy.Buffer, rg.Buffer, rg.BufferSize);
873      _encoderFilter->Init();
874      _encoderFilter->Filter(rgCopy.Buffer, (UInt32)rg.BufferSize);
875      RINOK(WriteStream(outStream, rgCopy.Buffer, rg.BufferSize));
876    }
877    else
878    {
879      inStreamSpec->Init(rg.Buffer, rg.BufferSize);
880      RINOK(_encoder->Code(inStream, outStream, 0, 0, progressInfo[0]));
881    }
882
883    UInt32 crcNew = CRC_GET_DIGEST(outStreamSpec->Crc);
884    if (i == 0)
885      crcPrev = crcNew;
886    else if (calcCrc && crcPrev != crcNew)
887      return E_FAIL;
888    compressedSize = outStreamSpec->Pos;
889    bi.UnpackSize += rg.BufferSize;
890    bi.PackSize += compressedSize;
891  }
892  _encoder.Release();
893  _encoderFilter.Release();
894  return S_OK;
895}
896
897HRESULT CEncoderInfo::Decode(UInt32 decoderIndex)
898{
899  CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
900  CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
901  CMyComPtr<ICompressCoder> &decoder = _decoders[decoderIndex];
902  CMyComPtr<IUnknown> coder;
903  if (_decoderFilter)
904  {
905    if (decoderIndex != 0)
906      return E_FAIL;
907    coder = _decoderFilter;
908  }
909  else
910    coder = decoder;
911
912  CMyComPtr<ICompressSetDecoderProperties2> setDecProps;
913  coder.QueryInterface(IID_ICompressSetDecoderProperties2, &setDecProps);
914  if (!setDecProps && propStreamSpec->Pos != 0)
915    return E_FAIL;
916
917  CCrcOutStream *crcOutStreamSpec = new CCrcOutStream;
918  CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec;
919
920  CBenchProgressInfo *pi = progressInfoSpec[decoderIndex];
921  pi->BenchInfo.UnpackSize = 0;
922  pi->BenchInfo.PackSize = 0;
923
924  #ifndef _7ZIP_ST
925  {
926    CMyComPtr<ICompressSetCoderMt> setCoderMt;
927    coder.QueryInterface(IID_ICompressSetCoderMt, &setCoderMt);
928    if (setCoderMt)
929    {
930      RINOK(setCoderMt->SetNumberOfThreads(NumDecoderSubThreads));
931    }
932  }
933  #endif
934
935  CMyComPtr<ICompressSetCoderProperties> scp;
936  coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
937  if (scp)
938  {
939    UInt64 reduceSize = _uncompressedDataSize;
940    RINOK(_method.SetCoderProps(scp, &reduceSize));
941  }
942
943  CMyComPtr<ICryptoProperties> cp;
944  coder.QueryInterface(IID_ICryptoProperties, &cp);
945
946  if (setDecProps)
947  {
948    RINOK(setDecProps->SetDecoderProperties2(propStreamSpec->Buffer, propStreamSpec->Pos));
949  }
950
951  {
952    CMyComPtr<ICryptoSetPassword> sp;
953    coder.QueryInterface(IID_ICryptoSetPassword, &sp);
954    if (sp)
955    {
956      RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)));
957    }
958  }
959
960  UInt64 prev = 0;
961
962  if (cp)
963  {
964    RINOK(cp->SetKey(_key, sizeof(_key)));
965    RINOK(cp->SetInitVector(_iv, sizeof(_iv)));
966  }
967
968  for (UInt64 i = 0; i < NumIterations; i++)
969  {
970    if (printCallback && pi->BenchInfo.UnpackSize - prev > (1 << 20))
971    {
972      RINOK(printCallback->CheckBreak());
973      prev = pi->BenchInfo.UnpackSize;
974    }
975
976    inStreamSpec->Init(outStreamSpec->Buffer, compressedSize);
977    crcOutStreamSpec->Init();
978
979    UInt64 outSize = kBufferSize;
980    crcOutStreamSpec->CalcCrc = ((i & 0x7F) == 0 || CheckCrc_Dec);
981    if (_decoderFilter)
982    {
983      if (compressedSize > rgCopy.BufferSize)
984        return E_FAIL;
985      memcpy(rgCopy.Buffer, outStreamSpec->Buffer, compressedSize);
986      _decoderFilter->Init();
987      _decoderFilter->Filter(rgCopy.Buffer, compressedSize);
988      RINOK(WriteStream(crcOutStream, rgCopy.Buffer, rg.BufferSize));
989    }
990    else
991    {
992      RINOK(decoder->Code(inStream, crcOutStream, 0, &outSize, progressInfo[decoderIndex]));
993    }
994    if (crcOutStreamSpec->CalcCrc && CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc)
995      return S_FALSE;
996    pi->BenchInfo.UnpackSize += kBufferSize;
997    pi->BenchInfo.PackSize += compressedSize;
998  }
999  decoder.Release();
1000  _decoderFilter.Release();
1001  return S_OK;
1002}
1003
1004static const UInt32 kNumThreadsMax = (1 << 12);
1005
1006struct CBenchEncoders
1007{
1008  CEncoderInfo *encoders;
1009  CBenchEncoders(UInt32 num): encoders(0) { encoders = new CEncoderInfo[num]; }
1010  ~CBenchEncoders() { delete []encoders; }
1011};
1012
1013static UInt64 GetNumIterations(UInt64 numCommands, UInt64 complexInCommands)
1014{
1015  if (numCommands < (1 << 4))
1016    numCommands = (1 << 4);
1017  UInt64 res = complexInCommands / numCommands;
1018  return (res == 0 ? 1 : res);
1019}
1020
1021static HRESULT MethodBench(
1022    DECL_EXTERNAL_CODECS_LOC_VARS
1023    UInt64 complexInCommands,
1024    bool oldLzmaBenchMode,
1025    UInt32 numThreads,
1026    const COneMethodInfo &method2,
1027    UInt32 uncompressedDataSize,
1028    unsigned generateDictBits,
1029    IBenchPrintCallback *printCallback,
1030    IBenchCallback *callback,
1031    CBenchProps *benchProps)
1032{
1033  COneMethodInfo method = method2;
1034  UInt64 methodId;
1035  UInt32 numInStreams, numOutStreams;
1036  if (!FindMethod(
1037      EXTERNAL_CODECS_LOC_VARS
1038      method.MethodName, methodId, numInStreams, numOutStreams))
1039    return E_NOTIMPL;
1040  if (numInStreams != 1 || numOutStreams != 1)
1041    return E_INVALIDARG;
1042
1043  UInt32 numEncoderThreads = 1;
1044  UInt32 numSubDecoderThreads = 1;
1045
1046  #ifndef _7ZIP_ST
1047    numEncoderThreads = numThreads;
1048
1049    if (oldLzmaBenchMode && methodId == k_LZMA)
1050    {
1051      bool fixedNumber;
1052      UInt32 numLzmaThreads = method.Get_Lzma_NumThreads(fixedNumber);
1053      if (!fixedNumber && numThreads == 1)
1054        method.AddNumThreadsProp(1);
1055      if (numThreads > 1 && numLzmaThreads > 1)
1056      {
1057        numEncoderThreads = numThreads / 2;
1058        numSubDecoderThreads = 2;
1059      }
1060    }
1061  #endif
1062
1063  CBenchEncoders encodersSpec(numEncoderThreads);
1064  CEncoderInfo *encoders = encodersSpec.encoders;
1065
1066  UInt32 i;
1067  for (i = 0; i < numEncoderThreads; i++)
1068  {
1069    CEncoderInfo &encoder = encoders[i];
1070    encoder.callback = (i == 0) ? callback : 0;
1071    encoder.printCallback = printCallback;
1072
1073    CMyComPtr<ICompressCoder2> coder2;
1074    RINOK(CreateCoder(EXTERNAL_CODECS_LOC_VARS methodId,
1075        encoder._encoderFilter, encoder._encoder, coder2, true, false));
1076    if (!encoder._encoder && !encoder._encoderFilter)
1077      return E_NOTIMPL;
1078    // encoder._encoderFilter.Release(); // we can disable filter to check the speed of FilterCoder.
1079
1080    encoder.CheckCrc_Enc = (benchProps->EncComplex) > 30 ;
1081    encoder.CheckCrc_Dec = (benchProps->DecComplexCompr + benchProps->DecComplexUnc) > 30 ;
1082
1083    memset(encoder._iv, 0, sizeof(encoder._iv));
1084    memset(encoder._key, 0, sizeof(encoder._key));
1085    memset(encoder._psw, 0, sizeof(encoder._psw));
1086
1087    for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1088    {
1089      CMyComPtr<ICompressCoder2> coder2de;
1090      CMyComPtr<ICompressCoder> &decoder = encoder._decoders[j];
1091      RINOK(CreateCoder(EXTERNAL_CODECS_LOC_VARS methodId,
1092        encoder._decoderFilter, decoder, coder2de, false, false));
1093      if (!encoder._decoderFilter && !decoder)
1094        return E_NOTIMPL;
1095    }
1096  }
1097
1098  CBaseRandomGenerator rg;
1099  rg.Init();
1100  for (i = 0; i < numEncoderThreads; i++)
1101  {
1102    CEncoderInfo &encoder = encoders[i];
1103    encoder._method = method;
1104    encoder._uncompressedDataSize = uncompressedDataSize;
1105    RINOK(encoders[i].Init(method, uncompressedDataSize, generateDictBits, &rg));
1106  }
1107
1108  CBenchProgressStatus status;
1109  status.Res = S_OK;
1110  status.EncodeMode = true;
1111
1112  for (i = 0; i < numEncoderThreads; i++)
1113  {
1114    CEncoderInfo &encoder = encoders[i];
1115    encoder.NumIterations = GetNumIterations(benchProps->GeComprCommands(uncompressedDataSize), complexInCommands);
1116
1117    for (int j = 0; j < 2; j++)
1118    {
1119      CBenchProgressInfo *spec = new CBenchProgressInfo;
1120      encoder.progressInfoSpec[j] = spec;
1121      encoder.progressInfo[j] = spec;
1122      spec->Status = &status;
1123    }
1124    if (i == 0)
1125    {
1126      CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
1127      bpi->Callback = callback;
1128      bpi->BenchInfo.NumIterations = numEncoderThreads;
1129      bpi->SetStartTime();
1130    }
1131
1132    #ifndef _7ZIP_ST
1133    if (numEncoderThreads > 1)
1134    {
1135      #ifdef USE_ALLOCA
1136      encoder.AllocaSize = (i * 16 * 21) & 0x7FF;
1137      #endif
1138      RINOK(encoder.CreateEncoderThread())
1139    }
1140    else
1141    #endif
1142    {
1143      RINOK(encoder.Encode());
1144    }
1145  }
1146  #ifndef _7ZIP_ST
1147  if (numEncoderThreads > 1)
1148    for (i = 0; i < numEncoderThreads; i++)
1149      encoders[i].thread[0].Wait();
1150  #endif
1151
1152  RINOK(status.Res);
1153
1154  CBenchInfo info;
1155
1156  encoders[0].progressInfoSpec[0]->SetFinishTime(info);
1157  info.UnpackSize = 0;
1158  info.PackSize = 0;
1159  info.NumIterations = encoders[0].NumIterations;
1160  for (i = 0; i < numEncoderThreads; i++)
1161  {
1162    CEncoderInfo &encoder = encoders[i];
1163    info.UnpackSize += encoder.kBufferSize;
1164    info.PackSize += encoder.compressedSize;
1165  }
1166  RINOK(callback->SetEncodeResult(info, true));
1167
1168
1169  status.Res = S_OK;
1170  status.EncodeMode = false;
1171
1172  UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads;
1173  for (i = 0; i < numEncoderThreads; i++)
1174  {
1175    CEncoderInfo &encoder = encoders[i];
1176
1177    if (i == 0)
1178    {
1179      encoder.NumIterations = GetNumIterations(benchProps->GeDecomprCommands(encoder.compressedSize, encoder.kBufferSize), complexInCommands);
1180      CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
1181      bpi->Callback = callback;
1182      bpi->BenchInfo.NumIterations = numDecoderThreads;
1183      bpi->SetStartTime();
1184    }
1185    else
1186      encoder.NumIterations = encoders[0].NumIterations;
1187
1188    #ifndef _7ZIP_ST
1189    {
1190      int numSubThreads = method.Get_NumThreads();
1191      encoder.NumDecoderSubThreads = (numSubThreads <= 0) ? 1 : numSubThreads;
1192    }
1193    if (numDecoderThreads > 1)
1194    {
1195      for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1196      {
1197        HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0)
1198            #ifdef USE_ALLOCA
1199            , ((i * numSubDecoderThreads + j) * 16 * 21) & 0x7FF
1200            #endif
1201            );
1202        RINOK(res);
1203      }
1204    }
1205    else
1206    #endif
1207    {
1208      RINOK(encoder.Decode(0));
1209    }
1210  }
1211  #ifndef _7ZIP_ST
1212  HRESULT res = S_OK;
1213  if (numDecoderThreads > 1)
1214    for (i = 0; i < numEncoderThreads; i++)
1215      for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1216      {
1217        CEncoderInfo &encoder = encoders[i];
1218        encoder.thread[j].Wait();
1219        if (encoder.Results[j] != S_OK)
1220          res = encoder.Results[j];
1221      }
1222  RINOK(res);
1223  #endif
1224  RINOK(status.Res);
1225  encoders[0].progressInfoSpec[0]->SetFinishTime(info);
1226  #ifndef _7ZIP_ST
1227  #ifdef UNDER_CE
1228  if (numDecoderThreads > 1)
1229    for (i = 0; i < numEncoderThreads; i++)
1230      for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1231      {
1232        FILETIME creationTime, exitTime, kernelTime, userTime;
1233        if (::GetThreadTimes(encoders[i].thread[j], &creationTime, &exitTime, &kernelTime, &userTime) != 0)
1234          info.UserTime += GetTime64(userTime) + GetTime64(kernelTime);
1235      }
1236  #endif
1237  #endif
1238  info.UnpackSize = 0;
1239  info.PackSize = 0;
1240  info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations;
1241  for (i = 0; i < numEncoderThreads; i++)
1242  {
1243    CEncoderInfo &encoder = encoders[i];
1244    info.UnpackSize += encoder.kBufferSize;
1245    info.PackSize += encoder.compressedSize;
1246  }
1247  RINOK(callback->SetDecodeResult(info, false));
1248  RINOK(callback->SetDecodeResult(info, true));
1249  return S_OK;
1250}
1251
1252
1253inline UInt64 GetLZMAUsage(bool multiThread, UInt32 dictionary)
1254{
1255  UInt32 hs = dictionary - 1;
1256  hs |= (hs >> 1);
1257  hs |= (hs >> 2);
1258  hs |= (hs >> 4);
1259  hs |= (hs >> 8);
1260  hs >>= 1;
1261  hs |= 0xFFFF;
1262  if (hs > (1 << 24))
1263    hs >>= 1;
1264  hs++;
1265  return ((hs + (1 << 16)) + (UInt64)dictionary * 2) * 4 + (UInt64)dictionary * 3 / 2 +
1266      (1 << 20) + (multiThread ? (6 << 20) : 0);
1267}
1268
1269UInt64 GetBenchMemoryUsage(UInt32 numThreads, UInt32 dictionary)
1270{
1271  const UInt32 kBufferSize = dictionary;
1272  const UInt32 kCompressedBufferSize = (kBufferSize / 2);
1273  UInt32 numSubThreads = (numThreads > 1) ? 2 : 1;
1274  UInt32 numBigThreads = numThreads / numSubThreads;
1275  return (kBufferSize + kCompressedBufferSize +
1276    GetLZMAUsage((numThreads > 1), dictionary) + (2 << 20)) * numBigThreads;
1277}
1278
1279static HRESULT CrcBig(const void *data, UInt32 size, UInt64 numIterations,
1280    const UInt32 *checkSum, IHasher *hf,
1281    IBenchPrintCallback *callback)
1282{
1283  Byte hash[64];
1284  UInt64 i;
1285  for (i = 0; i < sizeof(hash); i++)
1286    hash[i] = 0;
1287  for (i = 0; i < numIterations; i++)
1288  {
1289    if (callback && (i & 0xFF) == 0)
1290    {
1291      RINOK(callback->CheckBreak());
1292    }
1293    hf->Init();
1294    hf->Update(data, size);
1295    hf->Final(hash);
1296    UInt32 hashSize = hf->GetDigestSize();
1297    if (hashSize > sizeof(hash))
1298      return S_FALSE;
1299    UInt32 sum = 0;
1300    for (UInt32 j = 0; j < hashSize; j += 4)
1301      sum ^= GetUi32(hash + j);
1302    if (checkSum && sum != *checkSum)
1303    {
1304      // printf(" %08X ", sum);
1305      return S_FALSE;
1306    }
1307  }
1308  return S_OK;
1309}
1310
1311UInt32 g_BenchCpuFreqTemp = 1;
1312
1313#define YY1 sum += val; sum ^= val;
1314#define YY3 YY1 YY1 YY1 YY1
1315#define YY5 YY3 YY3 YY3 YY3
1316#define YY7 YY5 YY5 YY5 YY5
1317static const UInt32 kNumFreqCommands = 128;
1318
1319static UInt32 CountCpuFreq(UInt32 sum, UInt32 num, UInt32 val)
1320{
1321  for (UInt32 i = 0; i < num; i++)
1322  {
1323    YY7
1324  }
1325  return sum;
1326}
1327
1328#ifndef _7ZIP_ST
1329
1330struct CFreqInfo
1331{
1332  NWindows::CThread Thread;
1333  IBenchPrintCallback *Callback;
1334  HRESULT CallbackRes;
1335  UInt32 ValRes;
1336  UInt32 Size;
1337  UInt64 NumIterations;
1338
1339  void Wait()
1340  {
1341    Thread.Wait();
1342    Thread.Close();
1343  }
1344};
1345
1346static THREAD_FUNC_DECL FreqThreadFunction(void *param)
1347{
1348  CFreqInfo *p = (CFreqInfo *)param;
1349
1350  UInt32 sum = g_BenchCpuFreqTemp;
1351  for (UInt64 k = p->NumIterations; k > 0; k--)
1352  {
1353    p->CallbackRes = p->Callback->CheckBreak();
1354    if (p->CallbackRes != S_OK)
1355      return 0;
1356    sum = CountCpuFreq(sum, p->Size, g_BenchCpuFreqTemp);
1357  }
1358  p->ValRes = sum;
1359  return 0;
1360}
1361
1362struct CFreqThreads
1363{
1364  CFreqInfo *Items;
1365  UInt32 NumThreads;
1366
1367  CFreqThreads(): Items(0), NumThreads(0) {}
1368  void WaitAll()
1369  {
1370    for (UInt32 i = 0; i < NumThreads; i++)
1371      Items[i].Wait();
1372    NumThreads = 0;
1373  }
1374  ~CFreqThreads()
1375  {
1376    WaitAll();
1377    delete []Items;
1378  }
1379};
1380
1381struct CCrcInfo
1382{
1383  NWindows::CThread Thread;
1384  IBenchPrintCallback *Callback;
1385  HRESULT CallbackRes;
1386
1387  const Byte *Data;
1388  UInt32 Size;
1389  UInt64 NumIterations;
1390  bool CheckSumDefined;
1391  UInt32 CheckSum;
1392  CMyComPtr<IHasher> Hasher;
1393  HRESULT Res;
1394
1395  void Wait()
1396  {
1397    Thread.Wait();
1398    Thread.Close();
1399  }
1400};
1401
1402static THREAD_FUNC_DECL CrcThreadFunction(void *param)
1403{
1404  CCrcInfo *p = (CCrcInfo *)param;
1405  p->Res = CrcBig(p->Data, p->Size, p->NumIterations,
1406      p->CheckSumDefined ? &p->CheckSum : NULL, p->Hasher,
1407      p->Callback);
1408  return 0;
1409}
1410
1411struct CCrcThreads
1412{
1413  CCrcInfo *Items;
1414  UInt32 NumThreads;
1415
1416  CCrcThreads(): Items(0), NumThreads(0) {}
1417  void WaitAll()
1418  {
1419    for (UInt32 i = 0; i < NumThreads; i++)
1420      Items[i].Wait();
1421    NumThreads = 0;
1422  }
1423  ~CCrcThreads()
1424  {
1425    WaitAll();
1426    delete []Items;
1427  }
1428};
1429
1430#endif
1431
1432static UInt32 CrcCalc1(const Byte *buf, UInt32 size)
1433{
1434  UInt32 crc = CRC_INIT_VAL;;
1435  for (UInt32 i = 0; i < size; i++)
1436    crc = CRC_UPDATE_BYTE(crc, buf[i]);
1437  return CRC_GET_DIGEST(crc);
1438}
1439
1440static void RandGen(Byte *buf, UInt32 size, CBaseRandomGenerator &RG)
1441{
1442  for (UInt32 i = 0; i < size; i++)
1443    buf[i] = (Byte)RG.GetRnd();
1444}
1445
1446static UInt32 RandGenCrc(Byte *buf, UInt32 size, CBaseRandomGenerator &RG)
1447{
1448  RandGen(buf, size, RG);
1449  return CrcCalc1(buf, size);
1450}
1451
1452bool CrcInternalTest()
1453{
1454  CBenchBuffer buffer;
1455  const UInt32 kBufferSize0 = (1 << 8);
1456  const UInt32 kBufferSize1 = (1 << 10);
1457  const UInt32 kCheckSize = (1 << 5);
1458  if (!buffer.Alloc(kBufferSize0 + kBufferSize1))
1459    return false;
1460  Byte *buf = buffer.Buffer;
1461  UInt32 i;
1462  for (i = 0; i < kBufferSize0; i++)
1463    buf[i] = (Byte)i;
1464  UInt32 crc1 = CrcCalc1(buf, kBufferSize0);
1465  if (crc1 != 0x29058C73)
1466    return false;
1467  CBaseRandomGenerator RG;
1468  RandGen(buf + kBufferSize0, kBufferSize1, RG);
1469  for (i = 0; i < kBufferSize0 + kBufferSize1 - kCheckSize; i++)
1470    for (UInt32 j = 0; j < kCheckSize; j++)
1471      if (CrcCalc1(buf + i, j) != CrcCalc(buf + i, j))
1472        return false;
1473  return true;
1474}
1475
1476struct CBenchMethod
1477{
1478  unsigned DictBits;
1479  UInt32 EncComplex;
1480  UInt32 DecComplexCompr;
1481  UInt32 DecComplexUnc;
1482  const char *Name;
1483};
1484
1485static const CBenchMethod g_Bench[] =
1486{
1487  { 17,  357,  145,   20, "LZMA:x1" },
1488  { 24, 1220,  145,   20, "LZMA:x5:mt1" },
1489  { 24, 1220,  145,   20, "LZMA:x5:mt2" },
1490  { 16,  124,   40,   14, "Deflate:x1" },
1491  { 16,  376,   40,   14, "Deflate:x5" },
1492  { 16, 1082,   40,   14, "Deflate:x7" },
1493  { 17,  422,   40,   14, "Deflate64:x5" },
1494  { 15,  590,   69,   69, "BZip2:x1" },
1495  { 19,  815,  122,  122, "BZip2:x5" },
1496  { 19,  815,  122,  122, "BZip2:x5:mt2" },
1497  { 19, 2530,  122,  122, "BZip2:x7" },
1498  { 18, 1010,    0, 1150, "PPMD:x1" },
1499  { 22, 1655,    0, 1830, "PPMD:x5" },
1500  {  0,    6,    0,    6, "Delta:4" },
1501  {  0,    4,    0,    4, "BCJ" },
1502  {  0,   24,    0,   24, "AES256CBC:1" },
1503  {  0,    8,    0,    2, "AES256CBC:2" }
1504};
1505
1506struct CBenchHash
1507{
1508  UInt32 Complex;
1509  UInt32 CheckSum;
1510  const char *Name;
1511};
1512
1513static const CBenchHash g_Hash[] =
1514{
1515  {   558, 0x8F8FEDAB, "CRC32:4" },
1516  {   339, 0x8F8FEDAB, "CRC32:8" },
1517  {   512, 0xDF1C17CC, "CRC64" },
1518  { 11900, 0x2D79FF2E, "SHA256" },
1519  {  5230, 0x4C25132B, "SHA1" }
1520};
1521
1522struct CTotalBenchRes
1523{
1524  UInt64 NumIterations;
1525  UInt64 Rating;
1526  UInt64 Usage;
1527  UInt64 RPU;
1528  void Init() { NumIterations = 0; Rating = 0; Usage = 0; RPU = 0; }
1529  void SetSum(const CTotalBenchRes &r1, const CTotalBenchRes &r2)
1530  {
1531    Rating = (r1.Rating + r2.Rating);
1532    Usage = (r1.Usage + r2.Usage);
1533    RPU = (r1.RPU + r2.RPU);
1534    NumIterations = (r1.NumIterations + r2.NumIterations);
1535  }
1536};
1537
1538static void PrintNumber(IBenchPrintCallback &f, UInt64 value, int size)
1539{
1540  char s[128];
1541  int startPos = (int)sizeof(s) - 32;
1542  memset(s, ' ', startPos);
1543  ConvertUInt64ToString(value, s + startPos);
1544  // if (withSpace)
1545  {
1546    startPos--;
1547    size++;
1548  }
1549  int len = (int)strlen(s + startPos);
1550  if (size > len)
1551  {
1552    startPos -= (size - len);
1553    if (startPos < 0)
1554      startPos = 0;
1555  }
1556  f.Print(s + startPos);
1557}
1558
1559static const int kFieldSize_Name = 12;
1560static const int kFieldSize_SmallName = 4;
1561static const int kFieldSize_Speed = 9;
1562static const int kFieldSize_Usage = 5;
1563static const int kFieldSize_RU = 6;
1564static const int kFieldSize_Rating = 6;
1565static const int kFieldSize_EU = 5;
1566static const int kFieldSize_Effec = 5;
1567
1568static const int kFieldSize_TotalSize = 4 + kFieldSize_Speed + kFieldSize_Usage + kFieldSize_RU + kFieldSize_Rating;
1569static const int kFieldSize_EUAndEffec = 2 + kFieldSize_EU + kFieldSize_Effec;
1570
1571
1572static void PrintRating(IBenchPrintCallback &f, UInt64 rating, int size)
1573{
1574  PrintNumber(f, (rating + 500000) / 1000000, size);
1575}
1576
1577
1578static void PrintPercents(IBenchPrintCallback &f, UInt64 val, UInt64 divider, int size)
1579{
1580  PrintNumber(f, (val * 100 + divider / 2) / divider, size);
1581}
1582
1583static void PrintChars(IBenchPrintCallback &f, char c, int size)
1584{
1585  char s[256];
1586  memset(s, (Byte)c, size);
1587  s[size] = 0;
1588  f.Print(s);
1589}
1590
1591static void PrintSpaces(IBenchPrintCallback &f, int size)
1592{
1593  PrintChars(f, ' ', size);
1594}
1595
1596static void PrintResults(IBenchPrintCallback &f, UInt64 usage, UInt64 rpu, UInt64 rating, bool showFreq, UInt64 cpuFreq)
1597{
1598  PrintNumber(f, (usage + 5000) / 10000, kFieldSize_Usage);
1599  PrintRating(f, rpu, kFieldSize_RU);
1600  PrintRating(f, rating, kFieldSize_Rating);
1601  if (showFreq)
1602  {
1603    if (cpuFreq == 0)
1604      PrintSpaces(f, kFieldSize_EUAndEffec);
1605    else
1606    {
1607      UInt64 ddd = cpuFreq * usage / 100;
1608      if (ddd == 0)
1609        ddd = 1;
1610      PrintPercents(f, (rating * 10000), ddd, kFieldSize_EU);
1611      PrintPercents(f, rating, cpuFreq, kFieldSize_Effec);
1612    }
1613  }
1614}
1615
1616static void PrintResults(IBenchPrintCallback *f, const CBenchInfo &info, UInt64 rating, bool showFreq, UInt64 cpuFreq, CTotalBenchRes *res)
1617{
1618  UInt64 speed = info.GetSpeed(info.UnpackSize * info.NumIterations);
1619  if (f)
1620  {
1621    if (speed != 0)
1622      PrintNumber(*f, speed / 1024, kFieldSize_Speed);
1623    else
1624      PrintSpaces(*f, 1 + kFieldSize_Speed);
1625  }
1626  UInt64 usage = info.GetUsage();
1627  UInt64 rpu = info.GetRatingPerUsage(rating);
1628  if (f)
1629  {
1630    PrintResults(*f, usage, rpu, rating, showFreq, cpuFreq);
1631  }
1632
1633  if (res)
1634  {
1635    res->NumIterations++;
1636    res->RPU += rpu;
1637    res->Rating += rating;
1638    res->Usage += usage;
1639  }
1640}
1641
1642static void PrintTotals(IBenchPrintCallback &f, bool showFreq, UInt64 cpuFreq, const CTotalBenchRes &res)
1643{
1644  PrintSpaces(f, 1 + kFieldSize_Speed);
1645  UInt64 numIterations = res.NumIterations;
1646  if (numIterations == 0)
1647    numIterations = 1;
1648  PrintResults(f, res.Usage / numIterations, res.RPU / numIterations, res.Rating / numIterations, showFreq, cpuFreq);
1649}
1650
1651static void PrintRequirements(IBenchPrintCallback &f, const char *sizeString, UInt64 size, const char *threadsString, UInt32 numThreads)
1652{
1653  f.Print("RAM ");
1654  f.Print(sizeString);
1655  PrintNumber(f, (size >> 20), 6);
1656  f.Print(" MB,  # ");
1657  f.Print(threadsString);
1658  PrintNumber(f, numThreads, 3);
1659  f.NewLine();
1660}
1661
1662struct CBenchCallbackToPrint: public IBenchCallback
1663{
1664  CBenchProps BenchProps;
1665  CTotalBenchRes EncodeRes;
1666  CTotalBenchRes DecodeRes;
1667  IBenchPrintCallback *_file;
1668  UInt32 DictSize;
1669
1670  bool Use2Columns;
1671  int NameFieldSize;
1672
1673  bool ShowFreq;
1674  UInt64 CpuFreq;
1675
1676  CBenchCallbackToPrint(): Use2Columns(false), NameFieldSize(0), ShowFreq(false), CpuFreq(0) {}
1677
1678  void Init() { EncodeRes.Init(); DecodeRes.Init(); }
1679  void Print(const char *s);
1680  void NewLine();
1681
1682  HRESULT SetFreq(bool showFreq, UInt64 cpuFreq);
1683  HRESULT SetEncodeResult(const CBenchInfo &info, bool final);
1684  HRESULT SetDecodeResult(const CBenchInfo &info, bool final);
1685};
1686
1687HRESULT CBenchCallbackToPrint::SetFreq(bool showFreq, UInt64 cpuFreq)
1688{
1689  ShowFreq = showFreq;
1690  CpuFreq = cpuFreq;
1691  return S_OK;
1692}
1693
1694HRESULT CBenchCallbackToPrint::SetEncodeResult(const CBenchInfo &info, bool final)
1695{
1696  RINOK(_file->CheckBreak());
1697  if (final)
1698  {
1699    UInt64 rating = BenchProps.GetCompressRating(DictSize, info.GlobalTime, info.GlobalFreq, info.UnpackSize * info.NumIterations);
1700    PrintResults(_file, info, rating, ShowFreq, CpuFreq, &EncodeRes);
1701  }
1702  return S_OK;
1703}
1704
1705static const char *kSep = "  | ";
1706
1707HRESULT CBenchCallbackToPrint::SetDecodeResult(const CBenchInfo &info, bool final)
1708{
1709  RINOK(_file->CheckBreak());
1710  if (final)
1711  {
1712    UInt64 rating = BenchProps.GetDecompressRating(info.GlobalTime, info.GlobalFreq, info.UnpackSize, info.PackSize, info.NumIterations);
1713    if (Use2Columns)
1714      _file->Print(kSep);
1715    else
1716    {
1717      _file->NewLine();
1718      PrintSpaces(*_file, NameFieldSize);
1719    }
1720    CBenchInfo info2 = info;
1721    info2.UnpackSize *= info2.NumIterations;
1722    info2.PackSize *= info2.NumIterations;
1723    info2.NumIterations = 1;
1724    PrintResults(_file, info2, rating, ShowFreq, CpuFreq, &DecodeRes);
1725  }
1726  return S_OK;
1727}
1728
1729void CBenchCallbackToPrint::Print(const char *s)
1730{
1731  _file->Print(s);
1732}
1733
1734void CBenchCallbackToPrint::NewLine()
1735{
1736  _file->NewLine();
1737}
1738
1739void PrintLeft(IBenchPrintCallback &f, const char *s, unsigned size)
1740{
1741  f.Print(s);
1742  int numSpaces = size - MyStringLen(s);
1743  if (numSpaces > 0)
1744    PrintSpaces(f, numSpaces);
1745}
1746
1747void PrintRight(IBenchPrintCallback &f, const char *s, unsigned size)
1748{
1749  int numSpaces = size - MyStringLen(s);
1750  if (numSpaces > 0)
1751    PrintSpaces(f, numSpaces);
1752  f.Print(s);
1753}
1754
1755static HRESULT TotalBench(
1756    DECL_EXTERNAL_CODECS_LOC_VARS
1757    UInt64 complexInCommands,
1758    UInt32 numThreads, bool forceUnpackSize, UInt32 unpackSize, IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback)
1759{
1760  for (unsigned i = 0; i < ARRAY_SIZE(g_Bench); i++)
1761  {
1762    CBenchMethod bench = g_Bench[i];
1763    PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
1764    callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
1765    callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
1766    callback->BenchProps.EncComplex = bench.EncComplex;
1767    COneMethodInfo method;
1768    NCOM::CPropVariant propVariant;
1769    propVariant = bench.Name;
1770    RINOK(method.ParseMethodFromPROPVARIANT(L"", propVariant));
1771
1772    UInt32 unpackSize2 = unpackSize;
1773    if (!forceUnpackSize && bench.DictBits == 0)
1774      unpackSize2 = kFilterUnpackSize;
1775
1776    HRESULT res = MethodBench(
1777        EXTERNAL_CODECS_LOC_VARS
1778        complexInCommands,
1779        false, numThreads, method, unpackSize2, bench.DictBits,
1780        printCallback, callback, &callback->BenchProps);
1781    if (res == E_NOTIMPL)
1782    {
1783      // callback->Print(" ---");
1784      // we need additional empty line as line for decompression results
1785      if (!callback->Use2Columns)
1786        callback->NewLine();
1787    }
1788    else
1789    {
1790      RINOK(res);
1791    }
1792    callback->NewLine();
1793  }
1794  return S_OK;
1795}
1796
1797
1798static HRESULT FreqBench(
1799    UInt64 complexInCommands,
1800    UInt32 numThreads,
1801    IBenchPrintCallback *_file,
1802    bool showFreq,
1803    UInt64 &cpuFreq,
1804    UInt32 &res)
1805{
1806  res = 0;
1807  cpuFreq = 0;
1808
1809  UInt32 bufferSize = 1 << 20;
1810  UInt32 complexity = kNumFreqCommands;
1811  if (numThreads == 0)
1812    numThreads = 1;
1813
1814  #ifdef _7ZIP_ST
1815  numThreads = 1;
1816  #endif
1817
1818  UInt32 bsize = (bufferSize == 0 ? 1 : bufferSize);
1819  UInt64 numIterations = complexInCommands / complexity / bsize;
1820  if (numIterations == 0)
1821    numIterations = 1;
1822
1823  CBenchInfoCalc progressInfoSpec;
1824
1825  #ifndef _7ZIP_ST
1826  CFreqThreads threads;
1827  if (numThreads > 1)
1828  {
1829    threads.Items = new CFreqInfo[numThreads];
1830    UInt32 i;
1831    for (i = 0; i < numThreads; i++)
1832    {
1833      CFreqInfo &info = threads.Items[i];
1834      info.Callback = _file;
1835      info.CallbackRes = S_OK;
1836      info.NumIterations = numIterations;
1837      info.Size = bufferSize;
1838    }
1839    progressInfoSpec.SetStartTime();
1840    for (i = 0; i < numThreads; i++)
1841    {
1842      CFreqInfo &info = threads.Items[i];
1843      RINOK(info.Thread.Create(FreqThreadFunction, &info));
1844      threads.NumThreads++;
1845    }
1846    threads.WaitAll();
1847    for (i = 0; i < numThreads; i++)
1848    {
1849      RINOK(threads.Items[i].CallbackRes);
1850    }
1851  }
1852  else
1853  #endif
1854  {
1855    progressInfoSpec.SetStartTime();
1856    UInt32 sum = g_BenchCpuFreqTemp;
1857    for (UInt64 k = numIterations; k > 0; k--)
1858    {
1859      RINOK(_file->CheckBreak());
1860      sum = CountCpuFreq(sum, bufferSize, g_BenchCpuFreqTemp);
1861    }
1862    res += sum;
1863  }
1864  CBenchInfo info;
1865  progressInfoSpec.SetFinishTime(info);
1866
1867  info.UnpackSize = 0;
1868  info.PackSize = 0;
1869  info.NumIterations = 1;
1870
1871  if (_file)
1872  {
1873    {
1874      UInt64 numCommands = (UInt64)numIterations * bufferSize * numThreads * complexity;
1875      UInt64 rating = info.GetSpeed(numCommands);
1876      cpuFreq = rating / numThreads;
1877      PrintResults(_file, info, rating, showFreq, showFreq ? cpuFreq : 0, NULL);
1878    }
1879    RINOK(_file->CheckBreak());
1880  }
1881
1882  return S_OK;
1883}
1884
1885
1886
1887static HRESULT CrcBench(
1888    DECL_EXTERNAL_CODECS_LOC_VARS
1889    UInt64 complexInCommands,
1890    UInt32 numThreads, UInt32 bufferSize,
1891    UInt64 &speed,
1892    UInt32 complexity,
1893    const UInt32 *checkSum,
1894    const COneMethodInfo &method,
1895    IBenchPrintCallback *_file,
1896    CTotalBenchRes *encodeRes,
1897    bool showFreq, UInt64 cpuFreq)
1898{
1899  if (numThreads == 0)
1900    numThreads = 1;
1901
1902  #ifdef _7ZIP_ST
1903  numThreads = 1;
1904  #endif
1905
1906  UString methodName = method.MethodName;
1907  // methodName.RemoveChar(L'-');
1908  CMethodId hashID;
1909  if (!FindHashMethod(
1910      EXTERNAL_CODECS_LOC_VARS
1911      methodName, hashID))
1912    return E_NOTIMPL;
1913
1914  CBenchBuffer buffer;
1915  size_t totalSize = (size_t)bufferSize * numThreads;
1916  if (totalSize / numThreads != bufferSize)
1917    return E_OUTOFMEMORY;
1918  if (!buffer.Alloc(totalSize))
1919    return E_OUTOFMEMORY;
1920
1921  Byte *buf = buffer.Buffer;
1922  CBaseRandomGenerator RG;
1923  UInt32 bsize = (bufferSize == 0 ? 1 : bufferSize);
1924  UInt64 numIterations = complexInCommands * 256 / complexity / bsize;
1925  if (numIterations == 0)
1926    numIterations = 1;
1927
1928  CBenchInfoCalc progressInfoSpec;
1929
1930  #ifndef _7ZIP_ST
1931  CCrcThreads threads;
1932  if (numThreads > 1)
1933  {
1934    threads.Items = new CCrcInfo[numThreads];
1935    UInt32 i;
1936    for (i = 0; i < numThreads; i++)
1937    {
1938      CCrcInfo &info = threads.Items[i];
1939      UString name;
1940      RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, info.Hasher));
1941      if (!info.Hasher)
1942        return E_NOTIMPL;
1943      CMyComPtr<ICompressSetCoderProperties> scp;
1944      info.Hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
1945      if (scp)
1946      {
1947        UInt64 reduceSize = 1;
1948        RINOK(method.SetCoderProps(scp, &reduceSize));
1949      }
1950
1951      Byte *data = buf + (size_t)bufferSize * i;
1952      info.Callback = _file;
1953      info.Data = data;
1954      info.NumIterations = numIterations;
1955      info.Size = bufferSize;
1956      /* info.Crc = */ RandGenCrc(data, bufferSize, RG);
1957      info.CheckSumDefined = false;
1958      if (checkSum)
1959      {
1960        info.CheckSum = *checkSum;
1961        info.CheckSumDefined = (checkSum && (i == 0));
1962      }
1963    }
1964    progressInfoSpec.SetStartTime();
1965    for (i = 0; i < numThreads; i++)
1966    {
1967      CCrcInfo &info = threads.Items[i];
1968      RINOK(info.Thread.Create(CrcThreadFunction, &info));
1969      threads.NumThreads++;
1970    }
1971    threads.WaitAll();
1972    for (i = 0; i < numThreads; i++)
1973    {
1974      RINOK(threads.Items[i].Res);
1975    }
1976  }
1977  else
1978  #endif
1979  {
1980    /* UInt32 crc = */ RandGenCrc(buf, bufferSize, RG);
1981    progressInfoSpec.SetStartTime();
1982    CMyComPtr<IHasher> hasher;
1983    UString name;
1984    RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, hasher));
1985    if (!hasher)
1986      return E_NOTIMPL;
1987    CMyComPtr<ICompressSetCoderProperties> scp;
1988    hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
1989    if (scp)
1990    {
1991      UInt64 reduceSize = 1;
1992      RINOK(method.SetCoderProps(scp, &reduceSize));
1993    }
1994    RINOK(CrcBig(buf, bufferSize, numIterations, checkSum, hasher, _file));
1995  }
1996  CBenchInfo info;
1997  progressInfoSpec.SetFinishTime(info);
1998
1999  UInt64 unpSize = numIterations * bufferSize;
2000  UInt64 unpSizeThreads = unpSize * numThreads;
2001  info.UnpackSize = unpSizeThreads;
2002  info.PackSize = unpSizeThreads;
2003  info.NumIterations = 1;
2004
2005  if (_file)
2006  {
2007    {
2008      UInt64 numCommands = unpSizeThreads * complexity / 256;
2009      UInt64 rating = info.GetSpeed(numCommands);
2010      PrintResults(_file, info, rating, showFreq, cpuFreq, encodeRes);
2011    }
2012    RINOK(_file->CheckBreak());
2013  }
2014
2015  speed = info.GetSpeed(unpSizeThreads);
2016
2017  return S_OK;
2018}
2019
2020static HRESULT TotalBench_Hash(
2021    DECL_EXTERNAL_CODECS_LOC_VARS
2022    UInt64 complexInCommands,
2023    UInt32 numThreads, UInt32 bufSize,
2024    IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback,
2025    CTotalBenchRes *encodeRes,
2026    bool showFreq, UInt64 cpuFreq)
2027{
2028  for (unsigned i = 0; i < ARRAY_SIZE(g_Hash); i++)
2029  {
2030    const CBenchHash &bench = g_Hash[i];
2031    PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
2032    // callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
2033    // callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
2034    // callback->BenchProps.EncComplex = bench.EncComplex;
2035
2036    COneMethodInfo method;
2037    NCOM::CPropVariant propVariant;
2038    propVariant = bench.Name;
2039    RINOK(method.ParseMethodFromPROPVARIANT(L"", propVariant));
2040
2041    UInt64 speed;
2042    HRESULT res = CrcBench(
2043        EXTERNAL_CODECS_LOC_VARS
2044        complexInCommands,
2045        numThreads, bufSize,
2046        speed,
2047        bench.Complex, &bench.CheckSum, method,
2048        printCallback, encodeRes, showFreq, cpuFreq);
2049    if (res == E_NOTIMPL)
2050    {
2051      // callback->Print(" ---");
2052    }
2053    else
2054    {
2055      RINOK(res);
2056    }
2057    callback->NewLine();
2058  }
2059  return S_OK;
2060}
2061
2062struct CTempValues
2063{
2064  UInt64 *Values;
2065  CTempValues(UInt32 num) { Values = new UInt64[num]; }
2066  ~CTempValues() { delete []Values; }
2067};
2068
2069static void ParseNumberString(const UString &s, NCOM::CPropVariant &prop)
2070{
2071  const wchar_t *end;
2072  UInt64 result = ConvertStringToUInt64(s, &end);
2073  if (*end != 0 || s.IsEmpty())
2074    prop = s;
2075  else if (result <= (UInt32)0xFFFFFFFF)
2076    prop = (UInt32)result;
2077  else
2078    prop = result;
2079}
2080
2081static UInt32 GetNumThreadsNext(unsigned i, UInt32 numThreads)
2082{
2083  if (i < 2)
2084    return i + 1;
2085  i -= 1;
2086  UInt32 num = (UInt32)(2 + (i & 1)) << (i >> 1);
2087  return (num <= numThreads) ? num : numThreads;
2088}
2089
2090static bool AreSameMethodNames(const char *fullName, const wchar_t *shortName)
2091{
2092  for (;;)
2093  {
2094    wchar_t c2 = *shortName++;
2095    if (c2 == 0)
2096      return true;
2097    char c1 = *fullName++;
2098    if ((unsigned char)MyCharLower_Ascii(c1) != MyCharLower_Ascii(c2))
2099      return false;
2100  }
2101}
2102
2103HRESULT Bench(
2104    DECL_EXTERNAL_CODECS_LOC_VARS
2105    IBenchPrintCallback *printCallback,
2106    IBenchCallback *benchCallback,
2107    const CObjectVector<CProperty> &props,
2108    UInt32 numIterations,
2109    bool multiDict)
2110{
2111  if (!CrcInternalTest())
2112    return S_FALSE;
2113
2114  UInt32 numCPUs = 1;
2115  UInt64 ramSize = (UInt64)512 << 20;
2116  #ifndef _7ZIP_ST
2117  numCPUs = NSystem::GetNumberOfProcessors();
2118  #endif
2119  #if !defined(_7ZIP_ST) || defined(_WIN32)
2120  ramSize = NSystem::GetRamSize();
2121  #endif
2122  UInt32 numThreads = numCPUs;
2123
2124  UInt32 testTime = kComplexInSeconds;
2125
2126  COneMethodInfo method;
2127  unsigned i;
2128  for (i = 0; i < props.Size(); i++)
2129  {
2130    const CProperty &property = props[i];
2131    NCOM::CPropVariant propVariant;
2132    UString name = property.Name;
2133    name.MakeLower_Ascii();
2134    if (!property.Value.IsEmpty())
2135      ParseNumberString(property.Value, propVariant);
2136    if (name.IsEqualTo("testtime"))
2137    {
2138      RINOK(ParsePropToUInt32(L"", propVariant, testTime));
2139      continue;
2140    }
2141    if (name.IsPrefixedBy(L"mt"))
2142    {
2143      #ifndef _7ZIP_ST
2144      RINOK(ParseMtProp(name.Ptr(2), propVariant, numCPUs, numThreads));
2145      #endif
2146      continue;
2147    }
2148    RINOK(method.ParseMethodFromPROPVARIANT(name, propVariant));
2149  }
2150
2151  if (printCallback)
2152  {
2153    printCallback->Print("CPU Freq:");
2154  }
2155
2156  UInt64 complexInCommands = kComplexInCommands;
2157
2158  if (printCallback)
2159  {
2160    UInt64 numMilCommands = (1 << 6);
2161
2162    for (int jj = 0;; jj++)
2163    {
2164      UInt64 start = ::GetTimeCount();
2165      UInt32 sum = (UInt32)start;
2166      sum = CountCpuFreq(sum, (UInt32)(numMilCommands * 1000000 / kNumFreqCommands), g_BenchCpuFreqTemp);
2167      start = ::GetTimeCount() - start;
2168      if (start == 0)
2169        start = 1;
2170      UInt64 freq = GetFreq();
2171      UInt64 mipsVal = numMilCommands * freq / start;
2172      if (printCallback)
2173        PrintNumber(*printCallback, mipsVal, 5 + ((sum >> 31) & 1));
2174      if (jj >= 3)
2175      {
2176        SetComplexCommands(testTime, mipsVal * 1000000, complexInCommands);
2177        if (jj >= 8 || start >= freq)
2178          break;
2179        // break; // change it
2180        numMilCommands <<= 1;
2181      }
2182    }
2183  }
2184  if (printCallback)
2185  {
2186    printCallback->NewLine();
2187    printCallback->NewLine();
2188    PrintRequirements(*printCallback, "size: ", ramSize, "CPU hardware threads:", numCPUs);
2189  }
2190
2191  if (numThreads < 1 || numThreads > kNumThreadsMax)
2192    return E_INVALIDARG;
2193
2194  UInt32 dict;
2195  bool dictIsDefined = method.Get_DicSize(dict);
2196
2197  if (method.MethodName.IsEmpty())
2198    method.MethodName = L"LZMA";
2199
2200  if (benchCallback)
2201  {
2202    CBenchProps benchProps;
2203    benchProps.SetLzmaCompexity();
2204    UInt32 dictSize = method.Get_Lzma_DicSize();
2205    UInt32 uncompressedDataSize = kAdditionalSize + dictSize;
2206    return MethodBench(
2207        EXTERNAL_CODECS_LOC_VARS
2208        complexInCommands,
2209        true, numThreads,
2210        method, uncompressedDataSize,
2211        kOldLzmaDictBits, printCallback, benchCallback, &benchProps);
2212  }
2213
2214  UString methodName = method.MethodName;
2215  if (methodName.IsEqualToNoCase(L"CRC"))
2216    methodName = L"crc32";
2217  method.MethodName = methodName;
2218  CMethodId hashID;
2219  if (FindHashMethod(EXTERNAL_CODECS_LOC_VARS methodName, hashID))
2220  {
2221    if (!printCallback)
2222      return S_FALSE;
2223    IBenchPrintCallback &f = *printCallback;
2224    if (!dictIsDefined)
2225      dict = (1 << 24);
2226
2227
2228    // methhodName.RemoveChar(L'-');
2229    UInt32 complexity = 10000;
2230    const UInt32 *checkSum = NULL;
2231    {
2232      for (unsigned i = 0; i < ARRAY_SIZE(g_Hash); i++)
2233      {
2234        const CBenchHash &h = g_Hash[i];
2235        if (AreSameMethodNames(h.Name, methodName))
2236        {
2237          complexity = h.Complex;
2238          checkSum = &h.CheckSum;
2239          if (strcmp(h.Name, "CRC32:4") != 0)
2240            break;
2241        }
2242      }
2243    }
2244
2245    f.NewLine();
2246    f.Print("Size");
2247    const int kFieldSize_CrcSpeed = 6;
2248    unsigned numThreadsTests = 0;
2249    for (;;)
2250    {
2251      UInt32 t = GetNumThreadsNext(numThreadsTests, numThreads);
2252      PrintNumber(f, t, kFieldSize_CrcSpeed);
2253      numThreadsTests++;
2254      if (t >= numThreads)
2255        break;
2256    }
2257    f.NewLine();
2258    f.NewLine();
2259    CTempValues speedTotals(numThreadsTests);
2260    {
2261      for (unsigned ti = 0; ti < numThreadsTests; ti++)
2262        speedTotals.Values[ti] = 0;
2263    }
2264
2265    UInt64 numSteps = 0;
2266    for (UInt32 i = 0; i < numIterations; i++)
2267    {
2268      for (unsigned pow = 10; pow < 32; pow++)
2269      {
2270        UInt32 bufSize = (UInt32)1 << pow;
2271        if (bufSize > dict)
2272          break;
2273        char s[16];
2274        ConvertUInt32ToString(pow, s);
2275        int pos = MyStringLen(s);
2276        s[pos++] = ':';
2277        s[pos++] = ' ';
2278        s[pos] = 0;
2279        f.Print(s);
2280
2281        for (unsigned ti = 0; ti < numThreadsTests; ti++)
2282        {
2283          RINOK(f.CheckBreak());
2284          UInt32 t = GetNumThreadsNext(ti, numThreads);
2285          UInt64 speed = 0;
2286          RINOK(CrcBench(EXTERNAL_CODECS_LOC_VARS complexInCommands,
2287              t, bufSize, speed, complexity,
2288              (pow == kNumHashDictBits) ? checkSum : NULL, method, NULL, NULL, false, 0));
2289          PrintNumber(f, (speed >> 20), kFieldSize_CrcSpeed);
2290          speedTotals.Values[ti] += speed;
2291        }
2292        f.NewLine();
2293        numSteps++;
2294      }
2295    }
2296    if (numSteps != 0)
2297    {
2298      f.NewLine();
2299      f.Print("Avg:");
2300      for (unsigned ti = 0; ti < numThreadsTests; ti++)
2301      {
2302        PrintNumber(f, ((speedTotals.Values[ti] / numSteps) >> 20), kFieldSize_CrcSpeed);
2303      }
2304      f.NewLine();
2305    }
2306    return S_OK;
2307  }
2308
2309  bool use2Columns = false;
2310
2311  CBenchCallbackToPrint callback;
2312  callback.Init();
2313  callback._file = printCallback;
2314
2315  if (!dictIsDefined)
2316  {
2317    int dicSizeLog;
2318    for (dicSizeLog = 25; dicSizeLog > kBenchMinDicLogSize; dicSizeLog--)
2319      if (GetBenchMemoryUsage(numThreads, ((UInt32)1 << dicSizeLog)) + (8 << 20) <= ramSize)
2320        break;
2321    dict = (1 << dicSizeLog);
2322  }
2323
2324  IBenchPrintCallback &f = *printCallback;
2325  PrintRequirements(f, "usage:", GetBenchMemoryUsage(numThreads, dict), "Benchmark threads:   ", numThreads);
2326
2327  bool totalBenchMode = (method.MethodName == L"*");
2328  f.NewLine();
2329
2330  if (totalBenchMode)
2331  {
2332    callback.NameFieldSize = kFieldSize_Name;
2333    use2Columns = false;
2334  }
2335  else
2336  {
2337    callback.NameFieldSize = kFieldSize_SmallName;
2338    use2Columns = true;
2339  }
2340  callback.Use2Columns = use2Columns;
2341
2342  bool showFreq = false;
2343  UInt64 cpuFreq = 0;
2344
2345  if (totalBenchMode)
2346  {
2347    showFreq = true;
2348  }
2349
2350  int fileldSize = kFieldSize_TotalSize;
2351  if (showFreq)
2352    fileldSize += kFieldSize_EUAndEffec;
2353
2354  if (use2Columns)
2355  {
2356    PrintSpaces(f, callback.NameFieldSize);
2357    PrintRight(f, "Compressing", fileldSize);
2358    f.Print(kSep);
2359    PrintRight(f, "Decompressing", fileldSize);
2360  }
2361  f.NewLine();
2362  PrintLeft(f, totalBenchMode ? "Method" : "Dict", callback.NameFieldSize);
2363
2364  int j;
2365
2366  for (j = 0; j < 2; j++)
2367  {
2368    PrintRight(f, "Speed", kFieldSize_Speed + 1);
2369    PrintRight(f, "Usage", kFieldSize_Usage + 1);
2370    PrintRight(f, "R/U", kFieldSize_RU + 1);
2371    PrintRight(f, "Rating", kFieldSize_Rating + 1);
2372    if (showFreq)
2373    {
2374      PrintRight(f, "E/U", kFieldSize_EU + 1);
2375      PrintRight(f, "Effec", kFieldSize_Effec + 1);
2376    }
2377    if (!use2Columns)
2378      break;
2379    if (j == 0)
2380      f.Print(kSep);
2381  }
2382
2383  f.NewLine();
2384  PrintSpaces(f, callback.NameFieldSize);
2385
2386  for (j = 0; j < 2; j++)
2387  {
2388    PrintRight(f, "KB/s", kFieldSize_Speed + 1);
2389    PrintRight(f, "%", kFieldSize_Usage + 1);
2390    PrintRight(f, "MIPS", kFieldSize_RU + 1);
2391    PrintRight(f, "MIPS", kFieldSize_Rating + 1);
2392    if (showFreq)
2393    {
2394      PrintRight(f, "%", kFieldSize_EU + 1);
2395      PrintRight(f, "%", kFieldSize_Effec + 1);
2396    }
2397    if (!use2Columns)
2398      break;
2399    if (j == 0)
2400      f.Print(kSep);
2401  }
2402
2403  f.NewLine();
2404  f.NewLine();
2405
2406  if (totalBenchMode)
2407  {
2408    if (!dictIsDefined)
2409      dict =
2410        #ifdef UNDER_CE
2411          (UInt64)1 << 20;
2412        #else
2413          (UInt64)1 << 24;
2414        #endif
2415    for (UInt32 i = 0; i < numIterations; i++)
2416    {
2417      if (i != 0)
2418        printCallback->NewLine();
2419      HRESULT res;
2420
2421      int freqTest;
2422      const int kNumCpuTests = 3;
2423      for (freqTest = 0; freqTest < kNumCpuTests; freqTest++)
2424      {
2425        PrintLeft(f, "CPU", kFieldSize_Name);
2426        UInt32 resVal;
2427        RINOK(FreqBench(complexInCommands, numThreads, printCallback, freqTest == kNumCpuTests - 1, cpuFreq, resVal));
2428        callback.NewLine();
2429
2430        if (freqTest == kNumCpuTests - 1)
2431          SetComplexCommands(testTime, cpuFreq, complexInCommands);
2432      }
2433      callback.NewLine();
2434
2435      callback.SetFreq(true, cpuFreq);
2436      res = TotalBench(EXTERNAL_CODECS_LOC_VARS complexInCommands, numThreads, dictIsDefined, dict, printCallback, &callback);
2437      RINOK(res);
2438
2439      res = TotalBench_Hash(EXTERNAL_CODECS_LOC_VARS complexInCommands, numThreads,
2440          1 << kNumHashDictBits, printCallback, &callback, &callback.EncodeRes, true, cpuFreq);
2441      RINOK(res);
2442
2443      callback.NewLine();
2444      {
2445        PrintLeft(f, "CPU", kFieldSize_Name);
2446        UInt32 resVal;
2447        UInt64 cpuFreqLastTemp = cpuFreq;
2448        RINOK(FreqBench(complexInCommands, numThreads, printCallback, false, cpuFreqLastTemp, resVal));
2449        callback.NewLine();
2450      }
2451    }
2452  }
2453  else
2454  {
2455    bool needSetComplexity = true;
2456    if (!methodName.IsEqualToNoCase(L"LZMA"))
2457    {
2458      for (unsigned i = 0; i < ARRAY_SIZE(g_Bench); i++)
2459      {
2460        const CBenchMethod &h = g_Bench[i];
2461        AString s = h.Name;
2462        if (AreSameMethodNames(h.Name, methodName))
2463        {
2464          callback.BenchProps.EncComplex = h.EncComplex;
2465          callback.BenchProps.DecComplexCompr = h.DecComplexCompr;
2466          callback.BenchProps.DecComplexUnc = h.DecComplexUnc;;
2467          needSetComplexity = false;
2468          break;
2469        }
2470      }
2471    }
2472    if (needSetComplexity)
2473      callback.BenchProps.SetLzmaCompexity();
2474
2475  for (i = 0; i < numIterations; i++)
2476  {
2477    const unsigned kStartDicLog = 22;
2478    unsigned pow = (dict < ((UInt32)1 << kStartDicLog)) ? kBenchMinDicLogSize : kStartDicLog;
2479    if (!multiDict)
2480      pow = 31;
2481    while (((UInt32)1 << pow) > dict && pow > 0)
2482      pow--;
2483    for (; ((UInt32)1 << pow) <= dict; pow++)
2484    {
2485      char s[16];
2486      ConvertUInt32ToString(pow, s);
2487      unsigned pos = MyStringLen(s);
2488      s[pos++] = ':';
2489      s[pos] = 0;
2490      PrintLeft(f, s, kFieldSize_SmallName);
2491      callback.DictSize = (UInt32)1 << pow;
2492
2493      COneMethodInfo method2 = method;
2494
2495      if (StringsAreEqualNoCase_Ascii(method2.MethodName, L"LZMA"))
2496      {
2497        // We add dictionary size property.
2498        // method2 can have two different dictionary size properties.
2499        // And last property is main.
2500        NCOM::CPropVariant propVariant = (UInt32)pow;
2501        RINOK(method2.ParseMethodFromPROPVARIANT(L"d", propVariant));
2502      }
2503
2504      UInt32 uncompressedDataSize = callback.DictSize;
2505      if (uncompressedDataSize >= (1 << 18))
2506        uncompressedDataSize += kAdditionalSize;
2507
2508      HRESULT res = MethodBench(
2509          EXTERNAL_CODECS_LOC_VARS
2510          complexInCommands,
2511          true, numThreads,
2512          method2, uncompressedDataSize,
2513          kOldLzmaDictBits, printCallback, &callback, &callback.BenchProps);
2514      f.NewLine();
2515      RINOK(res);
2516      if (!multiDict)
2517        break;
2518    }
2519  }
2520  }
2521
2522  PrintChars(f, '-', callback.NameFieldSize + fileldSize);
2523
2524  if (use2Columns)
2525  {
2526    f.Print(kSep);
2527    PrintChars(f, '-', fileldSize);
2528  }
2529  f.NewLine();
2530  if (use2Columns)
2531  {
2532    PrintLeft(f, "Avr:", callback.NameFieldSize);
2533    PrintTotals(f, showFreq, cpuFreq, callback.EncodeRes);
2534    f.Print(kSep);
2535    PrintTotals(f, showFreq, cpuFreq, callback.DecodeRes);
2536    f.NewLine();
2537  }
2538  PrintLeft(f, "Tot:", callback.NameFieldSize);
2539  CTotalBenchRes midRes;
2540  midRes.SetSum(callback.EncodeRes, callback.DecodeRes);
2541  PrintTotals(f, showFreq, cpuFreq, midRes);
2542  f.NewLine();
2543  return S_OK;
2544}
2545