1/* LzmaSpec.cpp -- LZMA Reference Decoder
22015-06-14 : Igor Pavlov : Public domain */
3
4// This code implements LZMA file decoding according to LZMA specification.
5// This code is not optimized for speed.
6
7#include <stdio.h>
8
9#ifdef _MSC_VER
10  #pragma warning(disable : 4710) // function not inlined
11  #pragma warning(disable : 4996) // This function or variable may be unsafe
12#endif
13
14typedef unsigned char Byte;
15typedef unsigned short UInt16;
16
17#ifdef _LZMA_UINT32_IS_ULONG
18  typedef unsigned long UInt32;
19#else
20  typedef unsigned int UInt32;
21#endif
22
23#if defined(_MSC_VER) || defined(__BORLANDC__)
24  typedef unsigned __int64 UInt64;
25#else
26  typedef unsigned long long int UInt64;
27#endif
28
29
30struct CInputStream
31{
32  FILE *File;
33  UInt64 Processed;
34
35  void Init() { Processed = 0; }
36
37  Byte ReadByte()
38  {
39    int c = getc(File);
40    if (c < 0)
41      throw "Unexpected end of file";
42    Processed++;
43    return (Byte)c;
44  }
45};
46
47
48struct COutStream
49{
50  FILE *File;
51  UInt64 Processed;
52
53  void Init() { Processed = 0; }
54
55  void WriteByte(Byte b)
56  {
57    if (putc(b, File) == EOF)
58      throw "File writing error";
59    Processed++;
60  }
61};
62
63
64class COutWindow
65{
66  Byte *Buf;
67  UInt32 Pos;
68  UInt32 Size;
69  bool IsFull;
70
71public:
72  unsigned TotalPos;
73  COutStream OutStream;
74
75  COutWindow(): Buf(NULL) {}
76  ~COutWindow() { delete []Buf; }
77
78  void Create(UInt32 dictSize)
79  {
80    Buf = new Byte[dictSize];
81    Pos = 0;
82    Size = dictSize;
83    IsFull = false;
84    TotalPos = 0;
85  }
86
87  void PutByte(Byte b)
88  {
89    TotalPos++;
90    Buf[Pos++] = b;
91    if (Pos == Size)
92    {
93      Pos = 0;
94      IsFull = true;
95    }
96    OutStream.WriteByte(b);
97  }
98
99  Byte GetByte(UInt32 dist) const
100  {
101    return Buf[dist <= Pos ? Pos - dist : Size - dist + Pos];
102  }
103
104  void CopyMatch(UInt32 dist, unsigned len)
105  {
106    for (; len > 0; len--)
107      PutByte(GetByte(dist));
108  }
109
110  bool CheckDistance(UInt32 dist) const
111  {
112    return dist <= Pos || IsFull;
113  }
114
115  bool IsEmpty() const
116  {
117    return Pos == 0 && !IsFull;
118  }
119};
120
121
122#define kNumBitModelTotalBits 11
123#define kNumMoveBits 5
124
125typedef UInt16 CProb;
126
127#define PROB_INIT_VAL ((1 << kNumBitModelTotalBits) / 2)
128
129#define INIT_PROBS(p) \
130 { for (unsigned i = 0; i < sizeof(p) / sizeof(p[0]); i++) p[i] = PROB_INIT_VAL; }
131
132class CRangeDecoder
133{
134  UInt32 Range;
135  UInt32 Code;
136
137  void Normalize();
138
139public:
140
141  CInputStream *InStream;
142  bool Corrupted;
143
144  bool Init();
145  bool IsFinishedOK() const { return Code == 0; }
146
147  UInt32 DecodeDirectBits(unsigned numBits);
148  unsigned DecodeBit(CProb *prob);
149};
150
151bool CRangeDecoder::Init()
152{
153  Corrupted = false;
154  Range = 0xFFFFFFFF;
155  Code = 0;
156
157  Byte b = InStream->ReadByte();
158
159  for (int i = 0; i < 4; i++)
160    Code = (Code << 8) | InStream->ReadByte();
161
162  if (b != 0 || Code == Range)
163    Corrupted = true;
164  return b == 0;
165}
166
167#define kTopValue ((UInt32)1 << 24)
168
169void CRangeDecoder::Normalize()
170{
171  if (Range < kTopValue)
172  {
173    Range <<= 8;
174    Code = (Code << 8) | InStream->ReadByte();
175  }
176}
177
178UInt32 CRangeDecoder::DecodeDirectBits(unsigned numBits)
179{
180  UInt32 res = 0;
181  do
182  {
183    Range >>= 1;
184    Code -= Range;
185    UInt32 t = 0 - ((UInt32)Code >> 31);
186    Code += Range & t;
187
188    if (Code == Range)
189      Corrupted = true;
190
191    Normalize();
192    res <<= 1;
193    res += t + 1;
194  }
195  while (--numBits);
196  return res;
197}
198
199unsigned CRangeDecoder::DecodeBit(CProb *prob)
200{
201  unsigned v = *prob;
202  UInt32 bound = (Range >> kNumBitModelTotalBits) * v;
203  unsigned symbol;
204  if (Code < bound)
205  {
206    v += ((1 << kNumBitModelTotalBits) - v) >> kNumMoveBits;
207    Range = bound;
208    symbol = 0;
209  }
210  else
211  {
212    v -= v >> kNumMoveBits;
213    Code -= bound;
214    Range -= bound;
215    symbol = 1;
216  }
217  *prob = (CProb)v;
218  Normalize();
219  return symbol;
220}
221
222
223unsigned BitTreeReverseDecode(CProb *probs, unsigned numBits, CRangeDecoder *rc)
224{
225  unsigned m = 1;
226  unsigned symbol = 0;
227  for (unsigned i = 0; i < numBits; i++)
228  {
229    unsigned bit = rc->DecodeBit(&probs[m]);
230    m <<= 1;
231    m += bit;
232    symbol |= (bit << i);
233  }
234  return symbol;
235}
236
237template <unsigned NumBits>
238class CBitTreeDecoder
239{
240  CProb Probs[(unsigned)1 << NumBits];
241
242public:
243
244  void Init()
245  {
246    INIT_PROBS(Probs);
247  }
248
249  unsigned Decode(CRangeDecoder *rc)
250  {
251    unsigned m = 1;
252    for (unsigned i = 0; i < NumBits; i++)
253      m = (m << 1) + rc->DecodeBit(&Probs[m]);
254    return m - ((unsigned)1 << NumBits);
255  }
256
257  unsigned ReverseDecode(CRangeDecoder *rc)
258  {
259    return BitTreeReverseDecode(Probs, NumBits, rc);
260  }
261};
262
263#define kNumPosBitsMax 4
264
265#define kNumStates 12
266#define kNumLenToPosStates 4
267#define kNumAlignBits 4
268#define kStartPosModelIndex 4
269#define kEndPosModelIndex 14
270#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
271#define kMatchMinLen 2
272
273class CLenDecoder
274{
275  CProb Choice;
276  CProb Choice2;
277  CBitTreeDecoder<3> LowCoder[1 << kNumPosBitsMax];
278  CBitTreeDecoder<3> MidCoder[1 << kNumPosBitsMax];
279  CBitTreeDecoder<8> HighCoder;
280
281public:
282
283  void Init()
284  {
285    Choice = PROB_INIT_VAL;
286    Choice2 = PROB_INIT_VAL;
287    HighCoder.Init();
288    for (unsigned i = 0; i < (1 << kNumPosBitsMax); i++)
289    {
290      LowCoder[i].Init();
291      MidCoder[i].Init();
292    }
293  }
294
295  unsigned Decode(CRangeDecoder *rc, unsigned posState)
296  {
297    if (rc->DecodeBit(&Choice) == 0)
298      return LowCoder[posState].Decode(rc);
299    if (rc->DecodeBit(&Choice2) == 0)
300      return 8 + MidCoder[posState].Decode(rc);
301    return 16 + HighCoder.Decode(rc);
302  }
303};
304
305unsigned UpdateState_Literal(unsigned state)
306{
307  if (state < 4) return 0;
308  else if (state < 10) return state - 3;
309  else return state - 6;
310}
311unsigned UpdateState_Match   (unsigned state) { return state < 7 ? 7 : 10; }
312unsigned UpdateState_Rep     (unsigned state) { return state < 7 ? 8 : 11; }
313unsigned UpdateState_ShortRep(unsigned state) { return state < 7 ? 9 : 11; }
314
315#define LZMA_DIC_MIN (1 << 12)
316
317class CLzmaDecoder
318{
319public:
320  CRangeDecoder RangeDec;
321  COutWindow OutWindow;
322
323  bool markerIsMandatory;
324  unsigned lc, pb, lp;
325  UInt32 dictSize;
326  UInt32 dictSizeInProperties;
327
328  void DecodeProperties(const Byte *properties)
329  {
330    unsigned d = properties[0];
331    if (d >= (9 * 5 * 5))
332      throw "Incorrect LZMA properties";
333    lc = d % 9;
334    d /= 9;
335    pb = d / 5;
336    lp = d % 5;
337    dictSizeInProperties = 0;
338    for (int i = 0; i < 4; i++)
339      dictSizeInProperties |= (UInt32)properties[i + 1] << (8 * i);
340    dictSize = dictSizeInProperties;
341    if (dictSize < LZMA_DIC_MIN)
342      dictSize = LZMA_DIC_MIN;
343  }
344
345  CLzmaDecoder(): LitProbs(NULL) {}
346  ~CLzmaDecoder() { delete []LitProbs; }
347
348  void Create()
349  {
350    OutWindow.Create(dictSize);
351    CreateLiterals();
352  }
353
354  int Decode(bool unpackSizeDefined, UInt64 unpackSize);
355
356private:
357
358  CProb *LitProbs;
359
360  void CreateLiterals()
361  {
362    LitProbs = new CProb[(UInt32)0x300 << (lc + lp)];
363  }
364
365  void InitLiterals()
366  {
367    UInt32 num = (UInt32)0x300 << (lc + lp);
368    for (UInt32 i = 0; i < num; i++)
369      LitProbs[i] = PROB_INIT_VAL;
370  }
371
372  void DecodeLiteral(unsigned state, UInt32 rep0)
373  {
374    unsigned prevByte = 0;
375    if (!OutWindow.IsEmpty())
376      prevByte = OutWindow.GetByte(1);
377
378    unsigned symbol = 1;
379    unsigned litState = ((OutWindow.TotalPos & ((1 << lp) - 1)) << lc) + (prevByte >> (8 - lc));
380    CProb *probs = &LitProbs[(UInt32)0x300 * litState];
381
382    if (state >= 7)
383    {
384      unsigned matchByte = OutWindow.GetByte(rep0 + 1);
385      do
386      {
387        unsigned matchBit = (matchByte >> 7) & 1;
388        matchByte <<= 1;
389        unsigned bit = RangeDec.DecodeBit(&probs[((1 + matchBit) << 8) + symbol]);
390        symbol = (symbol << 1) | bit;
391        if (matchBit != bit)
392          break;
393      }
394      while (symbol < 0x100);
395    }
396    while (symbol < 0x100)
397      symbol = (symbol << 1) | RangeDec.DecodeBit(&probs[symbol]);
398    OutWindow.PutByte((Byte)(symbol - 0x100));
399  }
400
401  CBitTreeDecoder<6> PosSlotDecoder[kNumLenToPosStates];
402  CBitTreeDecoder<kNumAlignBits> AlignDecoder;
403  CProb PosDecoders[1 + kNumFullDistances - kEndPosModelIndex];
404
405  void InitDist()
406  {
407    for (unsigned i = 0; i < kNumLenToPosStates; i++)
408      PosSlotDecoder[i].Init();
409    AlignDecoder.Init();
410    INIT_PROBS(PosDecoders);
411  }
412
413  unsigned DecodeDistance(unsigned len)
414  {
415    unsigned lenState = len;
416    if (lenState > kNumLenToPosStates - 1)
417      lenState = kNumLenToPosStates - 1;
418
419    unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec);
420    if (posSlot < 4)
421      return posSlot;
422
423    unsigned numDirectBits = (unsigned)((posSlot >> 1) - 1);
424    UInt32 dist = ((2 | (posSlot & 1)) << numDirectBits);
425    if (posSlot < kEndPosModelIndex)
426      dist += BitTreeReverseDecode(PosDecoders + dist - posSlot, numDirectBits, &RangeDec);
427    else
428    {
429      dist += RangeDec.DecodeDirectBits(numDirectBits - kNumAlignBits) << kNumAlignBits;
430      dist += AlignDecoder.ReverseDecode(&RangeDec);
431    }
432    return dist;
433  }
434
435  CProb IsMatch[kNumStates << kNumPosBitsMax];
436  CProb IsRep[kNumStates];
437  CProb IsRepG0[kNumStates];
438  CProb IsRepG1[kNumStates];
439  CProb IsRepG2[kNumStates];
440  CProb IsRep0Long[kNumStates << kNumPosBitsMax];
441
442  CLenDecoder LenDecoder;
443  CLenDecoder RepLenDecoder;
444
445  void Init()
446  {
447    InitLiterals();
448    InitDist();
449
450    INIT_PROBS(IsMatch);
451    INIT_PROBS(IsRep);
452    INIT_PROBS(IsRepG0);
453    INIT_PROBS(IsRepG1);
454    INIT_PROBS(IsRepG2);
455    INIT_PROBS(IsRep0Long);
456
457    LenDecoder.Init();
458    RepLenDecoder.Init();
459  }
460};
461
462
463#define LZMA_RES_ERROR                   0
464#define LZMA_RES_FINISHED_WITH_MARKER    1
465#define LZMA_RES_FINISHED_WITHOUT_MARKER 2
466
467int CLzmaDecoder::Decode(bool unpackSizeDefined, UInt64 unpackSize)
468{
469  if (!RangeDec.Init())
470    return LZMA_RES_ERROR;
471
472  Init();
473
474  UInt32 rep0 = 0, rep1 = 0, rep2 = 0, rep3 = 0;
475  unsigned state = 0;
476
477  for (;;)
478  {
479    if (unpackSizeDefined && unpackSize == 0 && !markerIsMandatory)
480      if (RangeDec.IsFinishedOK())
481        return LZMA_RES_FINISHED_WITHOUT_MARKER;
482
483    unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1);
484
485    if (RangeDec.DecodeBit(&IsMatch[(state << kNumPosBitsMax) + posState]) == 0)
486    {
487      if (unpackSizeDefined && unpackSize == 0)
488        return LZMA_RES_ERROR;
489      DecodeLiteral(state, rep0);
490      state = UpdateState_Literal(state);
491      unpackSize--;
492      continue;
493    }
494
495    unsigned len;
496
497    if (RangeDec.DecodeBit(&IsRep[state]) != 0)
498    {
499      if (unpackSizeDefined && unpackSize == 0)
500        return LZMA_RES_ERROR;
501      if (OutWindow.IsEmpty())
502        return LZMA_RES_ERROR;
503      if (RangeDec.DecodeBit(&IsRepG0[state]) == 0)
504      {
505        if (RangeDec.DecodeBit(&IsRep0Long[(state << kNumPosBitsMax) + posState]) == 0)
506        {
507          state = UpdateState_ShortRep(state);
508          OutWindow.PutByte(OutWindow.GetByte(rep0 + 1));
509          unpackSize--;
510          continue;
511        }
512      }
513      else
514      {
515        UInt32 dist;
516        if (RangeDec.DecodeBit(&IsRepG1[state]) == 0)
517          dist = rep1;
518        else
519        {
520          if (RangeDec.DecodeBit(&IsRepG2[state]) == 0)
521            dist = rep2;
522          else
523          {
524            dist = rep3;
525            rep3 = rep2;
526          }
527          rep2 = rep1;
528        }
529        rep1 = rep0;
530        rep0 = dist;
531      }
532      len = RepLenDecoder.Decode(&RangeDec, posState);
533      state = UpdateState_Rep(state);
534    }
535    else
536    {
537      rep3 = rep2;
538      rep2 = rep1;
539      rep1 = rep0;
540      len = LenDecoder.Decode(&RangeDec, posState);
541      state = UpdateState_Match(state);
542      rep0 = DecodeDistance(len);
543      if (rep0 == 0xFFFFFFFF)
544        return RangeDec.IsFinishedOK() ?
545            LZMA_RES_FINISHED_WITH_MARKER :
546            LZMA_RES_ERROR;
547
548      if (unpackSizeDefined && unpackSize == 0)
549        return LZMA_RES_ERROR;
550      if (rep0 >= dictSize || !OutWindow.CheckDistance(rep0))
551        return LZMA_RES_ERROR;
552    }
553    len += kMatchMinLen;
554    bool isError = false;
555    if (unpackSizeDefined && unpackSize < len)
556    {
557      len = (unsigned)unpackSize;
558      isError = true;
559    }
560    OutWindow.CopyMatch(rep0 + 1, len);
561    unpackSize -= len;
562    if (isError)
563      return LZMA_RES_ERROR;
564  }
565}
566
567static void Print(const char *s)
568{
569  fputs(s, stdout);
570}
571
572static void PrintError(const char *s)
573{
574  fputs(s, stderr);
575}
576
577
578#define CONVERT_INT_TO_STR(charType, tempSize) \
579
580void ConvertUInt64ToString(UInt64 val, char *s)
581{
582  char temp[32];
583  unsigned i = 0;
584  while (val >= 10)
585  {
586    temp[i++] = (char)('0' + (unsigned)(val % 10));
587    val /= 10;
588  }
589  *s++ = (char)('0' + (unsigned)val);
590  while (i != 0)
591  {
592    i--;
593    *s++ = temp[i];
594  }
595  *s = 0;
596}
597
598void PrintUInt64(const char *title, UInt64 v)
599{
600  Print(title);
601  Print(" : ");
602  char s[32];
603  ConvertUInt64ToString(v, s);
604  Print(s);
605  Print(" bytes \n");
606}
607
608int main2(int numArgs, const char *args[])
609{
610  Print("\nLZMA Reference Decoder 15.00 : Igor Pavlov : Public domain : 2015-04-16\n");
611  if (numArgs == 1)
612    Print("\nUse: lzmaSpec a.lzma outFile");
613
614  if (numArgs != 3)
615    throw "you must specify two parameters";
616
617  CInputStream inStream;
618  inStream.File = fopen(args[1], "rb");
619  inStream.Init();
620  if (inStream.File == 0)
621    throw "Can't open input file";
622
623  CLzmaDecoder lzmaDecoder;
624  lzmaDecoder.OutWindow.OutStream.File = fopen(args[2], "wb+");
625  lzmaDecoder.OutWindow.OutStream.Init();
626  if (inStream.File == 0)
627    throw "Can't open output file";
628
629  Byte header[13];
630  int i;
631  for (i = 0; i < 13; i++)
632    header[i] = inStream.ReadByte();
633
634  lzmaDecoder.DecodeProperties(header);
635
636  printf("\nlc=%d, lp=%d, pb=%d", lzmaDecoder.lc, lzmaDecoder.lp, lzmaDecoder.pb);
637  printf("\nDictionary Size in properties = %u", lzmaDecoder.dictSizeInProperties);
638  printf("\nDictionary Size for decoding  = %u", lzmaDecoder.dictSize);
639
640  UInt64 unpackSize = 0;
641  bool unpackSizeDefined = false;
642  for (i = 0; i < 8; i++)
643  {
644    Byte b = header[5 + i];
645    if (b != 0xFF)
646      unpackSizeDefined = true;
647    unpackSize |= (UInt64)b << (8 * i);
648  }
649
650  lzmaDecoder.markerIsMandatory = !unpackSizeDefined;
651
652  Print("\n");
653  if (unpackSizeDefined)
654    PrintUInt64("Uncompressed Size", unpackSize);
655  else
656    Print("End marker is expected\n");
657  lzmaDecoder.RangeDec.InStream = &inStream;
658
659  Print("\n");
660
661  lzmaDecoder.Create();
662
663  int res = lzmaDecoder.Decode(unpackSizeDefined, unpackSize);
664
665  PrintUInt64("Read    ", inStream.Processed);
666  PrintUInt64("Written ", lzmaDecoder.OutWindow.OutStream.Processed);
667
668  if (res == LZMA_RES_ERROR)
669    throw "LZMA decoding error";
670  else if (res == LZMA_RES_FINISHED_WITHOUT_MARKER)
671    Print("Finished without end marker");
672  else if (res == LZMA_RES_FINISHED_WITH_MARKER)
673  {
674    if (unpackSizeDefined)
675    {
676      if (lzmaDecoder.OutWindow.OutStream.Processed != unpackSize)
677        throw "Finished with end marker before than specified size";
678      Print("Warning: ");
679    }
680    Print("Finished with end marker");
681  }
682  else
683    throw "Internal Error";
684
685  Print("\n");
686
687  if (lzmaDecoder.RangeDec.Corrupted)
688  {
689    Print("\nWarning: LZMA stream is corrupted\n");
690  }
691
692  return 0;
693}
694
695
696int
697  #ifdef _MSC_VER
698    __cdecl
699  #endif
700main(int numArgs, const char *args[])
701{
702  try { return main2(numArgs, args); }
703  catch (const char *s)
704  {
705    PrintError("\nError:\n");
706    PrintError(s);
707    PrintError("\n");
708    return 1;
709  }
710  catch(...)
711  {
712    PrintError("\nError\n");
713    return 1;
714  }
715}
716