1/* LzmaSpec.c -- LZMA Reference Decoder
22013-07-28 : Igor Pavlov : Public domain */
3
4// This code implements LZMA file decoding according to LZMA specification.
5// This code is not optimized for speed.
6
7#include <stdio.h>
8
9#ifdef _MSC_VER
10  #pragma warning(disable : 4710) // function not inlined
11  #pragma warning(disable : 4996) // This function or variable may be unsafe
12#endif
13
14typedef unsigned char Byte;
15typedef unsigned short UInt16;
16
17#ifdef _LZMA_UINT32_IS_ULONG
18  typedef unsigned long UInt32;
19#else
20  typedef unsigned int UInt32;
21#endif
22
23#if defined(_MSC_VER) || defined(__BORLANDC__)
24  typedef unsigned __int64 UInt64;
25#else
26  typedef unsigned long long int UInt64;
27#endif
28
29
30struct CInputStream
31{
32  FILE *File;
33  UInt64 Processed;
34
35  void Init() { Processed = 0; }
36
37  Byte ReadByte()
38  {
39    int c = getc(File);
40    if (c < 0)
41      throw "Unexpected end of file";
42    Processed++;
43    return (Byte)c;
44  }
45};
46
47
48struct COutStream
49{
50  FILE *File;
51  UInt64 Processed;
52
53  void Init() { Processed = 0; }
54
55  void WriteByte(Byte b)
56  {
57    if (putc(b, File) == EOF)
58      throw "File writing error";
59    Processed++;
60  }
61};
62
63
64class COutWindow
65{
66  Byte *Buf;
67  UInt32 Pos;
68  UInt32 Size;
69  bool IsFull;
70
71public:
72  unsigned TotalPos;
73  COutStream OutStream;
74
75  COutWindow(): Buf(NULL) {}
76  ~COutWindow() { delete []Buf; }
77
78  void Create(UInt32 dictSize)
79  {
80    Buf = new Byte[dictSize];
81    Pos = 0;
82    Size = dictSize;
83    IsFull = false;
84    TotalPos = 0;
85  }
86
87  void PutByte(Byte b)
88  {
89    TotalPos++;
90    Buf[Pos++] = b;
91    if (Pos == Size)
92    {
93      Pos = 0;
94      IsFull = true;
95    }
96    OutStream.WriteByte(b);
97  }
98
99  Byte GetByte(UInt32 dist) const
100  {
101    return Buf[dist <= Pos ? Pos - dist : Size - dist + Pos];
102  }
103
104  void CopyMatch(UInt32 dist, unsigned len)
105  {
106    for (; len > 0; len--)
107      PutByte(GetByte(dist));
108  }
109
110  bool CheckDistance(UInt32 dist) const
111  {
112    return dist <= Pos || IsFull;
113  }
114
115  bool IsEmpty() const
116  {
117    return Pos == 0 && !IsFull;
118  }
119};
120
121
122#define kNumBitModelTotalBits 11
123#define kNumMoveBits 5
124
125typedef UInt16 CProb;
126
127#define PROB_INIT_VAL ((1 << kNumBitModelTotalBits) / 2)
128
129#define INIT_PROBS(p) \
130 { for (unsigned i = 0; i < sizeof(p) / sizeof(p[0]); i++) p[i] = PROB_INIT_VAL; }
131
132class CRangeDecoder
133{
134  UInt32 Range;
135  UInt32 Code;
136
137  void Normalize();
138
139public:
140
141  CInputStream *InStream;
142  bool Corrupted;
143
144  void Init();
145  bool IsFinishedOK() const { return Code == 0; }
146
147  UInt32 DecodeDirectBits(unsigned numBits);
148  unsigned DecodeBit(CProb *prob);
149};
150
151void CRangeDecoder::Init()
152{
153  Corrupted = false;
154
155  if (InStream->ReadByte() != 0)
156    Corrupted = true;
157
158  Range = 0xFFFFFFFF;
159  Code = 0;
160  for (int i = 0; i < 4; i++)
161    Code = (Code << 8) | InStream->ReadByte();
162
163  if (Code == Range)
164    Corrupted = true;
165}
166
167#define kTopValue ((UInt32)1 << 24)
168
169void CRangeDecoder::Normalize()
170{
171  if (Range < kTopValue)
172  {
173    Range <<= 8;
174    Code = (Code << 8) | InStream->ReadByte();
175  }
176}
177
178UInt32 CRangeDecoder::DecodeDirectBits(unsigned numBits)
179{
180  UInt32 res = 0;
181  do
182  {
183    Range >>= 1;
184    Code -= Range;
185    UInt32 t = 0 - ((UInt32)Code >> 31);
186    Code += Range & t;
187
188    if (Code == Range)
189      Corrupted = true;
190
191    Normalize();
192    res <<= 1;
193    res += t + 1;
194  }
195  while (--numBits);
196  return res;
197}
198
199unsigned CRangeDecoder::DecodeBit(CProb *prob)
200{
201  unsigned v = *prob;
202  UInt32 bound = (Range >> kNumBitModelTotalBits) * v;
203  unsigned symbol;
204  if (Code < bound)
205  {
206    v += ((1 << kNumBitModelTotalBits) - v) >> kNumMoveBits;
207    Range = bound;
208    symbol = 0;
209  }
210  else
211  {
212    v -= v >> kNumMoveBits;
213    Code -= bound;
214    Range -= bound;
215    symbol = 1;
216  }
217  *prob = (CProb)v;
218  Normalize();
219  return symbol;
220}
221
222
223unsigned BitTreeReverseDecode(CProb *probs, unsigned numBits, CRangeDecoder *rc)
224{
225  unsigned m = 1;
226  unsigned symbol = 0;
227  for (unsigned i = 0; i < numBits; i++)
228  {
229    unsigned bit = rc->DecodeBit(&probs[m]);
230    m <<= 1;
231    m += bit;
232    symbol |= (bit << i);
233  }
234  return symbol;
235}
236
237template <unsigned NumBits>
238class CBitTreeDecoder
239{
240  CProb Probs[(unsigned)1 << NumBits];
241
242public:
243
244  void Init()
245  {
246    INIT_PROBS(Probs);
247  }
248
249  unsigned Decode(CRangeDecoder *rc)
250  {
251    unsigned m = 1;
252    for (unsigned i = 0; i < NumBits; i++)
253      m = (m << 1) + rc->DecodeBit(&Probs[m]);
254    return m - ((unsigned)1 << NumBits);
255  }
256
257  unsigned ReverseDecode(CRangeDecoder *rc)
258  {
259    return BitTreeReverseDecode(Probs, NumBits, rc);
260  }
261};
262
263#define kNumPosBitsMax 4
264
265#define kNumStates 12
266#define kNumLenToPosStates 4
267#define kNumAlignBits 4
268#define kStartPosModelIndex 4
269#define kEndPosModelIndex 14
270#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
271#define kMatchMinLen 2
272
273class CLenDecoder
274{
275  CProb Choice;
276  CProb Choice2;
277  CBitTreeDecoder<3> LowCoder[1 << kNumPosBitsMax];
278  CBitTreeDecoder<3> MidCoder[1 << kNumPosBitsMax];
279  CBitTreeDecoder<8> HighCoder;
280
281public:
282
283  void Init()
284  {
285    Choice = PROB_INIT_VAL;
286    Choice2 = PROB_INIT_VAL;
287    HighCoder.Init();
288    for (unsigned i = 0; i < (1 << kNumPosBitsMax); i++)
289    {
290      LowCoder[i].Init();
291      MidCoder[i].Init();
292    }
293  }
294
295  unsigned Decode(CRangeDecoder *rc, unsigned posState)
296  {
297    if (rc->DecodeBit(&Choice) == 0)
298      return LowCoder[posState].Decode(rc);
299    if (rc->DecodeBit(&Choice2) == 0)
300      return 8 + MidCoder[posState].Decode(rc);
301    return 16 + HighCoder.Decode(rc);
302  }
303};
304
305unsigned UpdateState_Literal(unsigned state)
306{
307  if (state < 4) return 0;
308  else if (state < 10) return state - 3;
309  else return state - 6;
310}
311unsigned UpdateState_Match   (unsigned state) { return state < 7 ? 7 : 10; }
312unsigned UpdateState_Rep     (unsigned state) { return state < 7 ? 8 : 11; }
313unsigned UpdateState_ShortRep(unsigned state) { return state < 7 ? 9 : 11; }
314
315#define LZMA_DIC_MIN (1 << 12)
316
317class CLzmaDecoder
318{
319public:
320  CRangeDecoder RangeDec;
321  COutWindow OutWindow;
322
323  bool markerIsMandatory;
324  unsigned lc, pb, lp;
325  UInt32 dictSize;
326  UInt32 dictSizeInProperties;
327
328  void DecodeProperties(const Byte *properties)
329  {
330    unsigned d = properties[0];
331    if (d >= (9 * 5 * 5))
332      throw "Incorrect LZMA properties";
333    lc = d % 9;
334    d /= 9;
335    pb = d / 5;
336    lp = d % 5;
337    dictSizeInProperties = 0;
338    for (int i = 0; i < 4; i++)
339      dictSizeInProperties |= (UInt32)properties[i + 1] << (8 * i);
340    dictSize = dictSizeInProperties;
341    if (dictSize < LZMA_DIC_MIN)
342      dictSize = LZMA_DIC_MIN;
343  }
344
345  CLzmaDecoder(): LitProbs(NULL) {}
346  ~CLzmaDecoder() { delete []LitProbs; }
347
348  void Create()
349  {
350    OutWindow.Create(dictSize);
351    CreateLiterals();
352  }
353
354  int Decode(bool unpackSizeDefined, UInt64 unpackSize);
355
356private:
357
358  CProb *LitProbs;
359
360  void CreateLiterals()
361  {
362    LitProbs = new CProb[(UInt32)0x300 << (lc + lp)];
363  }
364
365  void InitLiterals()
366  {
367    UInt32 num = (UInt32)0x300 << (lc + lp);
368    for (UInt32 i = 0; i < num; i++)
369      LitProbs[i] = PROB_INIT_VAL;
370  }
371
372  void DecodeLiteral(unsigned state, UInt32 rep0)
373  {
374    unsigned prevByte = 0;
375    if (!OutWindow.IsEmpty())
376      prevByte = OutWindow.GetByte(1);
377
378    unsigned symbol = 1;
379    unsigned litState = ((OutWindow.TotalPos & ((1 << lp) - 1)) << lc) + (prevByte >> (8 - lc));
380    CProb *probs = &LitProbs[(UInt32)0x300 * litState];
381
382    if (state >= 7)
383    {
384      unsigned matchByte = OutWindow.GetByte(rep0 + 1);
385      do
386      {
387        unsigned matchBit = (matchByte >> 7) & 1;
388        matchByte <<= 1;
389        unsigned bit = RangeDec.DecodeBit(&probs[((1 + matchBit) << 8) + symbol]);
390        symbol = (symbol << 1) | bit;
391        if (matchBit != bit)
392          break;
393      }
394      while (symbol < 0x100);
395    }
396    while (symbol < 0x100)
397      symbol = (symbol << 1) | RangeDec.DecodeBit(&probs[symbol]);
398    OutWindow.PutByte((Byte)(symbol - 0x100));
399  }
400
401  CBitTreeDecoder<6> PosSlotDecoder[kNumLenToPosStates];
402  CBitTreeDecoder<kNumAlignBits> AlignDecoder;
403  CProb PosDecoders[1 + kNumFullDistances - kEndPosModelIndex];
404
405  void InitDist()
406  {
407    for (unsigned i = 0; i < kNumLenToPosStates; i++)
408      PosSlotDecoder[i].Init();
409    AlignDecoder.Init();
410    INIT_PROBS(PosDecoders);
411  }
412
413  unsigned DecodeDistance(unsigned len)
414  {
415    unsigned lenState = len;
416    if (lenState > kNumLenToPosStates - 1)
417      lenState = kNumLenToPosStates - 1;
418
419    unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec);
420    if (posSlot < 4)
421      return posSlot;
422
423    unsigned numDirectBits = (unsigned)((posSlot >> 1) - 1);
424    UInt32 dist = ((2 | (posSlot & 1)) << numDirectBits);
425    if (posSlot < kEndPosModelIndex)
426      dist += BitTreeReverseDecode(PosDecoders + dist - posSlot, numDirectBits, &RangeDec);
427    else
428    {
429      dist += RangeDec.DecodeDirectBits(numDirectBits - kNumAlignBits) << kNumAlignBits;
430      dist += AlignDecoder.ReverseDecode(&RangeDec);
431    }
432    return dist;
433  }
434
435  CProb IsMatch[kNumStates << kNumPosBitsMax];
436  CProb IsRep[kNumStates];
437  CProb IsRepG0[kNumStates];
438  CProb IsRepG1[kNumStates];
439  CProb IsRepG2[kNumStates];
440  CProb IsRep0Long[kNumStates << kNumPosBitsMax];
441
442  CLenDecoder LenDecoder;
443  CLenDecoder RepLenDecoder;
444
445  void Init()
446  {
447    InitLiterals();
448    InitDist();
449
450    INIT_PROBS(IsMatch);
451    INIT_PROBS(IsRep);
452    INIT_PROBS(IsRepG0);
453    INIT_PROBS(IsRepG1);
454    INIT_PROBS(IsRepG2);
455    INIT_PROBS(IsRep0Long);
456
457    LenDecoder.Init();
458    RepLenDecoder.Init();
459  }
460};
461
462
463#define LZMA_RES_ERROR                   0
464#define LZMA_RES_FINISHED_WITH_MARKER    1
465#define LZMA_RES_FINISHED_WITHOUT_MARKER 2
466
467int CLzmaDecoder::Decode(bool unpackSizeDefined, UInt64 unpackSize)
468{
469  Init();
470  RangeDec.Init();
471
472  UInt32 rep0 = 0, rep1 = 0, rep2 = 0, rep3 = 0;
473  unsigned state = 0;
474
475  for (;;)
476  {
477    if (unpackSizeDefined && unpackSize == 0 && !markerIsMandatory)
478      if (RangeDec.IsFinishedOK())
479        return LZMA_RES_FINISHED_WITHOUT_MARKER;
480
481    unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1);
482
483    if (RangeDec.DecodeBit(&IsMatch[(state << kNumPosBitsMax) + posState]) == 0)
484    {
485      if (unpackSizeDefined && unpackSize == 0)
486        return LZMA_RES_ERROR;
487      DecodeLiteral(state, rep0);
488      state = UpdateState_Literal(state);
489      unpackSize--;
490      continue;
491    }
492
493    unsigned len;
494
495    if (RangeDec.DecodeBit(&IsRep[state]) != 0)
496    {
497      if (unpackSizeDefined && unpackSize == 0)
498        return LZMA_RES_ERROR;
499      if (OutWindow.IsEmpty())
500        return LZMA_RES_ERROR;
501      if (RangeDec.DecodeBit(&IsRepG0[state]) == 0)
502      {
503        if (RangeDec.DecodeBit(&IsRep0Long[(state << kNumPosBitsMax) + posState]) == 0)
504        {
505          state = UpdateState_ShortRep(state);
506          OutWindow.PutByte(OutWindow.GetByte(rep0 + 1));
507          unpackSize--;
508          continue;
509        }
510      }
511      else
512      {
513        UInt32 dist;
514        if (RangeDec.DecodeBit(&IsRepG1[state]) == 0)
515          dist = rep1;
516        else
517        {
518          if (RangeDec.DecodeBit(&IsRepG2[state]) == 0)
519            dist = rep2;
520          else
521          {
522            dist = rep3;
523            rep3 = rep2;
524          }
525          rep2 = rep1;
526        }
527        rep1 = rep0;
528        rep0 = dist;
529      }
530      len = RepLenDecoder.Decode(&RangeDec, posState);
531      state = UpdateState_Rep(state);
532    }
533    else
534    {
535      rep3 = rep2;
536      rep2 = rep1;
537      rep1 = rep0;
538      len = LenDecoder.Decode(&RangeDec, posState);
539      state = UpdateState_Match(state);
540      rep0 = DecodeDistance(len);
541      if (rep0 == 0xFFFFFFFF)
542        return RangeDec.IsFinishedOK() ?
543            LZMA_RES_FINISHED_WITH_MARKER :
544            LZMA_RES_ERROR;
545
546      if (unpackSizeDefined && unpackSize == 0)
547        return LZMA_RES_ERROR;
548      if (rep0 >= dictSize || !OutWindow.CheckDistance(rep0))
549        return LZMA_RES_ERROR;
550    }
551    len += kMatchMinLen;
552    bool isError = false;
553    if (unpackSizeDefined && unpackSize < len)
554    {
555      len = (unsigned)unpackSize;
556      isError = true;
557    }
558    OutWindow.CopyMatch(rep0 + 1, len);
559    unpackSize -= len;
560    if (isError)
561      return LZMA_RES_ERROR;
562  }
563}
564
565static void Print(const char *s)
566{
567  fputs(s, stdout);
568}
569
570static void PrintError(const char *s)
571{
572  fputs(s, stderr);
573}
574
575
576#define CONVERT_INT_TO_STR(charType, tempSize) \
577
578void ConvertUInt64ToString(UInt64 val, char *s)
579{
580  char temp[32];
581  unsigned i = 0;
582  while (val >= 10)
583  {
584    temp[i++] = (char)('0' + (unsigned)(val % 10));
585    val /= 10;
586  }
587  *s++ = (char)('0' + (unsigned)val);
588  while (i != 0)
589  {
590    i--;
591    *s++ = temp[i];
592  }
593  *s = 0;
594}
595
596void PrintUInt64(const char *title, UInt64 v)
597{
598  Print(title);
599  Print(" : ");
600  char s[32];
601  ConvertUInt64ToString(v, s);
602  Print(s);
603  Print(" bytes \n");
604}
605
606int main2(int numArgs, const char *args[])
607{
608  Print("\nLZMA Reference Decoder 9.31 : Igor Pavlov : Public domain : 2013-02-06\n");
609  if (numArgs == 1)
610    Print("\nUse: lzmaSpec a.lzma outFile");
611
612  if (numArgs != 3)
613    throw "you must specify two parameters";
614
615  CInputStream inStream;
616  inStream.File = fopen(args[1], "rb");
617  inStream.Init();
618  if (inStream.File == 0)
619    throw "Can't open input file";
620
621  CLzmaDecoder lzmaDecoder;
622  lzmaDecoder.OutWindow.OutStream.File = fopen(args[2], "wb+");
623  lzmaDecoder.OutWindow.OutStream.Init();
624  if (inStream.File == 0)
625    throw "Can't open output file";
626
627  Byte header[13];
628  int i;
629  for (i = 0; i < 13; i++)
630    header[i] = inStream.ReadByte();
631
632  lzmaDecoder.DecodeProperties(header);
633
634  printf("\nlc=%d, lp=%d, pb=%d", lzmaDecoder.lc, lzmaDecoder.lp, lzmaDecoder.pb);
635  printf("\nDictionary Size in properties = %u", lzmaDecoder.dictSizeInProperties);
636  printf("\nDictionary Size for decoding  = %u", lzmaDecoder.dictSize);
637
638  UInt64 unpackSize = 0;
639  bool unpackSizeDefined = false;
640  for (i = 0; i < 8; i++)
641  {
642    Byte b = header[5 + i];
643    if (b != 0xFF)
644      unpackSizeDefined = true;
645    unpackSize |= (UInt64)b << (8 * i);
646  }
647
648  lzmaDecoder.markerIsMandatory = !unpackSizeDefined;
649
650  Print("\n");
651  if (unpackSizeDefined)
652    PrintUInt64("Uncompressed Size", unpackSize);
653  else
654    Print("End marker is expected\n");
655  lzmaDecoder.RangeDec.InStream = &inStream;
656
657  Print("\n");
658
659  lzmaDecoder.Create();
660  // we support the streams that have uncompressed size and marker.
661  int res = lzmaDecoder.Decode(unpackSizeDefined, unpackSize);
662
663  PrintUInt64("Read    ", inStream.Processed);
664  PrintUInt64("Written ", lzmaDecoder.OutWindow.OutStream.Processed);
665
666  if (res == LZMA_RES_ERROR)
667    throw "LZMA decoding error";
668  else if (res == LZMA_RES_FINISHED_WITHOUT_MARKER)
669    Print("Finished without end marker");
670  else if (res == LZMA_RES_FINISHED_WITH_MARKER)
671  {
672    if (unpackSizeDefined)
673    {
674      if (lzmaDecoder.OutWindow.OutStream.Processed != unpackSize)
675        throw "Finished with end marker before than specified size";
676      Print("Warning: ");
677    }
678    Print("Finished with end marker");
679  }
680  else
681    throw "Internal Error";
682
683  Print("\n");
684
685  if (lzmaDecoder.RangeDec.Corrupted)
686  {
687    Print("\nWarning: LZMA stream is corrupted\n");
688  }
689
690  return 0;
691}
692
693
694int
695  #ifdef _MSC_VER
696    __cdecl
697  #endif
698main(int numArgs, const char *args[])
699{
700  try { return main2(numArgs, args); }
701  catch (const char *s)
702  {
703    PrintError("\nError:\n");
704    PrintError(s);
705    PrintError("\n");
706    return 1;
707  }
708  catch(...)
709  {
710    PrintError("\nError\n");
711    return 1;
712  }
713}
714