1/* LzmaSpec.cpp -- LZMA Reference Decoder 22015-06-14 : Igor Pavlov : Public domain */ 3 4// This code implements LZMA file decoding according to LZMA specification. 5// This code is not optimized for speed. 6 7#include <stdio.h> 8 9#ifdef _MSC_VER 10 #pragma warning(disable : 4710) // function not inlined 11 #pragma warning(disable : 4996) // This function or variable may be unsafe 12#endif 13 14typedef unsigned char Byte; 15typedef unsigned short UInt16; 16 17#ifdef _LZMA_UINT32_IS_ULONG 18 typedef unsigned long UInt32; 19#else 20 typedef unsigned int UInt32; 21#endif 22 23#if defined(_MSC_VER) || defined(__BORLANDC__) 24 typedef unsigned __int64 UInt64; 25#else 26 typedef unsigned long long int UInt64; 27#endif 28 29 30struct CInputStream 31{ 32 FILE *File; 33 UInt64 Processed; 34 35 void Init() { Processed = 0; } 36 37 Byte ReadByte() 38 { 39 int c = getc(File); 40 if (c < 0) 41 throw "Unexpected end of file"; 42 Processed++; 43 return (Byte)c; 44 } 45}; 46 47 48struct COutStream 49{ 50 FILE *File; 51 UInt64 Processed; 52 53 void Init() { Processed = 0; } 54 55 void WriteByte(Byte b) 56 { 57 if (putc(b, File) == EOF) 58 throw "File writing error"; 59 Processed++; 60 } 61}; 62 63 64class COutWindow 65{ 66 Byte *Buf; 67 UInt32 Pos; 68 UInt32 Size; 69 bool IsFull; 70 71public: 72 unsigned TotalPos; 73 COutStream OutStream; 74 75 COutWindow(): Buf(NULL) {} 76 ~COutWindow() { delete []Buf; } 77 78 void Create(UInt32 dictSize) 79 { 80 Buf = new Byte[dictSize]; 81 Pos = 0; 82 Size = dictSize; 83 IsFull = false; 84 TotalPos = 0; 85 } 86 87 void PutByte(Byte b) 88 { 89 TotalPos++; 90 Buf[Pos++] = b; 91 if (Pos == Size) 92 { 93 Pos = 0; 94 IsFull = true; 95 } 96 OutStream.WriteByte(b); 97 } 98 99 Byte GetByte(UInt32 dist) const 100 { 101 return Buf[dist <= Pos ? Pos - dist : Size - dist + Pos]; 102 } 103 104 void CopyMatch(UInt32 dist, unsigned len) 105 { 106 for (; len > 0; len--) 107 PutByte(GetByte(dist)); 108 } 109 110 bool CheckDistance(UInt32 dist) const 111 { 112 return dist <= Pos || IsFull; 113 } 114 115 bool IsEmpty() const 116 { 117 return Pos == 0 && !IsFull; 118 } 119}; 120 121 122#define kNumBitModelTotalBits 11 123#define kNumMoveBits 5 124 125typedef UInt16 CProb; 126 127#define PROB_INIT_VAL ((1 << kNumBitModelTotalBits) / 2) 128 129#define INIT_PROBS(p) \ 130 { for (unsigned i = 0; i < sizeof(p) / sizeof(p[0]); i++) p[i] = PROB_INIT_VAL; } 131 132class CRangeDecoder 133{ 134 UInt32 Range; 135 UInt32 Code; 136 137 void Normalize(); 138 139public: 140 141 CInputStream *InStream; 142 bool Corrupted; 143 144 bool Init(); 145 bool IsFinishedOK() const { return Code == 0; } 146 147 UInt32 DecodeDirectBits(unsigned numBits); 148 unsigned DecodeBit(CProb *prob); 149}; 150 151bool CRangeDecoder::Init() 152{ 153 Corrupted = false; 154 Range = 0xFFFFFFFF; 155 Code = 0; 156 157 Byte b = InStream->ReadByte(); 158 159 for (int i = 0; i < 4; i++) 160 Code = (Code << 8) | InStream->ReadByte(); 161 162 if (b != 0 || Code == Range) 163 Corrupted = true; 164 return b == 0; 165} 166 167#define kTopValue ((UInt32)1 << 24) 168 169void CRangeDecoder::Normalize() 170{ 171 if (Range < kTopValue) 172 { 173 Range <<= 8; 174 Code = (Code << 8) | InStream->ReadByte(); 175 } 176} 177 178UInt32 CRangeDecoder::DecodeDirectBits(unsigned numBits) 179{ 180 UInt32 res = 0; 181 do 182 { 183 Range >>= 1; 184 Code -= Range; 185 UInt32 t = 0 - ((UInt32)Code >> 31); 186 Code += Range & t; 187 188 if (Code == Range) 189 Corrupted = true; 190 191 Normalize(); 192 res <<= 1; 193 res += t + 1; 194 } 195 while (--numBits); 196 return res; 197} 198 199unsigned CRangeDecoder::DecodeBit(CProb *prob) 200{ 201 unsigned v = *prob; 202 UInt32 bound = (Range >> kNumBitModelTotalBits) * v; 203 unsigned symbol; 204 if (Code < bound) 205 { 206 v += ((1 << kNumBitModelTotalBits) - v) >> kNumMoveBits; 207 Range = bound; 208 symbol = 0; 209 } 210 else 211 { 212 v -= v >> kNumMoveBits; 213 Code -= bound; 214 Range -= bound; 215 symbol = 1; 216 } 217 *prob = (CProb)v; 218 Normalize(); 219 return symbol; 220} 221 222 223unsigned BitTreeReverseDecode(CProb *probs, unsigned numBits, CRangeDecoder *rc) 224{ 225 unsigned m = 1; 226 unsigned symbol = 0; 227 for (unsigned i = 0; i < numBits; i++) 228 { 229 unsigned bit = rc->DecodeBit(&probs[m]); 230 m <<= 1; 231 m += bit; 232 symbol |= (bit << i); 233 } 234 return symbol; 235} 236 237template <unsigned NumBits> 238class CBitTreeDecoder 239{ 240 CProb Probs[(unsigned)1 << NumBits]; 241 242public: 243 244 void Init() 245 { 246 INIT_PROBS(Probs); 247 } 248 249 unsigned Decode(CRangeDecoder *rc) 250 { 251 unsigned m = 1; 252 for (unsigned i = 0; i < NumBits; i++) 253 m = (m << 1) + rc->DecodeBit(&Probs[m]); 254 return m - ((unsigned)1 << NumBits); 255 } 256 257 unsigned ReverseDecode(CRangeDecoder *rc) 258 { 259 return BitTreeReverseDecode(Probs, NumBits, rc); 260 } 261}; 262 263#define kNumPosBitsMax 4 264 265#define kNumStates 12 266#define kNumLenToPosStates 4 267#define kNumAlignBits 4 268#define kStartPosModelIndex 4 269#define kEndPosModelIndex 14 270#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) 271#define kMatchMinLen 2 272 273class CLenDecoder 274{ 275 CProb Choice; 276 CProb Choice2; 277 CBitTreeDecoder<3> LowCoder[1 << kNumPosBitsMax]; 278 CBitTreeDecoder<3> MidCoder[1 << kNumPosBitsMax]; 279 CBitTreeDecoder<8> HighCoder; 280 281public: 282 283 void Init() 284 { 285 Choice = PROB_INIT_VAL; 286 Choice2 = PROB_INIT_VAL; 287 HighCoder.Init(); 288 for (unsigned i = 0; i < (1 << kNumPosBitsMax); i++) 289 { 290 LowCoder[i].Init(); 291 MidCoder[i].Init(); 292 } 293 } 294 295 unsigned Decode(CRangeDecoder *rc, unsigned posState) 296 { 297 if (rc->DecodeBit(&Choice) == 0) 298 return LowCoder[posState].Decode(rc); 299 if (rc->DecodeBit(&Choice2) == 0) 300 return 8 + MidCoder[posState].Decode(rc); 301 return 16 + HighCoder.Decode(rc); 302 } 303}; 304 305unsigned UpdateState_Literal(unsigned state) 306{ 307 if (state < 4) return 0; 308 else if (state < 10) return state - 3; 309 else return state - 6; 310} 311unsigned UpdateState_Match (unsigned state) { return state < 7 ? 7 : 10; } 312unsigned UpdateState_Rep (unsigned state) { return state < 7 ? 8 : 11; } 313unsigned UpdateState_ShortRep(unsigned state) { return state < 7 ? 9 : 11; } 314 315#define LZMA_DIC_MIN (1 << 12) 316 317class CLzmaDecoder 318{ 319public: 320 CRangeDecoder RangeDec; 321 COutWindow OutWindow; 322 323 bool markerIsMandatory; 324 unsigned lc, pb, lp; 325 UInt32 dictSize; 326 UInt32 dictSizeInProperties; 327 328 void DecodeProperties(const Byte *properties) 329 { 330 unsigned d = properties[0]; 331 if (d >= (9 * 5 * 5)) 332 throw "Incorrect LZMA properties"; 333 lc = d % 9; 334 d /= 9; 335 pb = d / 5; 336 lp = d % 5; 337 dictSizeInProperties = 0; 338 for (int i = 0; i < 4; i++) 339 dictSizeInProperties |= (UInt32)properties[i + 1] << (8 * i); 340 dictSize = dictSizeInProperties; 341 if (dictSize < LZMA_DIC_MIN) 342 dictSize = LZMA_DIC_MIN; 343 } 344 345 CLzmaDecoder(): LitProbs(NULL) {} 346 ~CLzmaDecoder() { delete []LitProbs; } 347 348 void Create() 349 { 350 OutWindow.Create(dictSize); 351 CreateLiterals(); 352 } 353 354 int Decode(bool unpackSizeDefined, UInt64 unpackSize); 355 356private: 357 358 CProb *LitProbs; 359 360 void CreateLiterals() 361 { 362 LitProbs = new CProb[(UInt32)0x300 << (lc + lp)]; 363 } 364 365 void InitLiterals() 366 { 367 UInt32 num = (UInt32)0x300 << (lc + lp); 368 for (UInt32 i = 0; i < num; i++) 369 LitProbs[i] = PROB_INIT_VAL; 370 } 371 372 void DecodeLiteral(unsigned state, UInt32 rep0) 373 { 374 unsigned prevByte = 0; 375 if (!OutWindow.IsEmpty()) 376 prevByte = OutWindow.GetByte(1); 377 378 unsigned symbol = 1; 379 unsigned litState = ((OutWindow.TotalPos & ((1 << lp) - 1)) << lc) + (prevByte >> (8 - lc)); 380 CProb *probs = &LitProbs[(UInt32)0x300 * litState]; 381 382 if (state >= 7) 383 { 384 unsigned matchByte = OutWindow.GetByte(rep0 + 1); 385 do 386 { 387 unsigned matchBit = (matchByte >> 7) & 1; 388 matchByte <<= 1; 389 unsigned bit = RangeDec.DecodeBit(&probs[((1 + matchBit) << 8) + symbol]); 390 symbol = (symbol << 1) | bit; 391 if (matchBit != bit) 392 break; 393 } 394 while (symbol < 0x100); 395 } 396 while (symbol < 0x100) 397 symbol = (symbol << 1) | RangeDec.DecodeBit(&probs[symbol]); 398 OutWindow.PutByte((Byte)(symbol - 0x100)); 399 } 400 401 CBitTreeDecoder<6> PosSlotDecoder[kNumLenToPosStates]; 402 CBitTreeDecoder<kNumAlignBits> AlignDecoder; 403 CProb PosDecoders[1 + kNumFullDistances - kEndPosModelIndex]; 404 405 void InitDist() 406 { 407 for (unsigned i = 0; i < kNumLenToPosStates; i++) 408 PosSlotDecoder[i].Init(); 409 AlignDecoder.Init(); 410 INIT_PROBS(PosDecoders); 411 } 412 413 unsigned DecodeDistance(unsigned len) 414 { 415 unsigned lenState = len; 416 if (lenState > kNumLenToPosStates - 1) 417 lenState = kNumLenToPosStates - 1; 418 419 unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec); 420 if (posSlot < 4) 421 return posSlot; 422 423 unsigned numDirectBits = (unsigned)((posSlot >> 1) - 1); 424 UInt32 dist = ((2 | (posSlot & 1)) << numDirectBits); 425 if (posSlot < kEndPosModelIndex) 426 dist += BitTreeReverseDecode(PosDecoders + dist - posSlot, numDirectBits, &RangeDec); 427 else 428 { 429 dist += RangeDec.DecodeDirectBits(numDirectBits - kNumAlignBits) << kNumAlignBits; 430 dist += AlignDecoder.ReverseDecode(&RangeDec); 431 } 432 return dist; 433 } 434 435 CProb IsMatch[kNumStates << kNumPosBitsMax]; 436 CProb IsRep[kNumStates]; 437 CProb IsRepG0[kNumStates]; 438 CProb IsRepG1[kNumStates]; 439 CProb IsRepG2[kNumStates]; 440 CProb IsRep0Long[kNumStates << kNumPosBitsMax]; 441 442 CLenDecoder LenDecoder; 443 CLenDecoder RepLenDecoder; 444 445 void Init() 446 { 447 InitLiterals(); 448 InitDist(); 449 450 INIT_PROBS(IsMatch); 451 INIT_PROBS(IsRep); 452 INIT_PROBS(IsRepG0); 453 INIT_PROBS(IsRepG1); 454 INIT_PROBS(IsRepG2); 455 INIT_PROBS(IsRep0Long); 456 457 LenDecoder.Init(); 458 RepLenDecoder.Init(); 459 } 460}; 461 462 463#define LZMA_RES_ERROR 0 464#define LZMA_RES_FINISHED_WITH_MARKER 1 465#define LZMA_RES_FINISHED_WITHOUT_MARKER 2 466 467int CLzmaDecoder::Decode(bool unpackSizeDefined, UInt64 unpackSize) 468{ 469 if (!RangeDec.Init()) 470 return LZMA_RES_ERROR; 471 472 Init(); 473 474 UInt32 rep0 = 0, rep1 = 0, rep2 = 0, rep3 = 0; 475 unsigned state = 0; 476 477 for (;;) 478 { 479 if (unpackSizeDefined && unpackSize == 0 && !markerIsMandatory) 480 if (RangeDec.IsFinishedOK()) 481 return LZMA_RES_FINISHED_WITHOUT_MARKER; 482 483 unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1); 484 485 if (RangeDec.DecodeBit(&IsMatch[(state << kNumPosBitsMax) + posState]) == 0) 486 { 487 if (unpackSizeDefined && unpackSize == 0) 488 return LZMA_RES_ERROR; 489 DecodeLiteral(state, rep0); 490 state = UpdateState_Literal(state); 491 unpackSize--; 492 continue; 493 } 494 495 unsigned len; 496 497 if (RangeDec.DecodeBit(&IsRep[state]) != 0) 498 { 499 if (unpackSizeDefined && unpackSize == 0) 500 return LZMA_RES_ERROR; 501 if (OutWindow.IsEmpty()) 502 return LZMA_RES_ERROR; 503 if (RangeDec.DecodeBit(&IsRepG0[state]) == 0) 504 { 505 if (RangeDec.DecodeBit(&IsRep0Long[(state << kNumPosBitsMax) + posState]) == 0) 506 { 507 state = UpdateState_ShortRep(state); 508 OutWindow.PutByte(OutWindow.GetByte(rep0 + 1)); 509 unpackSize--; 510 continue; 511 } 512 } 513 else 514 { 515 UInt32 dist; 516 if (RangeDec.DecodeBit(&IsRepG1[state]) == 0) 517 dist = rep1; 518 else 519 { 520 if (RangeDec.DecodeBit(&IsRepG2[state]) == 0) 521 dist = rep2; 522 else 523 { 524 dist = rep3; 525 rep3 = rep2; 526 } 527 rep2 = rep1; 528 } 529 rep1 = rep0; 530 rep0 = dist; 531 } 532 len = RepLenDecoder.Decode(&RangeDec, posState); 533 state = UpdateState_Rep(state); 534 } 535 else 536 { 537 rep3 = rep2; 538 rep2 = rep1; 539 rep1 = rep0; 540 len = LenDecoder.Decode(&RangeDec, posState); 541 state = UpdateState_Match(state); 542 rep0 = DecodeDistance(len); 543 if (rep0 == 0xFFFFFFFF) 544 return RangeDec.IsFinishedOK() ? 545 LZMA_RES_FINISHED_WITH_MARKER : 546 LZMA_RES_ERROR; 547 548 if (unpackSizeDefined && unpackSize == 0) 549 return LZMA_RES_ERROR; 550 if (rep0 >= dictSize || !OutWindow.CheckDistance(rep0)) 551 return LZMA_RES_ERROR; 552 } 553 len += kMatchMinLen; 554 bool isError = false; 555 if (unpackSizeDefined && unpackSize < len) 556 { 557 len = (unsigned)unpackSize; 558 isError = true; 559 } 560 OutWindow.CopyMatch(rep0 + 1, len); 561 unpackSize -= len; 562 if (isError) 563 return LZMA_RES_ERROR; 564 } 565} 566 567static void Print(const char *s) 568{ 569 fputs(s, stdout); 570} 571 572static void PrintError(const char *s) 573{ 574 fputs(s, stderr); 575} 576 577 578#define CONVERT_INT_TO_STR(charType, tempSize) \ 579 580void ConvertUInt64ToString(UInt64 val, char *s) 581{ 582 char temp[32]; 583 unsigned i = 0; 584 while (val >= 10) 585 { 586 temp[i++] = (char)('0' + (unsigned)(val % 10)); 587 val /= 10; 588 } 589 *s++ = (char)('0' + (unsigned)val); 590 while (i != 0) 591 { 592 i--; 593 *s++ = temp[i]; 594 } 595 *s = 0; 596} 597 598void PrintUInt64(const char *title, UInt64 v) 599{ 600 Print(title); 601 Print(" : "); 602 char s[32]; 603 ConvertUInt64ToString(v, s); 604 Print(s); 605 Print(" bytes \n"); 606} 607 608int main2(int numArgs, const char *args[]) 609{ 610 Print("\nLZMA Reference Decoder 15.00 : Igor Pavlov : Public domain : 2015-04-16\n"); 611 if (numArgs == 1) 612 Print("\nUse: lzmaSpec a.lzma outFile"); 613 614 if (numArgs != 3) 615 throw "you must specify two parameters"; 616 617 CInputStream inStream; 618 inStream.File = fopen(args[1], "rb"); 619 inStream.Init(); 620 if (inStream.File == 0) 621 throw "Can't open input file"; 622 623 CLzmaDecoder lzmaDecoder; 624 lzmaDecoder.OutWindow.OutStream.File = fopen(args[2], "wb+"); 625 lzmaDecoder.OutWindow.OutStream.Init(); 626 if (inStream.File == 0) 627 throw "Can't open output file"; 628 629 Byte header[13]; 630 int i; 631 for (i = 0; i < 13; i++) 632 header[i] = inStream.ReadByte(); 633 634 lzmaDecoder.DecodeProperties(header); 635 636 printf("\nlc=%d, lp=%d, pb=%d", lzmaDecoder.lc, lzmaDecoder.lp, lzmaDecoder.pb); 637 printf("\nDictionary Size in properties = %u", lzmaDecoder.dictSizeInProperties); 638 printf("\nDictionary Size for decoding = %u", lzmaDecoder.dictSize); 639 640 UInt64 unpackSize = 0; 641 bool unpackSizeDefined = false; 642 for (i = 0; i < 8; i++) 643 { 644 Byte b = header[5 + i]; 645 if (b != 0xFF) 646 unpackSizeDefined = true; 647 unpackSize |= (UInt64)b << (8 * i); 648 } 649 650 lzmaDecoder.markerIsMandatory = !unpackSizeDefined; 651 652 Print("\n"); 653 if (unpackSizeDefined) 654 PrintUInt64("Uncompressed Size", unpackSize); 655 else 656 Print("End marker is expected\n"); 657 lzmaDecoder.RangeDec.InStream = &inStream; 658 659 Print("\n"); 660 661 lzmaDecoder.Create(); 662 663 int res = lzmaDecoder.Decode(unpackSizeDefined, unpackSize); 664 665 PrintUInt64("Read ", inStream.Processed); 666 PrintUInt64("Written ", lzmaDecoder.OutWindow.OutStream.Processed); 667 668 if (res == LZMA_RES_ERROR) 669 throw "LZMA decoding error"; 670 else if (res == LZMA_RES_FINISHED_WITHOUT_MARKER) 671 Print("Finished without end marker"); 672 else if (res == LZMA_RES_FINISHED_WITH_MARKER) 673 { 674 if (unpackSizeDefined) 675 { 676 if (lzmaDecoder.OutWindow.OutStream.Processed != unpackSize) 677 throw "Finished with end marker before than specified size"; 678 Print("Warning: "); 679 } 680 Print("Finished with end marker"); 681 } 682 else 683 throw "Internal Error"; 684 685 Print("\n"); 686 687 if (lzmaDecoder.RangeDec.Corrupted) 688 { 689 Print("\nWarning: LZMA stream is corrupted\n"); 690 } 691 692 return 0; 693} 694 695 696int 697 #ifdef _MSC_VER 698 __cdecl 699 #endif 700main(int numArgs, const char *args[]) 701{ 702 try { return main2(numArgs, args); } 703 catch (const char *s) 704 { 705 PrintError("\nError:\n"); 706 PrintError(s); 707 PrintError("\n"); 708 return 1; 709 } 710 catch(...) 711 { 712 PrintError("\nError\n"); 713 return 1; 714 } 715} 716