1/*-----------------------------------------------------------*/
2/*--- Block recoverer program for bzip2                   ---*/
3/*---                                      bzip2recover.c ---*/
4/*-----------------------------------------------------------*/
5
6/* ------------------------------------------------------------------
7   This file is part of bzip2/libbzip2, a program and library for
8   lossless, block-sorting data compression.
9
10   bzip2/libbzip2 version 1.0.6 of 6 September 2010
11   Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
12
13   Please read the WARNING, DISCLAIMER and PATENTS sections in the
14   README file.
15
16   This program is released under the terms of the license contained
17   in the file LICENSE.
18   ------------------------------------------------------------------ */
19
20/* This program is a complete hack and should be rewritten properly.
21	 It isn't very complicated. */
22
23#include <stdio.h>
24#include <errno.h>
25#include <stdlib.h>
26#include <string.h>
27
28
29/* This program records bit locations in the file to be recovered.
30   That means that if 64-bit ints are not supported, we will not
31   be able to recover .bz2 files over 512MB (2^32 bits) long.
32   On GNU supported platforms, we take advantage of the 64-bit
33   int support to circumvent this problem.  Ditto MSVC.
34
35   This change occurred in version 1.0.2; all prior versions have
36   the 512MB limitation.
37*/
38#ifdef __GNUC__
39   typedef  unsigned long long int  MaybeUInt64;
40#  define MaybeUInt64_FMT "%Lu"
41#else
42#ifdef _MSC_VER
43   typedef  unsigned __int64  MaybeUInt64;
44#  define MaybeUInt64_FMT "%I64u"
45#else
46   typedef  unsigned int   MaybeUInt64;
47#  define MaybeUInt64_FMT "%u"
48#endif
49#endif
50
51typedef  unsigned int   UInt32;
52typedef  int            Int32;
53typedef  unsigned char  UChar;
54typedef  char           Char;
55typedef  unsigned char  Bool;
56#define True    ((Bool)1)
57#define False   ((Bool)0)
58
59
60#define BZ_MAX_FILENAME 2000
61
62Char inFileName[BZ_MAX_FILENAME];
63Char outFileName[BZ_MAX_FILENAME];
64Char progName[BZ_MAX_FILENAME];
65
66MaybeUInt64 bytesOut = 0;
67MaybeUInt64 bytesIn  = 0;
68
69
70/*---------------------------------------------------*/
71/*--- Header bytes                                ---*/
72/*---------------------------------------------------*/
73
74#define BZ_HDR_B 0x42                         /* 'B' */
75#define BZ_HDR_Z 0x5a                         /* 'Z' */
76#define BZ_HDR_h 0x68                         /* 'h' */
77#define BZ_HDR_0 0x30                         /* '0' */
78
79
80/*---------------------------------------------------*/
81/*--- I/O errors                                  ---*/
82/*---------------------------------------------------*/
83
84/*---------------------------------------------*/
85static void readError ( void )
86{
87   fprintf ( stderr,
88             "%s: I/O error reading `%s', possible reason follows.\n",
89            progName, inFileName );
90   perror ( progName );
91   fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
92             progName );
93   exit ( 1 );
94}
95
96
97/*---------------------------------------------*/
98static void writeError ( void )
99{
100   fprintf ( stderr,
101             "%s: I/O error reading `%s', possible reason follows.\n",
102            progName, inFileName );
103   perror ( progName );
104   fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
105             progName );
106   exit ( 1 );
107}
108
109
110/*---------------------------------------------*/
111static void mallocFail ( Int32 n )
112{
113   fprintf ( stderr,
114             "%s: malloc failed on request for %d bytes.\n",
115            progName, n );
116   fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
117             progName );
118   exit ( 1 );
119}
120
121
122/*---------------------------------------------*/
123static void tooManyBlocks ( Int32 max_handled_blocks )
124{
125   fprintf ( stderr,
126             "%s: `%s' appears to contain more than %d blocks\n",
127            progName, inFileName, max_handled_blocks );
128   fprintf ( stderr,
129             "%s: and cannot be handled.  To fix, increase\n",
130             progName );
131   fprintf ( stderr,
132             "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
133             progName );
134   exit ( 1 );
135}
136
137
138
139/*---------------------------------------------------*/
140/*--- Bit stream I/O                              ---*/
141/*---------------------------------------------------*/
142
143typedef
144   struct {
145      FILE*  handle;
146      Int32  buffer;
147      Int32  buffLive;
148      Char   mode;
149   }
150   BitStream;
151
152
153/*---------------------------------------------*/
154static BitStream* bsOpenReadStream ( FILE* stream )
155{
156   BitStream *bs = malloc ( sizeof(BitStream) );
157   if (bs == NULL) mallocFail ( sizeof(BitStream) );
158   bs->handle = stream;
159   bs->buffer = 0;
160   bs->buffLive = 0;
161   bs->mode = 'r';
162   return bs;
163}
164
165
166/*---------------------------------------------*/
167static BitStream* bsOpenWriteStream ( FILE* stream )
168{
169   BitStream *bs = malloc ( sizeof(BitStream) );
170   if (bs == NULL) mallocFail ( sizeof(BitStream) );
171   bs->handle = stream;
172   bs->buffer = 0;
173   bs->buffLive = 0;
174   bs->mode = 'w';
175   return bs;
176}
177
178
179/*---------------------------------------------*/
180static void bsPutBit ( BitStream* bs, Int32 bit )
181{
182   if (bs->buffLive == 8) {
183      Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
184      if (retVal == EOF) writeError();
185      bytesOut++;
186      bs->buffLive = 1;
187      bs->buffer = bit & 0x1;
188   } else {
189      bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
190      bs->buffLive++;
191   };
192}
193
194
195/*---------------------------------------------*/
196/*--
197   Returns 0 or 1, or 2 to indicate EOF.
198--*/
199static Int32 bsGetBit ( BitStream* bs )
200{
201   if (bs->buffLive > 0) {
202      bs->buffLive --;
203      return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
204   } else {
205      Int32 retVal = getc ( bs->handle );
206      if ( retVal == EOF ) {
207         if (errno != 0) readError();
208         return 2;
209      }
210      bs->buffLive = 7;
211      bs->buffer = retVal;
212      return ( ((bs->buffer) >> 7) & 0x1 );
213   }
214}
215
216
217/*---------------------------------------------*/
218static void bsClose ( BitStream* bs )
219{
220   Int32 retVal;
221
222   if ( bs->mode == 'w' ) {
223      while ( bs->buffLive < 8 ) {
224         bs->buffLive++;
225         bs->buffer <<= 1;
226      };
227      retVal = putc ( (UChar) (bs->buffer), bs->handle );
228      if (retVal == EOF) writeError();
229      bytesOut++;
230      retVal = fflush ( bs->handle );
231      if (retVal == EOF) writeError();
232   }
233   retVal = fclose ( bs->handle );
234   if (retVal == EOF) {
235      if (bs->mode == 'w') writeError(); else readError();
236   }
237   free ( bs );
238}
239
240
241/*---------------------------------------------*/
242static void bsPutUChar ( BitStream* bs, UChar c )
243{
244   Int32 i;
245   for (i = 7; i >= 0; i--)
246      bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
247}
248
249
250/*---------------------------------------------*/
251static void bsPutUInt32 ( BitStream* bs, UInt32 c )
252{
253   Int32 i;
254
255   for (i = 31; i >= 0; i--)
256      bsPutBit ( bs, (c >> i) & 0x1 );
257}
258
259
260/*---------------------------------------------*/
261static Bool endsInBz2 ( Char* name )
262{
263   Int32 n = strlen ( name );
264   if (n <= 4) return False;
265   return
266      (name[n-4] == '.' &&
267       name[n-3] == 'b' &&
268       name[n-2] == 'z' &&
269       name[n-1] == '2');
270}
271
272
273/*---------------------------------------------------*/
274/*---                                             ---*/
275/*---------------------------------------------------*/
276
277/* This logic isn't really right when it comes to Cygwin. */
278#ifdef _WIN32
279#  define  BZ_SPLIT_SYM  '\\'  /* path splitter on Windows platform */
280#else
281#  define  BZ_SPLIT_SYM  '/'   /* path splitter on Unix platform */
282#endif
283
284#define BLOCK_HEADER_HI  0x00003141UL
285#define BLOCK_HEADER_LO  0x59265359UL
286
287#define BLOCK_ENDMARK_HI 0x00001772UL
288#define BLOCK_ENDMARK_LO 0x45385090UL
289
290/* Increase if necessary.  However, a .bz2 file with > 50000 blocks
291   would have an uncompressed size of at least 40GB, so the chances
292   are low you'll need to up this.
293*/
294#define BZ_MAX_HANDLED_BLOCKS 50000
295
296MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
297MaybeUInt64 bEnd   [BZ_MAX_HANDLED_BLOCKS];
298MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
299MaybeUInt64 rbEnd  [BZ_MAX_HANDLED_BLOCKS];
300
301Int32 main ( Int32 argc, Char** argv )
302{
303   FILE*       inFile;
304   FILE*       outFile;
305   BitStream*  bsIn, *bsWr;
306   Int32       b, wrBlock, currBlock, rbCtr;
307   MaybeUInt64 bitsRead;
308
309   UInt32      buffHi, buffLo, blockCRC;
310   Char*       p;
311
312   strcpy ( progName, argv[0] );
313   inFileName[0] = outFileName[0] = 0;
314
315   fprintf ( stderr,
316             "bzip2recover 1.0.6: extracts blocks from damaged .bz2 files.\n" );
317
318   if (argc != 2) {
319      fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
320                        progName, progName );
321      switch (sizeof(MaybeUInt64)) {
322         case 8:
323            fprintf(stderr,
324                    "\trestrictions on size of recovered file: None\n");
325            break;
326         case 4:
327            fprintf(stderr,
328                    "\trestrictions on size of recovered file: 512 MB\n");
329            fprintf(stderr,
330                    "\tto circumvent, recompile with MaybeUInt64 as an\n"
331                    "\tunsigned 64-bit int.\n");
332            break;
333         default:
334            fprintf(stderr,
335                    "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
336                    "configuration error.\n");
337            break;
338      }
339      exit(1);
340   }
341
342   if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
343      fprintf ( stderr,
344                "%s: supplied filename is suspiciously (>= %d chars) long.  Bye!\n",
345                progName, (int)strlen(argv[1]) );
346      exit(1);
347   }
348
349   strcpy ( inFileName, argv[1] );
350
351   inFile = fopen ( inFileName, "rb" );
352   if (inFile == NULL) {
353      fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
354      exit(1);
355   }
356
357   bsIn = bsOpenReadStream ( inFile );
358   fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
359
360   bitsRead = 0;
361   buffHi = buffLo = 0;
362   currBlock = 0;
363   bStart[currBlock] = 0;
364
365   rbCtr = 0;
366
367   while (True) {
368      b = bsGetBit ( bsIn );
369      bitsRead++;
370      if (b == 2) {
371         if (bitsRead >= bStart[currBlock] &&
372            (bitsRead - bStart[currBlock]) >= 40) {
373            bEnd[currBlock] = bitsRead-1;
374            if (currBlock > 0)
375               fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
376                                 " to " MaybeUInt64_FMT " (incomplete)\n",
377                         currBlock,  bStart[currBlock], bEnd[currBlock] );
378         } else
379            currBlock--;
380         break;
381      }
382      buffHi = (buffHi << 1) | (buffLo >> 31);
383      buffLo = (buffLo << 1) | (b & 1);
384      if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
385             && buffLo == BLOCK_HEADER_LO)
386           ||
387           ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
388             && buffLo == BLOCK_ENDMARK_LO)
389         ) {
390         if (bitsRead > 49) {
391            bEnd[currBlock] = bitsRead-49;
392         } else {
393            bEnd[currBlock] = 0;
394         }
395         if (currBlock > 0 &&
396	     (bEnd[currBlock] - bStart[currBlock]) >= 130) {
397            fprintf ( stderr, "   block %d runs from " MaybeUInt64_FMT
398                              " to " MaybeUInt64_FMT "\n",
399                      rbCtr+1,  bStart[currBlock], bEnd[currBlock] );
400            rbStart[rbCtr] = bStart[currBlock];
401            rbEnd[rbCtr] = bEnd[currBlock];
402            rbCtr++;
403         }
404         if (currBlock >= BZ_MAX_HANDLED_BLOCKS)
405            tooManyBlocks(BZ_MAX_HANDLED_BLOCKS);
406         currBlock++;
407
408         bStart[currBlock] = bitsRead;
409      }
410   }
411
412   bsClose ( bsIn );
413
414   /*-- identified blocks run from 1 to rbCtr inclusive. --*/
415
416   if (rbCtr < 1) {
417      fprintf ( stderr,
418                "%s: sorry, I couldn't find any block boundaries.\n",
419                progName );
420      exit(1);
421   };
422
423   fprintf ( stderr, "%s: splitting into blocks\n", progName );
424
425   inFile = fopen ( inFileName, "rb" );
426   if (inFile == NULL) {
427      fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
428      exit(1);
429   }
430   bsIn = bsOpenReadStream ( inFile );
431
432   /*-- placate gcc's dataflow analyser --*/
433   blockCRC = 0; bsWr = 0;
434
435   bitsRead = 0;
436   outFile = NULL;
437   wrBlock = 0;
438   while (True) {
439      b = bsGetBit(bsIn);
440      if (b == 2) break;
441      buffHi = (buffHi << 1) | (buffLo >> 31);
442      buffLo = (buffLo << 1) | (b & 1);
443      if (bitsRead == 47+rbStart[wrBlock])
444         blockCRC = (buffHi << 16) | (buffLo >> 16);
445
446      if (outFile != NULL && bitsRead >= rbStart[wrBlock]
447                          && bitsRead <= rbEnd[wrBlock]) {
448         bsPutBit ( bsWr, b );
449      }
450
451      bitsRead++;
452
453      if (bitsRead == rbEnd[wrBlock]+1) {
454         if (outFile != NULL) {
455            bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
456            bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
457            bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
458            bsPutUInt32 ( bsWr, blockCRC );
459            bsClose ( bsWr );
460         }
461         if (wrBlock >= rbCtr) break;
462         wrBlock++;
463      } else
464      if (bitsRead == rbStart[wrBlock]) {
465         /* Create the output file name, correctly handling leading paths.
466            (31.10.2001 by Sergey E. Kusikov) */
467         Char* split;
468         Int32 ofs, k;
469         for (k = 0; k < BZ_MAX_FILENAME; k++)
470            outFileName[k] = 0;
471         strcpy (outFileName, inFileName);
472         split = strrchr (outFileName, BZ_SPLIT_SYM);
473         if (split == NULL) {
474            split = outFileName;
475         } else {
476            ++split;
477	 }
478	 /* Now split points to the start of the basename. */
479         ofs  = split - outFileName;
480         sprintf (split, "rec%5d", wrBlock+1);
481         for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
482         strcat (outFileName, inFileName + ofs);
483
484         if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
485
486         fprintf ( stderr, "   writing block %d to `%s' ...\n",
487                           wrBlock+1, outFileName );
488
489         outFile = fopen ( outFileName, "wb" );
490         if (outFile == NULL) {
491            fprintf ( stderr, "%s: can't write `%s'\n",
492                      progName, outFileName );
493            exit(1);
494         }
495         bsWr = bsOpenWriteStream ( outFile );
496         bsPutUChar ( bsWr, BZ_HDR_B );
497         bsPutUChar ( bsWr, BZ_HDR_Z );
498         bsPutUChar ( bsWr, BZ_HDR_h );
499         bsPutUChar ( bsWr, BZ_HDR_0 + 9 );
500         bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
501         bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
502         bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
503      }
504   }
505
506   fprintf ( stderr, "%s: finished\n", progName );
507   return 0;
508}
509
510
511
512/*-----------------------------------------------------------*/
513/*--- end                                  bzip2recover.c ---*/
514/*-----------------------------------------------------------*/
515