1/*-
2 * Copyright 2003-2005 Colin Percival
3 * All rights reserved
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted providing that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#if 0
28__FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c,v 1.1 2005/08/06 01:59:05 cperciva Exp $");
29#endif
30
31#include <sys/types.h>
32
33#include <bzlib.h>
34#include <err.h>
35#include <fcntl.h>
36#include <lzma.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <string.h>
40#include <unistd.h>
41#include <zlib.h>
42
43#if defined(__APPLE__)
44#include <libkern/OSByteOrder.h>
45#define htole64(x) OSSwapHostToLittleInt64(x)
46#elif defined(__linux__)
47#include <endian.h>
48#elif defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
49#define htole64(x) (x)
50#else
51#error Provide htole64 for this platform
52#endif
53
54#include "chrome/installer/mac/third_party/bsdiff/sha1_adapter.h"
55
56#define MIN(x,y) (((x)<(y)) ? (x) : (y))
57
58static void split(off_t *I,off_t *V,off_t start,off_t len,off_t h)
59{
60	off_t i,j,k,x,tmp,jj,kk;
61
62	if(len<16) {
63		for(k=start;k<start+len;k+=j) {
64			j=1;x=V[I[k]+h];
65			for(i=1;k+i<start+len;i++) {
66				if(V[I[k+i]+h]<x) {
67					x=V[I[k+i]+h];
68					j=0;
69				};
70				if(V[I[k+i]+h]==x) {
71					tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;
72					j++;
73				};
74			};
75			for(i=0;i<j;i++) V[I[k+i]]=k+j-1;
76			if(j==1) I[k]=-1;
77		};
78		return;
79	};
80
81	x=V[I[start+len/2]+h];
82	jj=0;kk=0;
83	for(i=start;i<start+len;i++) {
84		if(V[I[i]+h]<x) jj++;
85		if(V[I[i]+h]==x) kk++;
86	};
87	jj+=start;kk+=jj;
88
89	i=start;j=0;k=0;
90	while(i<jj) {
91		if(V[I[i]+h]<x) {
92			i++;
93		} else if(V[I[i]+h]==x) {
94			tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;
95			j++;
96		} else {
97			tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;
98			k++;
99		};
100	};
101
102	while(jj+j<kk) {
103		if(V[I[jj+j]+h]==x) {
104			j++;
105		} else {
106			tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;
107			k++;
108		};
109	};
110
111	if(jj>start) split(I,V,start,jj-start,h);
112
113	for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;
114	if(jj==kk-1) I[jj]=-1;
115
116	if(start+len>kk) split(I,V,kk,start+len-kk,h);
117}
118
119static void qsufsort(off_t *I,off_t *V,u_char *old,off_t oldsize)
120{
121	off_t buckets[256];
122	off_t i,h,len;
123
124	for(i=0;i<256;i++) buckets[i]=0;
125	for(i=0;i<oldsize;i++) buckets[old[i]]++;
126	for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
127	for(i=255;i>0;i--) buckets[i]=buckets[i-1];
128	buckets[0]=0;
129
130	for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i;
131	I[0]=oldsize;
132	for(i=0;i<oldsize;i++) V[i]=buckets[old[i]];
133	V[oldsize]=0;
134	for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;
135	I[0]=-1;
136
137	for(h=1;I[0]!=-(oldsize+1);h+=h) {
138		len=0;
139		for(i=0;i<oldsize+1;) {
140			if(I[i]<0) {
141				len-=I[i];
142				i-=I[i];
143			} else {
144				if(len) I[i-len]=-len;
145				len=V[I[i]]+1-i;
146				split(I,V,i,len,h);
147				i+=len;
148				len=0;
149			};
150		};
151		if(len) I[i-len]=-len;
152	};
153
154	for(i=0;i<oldsize+1;i++) I[V[i]]=i;
155}
156
157static off_t matchlen(u_char *old,off_t oldsize,u_char *new,off_t newsize)
158{
159	off_t i;
160
161	for(i=0;(i<oldsize)&&(i<newsize);i++)
162		if(old[i]!=new[i]) break;
163
164	return i;
165}
166
167static off_t search(off_t *I,u_char *old,off_t oldsize,
168		u_char *new,off_t newsize,off_t st,off_t en,off_t *pos)
169{
170	off_t x,y;
171
172	if(en-st<2) {
173		x=matchlen(old+I[st],oldsize-I[st],new,newsize);
174		y=matchlen(old+I[en],oldsize-I[en],new,newsize);
175
176		if(x>y) {
177			*pos=I[st];
178			return x;
179		} else {
180			*pos=I[en];
181			return y;
182		}
183	};
184
185	x=st+(en-st)/2;
186	if(memcmp(old+I[x],new,MIN(oldsize-I[x],newsize))<0) {
187		return search(I,old,oldsize,new,newsize,x,en,pos);
188	} else {
189		return search(I,old,oldsize,new,newsize,st,x,pos);
190	};
191}
192
193static inline void offtout(off_t x,u_char *buf)
194{
195	*((off_t*)buf) = htole64(x);
196}
197
198/* zlib provides compress2, which deflates to deflate (zlib) format. This is
199 * unfortunately distinct from gzip format in that the headers wrapping the
200 * decompressed data are different. gbspatch reads gzip-compressed data using
201 * the file-oriented gzread interface, which only supports gzip format.
202 * compress2gzip is identical to zlib's compress2 except that it produces gzip
203 * output compatible with gzread. This change is achieved by calling
204 * deflateInit2 instead of deflateInit and specifying 31 for windowBits;
205 * numbers greater than 15 cause the addition of a gzip wrapper. */
206static int compress2gzip(Bytef *dest, uLongf *destLen,
207                         const Bytef *source, uLong sourceLen, int level)
208{
209	z_stream stream;
210	int err;
211
212	stream.next_in = (Bytef*)source;
213	stream.avail_in = (uInt)sourceLen;
214
215	stream.next_out = dest;
216	stream.avail_out = (uInt)*destLen;
217	if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR;
218
219	stream.zalloc = (alloc_func)0;
220	stream.zfree = (free_func)0;
221	stream.opaque = (voidpf)0;
222
223	err = deflateInit2(&stream,
224	                   level, Z_DEFLATED, 31, 8, Z_DEFAULT_STRATEGY);
225	if (err != Z_OK) return err;
226
227	err = deflate(&stream, Z_FINISH);
228	if (err != Z_STREAM_END) {
229		deflateEnd(&stream);
230		return err == Z_OK ? Z_BUF_ERROR : err;
231	}
232	*destLen = stream.total_out;
233
234	err = deflateEnd(&stream);
235	return err;
236}
237
238/* Recompress buf of size buf_len using bzip2 or gzip. The smallest version is
239 * used. The original uncompressed variant may be the smallest. Returns a
240 * number identifying the encoding, 1 for uncompressed, 2 for bzip2, 3 for
241 * gzip, and 4 for xz/lzma2. If the original uncompressed variant is not
242 * smallest, it is freed. The caller must free any buf after this function
243 * returns. */
244static char make_small(u_char **buf, off_t *buf_len)
245{
246	u_char *source = *buf;
247	off_t source_len = *buf_len;
248	u_char *bz2, *gz, *lzma;
249	unsigned int bz2_len;
250	size_t gz_len, lzma_len, lzma_pos;
251	int bz2_err, gz_err;
252	lzma_ret lzma_err;
253	lzma_check lzma_ck;
254	char smallest;
255
256	smallest = 1;
257
258	bz2_len = source_len + 1;
259	bz2 = malloc(bz2_len);
260	bz2_err = BZ2_bzBuffToBuffCompress((char*)bz2, &bz2_len, (char*)source,
261	                                   source_len, 9, 0, 0);
262	if (bz2_err == BZ_OK) {
263		if (bz2_len < *buf_len) {
264			smallest = 2;
265			*buf = bz2;
266			*buf_len = bz2_len;
267		} else {
268			free(bz2);
269			bz2 = NULL;
270		}
271	} else if (bz2_err == BZ_OUTBUFF_FULL) {
272		free(bz2);
273		bz2 = NULL;
274	} else {
275		errx(1, "BZ2_bzBuffToBuffCompress: %d", bz2_err);
276	}
277
278	gz_len = source_len + 1;
279	gz = malloc(gz_len);
280	gz_err = compress2gzip(gz, &gz_len, source, source_len, 9);
281	if (gz_err == Z_OK) {
282		if (gz_len < *buf_len) {
283			smallest = 3;
284			*buf = gz;
285			*buf_len = gz_len;
286		} else {
287			free(gz);
288			gz = NULL;
289		}
290	} else if (gz_err == Z_BUF_ERROR) {
291		free(gz);
292		gz = NULL;
293	} else {
294		errx(1, "compress2gzip: %d", gz_err);
295	}
296
297	lzma_len = source_len + 1;
298	lzma = malloc(lzma_len);
299	lzma_pos = 0;
300
301	/* Equivalent to the options used by xz -9 -e. */
302	lzma_ck = LZMA_CHECK_CRC64;
303	if (!lzma_check_is_supported(lzma_ck))
304		lzma_ck = LZMA_CHECK_CRC32;
305	lzma_err = lzma_easy_buffer_encode(9 | LZMA_PRESET_EXTREME,
306	                                   lzma_ck, NULL,
307	                                   source, source_len,
308	                                   lzma, &lzma_pos, lzma_len);
309	if (lzma_err == LZMA_OK) {
310		if (lzma_pos < *buf_len) {
311			smallest = 4;
312			*buf = lzma;
313			*buf_len = lzma_pos;
314		} else {
315			free(lzma);
316			lzma = NULL;
317		}
318	} else if (lzma_err == LZMA_BUF_ERROR) {
319		free(lzma);
320		lzma = NULL;
321	} else {
322		errx(1, "lzma_easy_buffer_encode: %d", lzma_err);
323	}
324
325	if (smallest != 1) {
326		free(source);
327	}
328
329	return smallest;
330}
331
332int main(int argc,char *argv[])
333{
334	int fd;
335	u_char *old,*new;
336	off_t oldsize,newsize;
337	off_t *I,*V;
338	off_t scan,pos,len;
339	off_t lastscan,lastpos,lastoffset;
340	off_t oldscore,scsc;
341	off_t s,Sf,lenf,Sb,lenb;
342	off_t overlap,Ss,lens;
343	off_t i;
344	off_t cblen, dblen, eblen;
345	u_char *cb, *db, *eb;
346	u_char header[96];
347	FILE * pf;
348
349	if(argc!=4) errx(1,"usage: %s oldfile newfile patchfile",argv[0]);
350
351	/* Allocate oldsize+1 bytes instead of oldsize bytes to ensure
352		that we never try to malloc(0) and get a NULL pointer */
353	if(((fd=open(argv[1],O_RDONLY,0))<0) ||
354		((oldsize=lseek(fd,0,SEEK_END))==-1) ||
355		((old=malloc(oldsize+1))==NULL) ||
356		(lseek(fd,0,SEEK_SET)!=0) ||
357		(read(fd,old,oldsize)!=oldsize) ||
358		(close(fd)==-1)) err(1,"%s",argv[1]);
359
360	if(((I=malloc((oldsize+1)*sizeof(off_t)))==NULL) ||
361		((V=malloc((oldsize+1)*sizeof(off_t)))==NULL)) err(1,NULL);
362
363	qsufsort(I,V,old,oldsize);
364
365	free(V);
366
367	/* Allocate newsize+1 bytes instead of newsize bytes to ensure
368		that we never try to malloc(0) and get a NULL pointer */
369	if(((fd=open(argv[2],O_RDONLY,0))<0) ||
370		((newsize=lseek(fd,0,SEEK_END))==-1) ||
371		((new=malloc(newsize+1))==NULL) ||
372		(lseek(fd,0,SEEK_SET)!=0) ||
373		(read(fd,new,newsize)!=newsize) ||
374		(close(fd)==-1)) err(1,"%s",argv[2]);
375
376	if(((cb=malloc(newsize+1))==NULL) ||
377		((db=malloc(newsize+1))==NULL) ||
378		((eb=malloc(newsize+1))==NULL)) err(1,NULL);
379	cblen=0;
380	dblen=0;
381	eblen=0;
382
383	/* Create the patch file */
384	if ((pf = fopen(argv[3], "wb")) == NULL)
385		err(1, "%s", argv[3]);
386
387	/* File format:
388		0	8	"BSDIFF4G"
389		8	8	length of compressed control block (x)
390		16	8	length of compressed diff block (y)
391		24	8	length of compressed extra block (z)
392		32	8	length of old file
393		40	8	length of new file
394		48	20	SHA1 of old file
395		68	20	SHA1 of new file
396		88	1	encoding of control block
397		89	1	encoding of diff block
398		90	1	encoding of extra block
399		91	5	unused
400		96	x	compressed control block
401		96+x	y	compressed diff block
402		96+x+y	z	compressed extra block
403	Encodings are 1 (uncompressed), 2 (bzip2), 3 (gzip), and 4 (xz/lzma2).
404	*/
405
406	memset(header, 0, sizeof(header));
407	if (fwrite(header, sizeof(header), 1, pf) != 1)
408		err(1, "fwrite(%s)", argv[3]);
409	memcpy(header, "BSDIFF4G", 8);
410	offtout(oldsize, header + 32);
411	offtout(newsize, header + 40);
412	SHA1(old, oldsize, header + 48);
413	SHA1(new, newsize, header + 68);
414
415	/* Compute the differences */
416	scan=0;len=0;
417	lastscan=0;lastpos=0;lastoffset=0;
418	while(scan<newsize) {
419		oldscore=0;
420
421		for(scsc=scan+=len;scan<newsize;scan++) {
422			len=search(I,old,oldsize,new+scan,newsize-scan,
423					0,oldsize,&pos);
424
425			for(;scsc<scan+len;scsc++)
426			if((scsc+lastoffset<oldsize) &&
427				(old[scsc+lastoffset] == new[scsc]))
428				oldscore++;
429
430			if(((len==oldscore) && (len!=0)) ||
431				(len>oldscore+8)) break;
432
433			if((scan+lastoffset<oldsize) &&
434				(old[scan+lastoffset] == new[scan]))
435				oldscore--;
436		};
437
438		if((len!=oldscore) || (scan==newsize)) {
439			s=0;Sf=0;lenf=0;
440			for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) {
441				if(old[lastpos+i]==new[lastscan+i]) s++;
442				i++;
443				if(s*2-i>Sf*2-lenf) { Sf=s; lenf=i; };
444			};
445
446			lenb=0;
447			if(scan<newsize) {
448				s=0;Sb=0;
449				for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) {
450					if(old[pos-i]==new[scan-i]) s++;
451					if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; };
452				};
453			};
454
455			if(lastscan+lenf>scan-lenb) {
456				overlap=(lastscan+lenf)-(scan-lenb);
457				s=0;Ss=0;lens=0;
458				for(i=0;i<overlap;i++) {
459					if(new[lastscan+lenf-overlap+i]==
460					   old[lastpos+lenf-overlap+i]) s++;
461					if(new[scan-lenb+i]==
462					   old[pos-lenb+i]) s--;
463					if(s>Ss) { Ss=s; lens=i+1; };
464				};
465
466				lenf+=lens-overlap;
467				lenb-=lens;
468			};
469
470			for(i=0;i<lenf;i++)
471				db[dblen+i]=new[lastscan+i]-old[lastpos+i];
472			for(i=0;i<(scan-lenb)-(lastscan+lenf);i++)
473				eb[eblen+i]=new[lastscan+lenf+i];
474
475			dblen+=lenf;
476			eblen+=(scan-lenb)-(lastscan+lenf);
477
478			offtout(lenf, cb + cblen);
479			cblen += 8;
480
481			offtout((scan - lenb) - (lastscan + lenf), cb + cblen);
482			cblen += 8;
483
484			offtout((pos - lenb) - (lastpos + lenf), cb + cblen);
485			cblen += 8;
486
487			lastscan=scan-lenb;
488			lastpos=pos-lenb;
489			lastoffset=pos-scan;
490		};
491	};
492
493	header[88] = make_small(&cb, &cblen);
494	header[89] = make_small(&db, &dblen);
495	header[90] = make_small(&eb, &eblen);
496
497	if (fwrite(cb, 1, cblen, pf) != cblen)
498		err(1, "fwrite");
499	if (fwrite(db, 1, dblen, pf) != dblen)
500		err(1, "fwrite");
501	if (fwrite(eb, 1, eblen, pf) != eblen)
502		err(1, "fwrite");
503
504	offtout(cblen, header + 8);
505	offtout(dblen, header + 16);
506	offtout(eblen, header + 24);
507
508	/* Seek to the beginning, write the header, and close the file */
509	if (fseeko(pf, 0, SEEK_SET))
510		err(1, "fseeko");
511	if (fwrite(header, sizeof(header), 1, pf) != 1)
512		err(1, "fwrite(%s)", argv[3]);
513	if (fclose(pf))
514		err(1, "fclose");
515
516	/* Free the memory we used */
517	free(cb);
518	free(db);
519	free(eb);
520	free(I);
521	free(old);
522	free(new);
523
524	return 0;
525}
526