tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

bsdiff.c (9069B)


      1 /* vim:set ts=8 sw=8 sts=8 noet: */
      2 /*
      3  bsdiff.c -- Binary patch generator.
      4 
      5  Copyright 2003 Colin Percival
      6 
      7  For the terms under which this work may be distributed, please see
      8  the adjoining file "LICENSE".
      9 
     10  ChangeLog:
     11  2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit
     12               values throughout.
     13                 --Benjamin Smedberg <benjamin@smedbergs.us>
     14  2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table
     15               provided by libbz2.
     16                 --Darin Fisher <darin@meer.net>
     17 */
     18 
     19 #include "bspatch.h"
     20 
     21 #include <stdlib.h>
     22 #include <stdio.h>
     23 #include <string.h>
     24 #include <fcntl.h>
     25 #include <stdarg.h>
     26 #ifdef XP_WIN
     27 #include <io.h>
     28 #include <winsock2.h>
     29 #define open _open
     30 #define close _close
     31 #define read _read
     32 #define lseek _lseek
     33 #define write _write
     34 #else
     35 #include <unistd.h>
     36 #include <arpa/inet.h>
     37 #define _O_BINARY 0
     38 #endif
     39 
     40 #include "crctable.h"
     41 
     42 #undef MIN
     43 #define MIN(x,y) (((x)<(y)) ? (x) : (y))
     44 
     45 /*---------------------------------------------------------------------------*/
     46 
     47 /* This variable lives in libbz2.  It's declared in bzlib_private.h, so we just
     48 * declare it here to avoid including that entire header file.
     49 */
     50 extern unsigned int BZ2_crc32Table[256];
     51 
     52 static unsigned int
     53 crc32(const unsigned char *buf, unsigned int len)
     54 {
     55 unsigned int crc = 0xffffffffL;
     56 
     57 const unsigned char *end = buf + len;
     58 for (; buf != end; ++buf)
     59 	crc = (crc << 8) ^ BZ2_crc32Table[(crc >> 24) ^ *buf];
     60 
     61 crc = ~crc;
     62 return crc;
     63 }
     64 
     65 /*---------------------------------------------------------------------------*/
     66 
     67 static void
     68 reporterr(int e, const char *fmt, ...)
     69 {
     70 if (fmt) {
     71 	va_list args;
     72 	va_start(args, fmt);
     73 	vfprintf(stderr, fmt, args);
     74 	va_end(args);
     75 }
     76 
     77 exit(e);
     78 }
     79 
     80 static void
     81 split(int32_t *I,int32_t *V,int32_t start,int32_t len,int32_t h)
     82 {
     83 int32_t i,j,k,x,tmp,jj,kk;
     84 
     85 if(len<16) {
     86 	for(k=start;k<start+len;k+=j) {
     87 		j=1;x=V[I[k]+h];
     88 		for(i=1;k+i<start+len;i++) {
     89 			if(V[I[k+i]+h]<x) {
     90 				x=V[I[k+i]+h];
     91 				j=0;
     92 			};
     93 			if(V[I[k+i]+h]==x) {
     94 				tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;
     95 				j++;
     96 			};
     97 		};
     98 		for(i=0;i<j;i++) V[I[k+i]]=k+j-1;
     99 		if(j==1) I[k]=-1;
    100 	};
    101 	return;
    102 };
    103 
    104 x=V[I[start+len/2]+h];
    105 jj=0;kk=0;
    106 for(i=start;i<start+len;i++) {
    107 	if(V[I[i]+h]<x) jj++;
    108 	if(V[I[i]+h]==x) kk++;
    109 };
    110 jj+=start;kk+=jj;
    111 
    112 i=start;j=0;k=0;
    113 while(i<jj) {
    114 	if(V[I[i]+h]<x) {
    115 		i++;
    116 	} else if(V[I[i]+h]==x) {
    117 		tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;
    118 		j++;
    119 	} else {
    120 		tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;
    121 		k++;
    122 	};
    123 };
    124 
    125 while(jj+j<kk) {
    126 	if(V[I[jj+j]+h]==x) {
    127 		j++;
    128 	} else {
    129 		tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;
    130 		k++;
    131 	};
    132 };
    133 
    134 if(jj>start) split(I,V,start,jj-start,h);
    135 
    136 for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;
    137 if(jj==kk-1) I[jj]=-1;
    138 
    139 if(start+len>kk) split(I,V,kk,start+len-kk,h);
    140 }
    141 
    142 static void
    143 qsufsort(int32_t *I,int32_t *V,unsigned char *old,int32_t oldsize)
    144 {
    145 int32_t buckets[256];
    146 int32_t i,h,len;
    147 
    148 for(i=0;i<256;i++) buckets[i]=0;
    149 for(i=0;i<oldsize;i++) buckets[old[i]]++;
    150 for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
    151 for(i=255;i>0;i--) buckets[i]=buckets[i-1];
    152 buckets[0]=0;
    153 
    154 for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i;
    155 I[0]=oldsize;
    156 for(i=0;i<oldsize;i++) V[i]=buckets[old[i]];
    157 V[oldsize]=0;
    158 for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;
    159 I[0]=-1;
    160 
    161 for(h=1;I[0]!=-(oldsize+1);h+=h) {
    162 	len=0;
    163 	for(i=0;i<oldsize+1;) {
    164 		if(I[i]<0) {
    165 			len-=I[i];
    166 			i-=I[i];
    167 		} else {
    168 			if(len) I[i-len]=-len;
    169 			len=V[I[i]]+1-i;
    170 			split(I,V,i,len,h);
    171 			i+=len;
    172 			len=0;
    173 		};
    174 	};
    175 	if(len) I[i-len]=-len;
    176 };
    177 
    178 for(i=0;i<oldsize+1;i++) I[V[i]]=i;
    179 }
    180 
    181 static int32_t
    182 matchlen(unsigned char *old,int32_t oldsize,unsigned char *newbuf,int32_t newsize)
    183 {
    184 int32_t i;
    185 
    186 for(i=0;(i<oldsize)&&(i<newsize);i++)
    187 	if(old[i]!=newbuf[i]) break;
    188 
    189 return i;
    190 }
    191 
    192 static int32_t
    193 search(int32_t *I,unsigned char *old,int32_t oldsize,
    194       unsigned char *newbuf,int32_t newsize,int32_t st,int32_t en,int32_t *pos)
    195 {
    196 int32_t x,y;
    197 
    198 if(en-st<2) {
    199 	x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize);
    200 	y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize);
    201 
    202 	if(x>y) {
    203 		*pos=I[st];
    204 		return x;
    205 	} else {
    206 		*pos=I[en];
    207 		return y;
    208 	}
    209 };
    210 
    211 x=st+(en-st)/2;
    212 if(memcmp(old+I[x],newbuf,MIN(oldsize-I[x],newsize))<0) {
    213 	return search(I,old,oldsize,newbuf,newsize,x,en,pos);
    214 } else {
    215 	return search(I,old,oldsize,newbuf,newsize,st,x,pos);
    216 };
    217 }
    218 
    219 int main(int argc,char *argv[])
    220 {
    221 int fd;
    222 unsigned char *old,*newbuf;
    223 int32_t oldsize,newsize;
    224 int32_t *I,*V;
    225 
    226 int32_t scan,pos,len;
    227 int32_t lastscan,lastpos,lastoffset;
    228 int32_t oldscore,scsc;
    229 
    230 int32_t s,Sf,lenf,Sb,lenb;
    231 int32_t overlap,Ss,lens;
    232 int32_t i;
    233 
    234 int32_t dblen,eblen;
    235 unsigned char *db,*eb;
    236 
    237 unsigned int scrc;
    238 
    239 MBSPatchHeader header = {
    240 	{'M','B','D','I','F','F','1','0'},
    241 	0, 0, 0, 0, 0, 0
    242 };
    243 
    244 uint32_t numtriples;
    245 
    246 if(argc!=4)
    247 	reporterr(1,"usage: %s <oldfile> <newfile> <patchfile>\n",argv[0]);
    248 
    249 /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure
    250 	that we never try to malloc(0) and get a NULL pointer */
    251 if(((fd=open(argv[1],O_RDONLY|_O_BINARY,0))<0) ||
    252 	((oldsize=lseek(fd,0,SEEK_END))==-1) ||
    253 	((old=(unsigned char*) malloc(oldsize+1))==NULL) ||
    254 	(lseek(fd,0,SEEK_SET)!=0) ||
    255 	(read(fd,old,oldsize)!=oldsize) ||
    256 	(close(fd)==-1))
    257 	reporterr(1,"%s\n",argv[1]);
    258 
    259 scrc = crc32(old, oldsize);
    260 
    261 if(((I=(int32_t*) malloc((oldsize+1)*sizeof(int32_t)))==NULL) ||
    262 	((V=(int32_t*) malloc((oldsize+1)*sizeof(int32_t)))==NULL))
    263 	reporterr(1,NULL);
    264 
    265 qsufsort(I,V,old,oldsize);
    266 
    267 free(V);
    268 
    269 /* Allocate newsize+1 bytes instead of newsize bytes to ensure
    270 	that we never try to malloc(0) and get a NULL pointer */
    271 if(((fd=open(argv[2],O_RDONLY|_O_BINARY,0))<0) ||
    272 	((newsize=lseek(fd,0,SEEK_END))==-1) ||
    273 	((newbuf=(unsigned char*) malloc(newsize+1))==NULL) ||
    274 	(lseek(fd,0,SEEK_SET)!=0) ||
    275 	(read(fd,newbuf,newsize)!=newsize) ||
    276 	(close(fd)==-1)) reporterr(1,"%s\n",argv[2]);
    277 
    278 if(((db=(unsigned char*) malloc(newsize+1))==NULL) ||
    279 	((eb=(unsigned char*) malloc(newsize+1))==NULL))
    280 	reporterr(1,NULL);
    281 
    282 dblen=0;
    283 eblen=0;
    284 
    285 if((fd=open(argv[3],O_CREAT|O_TRUNC|O_WRONLY|_O_BINARY,0666))<0)
    286 	reporterr(1,"%s\n",argv[3]);
    287 
    288 /* start writing here */
    289 
    290 /* We don't know the lengths yet, so we will write the header again
    291 	 at the end */
    292 
    293 if(write(fd,&header,sizeof(MBSPatchHeader))!=sizeof(MBSPatchHeader))
    294 	reporterr(1,"%s\n",argv[3]);
    295 
    296 scan=0;len=0;
    297 lastscan=0;lastpos=0;lastoffset=0;
    298 numtriples = 0;
    299 while(scan<newsize) {
    300 	oldscore=0;
    301 
    302 	for(scsc=scan+=len;scan<newsize;scan++) {
    303 		len=search(I,old,oldsize,newbuf+scan,newsize-scan,
    304 				0,oldsize,&pos);
    305 
    306 		for(;scsc<scan+len;scsc++)
    307 		if((scsc+lastoffset<oldsize) &&
    308 			(old[scsc+lastoffset] == newbuf[scsc]))
    309 			oldscore++;
    310 
    311 		if(((len==oldscore) && (len!=0)) || 
    312 			(len>oldscore+10)) break;
    313 
    314 		if((scan+lastoffset<oldsize) &&
    315 			(old[scan+lastoffset] == newbuf[scan]))
    316 			oldscore--;
    317 	};
    318 
    319 	if((len!=oldscore) || (scan==newsize)) {
    320 		MBSPatchTriple triple;
    321 
    322 		s=0;Sf=0;lenf=0;
    323 		for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) {
    324 			if(old[lastpos+i]==newbuf[lastscan+i]) s++;
    325 			i++;
    326 			if(s*3-i*2>Sf*3-lenf*2) { Sf=s; lenf=i; };
    327 		};
    328 
    329 		lenb=0;
    330 		if(scan<newsize) {
    331 			s=0;Sb=0;
    332 			for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) {
    333 				if(old[pos-i]==newbuf[scan-i]) s++;
    334 				if(s*3-i*2>Sb*3-lenb*2) { Sb=s; lenb=i; };
    335 			};
    336 		};
    337 
    338 		if(lastscan+lenf>scan-lenb) {
    339 			overlap=(lastscan+lenf)-(scan-lenb);
    340 			s=0;Ss=0;lens=0;
    341 			for(i=0;i<overlap;i++) {
    342 				if(newbuf[lastscan+lenf-overlap+i]==
    343 				   old[lastpos+lenf-overlap+i]) s++;
    344 				if(newbuf[scan-lenb+i]==
    345 				   old[pos-lenb+i]) s--;
    346 				if(s>Ss) { Ss=s; lens=i+1; };
    347 			};
    348 
    349 			lenf+=lens-overlap;
    350 			lenb-=lens;
    351 		};
    352 
    353 		for(i=0;i<lenf;i++)
    354 			db[dblen+i]=newbuf[lastscan+i]-old[lastpos+i];
    355 		for(i=0;i<(scan-lenb)-(lastscan+lenf);i++)
    356 			eb[eblen+i]=newbuf[lastscan+lenf+i];
    357 
    358 		dblen+=lenf;
    359 		eblen+=(scan-lenb)-(lastscan+lenf);
    360 
    361 		triple.x = htonl(lenf);
    362 		triple.y = htonl((scan-lenb)-(lastscan+lenf));
    363 		triple.z = htonl((pos-lenb)-(lastpos+lenf));
    364 		if (write(fd,&triple,sizeof(triple)) != sizeof(triple))
    365 			reporterr(1,NULL);
    366 
    367 #ifdef DEBUG_bsmedberg
    368 		printf("Writing a block:\n"
    369 		       "	X: %u\n"
    370 		       "	Y: %u\n"
    371 		       "	Z: %i\n",
    372 		       (uint32_t) lenf,
    373 		       (uint32_t) ((scan-lenb)-(lastscan+lenf)),
    374 		       (uint32_t) ((pos-lenb)-(lastpos+lenf)));
    375 #endif
    376 
    377 		++numtriples;
    378 
    379 		lastscan=scan-lenb;
    380 		lastpos=pos-lenb;
    381 		lastoffset=pos-scan;
    382 	};
    383 };
    384 
    385 if(write(fd,db,dblen)!=dblen)
    386 	reporterr(1,NULL);
    387 
    388 if(write(fd,eb,eblen)!=eblen)
    389 	reporterr(1,NULL);
    390 
    391 header.slen	= htonl(oldsize);
    392 header.scrc32	= htonl(scrc);
    393 header.dlen	= htonl(newsize);
    394 header.cblen	= htonl(numtriples * sizeof(MBSPatchTriple));
    395 header.difflen	= htonl(dblen);
    396 header.extralen = htonl(eblen);
    397 
    398 if (lseek(fd,0,SEEK_SET) == -1 ||
    399     write(fd,&header,sizeof(header)) != sizeof(header) ||
    400     close(fd) == -1)
    401 	reporterr(1,NULL);
    402 
    403 free(db);
    404 free(eb);
    405 free(I);
    406 free(old);
    407 free(newbuf);
    408 
    409 return 0;
    410 }