bsdiff.c (9069B)
1 /* vim:set ts=8 sw=8 sts=8 noet: */ 2 /* 3 bsdiff.c -- Binary patch generator. 4 5 Copyright 2003 Colin Percival 6 7 For the terms under which this work may be distributed, please see 8 the adjoining file "LICENSE". 9 10 ChangeLog: 11 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit 12 values throughout. 13 --Benjamin Smedberg <benjamin@smedbergs.us> 14 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table 15 provided by libbz2. 16 --Darin Fisher <darin@meer.net> 17 */ 18 19 #include "bspatch.h" 20 21 #include <stdlib.h> 22 #include <stdio.h> 23 #include <string.h> 24 #include <fcntl.h> 25 #include <stdarg.h> 26 #ifdef XP_WIN 27 #include <io.h> 28 #include <winsock2.h> 29 #define open _open 30 #define close _close 31 #define read _read 32 #define lseek _lseek 33 #define write _write 34 #else 35 #include <unistd.h> 36 #include <arpa/inet.h> 37 #define _O_BINARY 0 38 #endif 39 40 #include "crctable.h" 41 42 #undef MIN 43 #define MIN(x,y) (((x)<(y)) ? (x) : (y)) 44 45 /*---------------------------------------------------------------------------*/ 46 47 /* This variable lives in libbz2. It's declared in bzlib_private.h, so we just 48 * declare it here to avoid including that entire header file. 49 */ 50 extern unsigned int BZ2_crc32Table[256]; 51 52 static unsigned int 53 crc32(const unsigned char *buf, unsigned int len) 54 { 55 unsigned int crc = 0xffffffffL; 56 57 const unsigned char *end = buf + len; 58 for (; buf != end; ++buf) 59 crc = (crc << 8) ^ BZ2_crc32Table[(crc >> 24) ^ *buf]; 60 61 crc = ~crc; 62 return crc; 63 } 64 65 /*---------------------------------------------------------------------------*/ 66 67 static void 68 reporterr(int e, const char *fmt, ...) 69 { 70 if (fmt) { 71 va_list args; 72 va_start(args, fmt); 73 vfprintf(stderr, fmt, args); 74 va_end(args); 75 } 76 77 exit(e); 78 } 79 80 static void 81 split(int32_t *I,int32_t *V,int32_t start,int32_t len,int32_t h) 82 { 83 int32_t i,j,k,x,tmp,jj,kk; 84 85 if(len<16) { 86 for(k=start;k<start+len;k+=j) { 87 j=1;x=V[I[k]+h]; 88 for(i=1;k+i<start+len;i++) { 89 if(V[I[k+i]+h]<x) { 90 x=V[I[k+i]+h]; 91 j=0; 92 }; 93 if(V[I[k+i]+h]==x) { 94 tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp; 95 j++; 96 }; 97 }; 98 for(i=0;i<j;i++) V[I[k+i]]=k+j-1; 99 if(j==1) I[k]=-1; 100 }; 101 return; 102 }; 103 104 x=V[I[start+len/2]+h]; 105 jj=0;kk=0; 106 for(i=start;i<start+len;i++) { 107 if(V[I[i]+h]<x) jj++; 108 if(V[I[i]+h]==x) kk++; 109 }; 110 jj+=start;kk+=jj; 111 112 i=start;j=0;k=0; 113 while(i<jj) { 114 if(V[I[i]+h]<x) { 115 i++; 116 } else if(V[I[i]+h]==x) { 117 tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp; 118 j++; 119 } else { 120 tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp; 121 k++; 122 }; 123 }; 124 125 while(jj+j<kk) { 126 if(V[I[jj+j]+h]==x) { 127 j++; 128 } else { 129 tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp; 130 k++; 131 }; 132 }; 133 134 if(jj>start) split(I,V,start,jj-start,h); 135 136 for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1; 137 if(jj==kk-1) I[jj]=-1; 138 139 if(start+len>kk) split(I,V,kk,start+len-kk,h); 140 } 141 142 static void 143 qsufsort(int32_t *I,int32_t *V,unsigned char *old,int32_t oldsize) 144 { 145 int32_t buckets[256]; 146 int32_t i,h,len; 147 148 for(i=0;i<256;i++) buckets[i]=0; 149 for(i=0;i<oldsize;i++) buckets[old[i]]++; 150 for(i=1;i<256;i++) buckets[i]+=buckets[i-1]; 151 for(i=255;i>0;i--) buckets[i]=buckets[i-1]; 152 buckets[0]=0; 153 154 for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i; 155 I[0]=oldsize; 156 for(i=0;i<oldsize;i++) V[i]=buckets[old[i]]; 157 V[oldsize]=0; 158 for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1; 159 I[0]=-1; 160 161 for(h=1;I[0]!=-(oldsize+1);h+=h) { 162 len=0; 163 for(i=0;i<oldsize+1;) { 164 if(I[i]<0) { 165 len-=I[i]; 166 i-=I[i]; 167 } else { 168 if(len) I[i-len]=-len; 169 len=V[I[i]]+1-i; 170 split(I,V,i,len,h); 171 i+=len; 172 len=0; 173 }; 174 }; 175 if(len) I[i-len]=-len; 176 }; 177 178 for(i=0;i<oldsize+1;i++) I[V[i]]=i; 179 } 180 181 static int32_t 182 matchlen(unsigned char *old,int32_t oldsize,unsigned char *newbuf,int32_t newsize) 183 { 184 int32_t i; 185 186 for(i=0;(i<oldsize)&&(i<newsize);i++) 187 if(old[i]!=newbuf[i]) break; 188 189 return i; 190 } 191 192 static int32_t 193 search(int32_t *I,unsigned char *old,int32_t oldsize, 194 unsigned char *newbuf,int32_t newsize,int32_t st,int32_t en,int32_t *pos) 195 { 196 int32_t x,y; 197 198 if(en-st<2) { 199 x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize); 200 y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize); 201 202 if(x>y) { 203 *pos=I[st]; 204 return x; 205 } else { 206 *pos=I[en]; 207 return y; 208 } 209 }; 210 211 x=st+(en-st)/2; 212 if(memcmp(old+I[x],newbuf,MIN(oldsize-I[x],newsize))<0) { 213 return search(I,old,oldsize,newbuf,newsize,x,en,pos); 214 } else { 215 return search(I,old,oldsize,newbuf,newsize,st,x,pos); 216 }; 217 } 218 219 int main(int argc,char *argv[]) 220 { 221 int fd; 222 unsigned char *old,*newbuf; 223 int32_t oldsize,newsize; 224 int32_t *I,*V; 225 226 int32_t scan,pos,len; 227 int32_t lastscan,lastpos,lastoffset; 228 int32_t oldscore,scsc; 229 230 int32_t s,Sf,lenf,Sb,lenb; 231 int32_t overlap,Ss,lens; 232 int32_t i; 233 234 int32_t dblen,eblen; 235 unsigned char *db,*eb; 236 237 unsigned int scrc; 238 239 MBSPatchHeader header = { 240 {'M','B','D','I','F','F','1','0'}, 241 0, 0, 0, 0, 0, 0 242 }; 243 244 uint32_t numtriples; 245 246 if(argc!=4) 247 reporterr(1,"usage: %s <oldfile> <newfile> <patchfile>\n",argv[0]); 248 249 /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure 250 that we never try to malloc(0) and get a NULL pointer */ 251 if(((fd=open(argv[1],O_RDONLY|_O_BINARY,0))<0) || 252 ((oldsize=lseek(fd,0,SEEK_END))==-1) || 253 ((old=(unsigned char*) malloc(oldsize+1))==NULL) || 254 (lseek(fd,0,SEEK_SET)!=0) || 255 (read(fd,old,oldsize)!=oldsize) || 256 (close(fd)==-1)) 257 reporterr(1,"%s\n",argv[1]); 258 259 scrc = crc32(old, oldsize); 260 261 if(((I=(int32_t*) malloc((oldsize+1)*sizeof(int32_t)))==NULL) || 262 ((V=(int32_t*) malloc((oldsize+1)*sizeof(int32_t)))==NULL)) 263 reporterr(1,NULL); 264 265 qsufsort(I,V,old,oldsize); 266 267 free(V); 268 269 /* Allocate newsize+1 bytes instead of newsize bytes to ensure 270 that we never try to malloc(0) and get a NULL pointer */ 271 if(((fd=open(argv[2],O_RDONLY|_O_BINARY,0))<0) || 272 ((newsize=lseek(fd,0,SEEK_END))==-1) || 273 ((newbuf=(unsigned char*) malloc(newsize+1))==NULL) || 274 (lseek(fd,0,SEEK_SET)!=0) || 275 (read(fd,newbuf,newsize)!=newsize) || 276 (close(fd)==-1)) reporterr(1,"%s\n",argv[2]); 277 278 if(((db=(unsigned char*) malloc(newsize+1))==NULL) || 279 ((eb=(unsigned char*) malloc(newsize+1))==NULL)) 280 reporterr(1,NULL); 281 282 dblen=0; 283 eblen=0; 284 285 if((fd=open(argv[3],O_CREAT|O_TRUNC|O_WRONLY|_O_BINARY,0666))<0) 286 reporterr(1,"%s\n",argv[3]); 287 288 /* start writing here */ 289 290 /* We don't know the lengths yet, so we will write the header again 291 at the end */ 292 293 if(write(fd,&header,sizeof(MBSPatchHeader))!=sizeof(MBSPatchHeader)) 294 reporterr(1,"%s\n",argv[3]); 295 296 scan=0;len=0; 297 lastscan=0;lastpos=0;lastoffset=0; 298 numtriples = 0; 299 while(scan<newsize) { 300 oldscore=0; 301 302 for(scsc=scan+=len;scan<newsize;scan++) { 303 len=search(I,old,oldsize,newbuf+scan,newsize-scan, 304 0,oldsize,&pos); 305 306 for(;scsc<scan+len;scsc++) 307 if((scsc+lastoffset<oldsize) && 308 (old[scsc+lastoffset] == newbuf[scsc])) 309 oldscore++; 310 311 if(((len==oldscore) && (len!=0)) || 312 (len>oldscore+10)) break; 313 314 if((scan+lastoffset<oldsize) && 315 (old[scan+lastoffset] == newbuf[scan])) 316 oldscore--; 317 }; 318 319 if((len!=oldscore) || (scan==newsize)) { 320 MBSPatchTriple triple; 321 322 s=0;Sf=0;lenf=0; 323 for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) { 324 if(old[lastpos+i]==newbuf[lastscan+i]) s++; 325 i++; 326 if(s*3-i*2>Sf*3-lenf*2) { Sf=s; lenf=i; }; 327 }; 328 329 lenb=0; 330 if(scan<newsize) { 331 s=0;Sb=0; 332 for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) { 333 if(old[pos-i]==newbuf[scan-i]) s++; 334 if(s*3-i*2>Sb*3-lenb*2) { Sb=s; lenb=i; }; 335 }; 336 }; 337 338 if(lastscan+lenf>scan-lenb) { 339 overlap=(lastscan+lenf)-(scan-lenb); 340 s=0;Ss=0;lens=0; 341 for(i=0;i<overlap;i++) { 342 if(newbuf[lastscan+lenf-overlap+i]== 343 old[lastpos+lenf-overlap+i]) s++; 344 if(newbuf[scan-lenb+i]== 345 old[pos-lenb+i]) s--; 346 if(s>Ss) { Ss=s; lens=i+1; }; 347 }; 348 349 lenf+=lens-overlap; 350 lenb-=lens; 351 }; 352 353 for(i=0;i<lenf;i++) 354 db[dblen+i]=newbuf[lastscan+i]-old[lastpos+i]; 355 for(i=0;i<(scan-lenb)-(lastscan+lenf);i++) 356 eb[eblen+i]=newbuf[lastscan+lenf+i]; 357 358 dblen+=lenf; 359 eblen+=(scan-lenb)-(lastscan+lenf); 360 361 triple.x = htonl(lenf); 362 triple.y = htonl((scan-lenb)-(lastscan+lenf)); 363 triple.z = htonl((pos-lenb)-(lastpos+lenf)); 364 if (write(fd,&triple,sizeof(triple)) != sizeof(triple)) 365 reporterr(1,NULL); 366 367 #ifdef DEBUG_bsmedberg 368 printf("Writing a block:\n" 369 " X: %u\n" 370 " Y: %u\n" 371 " Z: %i\n", 372 (uint32_t) lenf, 373 (uint32_t) ((scan-lenb)-(lastscan+lenf)), 374 (uint32_t) ((pos-lenb)-(lastpos+lenf))); 375 #endif 376 377 ++numtriples; 378 379 lastscan=scan-lenb; 380 lastpos=pos-lenb; 381 lastoffset=pos-scan; 382 }; 383 }; 384 385 if(write(fd,db,dblen)!=dblen) 386 reporterr(1,NULL); 387 388 if(write(fd,eb,eblen)!=eblen) 389 reporterr(1,NULL); 390 391 header.slen = htonl(oldsize); 392 header.scrc32 = htonl(scrc); 393 header.dlen = htonl(newsize); 394 header.cblen = htonl(numtriples * sizeof(MBSPatchTriple)); 395 header.difflen = htonl(dblen); 396 header.extralen = htonl(eblen); 397 398 if (lseek(fd,0,SEEK_SET) == -1 || 399 write(fd,&header,sizeof(header)) != sizeof(header) || 400 close(fd) == -1) 401 reporterr(1,NULL); 402 403 free(db); 404 free(eb); 405 free(I); 406 free(old); 407 free(newbuf); 408 409 return 0; 410 }