xhistogram.c (9639B)
1 /* 2 * Copyright (C) 2010, Google Inc. 3 * and other copyright owners as documented in JGit's IP log. 4 * 5 * This program and the accompanying materials are made available 6 * under the terms of the Eclipse Distribution License v1.0 which 7 * accompanies this distribution, is reproduced below, and is 8 * available at http://www.eclipse.org/org/documents/edl-v10.php 9 * 10 * All rights reserved. 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials provided 22 * with the distribution. 23 * 24 * - Neither the name of the Eclipse Foundation, Inc. nor the 25 * names of its contributors may be used to endorse or promote 26 * products derived from this software without specific prior 27 * written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 30 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 31 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 32 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 34 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 35 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 37 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 38 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 39 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 41 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 */ 43 44 #include "xinclude.h" 45 46 #define MAX_PTR INT_MAX 47 #define MAX_CNT INT_MAX 48 49 #define LINE_END(n) (line##n + count##n - 1) 50 #define LINE_END_PTR(n) (*line##n + *count##n - 1) 51 52 struct histindex { 53 struct record { 54 unsigned int ptr, cnt; 55 struct record *next; 56 } **records, /* an occurrence */ 57 **line_map; /* map of line to record chain */ 58 chastore_t rcha; 59 unsigned int *next_ptrs; 60 unsigned int table_bits, 61 records_size, 62 line_map_size; 63 64 unsigned int max_chain_length, 65 key_shift, 66 ptr_shift; 67 68 unsigned int cnt, 69 has_common; 70 71 xdfenv_t *env; 72 xpparam_t const *xpp; 73 }; 74 75 struct region { 76 unsigned int begin1, end1; 77 unsigned int begin2, end2; 78 }; 79 80 #define LINE_MAP(i, a) (i->line_map[(a) - i->ptr_shift]) 81 82 #define NEXT_PTR(index, ptr) \ 83 (index->next_ptrs[(ptr) - index->ptr_shift]) 84 85 #define CNT(index, ptr) \ 86 ((LINE_MAP(index, ptr))->cnt) 87 88 #define REC(env, s, l) \ 89 (env->xdf##s.recs[l - 1]) 90 91 static int cmp_recs(xpparam_t const *xpp, 92 xrecord_t *r1, xrecord_t *r2) 93 { 94 return r1->ha == r2->ha && 95 xdl_recmatch(r1->ptr, r1->size, r2->ptr, r2->size, 96 xpp->flags); 97 } 98 99 #define CMP_ENV(xpp, env, s1, l1, s2, l2) \ 100 (cmp_recs(xpp, REC(env, s1, l1), REC(env, s2, l2))) 101 102 #define CMP(i, s1, l1, s2, l2) \ 103 (cmp_recs(i->xpp, REC(i->env, s1, l1), REC(i->env, s2, l2))) 104 105 #define TABLE_HASH(index, side, line) \ 106 XDL_HASHLONG((REC(index->env, side, line))->ha, index->table_bits) 107 108 static int scanA(struct histindex *index, int line1, int count1) 109 { 110 int ptr, tbl_idx; 111 unsigned int chain_len; 112 struct record **rec_chain, *rec; 113 114 for (ptr = LINE_END(1); line1 <= ptr; ptr--) { 115 tbl_idx = TABLE_HASH(index, 1, ptr); 116 rec_chain = index->records + tbl_idx; 117 rec = *rec_chain; 118 119 chain_len = 0; 120 while (rec) { 121 if (CMP(index, 1, rec->ptr, 1, ptr)) { 122 /* 123 * ptr is identical to another element. Insert 124 * it onto the front of the existing element 125 * chain. 126 */ 127 NEXT_PTR(index, ptr) = rec->ptr; 128 rec->ptr = ptr; 129 /* cap rec->cnt at MAX_CNT */ 130 rec->cnt = XDL_MIN(MAX_CNT, rec->cnt + 1); 131 LINE_MAP(index, ptr) = rec; 132 goto continue_scan; 133 } 134 135 rec = rec->next; 136 chain_len++; 137 } 138 139 if (chain_len == index->max_chain_length) 140 return -1; 141 142 /* 143 * This is the first time we have ever seen this particular 144 * element in the sequence. Construct a new chain for it. 145 */ 146 if (!(rec = xdl_cha_alloc(&index->rcha))) 147 return -1; 148 rec->ptr = ptr; 149 rec->cnt = 1; 150 rec->next = *rec_chain; 151 *rec_chain = rec; 152 LINE_MAP(index, ptr) = rec; 153 154 continue_scan: 155 ; /* no op */ 156 } 157 158 return 0; 159 } 160 161 static int try_lcs(struct histindex *index, struct region *lcs, int b_ptr, 162 int line1, int count1, int line2, int count2) 163 { 164 unsigned int b_next = b_ptr + 1; 165 struct record *rec = index->records[TABLE_HASH(index, 2, b_ptr)]; 166 unsigned int as, ae, bs, be, np, rc; 167 int should_break; 168 169 for (; rec; rec = rec->next) { 170 if (rec->cnt > index->cnt) { 171 if (!index->has_common) 172 index->has_common = CMP(index, 1, rec->ptr, 2, b_ptr); 173 continue; 174 } 175 176 as = rec->ptr; 177 if (!CMP(index, 1, as, 2, b_ptr)) 178 continue; 179 180 index->has_common = 1; 181 for (;;) { 182 should_break = 0; 183 np = NEXT_PTR(index, as); 184 bs = b_ptr; 185 ae = as; 186 be = bs; 187 rc = rec->cnt; 188 189 while (line1 < (int)as && line2 < (int)bs 190 && CMP(index, 1, as - 1, 2, bs - 1)) { 191 as--; 192 bs--; 193 if (1 < rc) 194 rc = XDL_MIN(rc, CNT(index, as)); 195 } 196 while ((int)ae < LINE_END(1) && (int)be < LINE_END(2) 197 && CMP(index, 1, ae + 1, 2, be + 1)) { 198 ae++; 199 be++; 200 if (1 < rc) 201 rc = XDL_MIN(rc, CNT(index, ae)); 202 } 203 204 if (b_next <= be) 205 b_next = be + 1; 206 if (lcs->end1 - lcs->begin1 < ae - as || rc < index->cnt) { 207 lcs->begin1 = as; 208 lcs->begin2 = bs; 209 lcs->end1 = ae; 210 lcs->end2 = be; 211 index->cnt = rc; 212 } 213 214 if (np == 0) 215 break; 216 217 while (np <= ae) { 218 np = NEXT_PTR(index, np); 219 if (np == 0) { 220 should_break = 1; 221 break; 222 } 223 } 224 225 if (should_break) 226 break; 227 228 as = np; 229 } 230 } 231 return b_next; 232 } 233 234 static int fall_back_to_classic_diff(xpparam_t const *xpp, xdfenv_t *env, 235 int line1, int count1, int line2, int count2) 236 { 237 xpparam_t xpparam; 238 239 memset(&xpparam, 0, sizeof(xpparam)); 240 xpparam.flags = xpp->flags & ~XDF_DIFF_ALGORITHM_MASK; 241 242 return xdl_fall_back_diff(env, &xpparam, 243 line1, count1, line2, count2); 244 } 245 246 static inline void free_index(struct histindex *index) 247 { 248 xdl_free(index->records); 249 xdl_free(index->line_map); 250 xdl_free(index->next_ptrs); 251 xdl_cha_free(&index->rcha); 252 } 253 254 static int find_lcs(xpparam_t const *xpp, xdfenv_t *env, 255 struct region *lcs, 256 int line1, int count1, int line2, int count2) 257 { 258 int b_ptr; 259 int sz, ret = -1; 260 struct histindex index; 261 262 memset(&index, 0, sizeof(index)); 263 264 index.env = env; 265 index.xpp = xpp; 266 267 index.records = NULL; 268 index.line_map = NULL; 269 /* in case of early xdl_cha_free() */ 270 index.rcha.head = NULL; 271 272 index.table_bits = xdl_hashbits(count1); 273 sz = index.records_size = 1 << index.table_bits; 274 sz *= sizeof(struct record *); 275 if (!(index.records = (struct record **) xdl_malloc(sz))) 276 goto cleanup; 277 memset(index.records, 0, sz); 278 279 sz = index.line_map_size = count1; 280 sz *= sizeof(struct record *); 281 if (!(index.line_map = (struct record **) xdl_malloc(sz))) 282 goto cleanup; 283 memset(index.line_map, 0, sz); 284 285 sz = index.line_map_size; 286 sz *= sizeof(unsigned int); 287 if (!(index.next_ptrs = (unsigned int *) xdl_malloc(sz))) 288 goto cleanup; 289 memset(index.next_ptrs, 0, sz); 290 291 /* lines / 4 + 1 comes from xprepare.c:xdl_prepare_ctx() */ 292 if (xdl_cha_init(&index.rcha, sizeof(struct record), count1 / 4 + 1) < 0) 293 goto cleanup; 294 295 index.ptr_shift = line1; 296 index.max_chain_length = 64; 297 298 if (scanA(&index, line1, count1)) 299 goto cleanup; 300 301 index.cnt = index.max_chain_length + 1; 302 303 for (b_ptr = line2; b_ptr <= LINE_END(2); ) 304 b_ptr = try_lcs(&index, lcs, b_ptr, line1, count1, line2, count2); 305 306 if (index.has_common && index.max_chain_length < index.cnt) 307 ret = 1; 308 else 309 ret = 0; 310 311 cleanup: 312 free_index(&index); 313 return ret; 314 } 315 316 static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, 317 int line1, int count1, int line2, int count2) 318 { 319 struct region lcs; 320 int lcs_found; 321 int result; 322 redo: 323 result = -1; 324 325 if (count1 <= 0 && count2 <= 0) 326 return 0; 327 328 if (LINE_END(1) >= MAX_PTR) 329 return -1; 330 331 if (!count1) { 332 while(count2--) 333 env->xdf2.rchg[line2++ - 1] = 1; 334 return 0; 335 } else if (!count2) { 336 while(count1--) 337 env->xdf1.rchg[line1++ - 1] = 1; 338 return 0; 339 } 340 341 memset(&lcs, 0, sizeof(lcs)); 342 lcs_found = find_lcs(xpp, env, &lcs, line1, count1, line2, count2); 343 if (lcs_found < 0) 344 goto out; 345 else if (lcs_found) 346 result = fall_back_to_classic_diff(xpp, env, line1, count1, line2, count2); 347 else { 348 if (lcs.begin1 == 0 && lcs.begin2 == 0) { 349 while (count1--) 350 env->xdf1.rchg[line1++ - 1] = 1; 351 while (count2--) 352 env->xdf2.rchg[line2++ - 1] = 1; 353 result = 0; 354 } else { 355 result = histogram_diff(xpp, env, 356 line1, lcs.begin1 - line1, 357 line2, lcs.begin2 - line2); 358 if (result) 359 goto out; 360 /* 361 * result = histogram_diff(xpp, env, 362 * lcs.end1 + 1, LINE_END(1) - lcs.end1, 363 * lcs.end2 + 1, LINE_END(2) - lcs.end2); 364 * but let's optimize tail recursion ourself: 365 */ 366 count1 = LINE_END(1) - lcs.end1; 367 line1 = lcs.end1 + 1; 368 count2 = LINE_END(2) - lcs.end2; 369 line2 = lcs.end2 + 1; 370 goto redo; 371 } 372 } 373 out: 374 return result; 375 } 376 377 int xdl_do_histogram_diff(mmfile_t *file1, mmfile_t *file2, 378 xpparam_t const *xpp, xdfenv_t *env) 379 { 380 if (xdl_prepare_env(file1, file2, xpp, env) < 0) 381 return -1; 382 383 return histogram_diff(xpp, env, 384 env->xdf1.dstart + 1, env->xdf1.dend - env->xdf1.dstart + 1, 385 env->xdf2.dstart + 1, env->xdf2.dend - env->xdf2.dstart + 1); 386 }