h_bigkey.c (19923B)
1 /*- 2 * Copyright (c) 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Margo Seltzer. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. ***REMOVED*** - see 17 * ftp://ftp.cs.berkeley.edu/pub/4bsd/README.Impt.License.Change 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #if defined(LIBC_SCCS) && !defined(lint) 36 static char sccsid[] = "@(#)hash_bigkey.c 8.3 (Berkeley) 5/31/94"; 37 #endif /* LIBC_SCCS and not lint */ 38 39 /* 40 * PACKAGE: hash 41 * DESCRIPTION: 42 * Big key/data handling for the hashing package. 43 * 44 * ROUTINES: 45 * External 46 * __big_keydata 47 * __big_split 48 * __big_insert 49 * __big_return 50 * __big_delete 51 * __find_last_page 52 * Internal 53 * collect_key 54 * collect_data 55 */ 56 57 #if !defined(_WIN32) && !defined(_WINDOWS) && !defined(macintosh) 58 #include <sys/param.h> 59 #endif 60 61 #include <errno.h> 62 #include <stdio.h> 63 #include <stdlib.h> 64 #include <string.h> 65 66 #ifdef DEBUG 67 #include <assert.h> 68 #endif 69 70 #include "mcom_db.h" 71 #include "hash.h" 72 #include "page.h" 73 /* #include "extern.h" */ 74 75 static int collect_key(HTAB *, BUFHEAD *, int, DBT *, int); 76 static int collect_data(HTAB *, BUFHEAD *, int, int); 77 78 /* 79 * Big_insert 80 * 81 * You need to do an insert and the key/data pair is too big 82 * 83 * Returns: 84 * 0 ==> OK 85 *-1 ==> ERROR 86 */ 87 extern int 88 dbm_big_insert(HTAB *hashp, BUFHEAD *bufp, const DBT *key, const DBT *val) 89 { 90 register uint16 *p; 91 uint key_size, n, val_size; 92 uint16 space, move_bytes, off; 93 char *cp, *key_data, *val_data; 94 95 cp = bufp->page; /* Character pointer of p. */ 96 p = (uint16 *)cp; 97 98 key_data = (char *)key->data; 99 key_size = key->size; 100 val_data = (char *)val->data; 101 val_size = val->size; 102 103 /* First move the Key */ 104 for (space = FREESPACE(p) - BIGOVERHEAD; key_size; 105 space = FREESPACE(p) - BIGOVERHEAD) { 106 move_bytes = PR_MIN(space, key_size); 107 off = OFFSET(p) - move_bytes; 108 memmove(cp + off, key_data, move_bytes); 109 key_size -= move_bytes; 110 key_data += move_bytes; 111 n = p[0]; 112 p[++n] = off; 113 p[0] = ++n; 114 FREESPACE(p) = off - PAGE_META(n); 115 OFFSET(p) = off; 116 p[n] = PARTIAL_KEY; 117 bufp = dbm_add_ovflpage(hashp, bufp); 118 if (!bufp) 119 return (-1); 120 n = p[0]; 121 if (!key_size) { 122 if (FREESPACE(p)) { 123 move_bytes = PR_MIN(FREESPACE(p), val_size); 124 off = OFFSET(p) - move_bytes; 125 p[n] = off; 126 memmove(cp + off, val_data, move_bytes); 127 val_data += move_bytes; 128 val_size -= move_bytes; 129 p[n - 2] = FULL_KEY_DATA; 130 FREESPACE(p) = FREESPACE(p) - move_bytes; 131 OFFSET(p) = off; 132 } else 133 p[n - 2] = FULL_KEY; 134 } 135 p = (uint16 *)bufp->page; 136 cp = bufp->page; 137 bufp->flags |= BUF_MOD; 138 } 139 140 /* Now move the data */ 141 for (space = FREESPACE(p) - BIGOVERHEAD; val_size; 142 space = FREESPACE(p) - BIGOVERHEAD) { 143 move_bytes = PR_MIN(space, val_size); 144 /* 145 * Here's the hack to make sure that if the data ends on the 146 * same page as the key ends, FREESPACE is at least one. 147 */ 148 if (space == val_size && val_size == val->size) 149 move_bytes--; 150 off = OFFSET(p) - move_bytes; 151 memmove(cp + off, val_data, move_bytes); 152 val_size -= move_bytes; 153 val_data += move_bytes; 154 n = p[0]; 155 p[++n] = off; 156 p[0] = ++n; 157 FREESPACE(p) = off - PAGE_META(n); 158 OFFSET(p) = off; 159 if (val_size) { 160 p[n] = FULL_KEY; 161 bufp = dbm_add_ovflpage(hashp, bufp); 162 if (!bufp) 163 return (-1); 164 cp = bufp->page; 165 p = (uint16 *)cp; 166 } else 167 p[n] = FULL_KEY_DATA; 168 bufp->flags |= BUF_MOD; 169 } 170 return (0); 171 } 172 173 /* 174 * Called when bufp's page contains a partial key (index should be 1) 175 * 176 * All pages in the big key/data pair except bufp are freed. We cannot 177 * free bufp because the page pointing to it is lost and we can't get rid 178 * of its pointer. 179 * 180 * Returns: 181 * 0 => OK 182 *-1 => ERROR 183 */ 184 extern int 185 dbm_big_delete(HTAB *hashp, BUFHEAD *bufp) 186 { 187 register BUFHEAD *last_bfp, *rbufp; 188 uint16 *bp, pageno; 189 int key_done, n; 190 191 rbufp = bufp; 192 last_bfp = NULL; 193 bp = (uint16 *)bufp->page; 194 key_done = 0; 195 196 while (!key_done || (bp[2] != FULL_KEY_DATA)) { 197 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) 198 key_done = 1; 199 200 /* 201 * If there is freespace left on a FULL_KEY_DATA page, then 202 * the data is short and fits entirely on this page, and this 203 * is the last page. 204 */ 205 if (bp[2] == FULL_KEY_DATA && FREESPACE(bp)) 206 break; 207 pageno = bp[bp[0] - 1]; 208 rbufp->flags |= BUF_MOD; 209 rbufp = dbm_get_buf(hashp, pageno, rbufp, 0); 210 if (last_bfp) 211 dbm_free_ovflpage(hashp, last_bfp); 212 last_bfp = rbufp; 213 if (!rbufp) 214 return (-1); /* Error. */ 215 bp = (uint16 *)rbufp->page; 216 } 217 218 /* 219 * If we get here then rbufp points to the last page of the big 220 * key/data pair. Bufp points to the first one -- it should now be 221 * empty pointing to the next page after this pair. Can't free it 222 * because we don't have the page pointing to it. 223 */ 224 225 /* This is information from the last page of the pair. */ 226 n = bp[0]; 227 pageno = bp[n - 1]; 228 229 /* Now, bp is the first page of the pair. */ 230 bp = (uint16 *)bufp->page; 231 if (n > 2) { 232 /* There is an overflow page. */ 233 bp[1] = pageno; 234 bp[2] = OVFLPAGE; 235 bufp->ovfl = rbufp->ovfl; 236 } else 237 /* This is the last page. */ 238 bufp->ovfl = NULL; 239 n -= 2; 240 bp[0] = n; 241 FREESPACE(bp) = hashp->BSIZE - PAGE_META(n); 242 OFFSET(bp) = hashp->BSIZE - 1; 243 244 bufp->flags |= BUF_MOD; 245 if (rbufp) 246 dbm_free_ovflpage(hashp, rbufp); 247 if (last_bfp != rbufp) 248 dbm_free_ovflpage(hashp, last_bfp); 249 250 hashp->NKEYS--; 251 return (0); 252 } 253 /* 254 * Returns: 255 * 0 = key not found 256 * -1 = get next overflow page 257 * -2 means key not found and this is big key/data 258 * -3 error 259 */ 260 extern int 261 dbm_find_bigpair(HTAB *hashp, BUFHEAD *bufp, int ndx, char *key, int size) 262 { 263 register uint16 *bp; 264 register char *p; 265 int ksize; 266 uint16 bytes; 267 char *kkey; 268 269 bp = (uint16 *)bufp->page; 270 p = bufp->page; 271 ksize = size; 272 kkey = key; 273 274 for (bytes = hashp->BSIZE - bp[ndx]; 275 bytes <= size && bp[ndx + 1] == PARTIAL_KEY; 276 bytes = hashp->BSIZE - bp[ndx]) { 277 if (memcmp(p + bp[ndx], kkey, bytes)) 278 return (-2); 279 kkey += bytes; 280 ksize -= bytes; 281 bufp = dbm_get_buf(hashp, bp[ndx + 2], bufp, 0); 282 if (!bufp) 283 return (-3); 284 p = bufp->page; 285 bp = (uint16 *)p; 286 ndx = 1; 287 } 288 289 if (bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) { 290 #ifdef HASH_STATISTICS 291 ++hash_collisions; 292 #endif 293 return (-2); 294 } else 295 return (ndx); 296 } 297 298 /* 299 * Given the buffer pointer of the first overflow page of a big pair, 300 * find the end of the big pair 301 * 302 * This will set bpp to the buffer header of the last page of the big pair. 303 * It will return the pageno of the overflow page following the last page 304 * of the pair; 0 if there isn't any (i.e. big pair is the last key in the 305 * bucket) 306 */ 307 extern uint16 308 dbm_find_last_page(HTAB *hashp, BUFHEAD **bpp) 309 { 310 BUFHEAD *bufp; 311 uint16 *bp, pageno; 312 uint n; 313 314 bufp = *bpp; 315 bp = (uint16 *)bufp->page; 316 for (;;) { 317 n = bp[0]; 318 319 /* 320 * This is the last page if: the tag is FULL_KEY_DATA and 321 * either only 2 entries OVFLPAGE marker is explicit there 322 * is freespace on the page. 323 */ 324 if (bp[2] == FULL_KEY_DATA && 325 ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp)))) 326 break; 327 328 /* LJM bound the size of n to reasonable limits 329 */ 330 if (n > hashp->BSIZE / sizeof(uint16)) 331 return (0); 332 333 pageno = bp[n - 1]; 334 bufp = dbm_get_buf(hashp, pageno, bufp, 0); 335 if (!bufp) 336 return (0); /* Need to indicate an error! */ 337 bp = (uint16 *)bufp->page; 338 } 339 340 *bpp = bufp; 341 if (bp[0] > 2) 342 return (bp[3]); 343 else 344 return (0); 345 } 346 347 /* 348 * Return the data for the key/data pair that begins on this page at this 349 * index (index should always be 1). 350 */ 351 extern int 352 dbm_big_return( 353 HTAB *hashp, 354 BUFHEAD *bufp, 355 int ndx, 356 DBT *val, 357 int set_current) 358 { 359 BUFHEAD *save_p; 360 uint16 *bp, len, off, save_addr; 361 char *tp; 362 int save_flags; 363 364 bp = (uint16 *)bufp->page; 365 while (bp[ndx + 1] == PARTIAL_KEY) { 366 bufp = dbm_get_buf(hashp, bp[bp[0] - 1], bufp, 0); 367 if (!bufp) 368 return (-1); 369 bp = (uint16 *)bufp->page; 370 ndx = 1; 371 } 372 373 if (bp[ndx + 1] == FULL_KEY) { 374 bufp = dbm_get_buf(hashp, bp[bp[0] - 1], bufp, 0); 375 if (!bufp) 376 return (-1); 377 bp = (uint16 *)bufp->page; 378 save_p = bufp; 379 save_addr = save_p->addr; 380 off = bp[1]; 381 len = 0; 382 } else if (!FREESPACE(bp)) { 383 /* 384 * This is a hack. We can't distinguish between 385 * FULL_KEY_DATA that contains complete data or 386 * incomplete data, so we require that if the data 387 * is complete, there is at least 1 byte of free 388 * space left. 389 */ 390 off = bp[bp[0]]; 391 len = bp[1] - off; 392 save_p = bufp; 393 save_addr = bufp->addr; 394 bufp = dbm_get_buf(hashp, bp[bp[0] - 1], bufp, 0); 395 if (!bufp) 396 return (-1); 397 bp = (uint16 *)bufp->page; 398 } else { 399 /* The data is all on one page. */ 400 tp = (char *)bp; 401 off = bp[bp[0]]; 402 val->data = (uint8 *)tp + off; 403 val->size = bp[1] - off; 404 if (set_current) { 405 if (bp[0] == 2) { /* No more buckets in 406 * chain */ 407 hashp->cpage = NULL; 408 hashp->cbucket++; 409 hashp->cndx = 1; 410 } else { 411 hashp->cpage = dbm_get_buf(hashp, 412 bp[bp[0] - 1], bufp, 0); 413 if (!hashp->cpage) 414 return (-1); 415 hashp->cndx = 1; 416 if (!((uint16 *) 417 hashp->cpage->page)[0]) { 418 hashp->cbucket++; 419 hashp->cpage = NULL; 420 } 421 } 422 } 423 return (0); 424 } 425 426 /* pin our saved buf so that we don't lose if 427 * we run out of buffers */ 428 save_flags = save_p->flags; 429 save_p->flags |= BUF_PIN; 430 val->size = collect_data(hashp, bufp, (int)len, set_current); 431 save_p->flags = save_flags; 432 if (val->size == (size_t)-1) 433 return (-1); 434 if (save_p->addr != save_addr) { 435 /* We are pretty short on buffers. */ 436 errno = EINVAL; /* OUT OF BUFFERS */ 437 return (-1); 438 } 439 memmove(hashp->tmp_buf, (save_p->page) + off, len); 440 val->data = (uint8 *)hashp->tmp_buf; 441 return (0); 442 } 443 444 /* 445 * Count how big the total datasize is by looping through the pages. Then 446 * allocate a buffer and copy the data in the second loop. NOTE: Our caller 447 * may already have a bp which it is holding onto. The caller is 448 * responsible for copying that bp into our temp buffer. 'len' is how much 449 * space to reserve for that buffer. 450 */ 451 static int 452 collect_data( 453 HTAB *hashp, 454 BUFHEAD *bufp, 455 int len, int set) 456 { 457 register uint16 *bp; 458 BUFHEAD *save_bufp; 459 int save_flags; 460 int mylen, totlen; 461 462 /* 463 * save the input buf head because we need to walk the list twice. 464 * pin it to make sure it doesn't leave the buffer pool. 465 * This has the effect of growing the buffer pool if necessary. 466 */ 467 save_bufp = bufp; 468 save_flags = save_bufp->flags; 469 save_bufp->flags |= BUF_PIN; 470 471 /* read the length of the buffer */ 472 for (totlen = len; bufp; bufp = dbm_get_buf(hashp, bp[bp[0] - 1], bufp, 0)) { 473 bp = (uint16 *)bufp->page; 474 mylen = hashp->BSIZE - bp[1]; 475 476 /* if mylen ever goes negative it means that the 477 * page is screwed up. 478 */ 479 if (mylen < 0) { 480 save_bufp->flags = save_flags; 481 return (-1); 482 } 483 totlen += mylen; 484 if (bp[2] == FULL_KEY_DATA) { /* End of Data */ 485 break; 486 } 487 } 488 489 if (!bufp) { 490 save_bufp->flags = save_flags; 491 return (-1); 492 } 493 494 /* allocate a temp buf */ 495 if (hashp->tmp_buf) 496 free(hashp->tmp_buf); 497 if ((hashp->tmp_buf = (char *)malloc((size_t)totlen)) == NULL) { 498 save_bufp->flags = save_flags; 499 return (-1); 500 } 501 502 /* copy the buffers back into temp buf */ 503 for (bufp = save_bufp; bufp; 504 bufp = dbm_get_buf(hashp, bp[bp[0] - 1], bufp, 0)) { 505 bp = (uint16 *)bufp->page; 506 mylen = hashp->BSIZE - bp[1]; 507 memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], (size_t)mylen); 508 len += mylen; 509 if (bp[2] == FULL_KEY_DATA) { 510 break; 511 } 512 } 513 514 /* 'clear' the pin flags */ 515 save_bufp->flags = save_flags; 516 517 /* update the database cursor */ 518 if (set) { 519 hashp->cndx = 1; 520 if (bp[0] == 2) { /* No more buckets in chain */ 521 hashp->cpage = NULL; 522 hashp->cbucket++; 523 } else { 524 hashp->cpage = dbm_get_buf(hashp, bp[bp[0] - 1], bufp, 0); 525 if (!hashp->cpage) 526 return (-1); 527 else if (!((uint16 *)hashp->cpage->page)[0]) { 528 hashp->cbucket++; 529 hashp->cpage = NULL; 530 } 531 } 532 } 533 return (totlen); 534 } 535 536 /* 537 * Fill in the key and data for this big pair. 538 */ 539 extern int 540 dbm_big_keydata( 541 HTAB *hashp, 542 BUFHEAD *bufp, 543 DBT *key, DBT *val, 544 int set) 545 { 546 key->size = collect_key(hashp, bufp, 0, val, set); 547 if (key->size == (size_t)-1) 548 return (-1); 549 key->data = (uint8 *)hashp->tmp_key; 550 return (0); 551 } 552 553 /* 554 * Count how big the total key size is by recursing through the pages. Then 555 * collect the data, allocate a buffer and copy the key as you recurse up. 556 */ 557 static int 558 collect_key( 559 HTAB *hashp, 560 BUFHEAD *bufp, 561 int len, 562 DBT *val, 563 int set) 564 { 565 BUFHEAD *xbp; 566 char *p; 567 int mylen, totlen; 568 uint16 *bp, save_addr; 569 570 p = bufp->page; 571 bp = (uint16 *)p; 572 mylen = hashp->BSIZE - bp[1]; 573 574 save_addr = bufp->addr; 575 totlen = len + mylen; 576 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */ 577 if (hashp->tmp_key != NULL) 578 free(hashp->tmp_key); 579 if ((hashp->tmp_key = (char *)malloc((size_t)totlen)) == NULL) 580 return (-1); 581 if (dbm_big_return(hashp, bufp, 1, val, set)) 582 return (-1); 583 } else { 584 xbp = dbm_get_buf(hashp, bp[bp[0] - 1], bufp, 0); 585 if (!xbp || ((totlen = 586 collect_key(hashp, xbp, totlen, val, set)) < 1)) 587 return (-1); 588 } 589 if (bufp->addr != save_addr) { 590 errno = EINVAL; /* MIS -- OUT OF BUFFERS */ 591 return (-1); 592 } 593 memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], (size_t)mylen); 594 return (totlen); 595 } 596 597 /* 598 * Returns: 599 * 0 => OK 600 * -1 => error 601 */ 602 extern int 603 dbm_big_split( 604 HTAB *hashp, 605 BUFHEAD *op, /* Pointer to where to put keys that go in old bucket */ 606 BUFHEAD *np, /* Pointer to new bucket page */ 607 /* Pointer to first page containing the big key/data */ 608 BUFHEAD *big_keyp, 609 uint32 addr, /* Address of big_keyp */ 610 uint32 obucket, /* Old Bucket */ 611 SPLIT_RETURN *ret) 612 { 613 register BUFHEAD *tmpp; 614 register uint16 *tp; 615 BUFHEAD *bp; 616 DBT key, val; 617 uint32 change; 618 uint16 free_space, n, off; 619 620 bp = big_keyp; 621 622 /* Now figure out where the big key/data goes */ 623 if (dbm_big_keydata(hashp, big_keyp, &key, &val, 0)) 624 return (-1); 625 change = (dbm_call_hash(hashp, (char *)key.data, key.size) != obucket); 626 627 if ((ret->next_addr = dbm_find_last_page(hashp, &big_keyp))) { 628 if (!(ret->nextp = 629 dbm_get_buf(hashp, ret->next_addr, big_keyp, 0))) 630 return (-1); 631 ; 632 } else 633 ret->nextp = NULL; 634 635 /* Now make one of np/op point to the big key/data pair */ 636 #ifdef DEBUG 637 assert(np->ovfl == NULL); 638 #endif 639 if (change) 640 tmpp = np; 641 else 642 tmpp = op; 643 644 tmpp->flags |= BUF_MOD; 645 #ifdef DEBUG1 646 (void)fprintf(stderr, 647 "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr, 648 (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0)); 649 #endif 650 tmpp->ovfl = bp; /* one of op/np point to big_keyp */ 651 tp = (uint16 *)tmpp->page; 652 653 #if 0 /* this get's tripped on database corrupted error */ 654 assert(FREESPACE(tp) >= OVFLSIZE); 655 #endif 656 if (FREESPACE(tp) < OVFLSIZE) 657 return (DATABASE_CORRUPTED_ERROR); 658 659 n = tp[0]; 660 off = OFFSET(tp); 661 free_space = FREESPACE(tp); 662 tp[++n] = (uint16)addr; 663 tp[++n] = OVFLPAGE; 664 tp[0] = n; 665 OFFSET(tp) = off; 666 FREESPACE(tp) = free_space - OVFLSIZE; 667 668 /* 669 * Finally, set the new and old return values. BIG_KEYP contains a 670 * pointer to the last page of the big key_data pair. Make sure that 671 * big_keyp has no following page (2 elements) or create an empty 672 * following page. 673 */ 674 675 ret->newp = np; 676 ret->oldp = op; 677 678 tp = (uint16 *)big_keyp->page; 679 big_keyp->flags |= BUF_MOD; 680 if (tp[0] > 2) { 681 /* 682 * There may be either one or two offsets on this page. If 683 * there is one, then the overflow page is linked on normally 684 * and tp[4] is OVFLPAGE. If there are two, tp[4] contains 685 * the second offset and needs to get stuffed in after the 686 * next overflow page is added. 687 */ 688 n = tp[4]; 689 free_space = FREESPACE(tp); 690 off = OFFSET(tp); 691 tp[0] -= 2; 692 FREESPACE(tp) = free_space + OVFLSIZE; 693 OFFSET(tp) = off; 694 tmpp = dbm_add_ovflpage(hashp, big_keyp); 695 if (!tmpp) 696 return (-1); 697 tp[4] = n; 698 } else 699 tmpp = big_keyp; 700 701 if (change) 702 ret->newp = tmpp; 703 else 704 ret->oldp = tmpp; 705 return (0); 706 }