uhash.cpp (35882B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ****************************************************************************** 5 * Copyright (C) 1997-2016, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ****************************************************************************** 8 * Date Name Description 9 * 03/22/00 aliu Adapted from original C++ ICU Hashtable. 10 * 07/06/01 aliu Modified to support int32_t keys on 11 * platforms with sizeof(void*) < 32. 12 ****************************************************************************** 13 */ 14 15 #include <string_view> 16 17 #include "uhash.h" 18 #include "unicode/ustring.h" 19 #include "cstring.h" 20 #include "cmemory.h" 21 #include "uassert.h" 22 #include "ustr_imp.h" 23 24 /* This hashtable is implemented as a double hash. All elements are 25 * stored in a single array with no secondary storage for collision 26 * resolution (no linked list, etc.). When there is a hash collision 27 * (when two unequal keys have the same hashcode) we resolve this by 28 * using a secondary hash. The secondary hash is an increment 29 * computed as a hash function (a different one) of the primary 30 * hashcode. This increment is added to the initial hash value to 31 * obtain further slots assigned to the same hash code. For this to 32 * work, the length of the array and the increment must be relatively 33 * prime. The easiest way to achieve this is to have the length of 34 * the array be prime, and the increment be any value from 35 * 1..length-1. 36 * 37 * Hashcodes are 32-bit integers. We make sure all hashcodes are 38 * non-negative by masking off the top bit. This has two effects: (1) 39 * modulo arithmetic is simplified. If we allowed negative hashcodes, 40 * then when we computed hashcode % length, we could get a negative 41 * result, which we would then have to adjust back into range. It's 42 * simpler to just make hashcodes non-negative. (2) It makes it easy 43 * to check for empty vs. occupied slots in the table. We just mark 44 * empty or deleted slots with a negative hashcode. 45 * 46 * The central function is _uhash_find(). This function looks for a 47 * slot matching the given key and hashcode. If one is found, it 48 * returns a pointer to that slot. If the table is full, and no match 49 * is found, it returns nullptr -- in theory. This would make the code 50 * more complicated, since all callers of _uhash_find() would then 51 * have to check for a nullptr result. To keep this from happening, we 52 * don't allow the table to fill. When there is only one 53 * empty/deleted slot left, uhash_put() will refuse to increase the 54 * count, and fail. This simplifies the code. In practice, one will 55 * seldom encounter this using default UHashtables. However, if a 56 * hashtable is set to a U_FIXED resize policy, or if memory is 57 * exhausted, then the table may fill. 58 * 59 * High and low water ratios control rehashing. They establish levels 60 * of fullness (from 0 to 1) outside of which the data array is 61 * reallocated and repopulated. Setting the low water ratio to zero 62 * means the table will never shrink. Setting the high water ratio to 63 * one means the table will never grow. The ratios should be 64 * coordinated with the ratio between successive elements of the 65 * PRIMES table, so that when the primeIndex is incremented or 66 * decremented during rehashing, it brings the ratio of count / length 67 * back into the desired range (between low and high water ratios). 68 */ 69 70 /******************************************************************** 71 * PRIVATE Constants, Macros 72 ********************************************************************/ 73 74 /* This is a list of non-consecutive primes chosen such that 75 * PRIMES[i+1] ~ 2*PRIMES[i]. (Currently, the ratio ranges from 1.81 76 * to 2.18; the inverse ratio ranges from 0.459 to 0.552.) If this 77 * ratio is changed, the low and high water ratios should also be 78 * adjusted to suit. 79 * 80 * These prime numbers were also chosen so that they are the largest 81 * prime number while being less than a power of two. 82 */ 83 static const int32_t PRIMES[] = { 84 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749, 85 65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593, 86 16777213, 33554393, 67108859, 134217689, 268435399, 536870909, 87 1073741789, 2147483647 /*, 4294967291 */ 88 }; 89 90 #define PRIMES_LENGTH UPRV_LENGTHOF(PRIMES) 91 #define DEFAULT_PRIME_INDEX 4 92 93 /* These ratios are tuned to the PRIMES array such that a resize 94 * places the table back into the zone of non-resizing. That is, 95 * after a call to _uhash_rehash(), a subsequent call to 96 * _uhash_rehash() should do nothing (should not churn). This is only 97 * a potential problem with U_GROW_AND_SHRINK. 98 */ 99 static const float RESIZE_POLICY_RATIO_TABLE[6] = { 100 /* low, high water ratio */ 101 0.0F, 0.5F, /* U_GROW: Grow on demand, do not shrink */ 102 0.1F, 0.5F, /* U_GROW_AND_SHRINK: Grow and shrink on demand */ 103 0.0F, 1.0F /* U_FIXED: Never change size */ 104 }; 105 106 /* 107 Invariants for hashcode values: 108 109 * DELETED < 0 110 * EMPTY < 0 111 * Real hashes >= 0 112 113 Hashcodes may not start out this way, but internally they are 114 adjusted so that they are always positive. We assume 32-bit 115 hashcodes; adjust these constants for other hashcode sizes. 116 */ 117 #define HASH_DELETED ((int32_t) 0x80000000) 118 #define HASH_EMPTY ((int32_t) HASH_DELETED + 1) 119 120 #define IS_EMPTY_OR_DELETED(x) ((x) < 0) 121 122 /* This macro expects a UHashTok.pointer as its keypointer and 123 valuepointer parameters */ 124 #define HASH_DELETE_KEY_VALUE(hash, keypointer, valuepointer) UPRV_BLOCK_MACRO_BEGIN { \ 125 if (hash->keyDeleter != nullptr && keypointer != nullptr) { \ 126 (*hash->keyDeleter)(keypointer); \ 127 } \ 128 if (hash->valueDeleter != nullptr && valuepointer != nullptr) { \ 129 (*hash->valueDeleter)(valuepointer); \ 130 } \ 131 } UPRV_BLOCK_MACRO_END 132 133 /* 134 * Constants for hinting whether a key or value is an integer 135 * or a pointer. If a hint bit is zero, then the associated 136 * token is assumed to be an integer. 137 */ 138 #define HINT_BOTH_INTEGERS (0) 139 #define HINT_KEY_POINTER (1) 140 #define HINT_VALUE_POINTER (2) 141 #define HINT_ALLOW_ZERO (4) 142 143 /******************************************************************** 144 * PRIVATE Implementation 145 ********************************************************************/ 146 147 static UHashTok 148 _uhash_setElement(UHashtable *hash, UHashElement* e, 149 int32_t hashcode, 150 UHashTok key, UHashTok value, int8_t hint) { 151 152 UHashTok oldValue = e->value; 153 if (hash->keyDeleter != nullptr && e->key.pointer != nullptr && 154 e->key.pointer != key.pointer) { /* Avoid double deletion */ 155 (*hash->keyDeleter)(e->key.pointer); 156 } 157 if (hash->valueDeleter != nullptr) { 158 if (oldValue.pointer != nullptr && 159 oldValue.pointer != value.pointer) { /* Avoid double deletion */ 160 (*hash->valueDeleter)(oldValue.pointer); 161 } 162 oldValue.pointer = nullptr; 163 } 164 /* Compilers should copy the UHashTok union correctly, but even if 165 * they do, memory heap tools (e.g. BoundsChecker) can get 166 * confused when a pointer is cloaked in a union and then copied. 167 * TO ALLEVIATE THIS, we use hints (based on what API the user is 168 * calling) to copy pointers when we know the user thinks 169 * something is a pointer. */ 170 if (hint & HINT_KEY_POINTER) { 171 e->key.pointer = key.pointer; 172 } else { 173 e->key = key; 174 } 175 if (hint & HINT_VALUE_POINTER) { 176 e->value.pointer = value.pointer; 177 } else { 178 e->value = value; 179 } 180 e->hashcode = hashcode; 181 return oldValue; 182 } 183 184 /** 185 * Assumes that the given element is not empty or deleted. 186 */ 187 static UHashTok 188 _uhash_internalRemoveElement(UHashtable *hash, UHashElement* e) { 189 UHashTok empty; 190 U_ASSERT(!IS_EMPTY_OR_DELETED(e->hashcode)); 191 --hash->count; 192 empty.pointer = nullptr; empty.integer = 0; 193 return _uhash_setElement(hash, e, HASH_DELETED, empty, empty, 0); 194 } 195 196 static void 197 _uhash_internalSetResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) { 198 U_ASSERT(hash != nullptr); 199 U_ASSERT(((int32_t)policy) >= 0); 200 U_ASSERT(((int32_t)policy) < 3); 201 hash->lowWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2]; 202 hash->highWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2 + 1]; 203 } 204 205 /** 206 * Allocate internal data array of a size determined by the given 207 * prime index. If the index is out of range it is pinned into range. 208 * If the allocation fails the status is set to 209 * U_MEMORY_ALLOCATION_ERROR and all array storage is freed. In 210 * either case the previous array pointer is overwritten. 211 * 212 * Caller must ensure primeIndex is in range 0..PRIME_LENGTH-1. 213 */ 214 static void 215 _uhash_allocate(UHashtable *hash, 216 int32_t primeIndex, 217 UErrorCode *status) { 218 219 UHashElement *p, *limit; 220 UHashTok emptytok; 221 222 if (U_FAILURE(*status)) return; 223 224 U_ASSERT(primeIndex >= 0 && primeIndex < PRIMES_LENGTH); 225 226 hash->primeIndex = static_cast<int8_t>(primeIndex); 227 hash->length = PRIMES[primeIndex]; 228 229 p = hash->elements = static_cast<UHashElement*>( 230 uprv_malloc(sizeof(UHashElement) * hash->length)); 231 232 if (hash->elements == nullptr) { 233 *status = U_MEMORY_ALLOCATION_ERROR; 234 return; 235 } 236 237 emptytok.pointer = nullptr; /* Only one of these two is needed */ 238 emptytok.integer = 0; /* but we don't know which one. */ 239 240 limit = p + hash->length; 241 while (p < limit) { 242 p->key = emptytok; 243 p->value = emptytok; 244 p->hashcode = HASH_EMPTY; 245 ++p; 246 } 247 248 hash->count = 0; 249 hash->lowWaterMark = static_cast<int32_t>(hash->length * hash->lowWaterRatio); 250 hash->highWaterMark = static_cast<int32_t>(hash->length * hash->highWaterRatio); 251 } 252 253 static UHashtable* 254 _uhash_init(UHashtable *result, 255 UHashFunction *keyHash, 256 UKeyComparator *keyComp, 257 UValueComparator *valueComp, 258 int32_t primeIndex, 259 UErrorCode *status) 260 { 261 if (U_FAILURE(*status)) return nullptr; 262 U_ASSERT(keyHash != nullptr); 263 U_ASSERT(keyComp != nullptr); 264 265 result->keyHasher = keyHash; 266 result->keyComparator = keyComp; 267 result->valueComparator = valueComp; 268 result->keyDeleter = nullptr; 269 result->valueDeleter = nullptr; 270 result->allocated = false; 271 _uhash_internalSetResizePolicy(result, U_GROW); 272 273 _uhash_allocate(result, primeIndex, status); 274 275 if (U_FAILURE(*status)) { 276 return nullptr; 277 } 278 279 return result; 280 } 281 282 static UHashtable* 283 _uhash_create(UHashFunction *keyHash, 284 UKeyComparator *keyComp, 285 UValueComparator *valueComp, 286 int32_t primeIndex, 287 UErrorCode *status) { 288 UHashtable *result; 289 290 if (U_FAILURE(*status)) return nullptr; 291 292 result = static_cast<UHashtable*>(uprv_malloc(sizeof(UHashtable))); 293 if (result == nullptr) { 294 *status = U_MEMORY_ALLOCATION_ERROR; 295 return nullptr; 296 } 297 298 _uhash_init(result, keyHash, keyComp, valueComp, primeIndex, status); 299 result->allocated = true; 300 301 if (U_FAILURE(*status)) { 302 uprv_free(result); 303 return nullptr; 304 } 305 306 return result; 307 } 308 309 /** 310 * Look for a key in the table, or if no such key exists, the first 311 * empty slot matching the given hashcode. Keys are compared using 312 * the keyComparator function. 313 * 314 * First find the start position, which is the hashcode modulo 315 * the length. Test it to see if it is: 316 * 317 * a. identical: First check the hash values for a quick check, 318 * then compare keys for equality using keyComparator. 319 * b. deleted 320 * c. empty 321 * 322 * Stop if it is identical or empty, otherwise continue by adding a 323 * "jump" value (moduloing by the length again to keep it within 324 * range) and retesting. For efficiency, there need enough empty 325 * values so that the searches stop within a reasonable amount of time. 326 * This can be changed by changing the high/low water marks. 327 * 328 * In theory, this function can return nullptr, if it is full (no empty 329 * or deleted slots) and if no matching key is found. In practice, we 330 * prevent this elsewhere (in uhash_put) by making sure the last slot 331 * in the table is never filled. 332 * 333 * The size of the table should be prime for this algorithm to work; 334 * otherwise we are not guaranteed that the jump value (the secondary 335 * hash) is relatively prime to the table length. 336 */ 337 static UHashElement* 338 _uhash_find(const UHashtable *hash, UHashTok key, 339 int32_t hashcode) { 340 341 int32_t firstDeleted = -1; /* assume invalid index */ 342 int32_t theIndex, startIndex; 343 int32_t jump = 0; /* lazy evaluate */ 344 int32_t tableHash; 345 UHashElement *elements = hash->elements; 346 347 hashcode &= 0x7FFFFFFF; /* must be positive */ 348 startIndex = theIndex = (hashcode ^ 0x4000000) % hash->length; 349 350 do { 351 tableHash = elements[theIndex].hashcode; 352 if (tableHash == hashcode) { /* quick check */ 353 if ((*hash->keyComparator)(key, elements[theIndex].key)) { 354 return &(elements[theIndex]); 355 } 356 } else if (!IS_EMPTY_OR_DELETED(tableHash)) { 357 /* We have hit a slot which contains a key-value pair, 358 * but for which the hash code does not match. Keep 359 * looking. 360 */ 361 } else if (tableHash == HASH_EMPTY) { /* empty, end o' the line */ 362 break; 363 } else if (firstDeleted < 0) { /* remember first deleted */ 364 firstDeleted = theIndex; 365 } 366 if (jump == 0) { /* lazy compute jump */ 367 /* The jump value must be relatively prime to the table 368 * length. As long as the length is prime, then any value 369 * 1..length-1 will be relatively prime to it. 370 */ 371 jump = (hashcode % (hash->length - 1)) + 1; 372 } 373 theIndex = (theIndex + jump) % hash->length; 374 } while (theIndex != startIndex); 375 376 if (firstDeleted >= 0) { 377 theIndex = firstDeleted; /* reset if had deleted slot */ 378 } else if (tableHash != HASH_EMPTY) { 379 /* We get to this point if the hashtable is full (no empty or 380 * deleted slots), and we've failed to find a match. THIS 381 * WILL NEVER HAPPEN as long as uhash_put() makes sure that 382 * count is always < length. 383 */ 384 UPRV_UNREACHABLE_EXIT; 385 } 386 return &(elements[theIndex]); 387 } 388 389 /** 390 * Attempt to grow or shrink the data arrays in order to make the 391 * count fit between the high and low water marks. hash_put() and 392 * hash_remove() call this method when the count exceeds the high or 393 * low water marks. This method may do nothing, if memory allocation 394 * fails, or if the count is already in range, or if the length is 395 * already at the low or high limit. In any case, upon return the 396 * arrays will be valid. 397 */ 398 static void 399 _uhash_rehash(UHashtable *hash, UErrorCode *status) { 400 401 UHashElement *old = hash->elements; 402 int32_t oldLength = hash->length; 403 int32_t newPrimeIndex = hash->primeIndex; 404 int32_t i; 405 406 if (hash->count > hash->highWaterMark) { 407 if (++newPrimeIndex >= PRIMES_LENGTH) { 408 return; 409 } 410 } else if (hash->count < hash->lowWaterMark) { 411 if (--newPrimeIndex < 0) { 412 return; 413 } 414 } else { 415 return; 416 } 417 418 _uhash_allocate(hash, newPrimeIndex, status); 419 420 if (U_FAILURE(*status)) { 421 hash->elements = old; 422 hash->length = oldLength; 423 return; 424 } 425 426 for (i = oldLength - 1; i >= 0; --i) { 427 if (!IS_EMPTY_OR_DELETED(old[i].hashcode)) { 428 UHashElement *e = _uhash_find(hash, old[i].key, old[i].hashcode); 429 U_ASSERT(e != nullptr); 430 U_ASSERT(e->hashcode == HASH_EMPTY); 431 e->key = old[i].key; 432 e->value = old[i].value; 433 e->hashcode = old[i].hashcode; 434 ++hash->count; 435 } 436 } 437 438 uprv_free(old); 439 } 440 441 static UHashTok 442 _uhash_remove(UHashtable *hash, 443 UHashTok key) { 444 /* First find the position of the key in the table. If the object 445 * has not been removed already, remove it. If the user wanted 446 * keys deleted, then delete it also. We have to put a special 447 * hashcode in that position that means that something has been 448 * deleted, since when we do a find, we have to continue PAST any 449 * deleted values. 450 */ 451 UHashTok result; 452 UHashElement* e = _uhash_find(hash, key, hash->keyHasher(key)); 453 U_ASSERT(e != nullptr); 454 result.pointer = nullptr; 455 result.integer = 0; 456 if (!IS_EMPTY_OR_DELETED(e->hashcode)) { 457 result = _uhash_internalRemoveElement(hash, e); 458 if (hash->count < hash->lowWaterMark) { 459 UErrorCode status = U_ZERO_ERROR; 460 _uhash_rehash(hash, &status); 461 } 462 } 463 return result; 464 } 465 466 static UHashTok 467 _uhash_put(UHashtable *hash, 468 UHashTok key, 469 UHashTok value, 470 int8_t hint, 471 UErrorCode *status) { 472 473 /* Put finds the position in the table for the new value. If the 474 * key is already in the table, it is deleted, if there is a 475 * non-nullptr keyDeleter. Then the key, the hash and the value are 476 * all put at the position in their respective arrays. 477 */ 478 int32_t hashcode; 479 UHashElement* e; 480 UHashTok emptytok; 481 482 if (U_FAILURE(*status)) { 483 goto err; 484 } 485 U_ASSERT(hash != nullptr); 486 if ((hint & HINT_VALUE_POINTER) ? 487 value.pointer == nullptr : 488 value.integer == 0 && (hint & HINT_ALLOW_ZERO) == 0) { 489 /* Disallow storage of nullptr values, since nullptr is returned by 490 * get() to indicate an absent key. Storing nullptr == removing. 491 */ 492 return _uhash_remove(hash, key); 493 } 494 if (hash->count > hash->highWaterMark) { 495 _uhash_rehash(hash, status); 496 if (U_FAILURE(*status)) { 497 goto err; 498 } 499 } 500 501 hashcode = (*hash->keyHasher)(key); 502 e = _uhash_find(hash, key, hashcode); 503 U_ASSERT(e != nullptr); 504 505 if (IS_EMPTY_OR_DELETED(e->hashcode)) { 506 /* Important: We must never actually fill the table up. If we 507 * do so, then _uhash_find() will return nullptr, and we'll have 508 * to check for nullptr after every call to _uhash_find(). To 509 * avoid this we make sure there is always at least one empty 510 * or deleted slot in the table. This only is a problem if we 511 * are out of memory and rehash isn't working. 512 */ 513 ++hash->count; 514 if (hash->count == hash->length) { 515 /* Don't allow count to reach length */ 516 --hash->count; 517 *status = U_MEMORY_ALLOCATION_ERROR; 518 goto err; 519 } 520 } 521 522 /* We must in all cases handle storage properly. If there was an 523 * old key, then it must be deleted (if the deleter != nullptr). 524 * Make hashcodes stored in table positive. 525 */ 526 return _uhash_setElement(hash, e, hashcode & 0x7FFFFFFF, key, value, hint); 527 528 err: 529 /* If the deleters are non-nullptr, this method adopts its key and/or 530 * value arguments, and we must be sure to delete the key and/or 531 * value in all cases, even upon failure. 532 */ 533 HASH_DELETE_KEY_VALUE(hash, key.pointer, value.pointer); 534 emptytok.pointer = nullptr; emptytok.integer = 0; 535 return emptytok; 536 } 537 538 539 /******************************************************************** 540 * PUBLIC API 541 ********************************************************************/ 542 543 U_CAPI UHashtable* U_EXPORT2 544 uhash_open(UHashFunction *keyHash, 545 UKeyComparator *keyComp, 546 UValueComparator *valueComp, 547 UErrorCode *status) { 548 549 return _uhash_create(keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status); 550 } 551 552 U_CAPI UHashtable* U_EXPORT2 553 uhash_openSize(UHashFunction *keyHash, 554 UKeyComparator *keyComp, 555 UValueComparator *valueComp, 556 int32_t size, 557 UErrorCode *status) { 558 559 /* Find the smallest index i for which PRIMES[i] >= size. */ 560 int32_t i = 0; 561 while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) { 562 ++i; 563 } 564 565 return _uhash_create(keyHash, keyComp, valueComp, i, status); 566 } 567 568 U_CAPI UHashtable* U_EXPORT2 569 uhash_init(UHashtable *fillinResult, 570 UHashFunction *keyHash, 571 UKeyComparator *keyComp, 572 UValueComparator *valueComp, 573 UErrorCode *status) { 574 575 return _uhash_init(fillinResult, keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status); 576 } 577 578 U_CAPI UHashtable* U_EXPORT2 579 uhash_initSize(UHashtable *fillinResult, 580 UHashFunction *keyHash, 581 UKeyComparator *keyComp, 582 UValueComparator *valueComp, 583 int32_t size, 584 UErrorCode *status) { 585 586 // Find the smallest index i for which PRIMES[i] >= size. 587 int32_t i = 0; 588 while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) { 589 ++i; 590 } 591 return _uhash_init(fillinResult, keyHash, keyComp, valueComp, i, status); 592 } 593 594 U_CAPI void U_EXPORT2 595 uhash_close(UHashtable *hash) { 596 if (hash == nullptr) { 597 return; 598 } 599 if (hash->elements != nullptr) { 600 if (hash->keyDeleter != nullptr || hash->valueDeleter != nullptr) { 601 int32_t pos=UHASH_FIRST; 602 UHashElement *e; 603 while ((e = (UHashElement*) uhash_nextElement(hash, &pos)) != nullptr) { 604 HASH_DELETE_KEY_VALUE(hash, e->key.pointer, e->value.pointer); 605 } 606 } 607 uprv_free(hash->elements); 608 hash->elements = nullptr; 609 } 610 if (hash->allocated) { 611 uprv_free(hash); 612 } 613 } 614 615 U_CAPI UHashFunction *U_EXPORT2 616 uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn) { 617 UHashFunction *result = hash->keyHasher; 618 hash->keyHasher = fn; 619 return result; 620 } 621 622 U_CAPI UKeyComparator *U_EXPORT2 623 uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn) { 624 UKeyComparator *result = hash->keyComparator; 625 hash->keyComparator = fn; 626 return result; 627 } 628 U_CAPI UValueComparator *U_EXPORT2 629 uhash_setValueComparator(UHashtable *hash, UValueComparator *fn){ 630 UValueComparator *result = hash->valueComparator; 631 hash->valueComparator = fn; 632 return result; 633 } 634 635 U_CAPI UObjectDeleter *U_EXPORT2 636 uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn) { 637 UObjectDeleter *result = hash->keyDeleter; 638 hash->keyDeleter = fn; 639 return result; 640 } 641 642 U_CAPI UObjectDeleter *U_EXPORT2 643 uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn) { 644 UObjectDeleter *result = hash->valueDeleter; 645 hash->valueDeleter = fn; 646 return result; 647 } 648 649 U_CAPI void U_EXPORT2 650 uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) { 651 UErrorCode status = U_ZERO_ERROR; 652 _uhash_internalSetResizePolicy(hash, policy); 653 hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio); 654 hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio); 655 _uhash_rehash(hash, &status); 656 } 657 658 U_CAPI int32_t U_EXPORT2 659 uhash_count(const UHashtable *hash) { 660 return hash->count; 661 } 662 663 U_CAPI void* U_EXPORT2 664 uhash_get(const UHashtable *hash, 665 const void* key) { 666 UHashTok keyholder; 667 keyholder.pointer = (void*) key; 668 return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer; 669 } 670 671 U_CAPI void* U_EXPORT2 672 uhash_iget(const UHashtable *hash, 673 int32_t key) { 674 UHashTok keyholder; 675 keyholder.integer = key; 676 return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer; 677 } 678 679 U_CAPI int32_t U_EXPORT2 680 uhash_geti(const UHashtable *hash, 681 const void* key) { 682 UHashTok keyholder; 683 keyholder.pointer = (void*) key; 684 return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer; 685 } 686 687 U_CAPI int32_t U_EXPORT2 688 uhash_igeti(const UHashtable *hash, 689 int32_t key) { 690 UHashTok keyholder; 691 keyholder.integer = key; 692 return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer; 693 } 694 695 U_CAPI int32_t U_EXPORT2 696 uhash_getiAndFound(const UHashtable *hash, 697 const void *key, 698 UBool *found) { 699 UHashTok keyholder; 700 keyholder.pointer = (void *)key; 701 const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder)); 702 *found = !IS_EMPTY_OR_DELETED(e->hashcode); 703 return e->value.integer; 704 } 705 706 U_CAPI int32_t U_EXPORT2 707 uhash_igetiAndFound(const UHashtable *hash, 708 int32_t key, 709 UBool *found) { 710 UHashTok keyholder; 711 keyholder.integer = key; 712 const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder)); 713 *found = !IS_EMPTY_OR_DELETED(e->hashcode); 714 return e->value.integer; 715 } 716 717 U_CAPI void* U_EXPORT2 718 uhash_put(UHashtable *hash, 719 void* key, 720 void* value, 721 UErrorCode *status) { 722 UHashTok keyholder, valueholder; 723 keyholder.pointer = key; 724 valueholder.pointer = value; 725 return _uhash_put(hash, keyholder, valueholder, 726 HINT_KEY_POINTER | HINT_VALUE_POINTER, 727 status).pointer; 728 } 729 730 U_CAPI void* U_EXPORT2 731 uhash_iput(UHashtable *hash, 732 int32_t key, 733 void* value, 734 UErrorCode *status) { 735 UHashTok keyholder, valueholder; 736 keyholder.integer = key; 737 valueholder.pointer = value; 738 return _uhash_put(hash, keyholder, valueholder, 739 HINT_VALUE_POINTER, 740 status).pointer; 741 } 742 743 U_CAPI int32_t U_EXPORT2 744 uhash_puti(UHashtable *hash, 745 void* key, 746 int32_t value, 747 UErrorCode *status) { 748 UHashTok keyholder, valueholder; 749 keyholder.pointer = key; 750 valueholder.integer = value; 751 return _uhash_put(hash, keyholder, valueholder, 752 HINT_KEY_POINTER, 753 status).integer; 754 } 755 756 757 U_CAPI int32_t U_EXPORT2 758 uhash_iputi(UHashtable *hash, 759 int32_t key, 760 int32_t value, 761 UErrorCode *status) { 762 UHashTok keyholder, valueholder; 763 keyholder.integer = key; 764 valueholder.integer = value; 765 return _uhash_put(hash, keyholder, valueholder, 766 HINT_BOTH_INTEGERS, 767 status).integer; 768 } 769 770 U_CAPI int32_t U_EXPORT2 771 uhash_putiAllowZero(UHashtable *hash, 772 void *key, 773 int32_t value, 774 UErrorCode *status) { 775 UHashTok keyholder, valueholder; 776 keyholder.pointer = key; 777 valueholder.integer = value; 778 return _uhash_put(hash, keyholder, valueholder, 779 HINT_KEY_POINTER | HINT_ALLOW_ZERO, 780 status).integer; 781 } 782 783 784 U_CAPI int32_t U_EXPORT2 785 uhash_iputiAllowZero(UHashtable *hash, 786 int32_t key, 787 int32_t value, 788 UErrorCode *status) { 789 UHashTok keyholder, valueholder; 790 keyholder.integer = key; 791 valueholder.integer = value; 792 return _uhash_put(hash, keyholder, valueholder, 793 HINT_BOTH_INTEGERS | HINT_ALLOW_ZERO, 794 status).integer; 795 } 796 797 U_CAPI void* U_EXPORT2 798 uhash_remove(UHashtable *hash, 799 const void* key) { 800 UHashTok keyholder; 801 keyholder.pointer = (void*) key; 802 return _uhash_remove(hash, keyholder).pointer; 803 } 804 805 U_CAPI void* U_EXPORT2 806 uhash_iremove(UHashtable *hash, 807 int32_t key) { 808 UHashTok keyholder; 809 keyholder.integer = key; 810 return _uhash_remove(hash, keyholder).pointer; 811 } 812 813 U_CAPI int32_t U_EXPORT2 814 uhash_removei(UHashtable *hash, 815 const void* key) { 816 UHashTok keyholder; 817 keyholder.pointer = (void*) key; 818 return _uhash_remove(hash, keyholder).integer; 819 } 820 821 U_CAPI int32_t U_EXPORT2 822 uhash_iremovei(UHashtable *hash, 823 int32_t key) { 824 UHashTok keyholder; 825 keyholder.integer = key; 826 return _uhash_remove(hash, keyholder).integer; 827 } 828 829 U_CAPI void U_EXPORT2 830 uhash_removeAll(UHashtable *hash) { 831 int32_t pos = UHASH_FIRST; 832 const UHashElement *e; 833 U_ASSERT(hash != nullptr); 834 if (hash->count != 0) { 835 while ((e = uhash_nextElement(hash, &pos)) != nullptr) { 836 uhash_removeElement(hash, e); 837 } 838 } 839 U_ASSERT(hash->count == 0); 840 } 841 842 U_CAPI UBool U_EXPORT2 843 uhash_containsKey(const UHashtable *hash, const void *key) { 844 UHashTok keyholder; 845 keyholder.pointer = (void *)key; 846 const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder)); 847 return !IS_EMPTY_OR_DELETED(e->hashcode); 848 } 849 850 /** 851 * Returns true if the UHashtable contains an item with this integer key. 852 * 853 * @param hash The target UHashtable. 854 * @param key An integer key stored in a hashtable 855 * @return true if the key is found. 856 */ 857 U_CAPI UBool U_EXPORT2 858 uhash_icontainsKey(const UHashtable *hash, int32_t key) { 859 UHashTok keyholder; 860 keyholder.integer = key; 861 const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder)); 862 return !IS_EMPTY_OR_DELETED(e->hashcode); 863 } 864 865 U_CAPI const UHashElement* U_EXPORT2 866 uhash_find(const UHashtable *hash, const void* key) { 867 UHashTok keyholder; 868 const UHashElement *e; 869 keyholder.pointer = (void*) key; 870 e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder)); 871 return IS_EMPTY_OR_DELETED(e->hashcode) ? nullptr : e; 872 } 873 874 U_CAPI const UHashElement* U_EXPORT2 875 uhash_nextElement(const UHashtable *hash, int32_t *pos) { 876 /* Walk through the array until we find an element that is not 877 * EMPTY and not DELETED. 878 */ 879 int32_t i; 880 U_ASSERT(hash != nullptr); 881 for (i = *pos + 1; i < hash->length; ++i) { 882 if (!IS_EMPTY_OR_DELETED(hash->elements[i].hashcode)) { 883 *pos = i; 884 return &(hash->elements[i]); 885 } 886 } 887 888 /* No more elements */ 889 return nullptr; 890 } 891 892 U_CAPI void* U_EXPORT2 893 uhash_removeElement(UHashtable *hash, const UHashElement* e) { 894 U_ASSERT(hash != nullptr); 895 U_ASSERT(e != nullptr); 896 if (!IS_EMPTY_OR_DELETED(e->hashcode)) { 897 UHashElement *nce = (UHashElement *)e; 898 return _uhash_internalRemoveElement(hash, nce).pointer; 899 } 900 return nullptr; 901 } 902 903 /******************************************************************** 904 * UHashTok convenience 905 ********************************************************************/ 906 907 /** 908 * Return a UHashTok for an integer. 909 */ 910 /*U_CAPI UHashTok U_EXPORT2 911 uhash_toki(int32_t i) { 912 UHashTok tok; 913 tok.integer = i; 914 return tok; 915 }*/ 916 917 /** 918 * Return a UHashTok for a pointer. 919 */ 920 /*U_CAPI UHashTok U_EXPORT2 921 uhash_tokp(void* p) { 922 UHashTok tok; 923 tok.pointer = p; 924 return tok; 925 }*/ 926 927 /******************************************************************** 928 * PUBLIC Key Hash Functions 929 ********************************************************************/ 930 931 U_CAPI int32_t U_EXPORT2 932 uhash_hashUChars(const UHashTok key) { 933 const char16_t *s = (const char16_t *)key.pointer; 934 return s == nullptr ? 0 : ustr_hashUCharsN(s, u_strlen(s)); 935 } 936 937 U_CAPI int32_t U_EXPORT2 938 uhash_hashChars(const UHashTok key) { 939 const char *s = (const char *)key.pointer; 940 return s == nullptr ? 0 : static_cast<int32_t>(ustr_hashCharsN(s, static_cast<int32_t>(uprv_strlen(s)))); 941 } 942 943 U_CAPI int32_t U_EXPORT2 944 uhash_hashIChars(const UHashTok key) { 945 const char *s = (const char *)key.pointer; 946 return s == nullptr ? 0 : ustr_hashICharsN(s, static_cast<int32_t>(uprv_strlen(s))); 947 } 948 949 U_CAPI int32_t U_EXPORT2 950 uhash_hashIStringView(const UHashTok key) { 951 const std::string_view* s = static_cast<std::string_view*>(key.pointer); 952 return s == nullptr ? 0 : ustr_hashICharsN(s->data(), static_cast<int32_t>(s->size())); 953 } 954 955 U_CAPI UBool U_EXPORT2 956 uhash_equals(const UHashtable* hash1, const UHashtable* hash2){ 957 int32_t count1, count2, pos, i; 958 959 if(hash1==hash2){ 960 return true; 961 } 962 963 /* 964 * Make sure that we are comparing 2 valid hashes of the same type 965 * with valid comparison functions. 966 * Without valid comparison functions, a binary comparison 967 * of the hash values will yield random results on machines 968 * with 64-bit pointers and 32-bit integer hashes. 969 * A valueComparator is normally optional. 970 */ 971 if (hash1==nullptr || hash2==nullptr || 972 hash1->keyComparator != hash2->keyComparator || 973 hash1->valueComparator != hash2->valueComparator || 974 hash1->valueComparator == nullptr) 975 { 976 /* 977 Normally we would return an error here about incompatible hash tables, 978 but we return false instead. 979 */ 980 return false; 981 } 982 983 count1 = uhash_count(hash1); 984 count2 = uhash_count(hash2); 985 if(count1!=count2){ 986 return false; 987 } 988 989 pos=UHASH_FIRST; 990 for(i=0; i<count1; i++){ 991 const UHashElement* elem1 = uhash_nextElement(hash1, &pos); 992 const UHashTok key1 = elem1->key; 993 const UHashTok val1 = elem1->value; 994 /* here the keys are not compared, instead the key form hash1 is used to fetch 995 * value from hash2. If the hashes are equal then then both hashes should 996 * contain equal values for the same key! 997 */ 998 const UHashElement* elem2 = _uhash_find(hash2, key1, hash2->keyHasher(key1)); 999 const UHashTok val2 = elem2->value; 1000 if(hash1->valueComparator(val1, val2)==false){ 1001 return false; 1002 } 1003 } 1004 return true; 1005 } 1006 1007 /******************************************************************** 1008 * PUBLIC Comparator Functions 1009 ********************************************************************/ 1010 1011 U_CAPI UBool U_EXPORT2 1012 uhash_compareUChars(const UHashTok key1, const UHashTok key2) { 1013 const char16_t *p1 = (const char16_t*) key1.pointer; 1014 const char16_t *p2 = (const char16_t*) key2.pointer; 1015 if (p1 == p2) { 1016 return true; 1017 } 1018 if (p1 == nullptr || p2 == nullptr) { 1019 return false; 1020 } 1021 while (*p1 != 0 && *p1 == *p2) { 1022 ++p1; 1023 ++p2; 1024 } 1025 return *p1 == *p2; 1026 } 1027 1028 U_CAPI UBool U_EXPORT2 1029 uhash_compareChars(const UHashTok key1, const UHashTok key2) { 1030 const char *p1 = (const char*) key1.pointer; 1031 const char *p2 = (const char*) key2.pointer; 1032 if (p1 == p2) { 1033 return true; 1034 } 1035 if (p1 == nullptr || p2 == nullptr) { 1036 return false; 1037 } 1038 while (*p1 != 0 && *p1 == *p2) { 1039 ++p1; 1040 ++p2; 1041 } 1042 return *p1 == *p2; 1043 } 1044 1045 U_CAPI UBool U_EXPORT2 1046 uhash_compareIChars(const UHashTok key1, const UHashTok key2) { 1047 const char *p1 = (const char*) key1.pointer; 1048 const char *p2 = (const char*) key2.pointer; 1049 if (p1 == p2) { 1050 return true; 1051 } 1052 if (p1 == nullptr || p2 == nullptr) { 1053 return false; 1054 } 1055 while (*p1 != 0 && uprv_tolower(*p1) == uprv_tolower(*p2)) { 1056 ++p1; 1057 ++p2; 1058 } 1059 return *p1 == *p2; 1060 } 1061 1062 U_CAPI UBool U_EXPORT2 1063 uhash_compareIStringView(const UHashTok key1, const UHashTok key2) { 1064 const std::string_view* p1 = static_cast<std::string_view*>(key1.pointer); 1065 const std::string_view* p2 = static_cast<std::string_view*>(key2.pointer); 1066 if (p1 == p2) { 1067 return true; 1068 } 1069 if (p1 == nullptr || p2 == nullptr) { 1070 return false; 1071 } 1072 const std::string_view& v1 = *p1; 1073 const std::string_view& v2 = *p2; 1074 if (v1.size() != v2.size()) { 1075 return false; 1076 } 1077 for (size_t i = 0; i < v1.size(); ++i) { 1078 if (uprv_tolower(v1[i]) != uprv_tolower(v2[i])) { 1079 return false; 1080 } 1081 } 1082 return true; 1083 } 1084 1085 /******************************************************************** 1086 * PUBLIC int32_t Support Functions 1087 ********************************************************************/ 1088 1089 U_CAPI int32_t U_EXPORT2 1090 uhash_hashLong(const UHashTok key) { 1091 return key.integer; 1092 } 1093 1094 U_CAPI UBool U_EXPORT2 1095 uhash_compareLong(const UHashTok key1, const UHashTok key2) { 1096 return key1.integer == key2.integer; 1097 }