raw_hash_set.h (151430B)
1 // Copyright 2018 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // An open-addressing 16 // hashtable with quadratic probing. 17 // 18 // This is a low level hashtable on top of which different interfaces can be 19 // implemented, like flat_hash_set, node_hash_set, string_hash_set, etc. 20 // 21 // The table interface is similar to that of std::unordered_set. Notable 22 // differences are that most member functions support heterogeneous keys when 23 // BOTH the hash and eq functions are marked as transparent. They do so by 24 // providing a typedef called `is_transparent`. 25 // 26 // When heterogeneous lookup is enabled, functions that take key_type act as if 27 // they have an overload set like: 28 // 29 // iterator find(const key_type& key); 30 // template <class K> 31 // iterator find(const K& key); 32 // 33 // size_type erase(const key_type& key); 34 // template <class K> 35 // size_type erase(const K& key); 36 // 37 // std::pair<iterator, iterator> equal_range(const key_type& key); 38 // template <class K> 39 // std::pair<iterator, iterator> equal_range(const K& key); 40 // 41 // When heterogeneous lookup is disabled, only the explicit `key_type` overloads 42 // exist. 43 // 44 // In addition the pointer to element and iterator stability guarantees are 45 // weaker: all iterators and pointers are invalidated after a new element is 46 // inserted. 47 // 48 // IMPLEMENTATION DETAILS 49 // 50 // # Table Layout 51 // 52 // A raw_hash_set's backing array consists of control bytes followed by slots 53 // that may or may not contain objects. 54 // 55 // The layout of the backing array, for `capacity` slots, is thus, as a 56 // pseudo-struct: 57 // 58 // struct BackingArray { 59 // // Sampling handler. This field isn't present when the sampling is 60 // // disabled or this allocation hasn't been selected for sampling. 61 // HashtablezInfoHandle infoz_; 62 // // The number of elements we can insert before growing the capacity. 63 // size_t growth_left; 64 // // Control bytes for the "real" slots. 65 // ctrl_t ctrl[capacity]; 66 // // Always `ctrl_t::kSentinel`. This is used by iterators to find when to 67 // // stop and serves no other purpose. 68 // ctrl_t sentinel; 69 // // A copy of the first `kWidth - 1` elements of `ctrl`. This is used so 70 // // that if a probe sequence picks a value near the end of `ctrl`, 71 // // `Group` will have valid control bytes to look at. 72 // ctrl_t clones[kWidth - 1]; 73 // // The actual slot data. 74 // slot_type slots[capacity]; 75 // }; 76 // 77 // The length of this array is computed by `RawHashSetLayout::alloc_size` below. 78 // 79 // Control bytes (`ctrl_t`) are bytes (collected into groups of a 80 // platform-specific size) that define the state of the corresponding slot in 81 // the slot array. Group manipulation is tightly optimized to be as efficient 82 // as possible: SSE and friends on x86, clever bit operations on other arches. 83 // 84 // Group 1 Group 2 Group 3 85 // +---------------+---------------+---------------+ 86 // | | | | | | | | | | | | | | | | | | | | | | | | | 87 // +---------------+---------------+---------------+ 88 // 89 // Each control byte is either a special value for empty slots, deleted slots 90 // (sometimes called *tombstones*), and a special end-of-table marker used by 91 // iterators, or, if occupied, seven bits (H2) from the hash of the value in the 92 // corresponding slot. 93 // 94 // Storing control bytes in a separate array also has beneficial cache effects, 95 // since more logical slots will fit into a cache line. 96 // 97 // # Small Object Optimization (SOO) 98 // 99 // When the size/alignment of the value_type and the capacity of the table are 100 // small, we enable small object optimization and store the values inline in 101 // the raw_hash_set object. This optimization allows us to avoid 102 // allocation/deallocation as well as cache/dTLB misses. 103 // 104 // # Hashing 105 // 106 // We compute two separate hashes, `H1` and `H2`, from the hash of an object. 107 // `H1(hash(x))` is an index into `slots`, and essentially the starting point 108 // for the probe sequence. `H2(hash(x))` is a 7-bit value used to filter out 109 // objects that cannot possibly be the one we are looking for. 110 // 111 // # Table operations. 112 // 113 // The key operations are `insert`, `find`, and `erase`. 114 // 115 // Since `insert` and `erase` are implemented in terms of `find`, we describe 116 // `find` first. To `find` a value `x`, we compute `hash(x)`. From 117 // `H1(hash(x))` and the capacity, we construct a `probe_seq` that visits every 118 // group of slots in some interesting order. 119 // 120 // We now walk through these indices. At each index, we select the entire group 121 // starting with that index and extract potential candidates: occupied slots 122 // with a control byte equal to `H2(hash(x))`. If we find an empty slot in the 123 // group, we stop and return an error. Each candidate slot `y` is compared with 124 // `x`; if `x == y`, we are done and return `&y`; otherwise we continue to the 125 // next probe index. Tombstones effectively behave like full slots that never 126 // match the value we're looking for. 127 // 128 // The `H2` bits ensure when we compare a slot to an object with `==`, we are 129 // likely to have actually found the object. That is, the chance is low that 130 // `==` is called and returns `false`. Thus, when we search for an object, we 131 // are unlikely to call `==` many times. This likelyhood can be analyzed as 132 // follows (assuming that H2 is a random enough hash function). 133 // 134 // Let's assume that there are `k` "wrong" objects that must be examined in a 135 // probe sequence. For example, when doing a `find` on an object that is in the 136 // table, `k` is the number of objects between the start of the probe sequence 137 // and the final found object (not including the final found object). The 138 // expected number of objects with an H2 match is then `k/128`. Measurements 139 // and analysis indicate that even at high load factors, `k` is less than 32, 140 // meaning that the number of "false positive" comparisons we must perform is 141 // less than 1/8 per `find`. 142 143 // `insert` is implemented in terms of `unchecked_insert`, which inserts a 144 // value presumed to not be in the table (violating this requirement will cause 145 // the table to behave erratically). Given `x` and its hash `hash(x)`, to insert 146 // it, we construct a `probe_seq` once again, and use it to find the first 147 // group with an unoccupied (empty *or* deleted) slot. We place `x` into the 148 // first such slot in the group and mark it as full with `x`'s H2. 149 // 150 // To `insert`, we compose `unchecked_insert` with `find`. We compute `h(x)` and 151 // perform a `find` to see if it's already present; if it is, we're done. If 152 // it's not, we may decide the table is getting overcrowded (i.e. the load 153 // factor is greater than 7/8 for big tables; `is_small()` tables use a max load 154 // factor of 1); in this case, we allocate a bigger array, `unchecked_insert` 155 // each element of the table into the new array (we know that no insertion here 156 // will insert an already-present value), and discard the old backing array. At 157 // this point, we may `unchecked_insert` the value `x`. 158 // 159 // Below, `unchecked_insert` is partly implemented by `prepare_insert`, which 160 // presents a viable, initialized slot pointee to the caller. 161 // 162 // `erase` is implemented in terms of `erase_at`, which takes an index to a 163 // slot. Given an offset, we simply create a tombstone and destroy its contents. 164 // If we can prove that the slot would not appear in a probe sequence, we can 165 // make the slot as empty, instead. We can prove this by observing that if a 166 // group has any empty slots, it has never been full (assuming we never create 167 // an empty slot in a group with no empties, which this heuristic guarantees we 168 // never do) and find would stop at this group anyways (since it does not probe 169 // beyond groups with empties). 170 // 171 // `erase` is `erase_at` composed with `find`: if we 172 // have a value `x`, we can perform a `find`, and then `erase_at` the resulting 173 // slot. 174 // 175 // To iterate, we simply traverse the array, skipping empty and deleted slots 176 // and stopping when we hit a `kSentinel`. 177 178 #ifndef ABSL_CONTAINER_INTERNAL_RAW_HASH_SET_H_ 179 #define ABSL_CONTAINER_INTERNAL_RAW_HASH_SET_H_ 180 181 #include <algorithm> 182 #include <cassert> 183 #include <cmath> 184 #include <cstddef> 185 #include <cstdint> 186 #include <cstring> 187 #include <functional> 188 #include <initializer_list> 189 #include <iterator> 190 #include <limits> 191 #include <memory> 192 #include <tuple> 193 #include <type_traits> 194 #include <utility> 195 196 #include "absl/base/attributes.h" 197 #include "absl/base/config.h" 198 #include "absl/base/internal/endian.h" 199 #include "absl/base/internal/iterator_traits.h" 200 #include "absl/base/internal/raw_logging.h" 201 #include "absl/base/macros.h" 202 #include "absl/base/optimization.h" 203 #include "absl/base/options.h" 204 #include "absl/base/port.h" 205 #include "absl/base/prefetch.h" 206 #include "absl/container/internal/common.h" // IWYU pragma: export // for node_handle 207 #include "absl/container/internal/common_policy_traits.h" 208 #include "absl/container/internal/compressed_tuple.h" 209 #include "absl/container/internal/container_memory.h" 210 #include "absl/container/internal/hash_function_defaults.h" 211 #include "absl/container/internal/hash_policy_traits.h" 212 #include "absl/container/internal/hashtable_control_bytes.h" 213 #include "absl/container/internal/hashtable_debug_hooks.h" 214 #include "absl/container/internal/hashtablez_sampler.h" 215 #include "absl/functional/function_ref.h" 216 #include "absl/hash/hash.h" 217 #include "absl/memory/memory.h" 218 #include "absl/meta/type_traits.h" 219 #include "absl/numeric/bits.h" 220 #include "absl/utility/utility.h" 221 222 namespace absl { 223 ABSL_NAMESPACE_BEGIN 224 namespace container_internal { 225 226 #ifdef ABSL_SWISSTABLE_ENABLE_GENERATIONS 227 #error ABSL_SWISSTABLE_ENABLE_GENERATIONS cannot be directly set 228 #elif (defined(ABSL_HAVE_ADDRESS_SANITIZER) || \ 229 defined(ABSL_HAVE_HWADDRESS_SANITIZER) || \ 230 defined(ABSL_HAVE_MEMORY_SANITIZER)) && \ 231 !defined(NDEBUG_SANITIZER) // If defined, performance is important. 232 // When compiled in sanitizer mode, we add generation integers to the backing 233 // array and iterators. In the backing array, we store the generation between 234 // the control bytes and the slots. When iterators are dereferenced, we assert 235 // that the container has not been mutated in a way that could cause iterator 236 // invalidation since the iterator was initialized. 237 #define ABSL_SWISSTABLE_ENABLE_GENERATIONS 238 #endif 239 240 #ifdef ABSL_SWISSTABLE_ASSERT 241 #error ABSL_SWISSTABLE_ASSERT cannot be directly set 242 #else 243 // We use this macro for assertions that users may see when the table is in an 244 // invalid state that sanitizers may help diagnose. 245 #define ABSL_SWISSTABLE_ASSERT(CONDITION) \ 246 assert((CONDITION) && "Try enabling sanitizers.") 247 #endif 248 249 // We use uint8_t so we don't need to worry about padding. 250 using GenerationType = uint8_t; 251 252 // A sentinel value for empty generations. Using 0 makes it easy to constexpr 253 // initialize an array of this value. 254 constexpr GenerationType SentinelEmptyGeneration() { return 0; } 255 256 constexpr GenerationType NextGeneration(GenerationType generation) { 257 return ++generation == SentinelEmptyGeneration() ? ++generation : generation; 258 } 259 260 #ifdef ABSL_SWISSTABLE_ENABLE_GENERATIONS 261 constexpr bool SwisstableGenerationsEnabled() { return true; } 262 constexpr size_t NumGenerationBytes() { return sizeof(GenerationType); } 263 #else 264 constexpr bool SwisstableGenerationsEnabled() { return false; } 265 constexpr size_t NumGenerationBytes() { return 0; } 266 #endif 267 268 // Returns true if we should assert that the table is not accessed after it has 269 // been destroyed or during the destruction of the table. 270 constexpr bool SwisstableAssertAccessToDestroyedTable() { 271 #ifndef NDEBUG 272 return true; 273 #endif 274 return SwisstableGenerationsEnabled(); 275 } 276 277 template <typename AllocType> 278 void SwapAlloc(AllocType& lhs, AllocType& rhs, 279 std::true_type /* propagate_on_container_swap */) { 280 using std::swap; 281 swap(lhs, rhs); 282 } 283 template <typename AllocType> 284 void SwapAlloc(AllocType& lhs, AllocType& rhs, 285 std::false_type /* propagate_on_container_swap */) { 286 (void)lhs; 287 (void)rhs; 288 assert(lhs == rhs && 289 "It's UB to call swap with unequal non-propagating allocators."); 290 } 291 292 template <typename AllocType> 293 void CopyAlloc(AllocType& lhs, AllocType& rhs, 294 std::true_type /* propagate_alloc */) { 295 lhs = rhs; 296 } 297 template <typename AllocType> 298 void CopyAlloc(AllocType&, AllocType&, std::false_type /* propagate_alloc */) {} 299 300 // The state for a probe sequence. 301 // 302 // Currently, the sequence is a triangular progression of the form 303 // 304 // p(i) := Width * (i^2 + i)/2 + hash (mod mask + 1) 305 // 306 // The use of `Width` ensures that each probe step does not overlap groups; 307 // the sequence effectively outputs the addresses of *groups* (although not 308 // necessarily aligned to any boundary). The `Group` machinery allows us 309 // to check an entire group with minimal branching. 310 // 311 // Wrapping around at `mask + 1` is important, but not for the obvious reason. 312 // As described above, the first few entries of the control byte array 313 // are mirrored at the end of the array, which `Group` will find and use 314 // for selecting candidates. However, when those candidates' slots are 315 // actually inspected, there are no corresponding slots for the cloned bytes, 316 // so we need to make sure we've treated those offsets as "wrapping around". 317 // 318 // It turns out that this probe sequence visits every group exactly once if the 319 // number of groups is a power of two, since (i^2+i)/2 is a bijection in 320 // Z/(2^m). See https://en.wikipedia.org/wiki/Quadratic_probing 321 template <size_t Width> 322 class probe_seq { 323 public: 324 // Creates a new probe sequence using `hash` as the initial value of the 325 // sequence and `mask` (usually the capacity of the table) as the mask to 326 // apply to each value in the progression. 327 probe_seq(size_t hash, size_t mask) { 328 ABSL_SWISSTABLE_ASSERT(((mask + 1) & mask) == 0 && "not a mask"); 329 mask_ = mask; 330 offset_ = hash & mask_; 331 } 332 333 // The offset within the table, i.e., the value `p(i)` above. 334 size_t offset() const { return offset_; } 335 size_t offset(size_t i) const { return (offset_ + i) & mask_; } 336 337 void next() { 338 index_ += Width; 339 offset_ += index_; 340 offset_ &= mask_; 341 } 342 // 0-based probe index, a multiple of `Width`. 343 size_t index() const { return index_; } 344 345 private: 346 size_t mask_; 347 size_t offset_; 348 size_t index_ = 0; 349 }; 350 351 template <class ContainerKey, class Hash, class Eq> 352 struct RequireUsableKey { 353 template <class PassedKey, class... Args> 354 std::pair< 355 decltype(std::declval<const Hash&>()(std::declval<const PassedKey&>())), 356 decltype(std::declval<const Eq&>()(std::declval<const ContainerKey&>(), 357 std::declval<const PassedKey&>()))>* 358 operator()(const PassedKey&, const Args&...) const; 359 }; 360 361 template <class E, class Policy, class Hash, class Eq, class... Ts> 362 struct IsDecomposable : std::false_type {}; 363 364 template <class Policy, class Hash, class Eq, class... Ts> 365 struct IsDecomposable< 366 absl::void_t<decltype(Policy::apply( 367 RequireUsableKey<typename Policy::key_type, Hash, Eq>(), 368 std::declval<Ts>()...))>, 369 Policy, Hash, Eq, Ts...> : std::true_type {}; 370 371 // TODO(alkis): Switch to std::is_nothrow_swappable when gcc/clang supports it. 372 template <class T> 373 constexpr bool IsNoThrowSwappable(std::true_type = {} /* is_swappable */) { 374 using std::swap; 375 return noexcept(swap(std::declval<T&>(), std::declval<T&>())); 376 } 377 template <class T> 378 constexpr bool IsNoThrowSwappable(std::false_type /* is_swappable */) { 379 return false; 380 } 381 382 // See definition comment for why this is size 32. 383 ABSL_DLL extern const ctrl_t kEmptyGroup[32]; 384 385 // We use these sentinel capacity values in debug mode to indicate different 386 // classes of bugs. 387 enum InvalidCapacity : size_t { 388 kAboveMaxValidCapacity = ~size_t{} - 100, 389 kReentrance, 390 kDestroyed, 391 392 // These two must be last because we use `>= kMovedFrom` to mean moved-from. 393 kMovedFrom, 394 kSelfMovedFrom, 395 }; 396 397 // Returns a pointer to a control byte group that can be used by empty tables. 398 inline ctrl_t* EmptyGroup() { 399 // Const must be cast away here; no uses of this function will actually write 400 // to it because it is only used for empty tables. 401 return const_cast<ctrl_t*>(kEmptyGroup + 16); 402 } 403 404 // For use in SOO iterators. 405 // TODO(b/289225379): we could potentially get rid of this by adding an is_soo 406 // bit in iterators. This would add branches but reduce cache misses. 407 ABSL_DLL extern const ctrl_t kSooControl[17]; 408 409 // Returns a pointer to a full byte followed by a sentinel byte. 410 inline ctrl_t* SooControl() { 411 // Const must be cast away here; no uses of this function will actually write 412 // to it because it is only used for SOO iterators. 413 return const_cast<ctrl_t*>(kSooControl); 414 } 415 // Whether ctrl is from the SooControl array. 416 inline bool IsSooControl(const ctrl_t* ctrl) { return ctrl == SooControl(); } 417 418 // Returns a pointer to a generation to use for an empty hashtable. 419 GenerationType* EmptyGeneration(); 420 421 // Returns whether `generation` is a generation for an empty hashtable that 422 // could be returned by EmptyGeneration(). 423 inline bool IsEmptyGeneration(const GenerationType* generation) { 424 return *generation == SentinelEmptyGeneration(); 425 } 426 427 // We only allow a maximum of 1 SOO element, which makes the implementation 428 // much simpler. Complications with multiple SOO elements include: 429 // - Satisfying the guarantee that erasing one element doesn't invalidate 430 // iterators to other elements means we would probably need actual SOO 431 // control bytes. 432 // - In order to prevent user code from depending on iteration order for small 433 // tables, we would need to randomize the iteration order somehow. 434 constexpr size_t SooCapacity() { return 1; } 435 // Sentinel type to indicate SOO CommonFields construction. 436 struct soo_tag_t {}; 437 // Sentinel type to indicate SOO CommonFields construction with full size. 438 struct full_soo_tag_t {}; 439 // Sentinel type to indicate non-SOO CommonFields construction. 440 struct non_soo_tag_t {}; 441 // Sentinel value to indicate an uninitialized value explicitly. 442 struct uninitialized_tag_t {}; 443 // Sentinel value to indicate creation of an empty table without a seed. 444 struct no_seed_empty_tag_t {}; 445 446 // Per table hash salt. This gets mixed into H1 to randomize iteration order 447 // per-table. 448 // The seed is needed to ensure non-determinism of iteration order. 449 class PerTableSeed { 450 public: 451 // The number of bits in the seed. 452 // It is big enough to ensure non-determinism of iteration order. 453 // We store the seed inside a uint64_t together with size and other metadata. 454 // Using 16 bits allows us to save one `and` instruction in H1 (we use movzwl 455 // instead of movq+and). 456 static constexpr size_t kBitCount = 16; 457 458 // Returns the seed for the table. Only the lowest kBitCount are non zero. 459 size_t seed() const { return seed_; } 460 461 private: 462 friend class HashtableSize; 463 explicit PerTableSeed(size_t seed) : seed_(seed) {} 464 465 const size_t seed_; 466 }; 467 468 // Returns next seed base number that is used for generating per-table seeds. 469 // Only the lowest PerTableSeed::kBitCount bits are used for actual hash table 470 // seed. 471 inline size_t NextSeedBaseNumber() { 472 thread_local size_t seed = reinterpret_cast<uintptr_t>(&seed); 473 if constexpr (sizeof(size_t) == 4) { 474 seed += uint32_t{0xcc9e2d51}; 475 } else { 476 seed += uint64_t{0xdcb22ca68cb134ed}; 477 } 478 return seed; 479 } 480 481 // The size and also has additionally 482 // 1) one bit that stores whether we have infoz. 483 // 2) PerTableSeed::kBitCount bits for the seed. 484 class HashtableSize { 485 public: 486 static constexpr size_t kSizeBitCount = 64 - PerTableSeed::kBitCount - 1; 487 488 explicit HashtableSize(uninitialized_tag_t) {} 489 explicit HashtableSize(no_seed_empty_tag_t) : data_(0) {} 490 explicit HashtableSize(full_soo_tag_t) : data_(kSizeOneNoMetadata) {} 491 492 // Returns actual size of the table. 493 size_t size() const { return static_cast<size_t>(data_ >> kSizeShift); } 494 void increment_size() { data_ += kSizeOneNoMetadata; } 495 void increment_size(size_t size) { 496 data_ += static_cast<uint64_t>(size) * kSizeOneNoMetadata; 497 } 498 void decrement_size() { data_ -= kSizeOneNoMetadata; } 499 // Returns true if the table is empty. 500 bool empty() const { return data_ < kSizeOneNoMetadata; } 501 // Sets the size to zero, but keeps all the metadata bits. 502 void set_size_to_zero_keep_metadata() { data_ = data_ & kMetadataMask; } 503 504 PerTableSeed seed() const { 505 return PerTableSeed(static_cast<size_t>(data_) & kSeedMask); 506 } 507 508 void generate_new_seed() { 509 size_t seed = NextSeedBaseNumber(); 510 data_ = (data_ & ~kSeedMask) | (seed & kSeedMask); 511 } 512 513 // Returns true if the table has infoz. 514 bool has_infoz() const { 515 return ABSL_PREDICT_FALSE((data_ & kHasInfozMask) != 0); 516 } 517 518 // Sets the has_infoz bit. 519 void set_has_infoz() { data_ |= kHasInfozMask; } 520 521 private: 522 static constexpr size_t kSizeShift = 64 - kSizeBitCount; 523 static constexpr uint64_t kSizeOneNoMetadata = uint64_t{1} << kSizeShift; 524 static constexpr uint64_t kMetadataMask = kSizeOneNoMetadata - 1; 525 static constexpr uint64_t kSeedMask = 526 (uint64_t{1} << PerTableSeed::kBitCount) - 1; 527 // The next bit after the seed. 528 static constexpr uint64_t kHasInfozMask = kSeedMask + 1; 529 uint64_t data_; 530 }; 531 532 // Extracts the H1 portion of a hash: 57 bits mixed with a per-table seed. 533 inline size_t H1(size_t hash, PerTableSeed seed) { 534 return (hash >> 7) ^ seed.seed(); 535 } 536 537 // Extracts the H2 portion of a hash: the 7 bits not used for H1. 538 // 539 // These are used as an occupied control byte. 540 inline h2_t H2(size_t hash) { return hash & 0x7F; } 541 542 // Mixes a randomly generated per-process seed with `hash` and `ctrl` to 543 // randomize insertion order within groups. 544 bool ShouldInsertBackwardsForDebug(size_t capacity, size_t hash, 545 PerTableSeed seed); 546 547 ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool ShouldInsertBackwards( 548 ABSL_ATTRIBUTE_UNUSED size_t capacity, ABSL_ATTRIBUTE_UNUSED size_t hash, 549 ABSL_ATTRIBUTE_UNUSED PerTableSeed seed) { 550 #if defined(NDEBUG) 551 return false; 552 #else 553 return ShouldInsertBackwardsForDebug(capacity, hash, seed); 554 #endif 555 } 556 557 // Returns insert position for the given mask. 558 // We want to add entropy even when ASLR is not enabled. 559 // In debug build we will randomly insert in either the front or back of 560 // the group. 561 // TODO(kfm,sbenza): revisit after we do unconditional mixing 562 template <class Mask> 563 ABSL_ATTRIBUTE_ALWAYS_INLINE inline auto GetInsertionOffset( 564 Mask mask, ABSL_ATTRIBUTE_UNUSED size_t capacity, 565 ABSL_ATTRIBUTE_UNUSED size_t hash, 566 ABSL_ATTRIBUTE_UNUSED PerTableSeed seed) { 567 #if defined(NDEBUG) 568 return mask.LowestBitSet(); 569 #else 570 return ShouldInsertBackwardsForDebug(capacity, hash, seed) 571 ? mask.HighestBitSet() 572 : mask.LowestBitSet(); 573 #endif 574 } 575 576 // When there is an insertion with no reserved growth, we rehash with 577 // probability `min(1, RehashProbabilityConstant() / capacity())`. Using a 578 // constant divided by capacity ensures that inserting N elements is still O(N) 579 // in the average case. Using the constant 16 means that we expect to rehash ~8 580 // times more often than when generations are disabled. We are adding expected 581 // rehash_probability * #insertions/capacity_growth = 16/capacity * ((7/8 - 582 // 7/16) * capacity)/capacity_growth = ~7 extra rehashes per capacity growth. 583 inline size_t RehashProbabilityConstant() { return 16; } 584 585 class CommonFieldsGenerationInfoEnabled { 586 // A sentinel value for reserved_growth_ indicating that we just ran out of 587 // reserved growth on the last insertion. When reserve is called and then 588 // insertions take place, reserved_growth_'s state machine is N, ..., 1, 589 // kReservedGrowthJustRanOut, 0. 590 static constexpr size_t kReservedGrowthJustRanOut = 591 (std::numeric_limits<size_t>::max)(); 592 593 public: 594 CommonFieldsGenerationInfoEnabled() = default; 595 CommonFieldsGenerationInfoEnabled(CommonFieldsGenerationInfoEnabled&& that) 596 : reserved_growth_(that.reserved_growth_), 597 reservation_size_(that.reservation_size_), 598 generation_(that.generation_) { 599 that.reserved_growth_ = 0; 600 that.reservation_size_ = 0; 601 that.generation_ = EmptyGeneration(); 602 } 603 CommonFieldsGenerationInfoEnabled& operator=( 604 CommonFieldsGenerationInfoEnabled&&) = default; 605 606 // Whether we should rehash on insert in order to detect bugs of using invalid 607 // references. We rehash on the first insertion after reserved_growth_ reaches 608 // 0 after a call to reserve. We also do a rehash with low probability 609 // whenever reserved_growth_ is zero. 610 bool should_rehash_for_bug_detection_on_insert(PerTableSeed seed, 611 size_t capacity) const; 612 // Similar to above, except that we don't depend on reserved_growth_. 613 bool should_rehash_for_bug_detection_on_move(PerTableSeed seed, 614 size_t capacity) const; 615 void maybe_increment_generation_on_insert() { 616 if (reserved_growth_ == kReservedGrowthJustRanOut) reserved_growth_ = 0; 617 618 if (reserved_growth_ > 0) { 619 if (--reserved_growth_ == 0) reserved_growth_ = kReservedGrowthJustRanOut; 620 } else { 621 increment_generation(); 622 } 623 } 624 void increment_generation() { *generation_ = NextGeneration(*generation_); } 625 void reset_reserved_growth(size_t reservation, size_t size) { 626 reserved_growth_ = reservation - size; 627 } 628 size_t reserved_growth() const { return reserved_growth_; } 629 void set_reserved_growth(size_t r) { reserved_growth_ = r; } 630 size_t reservation_size() const { return reservation_size_; } 631 void set_reservation_size(size_t r) { reservation_size_ = r; } 632 GenerationType generation() const { return *generation_; } 633 void set_generation(GenerationType g) { *generation_ = g; } 634 GenerationType* generation_ptr() const { return generation_; } 635 void set_generation_ptr(GenerationType* g) { generation_ = g; } 636 637 private: 638 // The number of insertions remaining that are guaranteed to not rehash due to 639 // a prior call to reserve. Note: we store reserved growth in addition to 640 // reservation size because calls to erase() decrease size_ but don't decrease 641 // reserved growth. 642 size_t reserved_growth_ = 0; 643 // The maximum argument to reserve() since the container was cleared. We need 644 // to keep track of this, in addition to reserved growth, because we reset 645 // reserved growth to this when erase(begin(), end()) is called. 646 size_t reservation_size_ = 0; 647 // Pointer to the generation counter, which is used to validate iterators and 648 // is stored in the backing array between the control bytes and the slots. 649 // Note that we can't store the generation inside the container itself and 650 // keep a pointer to the container in the iterators because iterators must 651 // remain valid when the container is moved. 652 // Note: we could derive this pointer from the control pointer, but it makes 653 // the code more complicated, and there's a benefit in having the sizes of 654 // raw_hash_set in sanitizer mode and non-sanitizer mode a bit more different, 655 // which is that tests are less likely to rely on the size remaining the same. 656 GenerationType* generation_ = EmptyGeneration(); 657 }; 658 659 class CommonFieldsGenerationInfoDisabled { 660 public: 661 CommonFieldsGenerationInfoDisabled() = default; 662 CommonFieldsGenerationInfoDisabled(CommonFieldsGenerationInfoDisabled&&) = 663 default; 664 CommonFieldsGenerationInfoDisabled& operator=( 665 CommonFieldsGenerationInfoDisabled&&) = default; 666 667 bool should_rehash_for_bug_detection_on_insert(PerTableSeed, size_t) const { 668 return false; 669 } 670 bool should_rehash_for_bug_detection_on_move(PerTableSeed, size_t) const { 671 return false; 672 } 673 void maybe_increment_generation_on_insert() {} 674 void increment_generation() {} 675 void reset_reserved_growth(size_t, size_t) {} 676 size_t reserved_growth() const { return 0; } 677 void set_reserved_growth(size_t) {} 678 size_t reservation_size() const { return 0; } 679 void set_reservation_size(size_t) {} 680 GenerationType generation() const { return 0; } 681 void set_generation(GenerationType) {} 682 GenerationType* generation_ptr() const { return nullptr; } 683 void set_generation_ptr(GenerationType*) {} 684 }; 685 686 class HashSetIteratorGenerationInfoEnabled { 687 public: 688 HashSetIteratorGenerationInfoEnabled() = default; 689 explicit HashSetIteratorGenerationInfoEnabled( 690 const GenerationType* generation_ptr) 691 : generation_ptr_(generation_ptr), generation_(*generation_ptr) {} 692 693 GenerationType generation() const { return generation_; } 694 void reset_generation() { generation_ = *generation_ptr_; } 695 const GenerationType* generation_ptr() const { return generation_ptr_; } 696 void set_generation_ptr(const GenerationType* ptr) { generation_ptr_ = ptr; } 697 698 private: 699 const GenerationType* generation_ptr_ = EmptyGeneration(); 700 GenerationType generation_ = *generation_ptr_; 701 }; 702 703 class HashSetIteratorGenerationInfoDisabled { 704 public: 705 HashSetIteratorGenerationInfoDisabled() = default; 706 explicit HashSetIteratorGenerationInfoDisabled(const GenerationType*) {} 707 708 GenerationType generation() const { return 0; } 709 void reset_generation() {} 710 const GenerationType* generation_ptr() const { return nullptr; } 711 void set_generation_ptr(const GenerationType*) {} 712 }; 713 714 #ifdef ABSL_SWISSTABLE_ENABLE_GENERATIONS 715 using CommonFieldsGenerationInfo = CommonFieldsGenerationInfoEnabled; 716 using HashSetIteratorGenerationInfo = HashSetIteratorGenerationInfoEnabled; 717 #else 718 using CommonFieldsGenerationInfo = CommonFieldsGenerationInfoDisabled; 719 using HashSetIteratorGenerationInfo = HashSetIteratorGenerationInfoDisabled; 720 #endif 721 722 // Stored the information regarding number of slots we can still fill 723 // without needing to rehash. 724 // 725 // We want to ensure sufficient number of empty slots in the table in order 726 // to keep probe sequences relatively short. Empty slot in the probe group 727 // is required to stop probing. 728 // 729 // Tombstones (kDeleted slots) are not included in the growth capacity, 730 // because we'd like to rehash when the table is filled with tombstones and/or 731 // full slots. 732 // 733 // GrowthInfo also stores a bit that encodes whether table may have any 734 // deleted slots. 735 // Most of the tables (>95%) have no deleted slots, so some functions can 736 // be more efficient with this information. 737 // 738 // Callers can also force a rehash via the standard `rehash(0)`, 739 // which will recompute this value as a side-effect. 740 // 741 // See also `CapacityToGrowth()`. 742 class GrowthInfo { 743 public: 744 // Leaves data member uninitialized. 745 GrowthInfo() = default; 746 747 // Initializes the GrowthInfo assuming we can grow `growth_left` elements 748 // and there are no kDeleted slots in the table. 749 void InitGrowthLeftNoDeleted(size_t growth_left) { 750 growth_left_info_ = growth_left; 751 } 752 753 // Overwrites single full slot with an empty slot. 754 void OverwriteFullAsEmpty() { ++growth_left_info_; } 755 756 // Overwrites single empty slot with a full slot. 757 void OverwriteEmptyAsFull() { 758 ABSL_SWISSTABLE_ASSERT(GetGrowthLeft() > 0); 759 --growth_left_info_; 760 } 761 762 // Overwrites several empty slots with full slots. 763 void OverwriteManyEmptyAsFull(size_t count) { 764 ABSL_SWISSTABLE_ASSERT(GetGrowthLeft() >= count); 765 growth_left_info_ -= count; 766 } 767 768 // Overwrites specified control element with full slot. 769 void OverwriteControlAsFull(ctrl_t ctrl) { 770 ABSL_SWISSTABLE_ASSERT(GetGrowthLeft() >= 771 static_cast<size_t>(IsEmpty(ctrl))); 772 growth_left_info_ -= static_cast<size_t>(IsEmpty(ctrl)); 773 } 774 775 // Overwrites single full slot with a deleted slot. 776 void OverwriteFullAsDeleted() { growth_left_info_ |= kDeletedBit; } 777 778 // Returns true if table satisfies two properties: 779 // 1. Guaranteed to have no kDeleted slots. 780 // 2. There is a place for at least one element to grow. 781 bool HasNoDeletedAndGrowthLeft() const { 782 return static_cast<std::make_signed_t<size_t>>(growth_left_info_) > 0; 783 } 784 785 // Returns true if the table satisfies two properties: 786 // 1. Guaranteed to have no kDeleted slots. 787 // 2. There is no growth left. 788 bool HasNoGrowthLeftAndNoDeleted() const { return growth_left_info_ == 0; } 789 790 // Returns true if GetGrowthLeft() == 0, but must be called only if 791 // HasNoDeleted() is false. It is slightly more efficient. 792 bool HasNoGrowthLeftAssumingMayHaveDeleted() const { 793 ABSL_SWISSTABLE_ASSERT(!HasNoDeleted()); 794 return growth_left_info_ == kDeletedBit; 795 } 796 797 // Returns true if table guaranteed to have no kDeleted slots. 798 bool HasNoDeleted() const { 799 return static_cast<std::make_signed_t<size_t>>(growth_left_info_) >= 0; 800 } 801 802 // Returns the number of elements left to grow. 803 size_t GetGrowthLeft() const { return growth_left_info_ & kGrowthLeftMask; } 804 805 private: 806 static constexpr size_t kGrowthLeftMask = ((~size_t{}) >> 1); 807 static constexpr size_t kDeletedBit = ~kGrowthLeftMask; 808 // Topmost bit signal whenever there are deleted slots. 809 size_t growth_left_info_; 810 }; 811 812 static_assert(sizeof(GrowthInfo) == sizeof(size_t), ""); 813 static_assert(alignof(GrowthInfo) == alignof(size_t), ""); 814 815 // Returns whether `n` is a valid capacity (i.e., number of slots). 816 // 817 // A valid capacity is a non-zero integer `2^m - 1`. 818 constexpr bool IsValidCapacity(size_t n) { return ((n + 1) & n) == 0 && n > 0; } 819 820 // Returns the number of "cloned control bytes". 821 // 822 // This is the number of control bytes that are present both at the beginning 823 // of the control byte array and at the end, such that we can create a 824 // `Group::kWidth`-width probe window starting from any control byte. 825 constexpr size_t NumClonedBytes() { return Group::kWidth - 1; } 826 827 // Returns the number of control bytes including cloned. 828 constexpr size_t NumControlBytes(size_t capacity) { 829 return capacity + 1 + NumClonedBytes(); 830 } 831 832 // Computes the offset from the start of the backing allocation of control. 833 // infoz and growth_info are stored at the beginning of the backing array. 834 constexpr size_t ControlOffset(bool has_infoz) { 835 return (has_infoz ? sizeof(HashtablezInfoHandle) : 0) + sizeof(GrowthInfo); 836 } 837 838 // Helper class for computing offsets and allocation size of hash set fields. 839 class RawHashSetLayout { 840 public: 841 explicit RawHashSetLayout(size_t capacity, size_t slot_size, 842 size_t slot_align, bool has_infoz) 843 : control_offset_(ControlOffset(has_infoz)), 844 generation_offset_(control_offset_ + NumControlBytes(capacity)), 845 slot_offset_( 846 (generation_offset_ + NumGenerationBytes() + slot_align - 1) & 847 (~slot_align + 1)), 848 alloc_size_(slot_offset_ + capacity * slot_size) { 849 ABSL_SWISSTABLE_ASSERT(IsValidCapacity(capacity)); 850 ABSL_SWISSTABLE_ASSERT( 851 slot_size <= 852 ((std::numeric_limits<size_t>::max)() - slot_offset_) / capacity); 853 } 854 855 // Returns precomputed offset from the start of the backing allocation of 856 // control. 857 size_t control_offset() const { return control_offset_; } 858 859 // Given the capacity of a table, computes the offset (from the start of the 860 // backing allocation) of the generation counter (if it exists). 861 size_t generation_offset() const { return generation_offset_; } 862 863 // Given the capacity of a table, computes the offset (from the start of the 864 // backing allocation) at which the slots begin. 865 size_t slot_offset() const { return slot_offset_; } 866 867 // Given the capacity of a table, computes the total size of the backing 868 // array. 869 size_t alloc_size() const { return alloc_size_; } 870 871 private: 872 size_t control_offset_; 873 size_t generation_offset_; 874 size_t slot_offset_; 875 size_t alloc_size_; 876 }; 877 878 struct HashtableFreeFunctionsAccess; 879 880 // Suppress erroneous uninitialized memory errors on GCC. For example, GCC 881 // thinks that the call to slot_array() in find_or_prepare_insert() is reading 882 // uninitialized memory, but slot_array is only called there when the table is 883 // non-empty and this memory is initialized when the table is non-empty. 884 #if !defined(__clang__) && defined(__GNUC__) 885 #define ABSL_SWISSTABLE_IGNORE_UNINITIALIZED(x) \ 886 _Pragma("GCC diagnostic push") \ 887 _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") \ 888 _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") x; \ 889 _Pragma("GCC diagnostic pop") 890 #define ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(x) \ 891 ABSL_SWISSTABLE_IGNORE_UNINITIALIZED(return x) 892 #else 893 #define ABSL_SWISSTABLE_IGNORE_UNINITIALIZED(x) x 894 #define ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(x) return x 895 #endif 896 897 // This allows us to work around an uninitialized memory warning when 898 // constructing begin() iterators in empty hashtables. 899 union MaybeInitializedPtr { 900 void* get() const { ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(p); } 901 void set(void* ptr) { p = ptr; } 902 903 void* p; 904 }; 905 906 struct HeapPtrs { 907 explicit HeapPtrs(uninitialized_tag_t) {} 908 explicit HeapPtrs(ctrl_t* c) : control(c) {} 909 910 // The control bytes (and, also, a pointer near to the base of the backing 911 // array). 912 // 913 // This contains `capacity + 1 + NumClonedBytes()` entries, even 914 // when the table is empty (hence EmptyGroup). 915 // 916 // Note that growth_info is stored immediately before this pointer. 917 // May be uninitialized for SOO tables. 918 ctrl_t* control; 919 920 // The beginning of the slots, located at `SlotOffset()` bytes after 921 // `control`. May be uninitialized for empty tables. 922 // Note: we can't use `slots` because Qt defines "slots" as a macro. 923 MaybeInitializedPtr slot_array; 924 }; 925 926 // Returns the maximum size of the SOO slot. 927 constexpr size_t MaxSooSlotSize() { return sizeof(HeapPtrs); } 928 929 // Manages the backing array pointers or the SOO slot. When raw_hash_set::is_soo 930 // is true, the SOO slot is stored in `soo_data`. Otherwise, we use `heap`. 931 union HeapOrSoo { 932 explicit HeapOrSoo(uninitialized_tag_t) : heap(uninitialized_tag_t{}) {} 933 explicit HeapOrSoo(ctrl_t* c) : heap(c) {} 934 935 ctrl_t*& control() { 936 ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(heap.control); 937 } 938 ctrl_t* control() const { 939 ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(heap.control); 940 } 941 MaybeInitializedPtr& slot_array() { 942 ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(heap.slot_array); 943 } 944 MaybeInitializedPtr slot_array() const { 945 ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(heap.slot_array); 946 } 947 void* get_soo_data() { 948 ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(soo_data); 949 } 950 const void* get_soo_data() const { 951 ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(soo_data); 952 } 953 954 HeapPtrs heap; 955 unsigned char soo_data[MaxSooSlotSize()]; 956 }; 957 958 // CommonFields hold the fields in raw_hash_set that do not depend 959 // on template parameters. This allows us to conveniently pass all 960 // of this state to helper functions as a single argument. 961 class CommonFields : public CommonFieldsGenerationInfo { 962 public: 963 explicit CommonFields(soo_tag_t) 964 : capacity_(SooCapacity()), 965 size_(no_seed_empty_tag_t{}), 966 heap_or_soo_(uninitialized_tag_t{}) {} 967 explicit CommonFields(full_soo_tag_t) 968 : capacity_(SooCapacity()), 969 size_(full_soo_tag_t{}), 970 heap_or_soo_(uninitialized_tag_t{}) {} 971 explicit CommonFields(non_soo_tag_t) 972 : capacity_(0), 973 size_(no_seed_empty_tag_t{}), 974 heap_or_soo_(EmptyGroup()) {} 975 // For use in swapping. 976 explicit CommonFields(uninitialized_tag_t) 977 : size_(uninitialized_tag_t{}), heap_or_soo_(uninitialized_tag_t{}) {} 978 979 // Not copyable 980 CommonFields(const CommonFields&) = delete; 981 CommonFields& operator=(const CommonFields&) = delete; 982 983 // Copy with guarantee that it is not SOO. 984 CommonFields(non_soo_tag_t, const CommonFields& that) 985 : capacity_(that.capacity_), 986 size_(that.size_), 987 heap_or_soo_(that.heap_or_soo_) { 988 } 989 990 // Movable 991 CommonFields(CommonFields&& that) = default; 992 CommonFields& operator=(CommonFields&&) = default; 993 994 template <bool kSooEnabled> 995 static CommonFields CreateDefault() { 996 return kSooEnabled ? CommonFields{soo_tag_t{}} 997 : CommonFields{non_soo_tag_t{}}; 998 } 999 1000 // The inline data for SOO is written on top of control_/slots_. 1001 const void* soo_data() const { return heap_or_soo_.get_soo_data(); } 1002 void* soo_data() { return heap_or_soo_.get_soo_data(); } 1003 1004 ctrl_t* control() const { return heap_or_soo_.control(); } 1005 1006 // When we set the control bytes, we also often want to generate a new seed. 1007 // So we bundle these two operations together to make sure we don't forget to 1008 // generate a new seed. 1009 // The table will be invalidated if 1010 // `kGenerateSeed && !empty() && !is_single_group(capacity())` because H1 is 1011 // being changed. In such cases, we will need to rehash the table. 1012 template <bool kGenerateSeed> 1013 void set_control(ctrl_t* c) { 1014 heap_or_soo_.control() = c; 1015 if constexpr (kGenerateSeed) { 1016 generate_new_seed(); 1017 } 1018 } 1019 void* backing_array_start() const { 1020 // growth_info (and maybe infoz) is stored before control bytes. 1021 ABSL_SWISSTABLE_ASSERT( 1022 reinterpret_cast<uintptr_t>(control()) % alignof(size_t) == 0); 1023 return control() - ControlOffset(has_infoz()); 1024 } 1025 1026 // Note: we can't use slots() because Qt defines "slots" as a macro. 1027 void* slot_array() const { return heap_or_soo_.slot_array().get(); } 1028 MaybeInitializedPtr slots_union() const { return heap_or_soo_.slot_array(); } 1029 void set_slots(void* s) { heap_or_soo_.slot_array().set(s); } 1030 1031 // The number of filled slots. 1032 size_t size() const { return size_.size(); } 1033 // Sets the size to zero, but keeps hashinfoz bit and seed. 1034 void set_size_to_zero() { size_.set_size_to_zero_keep_metadata(); } 1035 void set_empty_soo() { 1036 AssertInSooMode(); 1037 size_ = HashtableSize(no_seed_empty_tag_t{}); 1038 } 1039 void set_full_soo() { 1040 AssertInSooMode(); 1041 size_ = HashtableSize(full_soo_tag_t{}); 1042 } 1043 void increment_size() { 1044 ABSL_SWISSTABLE_ASSERT(size() < capacity()); 1045 size_.increment_size(); 1046 } 1047 void increment_size(size_t n) { 1048 ABSL_SWISSTABLE_ASSERT(size() + n <= capacity()); 1049 size_.increment_size(n); 1050 } 1051 void decrement_size() { 1052 ABSL_SWISSTABLE_ASSERT(!empty()); 1053 size_.decrement_size(); 1054 } 1055 bool empty() const { return size_.empty(); } 1056 1057 // The seed used for the H1 part of the hash function. 1058 PerTableSeed seed() const { return size_.seed(); } 1059 // Generates a new seed for the H1 part of the hash function. 1060 // The table will be invalidated if 1061 // `kGenerateSeed && !empty() && !is_single_group(capacity())` because H1 is 1062 // being changed. In such cases, we will need to rehash the table. 1063 void generate_new_seed() { size_.generate_new_seed(); } 1064 1065 // The total number of available slots. 1066 size_t capacity() const { return capacity_; } 1067 void set_capacity(size_t c) { 1068 // We allow setting above the max valid capacity for debugging purposes. 1069 ABSL_SWISSTABLE_ASSERT(c == 0 || IsValidCapacity(c) || 1070 c > kAboveMaxValidCapacity); 1071 capacity_ = c; 1072 } 1073 1074 // The number of slots we can still fill without needing to rehash. 1075 // This is stored in the heap allocation before the control bytes. 1076 // TODO(b/289225379): experiment with moving growth_info back inline to 1077 // increase room for SOO. 1078 size_t growth_left() const { return growth_info().GetGrowthLeft(); } 1079 1080 GrowthInfo& growth_info() { 1081 auto* gl_ptr = reinterpret_cast<GrowthInfo*>(control()) - 1; 1082 ABSL_SWISSTABLE_ASSERT( 1083 reinterpret_cast<uintptr_t>(gl_ptr) % alignof(GrowthInfo) == 0); 1084 return *gl_ptr; 1085 } 1086 GrowthInfo growth_info() const { 1087 return const_cast<CommonFields*>(this)->growth_info(); 1088 } 1089 1090 bool has_infoz() const { return size_.has_infoz(); } 1091 void set_has_infoz() { size_.set_has_infoz(); } 1092 1093 HashtablezInfoHandle infoz() { 1094 return has_infoz() 1095 ? *reinterpret_cast<HashtablezInfoHandle*>(backing_array_start()) 1096 : HashtablezInfoHandle(); 1097 } 1098 void set_infoz(HashtablezInfoHandle infoz) { 1099 ABSL_SWISSTABLE_ASSERT(has_infoz()); 1100 *reinterpret_cast<HashtablezInfoHandle*>(backing_array_start()) = infoz; 1101 } 1102 1103 bool should_rehash_for_bug_detection_on_insert() const { 1104 if constexpr (!SwisstableGenerationsEnabled()) { 1105 return false; 1106 } 1107 // As an optimization, we avoid calling ShouldRehashForBugDetection if we 1108 // will end up rehashing anyways. 1109 if (growth_left() == 0) return false; 1110 return CommonFieldsGenerationInfo:: 1111 should_rehash_for_bug_detection_on_insert(seed(), capacity()); 1112 } 1113 bool should_rehash_for_bug_detection_on_move() const { 1114 return CommonFieldsGenerationInfo::should_rehash_for_bug_detection_on_move( 1115 seed(), capacity()); 1116 } 1117 void reset_reserved_growth(size_t reservation) { 1118 CommonFieldsGenerationInfo::reset_reserved_growth(reservation, size()); 1119 } 1120 1121 // The size of the backing array allocation. 1122 size_t alloc_size(size_t slot_size, size_t slot_align) const { 1123 return RawHashSetLayout(capacity(), slot_size, slot_align, has_infoz()) 1124 .alloc_size(); 1125 } 1126 1127 // Move fields other than heap_or_soo_. 1128 void move_non_heap_or_soo_fields(CommonFields& that) { 1129 static_cast<CommonFieldsGenerationInfo&>(*this) = 1130 std::move(static_cast<CommonFieldsGenerationInfo&>(that)); 1131 capacity_ = that.capacity_; 1132 size_ = that.size_; 1133 } 1134 1135 // Returns the number of control bytes set to kDeleted. For testing only. 1136 size_t TombstonesCount() const { 1137 return static_cast<size_t>( 1138 std::count(control(), control() + capacity(), ctrl_t::kDeleted)); 1139 } 1140 1141 // Helper to enable sanitizer mode validation to protect against reentrant 1142 // calls during element constructor/destructor. 1143 template <typename F> 1144 void RunWithReentrancyGuard(F f) { 1145 #ifdef NDEBUG 1146 f(); 1147 return; 1148 #endif 1149 const size_t cap = capacity(); 1150 set_capacity(InvalidCapacity::kReentrance); 1151 f(); 1152 set_capacity(cap); 1153 } 1154 1155 private: 1156 // We store the has_infoz bit in the lowest bit of size_. 1157 static constexpr size_t HasInfozShift() { return 1; } 1158 static constexpr size_t HasInfozMask() { 1159 return (size_t{1} << HasInfozShift()) - 1; 1160 } 1161 1162 // We can't assert that SOO is enabled because we don't have SooEnabled(), but 1163 // we assert what we can. 1164 void AssertInSooMode() const { 1165 ABSL_SWISSTABLE_ASSERT(capacity() == SooCapacity()); 1166 ABSL_SWISSTABLE_ASSERT(!has_infoz()); 1167 } 1168 1169 // The number of slots in the backing array. This is always 2^N-1 for an 1170 // integer N. NOTE: we tried experimenting with compressing the capacity and 1171 // storing it together with size_: (a) using 6 bits to store the corresponding 1172 // power (N in 2^N-1), and (b) storing 2^N as the most significant bit of 1173 // size_ and storing size in the low bits. Both of these experiments were 1174 // regressions, presumably because we need capacity to do find operations. 1175 size_t capacity_; 1176 1177 // TODO(b/289225379): we could put size_ into HeapOrSoo and make capacity_ 1178 // encode the size in SOO case. We would be making size()/capacity() more 1179 // expensive in order to have more SOO space. 1180 HashtableSize size_; 1181 1182 // Either the control/slots pointers or the SOO slot. 1183 HeapOrSoo heap_or_soo_; 1184 }; 1185 1186 template <class Policy, class Hash, class Eq, class Alloc> 1187 class raw_hash_set; 1188 1189 // Returns the next valid capacity after `n`. 1190 constexpr size_t NextCapacity(size_t n) { 1191 ABSL_SWISSTABLE_ASSERT(IsValidCapacity(n) || n == 0); 1192 return n * 2 + 1; 1193 } 1194 1195 // Applies the following mapping to every byte in the control array: 1196 // * kDeleted -> kEmpty 1197 // * kEmpty -> kEmpty 1198 // * _ -> kDeleted 1199 // PRECONDITION: 1200 // IsValidCapacity(capacity) 1201 // ctrl[capacity] == ctrl_t::kSentinel 1202 // ctrl[i] != ctrl_t::kSentinel for all i < capacity 1203 void ConvertDeletedToEmptyAndFullToDeleted(ctrl_t* ctrl, size_t capacity); 1204 1205 // Converts `n` into the next valid capacity, per `IsValidCapacity`. 1206 constexpr size_t NormalizeCapacity(size_t n) { 1207 return n ? ~size_t{} >> countl_zero(n) : 1; 1208 } 1209 1210 // General notes on capacity/growth methods below: 1211 // - We use 7/8th as maximum load factor. For 16-wide groups, that gives an 1212 // average of two empty slots per group. 1213 // - For (capacity+1) >= Group::kWidth, growth is 7/8*capacity. 1214 // - For (capacity+1) < Group::kWidth, growth == capacity. In this case, we 1215 // never need to probe (the whole table fits in one group) so we don't need a 1216 // load factor less than 1. 1217 1218 // Given `capacity`, applies the load factor; i.e., it returns the maximum 1219 // number of values we should put into the table before a resizing rehash. 1220 constexpr size_t CapacityToGrowth(size_t capacity) { 1221 ABSL_SWISSTABLE_ASSERT(IsValidCapacity(capacity)); 1222 // `capacity*7/8` 1223 if (Group::kWidth == 8 && capacity == 7) { 1224 // x-x/8 does not work when x==7. 1225 return 6; 1226 } 1227 return capacity - capacity / 8; 1228 } 1229 1230 // Given `growth`, "unapplies" the load factor to find how large the capacity 1231 // should be to stay within the load factor. 1232 // 1233 // This might not be a valid capacity and `NormalizeCapacity()` should be 1234 // called on this. 1235 constexpr size_t GrowthToLowerboundCapacity(size_t growth) { 1236 // `growth*8/7` 1237 if (Group::kWidth == 8 && growth == 7) { 1238 // x+(x-1)/7 does not work when x==7. 1239 return 8; 1240 } 1241 return growth + static_cast<size_t>((static_cast<int64_t>(growth) - 1) / 7); 1242 } 1243 1244 template <class InputIter> 1245 size_t SelectBucketCountForIterRange(InputIter first, InputIter last, 1246 size_t bucket_count) { 1247 if (bucket_count != 0) { 1248 return bucket_count; 1249 } 1250 if (base_internal::IsAtLeastIterator<std::random_access_iterator_tag, 1251 InputIter>()) { 1252 return GrowthToLowerboundCapacity( 1253 static_cast<size_t>(std::distance(first, last))); 1254 } 1255 return 0; 1256 } 1257 1258 constexpr bool SwisstableDebugEnabled() { 1259 #if defined(ABSL_SWISSTABLE_ENABLE_GENERATIONS) || \ 1260 ABSL_OPTION_HARDENED == 1 || !defined(NDEBUG) 1261 return true; 1262 #else 1263 return false; 1264 #endif 1265 } 1266 1267 inline void AssertIsFull(const ctrl_t* ctrl, GenerationType generation, 1268 const GenerationType* generation_ptr, 1269 const char* operation) { 1270 if (!SwisstableDebugEnabled()) return; 1271 // `SwisstableDebugEnabled()` is also true for release builds with hardening 1272 // enabled. To minimize their impact in those builds: 1273 // - use `ABSL_PREDICT_FALSE()` to provide a compiler hint for code layout 1274 // - use `ABSL_RAW_LOG()` with a format string to reduce code size and improve 1275 // the chances that the hot paths will be inlined. 1276 if (ABSL_PREDICT_FALSE(ctrl == nullptr)) { 1277 ABSL_RAW_LOG(FATAL, "%s called on end() iterator.", operation); 1278 } 1279 if (ABSL_PREDICT_FALSE(ctrl == EmptyGroup())) { 1280 ABSL_RAW_LOG(FATAL, "%s called on default-constructed iterator.", 1281 operation); 1282 } 1283 if (SwisstableGenerationsEnabled()) { 1284 if (ABSL_PREDICT_FALSE(generation != *generation_ptr)) { 1285 ABSL_RAW_LOG(FATAL, 1286 "%s called on invalid iterator. The table could have " 1287 "rehashed or moved since this iterator was initialized.", 1288 operation); 1289 } 1290 if (ABSL_PREDICT_FALSE(!IsFull(*ctrl))) { 1291 ABSL_RAW_LOG( 1292 FATAL, 1293 "%s called on invalid iterator. The element was likely erased.", 1294 operation); 1295 } 1296 } else { 1297 if (ABSL_PREDICT_FALSE(!IsFull(*ctrl))) { 1298 ABSL_RAW_LOG( 1299 FATAL, 1300 "%s called on invalid iterator. The element might have been erased " 1301 "or the table might have rehashed. Consider running with " 1302 "--config=asan to diagnose rehashing issues.", 1303 operation); 1304 } 1305 } 1306 } 1307 1308 // Note that for comparisons, null/end iterators are valid. 1309 inline void AssertIsValidForComparison(const ctrl_t* ctrl, 1310 GenerationType generation, 1311 const GenerationType* generation_ptr) { 1312 if (!SwisstableDebugEnabled()) return; 1313 const bool ctrl_is_valid_for_comparison = 1314 ctrl == nullptr || ctrl == EmptyGroup() || IsFull(*ctrl); 1315 if (SwisstableGenerationsEnabled()) { 1316 if (ABSL_PREDICT_FALSE(generation != *generation_ptr)) { 1317 ABSL_RAW_LOG(FATAL, 1318 "Invalid iterator comparison. The table could have rehashed " 1319 "or moved since this iterator was initialized."); 1320 } 1321 if (ABSL_PREDICT_FALSE(!ctrl_is_valid_for_comparison)) { 1322 ABSL_RAW_LOG( 1323 FATAL, "Invalid iterator comparison. The element was likely erased."); 1324 } 1325 } else { 1326 ABSL_HARDENING_ASSERT( 1327 ctrl_is_valid_for_comparison && 1328 "Invalid iterator comparison. The element might have been erased or " 1329 "the table might have rehashed. Consider running with --config=asan to " 1330 "diagnose rehashing issues."); 1331 } 1332 } 1333 1334 // If the two iterators come from the same container, then their pointers will 1335 // interleave such that ctrl_a <= ctrl_b < slot_a <= slot_b or vice/versa. 1336 // Note: we take slots by reference so that it's not UB if they're uninitialized 1337 // as long as we don't read them (when ctrl is null). 1338 inline bool AreItersFromSameContainer(const ctrl_t* ctrl_a, 1339 const ctrl_t* ctrl_b, 1340 const void* const& slot_a, 1341 const void* const& slot_b) { 1342 // If either control byte is null, then we can't tell. 1343 if (ctrl_a == nullptr || ctrl_b == nullptr) return true; 1344 const bool a_is_soo = IsSooControl(ctrl_a); 1345 if (a_is_soo != IsSooControl(ctrl_b)) return false; 1346 if (a_is_soo) return slot_a == slot_b; 1347 1348 const void* low_slot = slot_a; 1349 const void* hi_slot = slot_b; 1350 if (ctrl_a > ctrl_b) { 1351 std::swap(ctrl_a, ctrl_b); 1352 std::swap(low_slot, hi_slot); 1353 } 1354 return ctrl_b < low_slot && low_slot <= hi_slot; 1355 } 1356 1357 // Asserts that two iterators come from the same container. 1358 // Note: we take slots by reference so that it's not UB if they're uninitialized 1359 // as long as we don't read them (when ctrl is null). 1360 inline void AssertSameContainer(const ctrl_t* ctrl_a, const ctrl_t* ctrl_b, 1361 const void* const& slot_a, 1362 const void* const& slot_b, 1363 const GenerationType* generation_ptr_a, 1364 const GenerationType* generation_ptr_b) { 1365 if (!SwisstableDebugEnabled()) return; 1366 // `SwisstableDebugEnabled()` is also true for release builds with hardening 1367 // enabled. To minimize their impact in those builds: 1368 // - use `ABSL_PREDICT_FALSE()` to provide a compiler hint for code layout 1369 // - use `ABSL_RAW_LOG()` with a format string to reduce code size and improve 1370 // the chances that the hot paths will be inlined. 1371 1372 // fail_if(is_invalid, message) crashes when is_invalid is true and provides 1373 // an error message based on `message`. 1374 const auto fail_if = [](bool is_invalid, const char* message) { 1375 if (ABSL_PREDICT_FALSE(is_invalid)) { 1376 ABSL_RAW_LOG(FATAL, "Invalid iterator comparison. %s", message); 1377 } 1378 }; 1379 1380 const bool a_is_default = ctrl_a == EmptyGroup(); 1381 const bool b_is_default = ctrl_b == EmptyGroup(); 1382 if (a_is_default && b_is_default) return; 1383 fail_if(a_is_default != b_is_default, 1384 "Comparing default-constructed hashtable iterator with a " 1385 "non-default-constructed hashtable iterator."); 1386 1387 if (SwisstableGenerationsEnabled()) { 1388 if (ABSL_PREDICT_TRUE(generation_ptr_a == generation_ptr_b)) return; 1389 // Users don't need to know whether the tables are SOO so don't mention SOO 1390 // in the debug message. 1391 const bool a_is_soo = IsSooControl(ctrl_a); 1392 const bool b_is_soo = IsSooControl(ctrl_b); 1393 fail_if(a_is_soo != b_is_soo || (a_is_soo && b_is_soo), 1394 "Comparing iterators from different hashtables."); 1395 1396 const bool a_is_empty = IsEmptyGeneration(generation_ptr_a); 1397 const bool b_is_empty = IsEmptyGeneration(generation_ptr_b); 1398 fail_if(a_is_empty != b_is_empty, 1399 "Comparing an iterator from an empty hashtable with an iterator " 1400 "from a non-empty hashtable."); 1401 fail_if(a_is_empty && b_is_empty, 1402 "Comparing iterators from different empty hashtables."); 1403 1404 const bool a_is_end = ctrl_a == nullptr; 1405 const bool b_is_end = ctrl_b == nullptr; 1406 fail_if(a_is_end || b_is_end, 1407 "Comparing iterator with an end() iterator from a different " 1408 "hashtable."); 1409 fail_if(true, "Comparing non-end() iterators from different hashtables."); 1410 } else { 1411 ABSL_HARDENING_ASSERT_SLOW( 1412 AreItersFromSameContainer(ctrl_a, ctrl_b, slot_a, slot_b) && 1413 "Invalid iterator comparison. The iterators may be from different " 1414 "containers or the container might have rehashed or moved. Consider " 1415 "running with --config=asan to diagnose issues."); 1416 } 1417 } 1418 1419 struct FindInfo { 1420 size_t offset; 1421 size_t probe_length; 1422 }; 1423 1424 // Whether a table is "small". A small table fits entirely into a probing 1425 // group, i.e., has a capacity < `Group::kWidth`. 1426 // 1427 // In small mode we are able to use the whole capacity. The extra control 1428 // bytes give us at least one "empty" control byte to stop the iteration. 1429 // This is important to make 1 a valid capacity. 1430 // 1431 // In small mode only the first `capacity` control bytes after the sentinel 1432 // are valid. The rest contain dummy ctrl_t::kEmpty values that do not 1433 // represent a real slot. This is important to take into account on 1434 // `find_first_non_full()`, where we never try 1435 // `ShouldInsertBackwards()` for small tables. 1436 constexpr bool is_small(size_t capacity) { 1437 return capacity < Group::kWidth - 1; 1438 } 1439 1440 // Whether a table fits entirely into a probing group. 1441 // Arbitrary order of elements in such tables is correct. 1442 constexpr bool is_single_group(size_t capacity) { 1443 return capacity <= Group::kWidth; 1444 } 1445 1446 // Begins a probing operation on `common.control`, using `hash`. 1447 inline probe_seq<Group::kWidth> probe(PerTableSeed seed, const size_t capacity, 1448 size_t hash) { 1449 return probe_seq<Group::kWidth>(H1(hash, seed), capacity); 1450 } 1451 inline probe_seq<Group::kWidth> probe(const CommonFields& common, size_t hash) { 1452 return probe(common.seed(), common.capacity(), hash); 1453 } 1454 1455 // Probes an array of control bits using a probe sequence derived from `hash`, 1456 // and returns the offset corresponding to the first deleted or empty slot. 1457 // 1458 // Behavior when the entire table is full is undefined. 1459 // 1460 // NOTE: this function must work with tables having both empty and deleted 1461 // slots in the same group. Such tables appear during `erase()`. 1462 template <typename = void> 1463 inline FindInfo find_first_non_full(const CommonFields& common, size_t hash) { 1464 auto seq = probe(common, hash); 1465 const ctrl_t* ctrl = common.control(); 1466 const PerTableSeed seed = common.seed(); 1467 if (IsEmptyOrDeleted(ctrl[seq.offset()]) && 1468 !ShouldInsertBackwards(common.capacity(), hash, seed)) { 1469 return {seq.offset(), /*probe_length=*/0}; 1470 } 1471 while (true) { 1472 GroupFullEmptyOrDeleted g{ctrl + seq.offset()}; 1473 auto mask = g.MaskEmptyOrDeleted(); 1474 if (mask) { 1475 return { 1476 seq.offset(GetInsertionOffset(mask, common.capacity(), hash, seed)), 1477 seq.index()}; 1478 } 1479 seq.next(); 1480 ABSL_SWISSTABLE_ASSERT(seq.index() <= common.capacity() && "full table!"); 1481 } 1482 } 1483 1484 // Extern template for inline function keep possibility of inlining. 1485 // When compiler decided to not inline, no symbols will be added to the 1486 // corresponding translation unit. 1487 extern template FindInfo find_first_non_full(const CommonFields&, size_t); 1488 1489 // Non-inlined version of find_first_non_full for use in less 1490 // performance critical routines. 1491 FindInfo find_first_non_full_outofline(const CommonFields&, size_t); 1492 1493 // Sets sanitizer poisoning for slot corresponding to control byte being set. 1494 inline void DoSanitizeOnSetCtrl(const CommonFields& c, size_t i, ctrl_t h, 1495 size_t slot_size) { 1496 ABSL_SWISSTABLE_ASSERT(i < c.capacity()); 1497 auto* slot_i = static_cast<const char*>(c.slot_array()) + i * slot_size; 1498 if (IsFull(h)) { 1499 SanitizerUnpoisonMemoryRegion(slot_i, slot_size); 1500 } else { 1501 SanitizerPoisonMemoryRegion(slot_i, slot_size); 1502 } 1503 } 1504 1505 // Sets `ctrl[i]` to `h`. 1506 // 1507 // Unlike setting it directly, this function will perform bounds checks and 1508 // mirror the value to the cloned tail if necessary. 1509 inline void SetCtrl(const CommonFields& c, size_t i, ctrl_t h, 1510 size_t slot_size) { 1511 DoSanitizeOnSetCtrl(c, i, h, slot_size); 1512 ctrl_t* ctrl = c.control(); 1513 ctrl[i] = h; 1514 ctrl[((i - NumClonedBytes()) & c.capacity()) + 1515 (NumClonedBytes() & c.capacity())] = h; 1516 } 1517 // Overload for setting to an occupied `h2_t` rather than a special `ctrl_t`. 1518 inline void SetCtrl(const CommonFields& c, size_t i, h2_t h, size_t slot_size) { 1519 SetCtrl(c, i, static_cast<ctrl_t>(h), slot_size); 1520 } 1521 1522 // Like SetCtrl, but in a single group table, we can save some operations when 1523 // setting the cloned control byte. 1524 inline void SetCtrlInSingleGroupTable(const CommonFields& c, size_t i, ctrl_t h, 1525 size_t slot_size) { 1526 ABSL_SWISSTABLE_ASSERT(is_single_group(c.capacity())); 1527 DoSanitizeOnSetCtrl(c, i, h, slot_size); 1528 ctrl_t* ctrl = c.control(); 1529 ctrl[i] = h; 1530 ctrl[i + c.capacity() + 1] = h; 1531 } 1532 // Overload for setting to an occupied `h2_t` rather than a special `ctrl_t`. 1533 inline void SetCtrlInSingleGroupTable(const CommonFields& c, size_t i, h2_t h, 1534 size_t slot_size) { 1535 SetCtrlInSingleGroupTable(c, i, static_cast<ctrl_t>(h), slot_size); 1536 } 1537 1538 // Like SetCtrl, but in a table with capacity >= Group::kWidth - 1, 1539 // we can save some operations when setting the cloned control byte. 1540 inline void SetCtrlInLargeTable(const CommonFields& c, size_t i, ctrl_t h, 1541 size_t slot_size) { 1542 ABSL_SWISSTABLE_ASSERT(c.capacity() >= Group::kWidth - 1); 1543 DoSanitizeOnSetCtrl(c, i, h, slot_size); 1544 ctrl_t* ctrl = c.control(); 1545 ctrl[i] = h; 1546 ctrl[((i - NumClonedBytes()) & c.capacity()) + NumClonedBytes()] = h; 1547 } 1548 // Overload for setting to an occupied `h2_t` rather than a special `ctrl_t`. 1549 inline void SetCtrlInLargeTable(const CommonFields& c, size_t i, h2_t h, 1550 size_t slot_size) { 1551 SetCtrlInLargeTable(c, i, static_cast<ctrl_t>(h), slot_size); 1552 } 1553 1554 // growth_info (which is a size_t) is stored with the backing array. 1555 constexpr size_t BackingArrayAlignment(size_t align_of_slot) { 1556 return (std::max)(align_of_slot, alignof(GrowthInfo)); 1557 } 1558 1559 // Returns the address of the ith slot in slots where each slot occupies 1560 // slot_size. 1561 inline void* SlotAddress(void* slot_array, size_t slot, size_t slot_size) { 1562 return static_cast<void*>(static_cast<char*>(slot_array) + 1563 (slot * slot_size)); 1564 } 1565 1566 // Iterates over all full slots and calls `cb(const ctrl_t*, void*)`. 1567 // No insertion to the table is allowed during `cb` call. 1568 // Erasure is allowed only for the element passed to the callback. 1569 // The table must not be in SOO mode. 1570 void IterateOverFullSlots(const CommonFields& c, size_t slot_size, 1571 absl::FunctionRef<void(const ctrl_t*, void*)> cb); 1572 1573 template <typename CharAlloc> 1574 constexpr bool ShouldSampleHashtablezInfoForAlloc() { 1575 // Folks with custom allocators often make unwarranted assumptions about the 1576 // behavior of their classes vis-a-vis trivial destructability and what 1577 // calls they will or won't make. Avoid sampling for people with custom 1578 // allocators to get us out of this mess. This is not a hard guarantee but 1579 // a workaround while we plan the exact guarantee we want to provide. 1580 return std::is_same_v<CharAlloc, std::allocator<char>>; 1581 } 1582 1583 template <bool kSooEnabled> 1584 bool ShouldSampleHashtablezInfoOnResize(bool force_sampling, 1585 bool is_hashtablez_eligible, 1586 size_t old_capacity, CommonFields& c) { 1587 if (!is_hashtablez_eligible) return false; 1588 // Force sampling is only allowed for SOO tables. 1589 ABSL_SWISSTABLE_ASSERT(kSooEnabled || !force_sampling); 1590 if (kSooEnabled && force_sampling) { 1591 return true; 1592 } 1593 // In SOO, we sample on the first insertion so if this is an empty SOO case 1594 // (e.g. when reserve is called), then we still need to sample. 1595 if (kSooEnabled && old_capacity == SooCapacity() && c.empty()) { 1596 return ShouldSampleNextTable(); 1597 } 1598 if (!kSooEnabled && old_capacity == 0) { 1599 return ShouldSampleNextTable(); 1600 } 1601 return false; 1602 } 1603 1604 // Allocates `n` bytes for a backing array. 1605 template <size_t AlignOfBackingArray, typename Alloc> 1606 ABSL_ATTRIBUTE_NOINLINE void* AllocateBackingArray(void* alloc, size_t n) { 1607 return Allocate<AlignOfBackingArray>(static_cast<Alloc*>(alloc), n); 1608 } 1609 1610 template <size_t AlignOfBackingArray, typename Alloc> 1611 ABSL_ATTRIBUTE_NOINLINE void DeallocateBackingArray( 1612 void* alloc, size_t capacity, ctrl_t* ctrl, size_t slot_size, 1613 size_t slot_align, bool had_infoz) { 1614 RawHashSetLayout layout(capacity, slot_size, slot_align, had_infoz); 1615 void* backing_array = ctrl - layout.control_offset(); 1616 // Unpoison before returning the memory to the allocator. 1617 SanitizerUnpoisonMemoryRegion(backing_array, layout.alloc_size()); 1618 Deallocate<AlignOfBackingArray>(static_cast<Alloc*>(alloc), backing_array, 1619 layout.alloc_size()); 1620 } 1621 1622 // PolicyFunctions bundles together some information for a particular 1623 // raw_hash_set<T, ...> instantiation. This information is passed to 1624 // type-erased functions that want to do small amounts of type-specific 1625 // work. 1626 struct PolicyFunctions { 1627 uint32_t key_size; 1628 uint32_t value_size; 1629 uint32_t slot_size; 1630 uint16_t slot_align; 1631 uint8_t soo_capacity; 1632 bool is_hashtablez_eligible; 1633 1634 // Returns the pointer to the hash function stored in the set. 1635 void* (*hash_fn)(CommonFields& common); 1636 1637 // Returns the hash of the pointed-to slot. 1638 size_t (*hash_slot)(const void* hash_fn, void* slot); 1639 1640 // Transfers the contents of `count` slots from src_slot to dst_slot. 1641 // We use ability to transfer several slots in single group table growth. 1642 void (*transfer_n)(void* set, void* dst_slot, void* src_slot, size_t count); 1643 1644 // Returns the pointer to the CharAlloc stored in the set. 1645 void* (*get_char_alloc)(CommonFields& common); 1646 1647 // Allocates n bytes for the backing store for common. 1648 void* (*alloc)(void* alloc, size_t n); 1649 1650 // Deallocates the backing store from common. 1651 void (*dealloc)(void* alloc, size_t capacity, ctrl_t* ctrl, size_t slot_size, 1652 size_t slot_align, bool had_infoz); 1653 1654 // Iterates over full slots in old table, finds new positions 1655 // for them and transfers the slots. 1656 // Returns the total probe length. 1657 size_t (*find_new_positions_and_transfer_slots)(CommonFields& common, 1658 ctrl_t* old_ctrl, 1659 void* old_slots, 1660 size_t old_capacity); 1661 }; 1662 1663 // Returns the maximum valid size for a table with 1-byte slots. 1664 // This function is an utility shared by MaxValidSize and IsAboveValidSize. 1665 // Template parameter is only used to enable testing. 1666 template <size_t kSizeOfSizeT = sizeof(size_t)> 1667 constexpr size_t MaxValidSizeFor1ByteSlot() { 1668 if constexpr (kSizeOfSizeT == 8) { 1669 return CapacityToGrowth( 1670 static_cast<size_t>(uint64_t{1} << HashtableSize::kSizeBitCount) - 1); 1671 } else { 1672 static_assert(kSizeOfSizeT == 4); 1673 return CapacityToGrowth((size_t{1} << (kSizeOfSizeT * 8 - 2)) - 1); 1674 } 1675 } 1676 1677 // Returns the maximum valid size for a table with provided slot size. 1678 // Template parameter is only used to enable testing. 1679 template <size_t kSizeOfSizeT = sizeof(size_t)> 1680 constexpr size_t MaxValidSize(size_t slot_size) { 1681 if constexpr (kSizeOfSizeT == 8) { 1682 // For small slot sizes we are limited by HashtableSize::kSizeBitCount. 1683 if (slot_size < size_t{1} << (64 - HashtableSize::kSizeBitCount)) { 1684 return MaxValidSizeFor1ByteSlot<kSizeOfSizeT>(); 1685 } 1686 return (size_t{1} << (kSizeOfSizeT * 8 - 2)) / slot_size; 1687 } else { 1688 return MaxValidSizeFor1ByteSlot<kSizeOfSizeT>() / slot_size; 1689 } 1690 } 1691 1692 // Returns true if size is larger than the maximum valid size. 1693 // It is an optimization to avoid the division operation in the common case. 1694 // Template parameter is only used to enable testing. 1695 template <size_t kSizeOfSizeT = sizeof(size_t)> 1696 constexpr bool IsAboveValidSize(size_t size, size_t slot_size) { 1697 if constexpr (kSizeOfSizeT == 8) { 1698 // For small slot sizes we are limited by HashtableSize::kSizeBitCount. 1699 if (ABSL_PREDICT_TRUE(slot_size < 1700 (size_t{1} << (64 - HashtableSize::kSizeBitCount)))) { 1701 return size > MaxValidSizeFor1ByteSlot<kSizeOfSizeT>(); 1702 } 1703 return size > MaxValidSize<kSizeOfSizeT>(slot_size); 1704 } else { 1705 return uint64_t{size} * slot_size > 1706 MaxValidSizeFor1ByteSlot<kSizeOfSizeT>(); 1707 } 1708 } 1709 1710 // Returns the index of the SOO slot when growing from SOO to non-SOO in a 1711 // single group. See also InitializeSmallControlBytesAfterSoo(). It's important 1712 // to use index 1 so that when resizing from capacity 1 to 3, we can still have 1713 // random iteration order between the first two inserted elements. 1714 // I.e. it allows inserting the second element at either index 0 or 2. 1715 constexpr size_t SooSlotIndex() { return 1; } 1716 1717 // Maximum capacity for the algorithm for small table after SOO. 1718 // Note that typical size after SOO is 3, but we allow up to 7. 1719 // Allowing till 16 would require additional store that can be avoided. 1720 constexpr size_t MaxSmallAfterSooCapacity() { return 7; } 1721 1722 // Resizes empty non-allocated table to the capacity to fit new_size elements. 1723 // Requires: 1724 // 1. `c.capacity() == policy.soo_capacity`. 1725 // 2. `c.empty()`. 1726 // 3. `new_size > policy.soo_capacity`. 1727 // The table will be attempted to be sampled. 1728 void ReserveEmptyNonAllocatedTableToFitNewSize(CommonFields& common, 1729 const PolicyFunctions& policy, 1730 size_t new_size); 1731 1732 // The same as ReserveEmptyNonAllocatedTableToFitNewSize, but resizes to the 1733 // next valid capacity after `bucket_count`. 1734 void ReserveEmptyNonAllocatedTableToFitBucketCount( 1735 CommonFields& common, const PolicyFunctions& policy, size_t bucket_count); 1736 1737 // Resizes empty non-allocated SOO table to NextCapacity(SooCapacity()) and 1738 // forces the table to be sampled. 1739 // SOO tables need to switch from SOO to heap in order to store the infoz. 1740 // Requires: 1741 // 1. `c.capacity() == SooCapacity()`. 1742 // 2. `c.empty()`. 1743 void GrowEmptySooTableToNextCapacityForceSampling( 1744 CommonFields& common, const PolicyFunctions& policy); 1745 1746 // Type erased version of raw_hash_set::rehash. 1747 void Rehash(CommonFields& common, const PolicyFunctions& policy, size_t n); 1748 1749 // Type erased version of raw_hash_set::reserve for tables that have an 1750 // allocated backing array. 1751 // 1752 // Requires: 1753 // 1. `c.capacity() > policy.soo_capacity` OR `!c.empty()`. 1754 // Reserving already allocated tables is considered to be a rare case. 1755 void ReserveAllocatedTable(CommonFields& common, const PolicyFunctions& policy, 1756 size_t n); 1757 1758 // Returns the optimal size for memcpy when transferring SOO slot. 1759 // Otherwise, returns the optimal size for memcpy SOO slot transfer 1760 // to SooSlotIndex(). 1761 // At the destination we are allowed to copy upto twice more bytes, 1762 // because there is at least one more slot after SooSlotIndex(). 1763 // The result must not exceed MaxSooSlotSize(). 1764 // Some of the cases are merged to minimize the number of function 1765 // instantiations. 1766 constexpr size_t OptimalMemcpySizeForSooSlotTransfer( 1767 size_t slot_size, size_t max_soo_slot_size = MaxSooSlotSize()) { 1768 static_assert(MaxSooSlotSize() >= 8, "unexpectedly small SOO slot size"); 1769 if (slot_size == 1) { 1770 return 1; 1771 } 1772 if (slot_size <= 3) { 1773 return 4; 1774 } 1775 // We are merging 4 and 8 into one case because we expect them to be the 1776 // hottest cases. Copying 8 bytes is as fast on common architectures. 1777 if (slot_size <= 8) { 1778 return 8; 1779 } 1780 if (max_soo_slot_size <= 16) { 1781 return max_soo_slot_size; 1782 } 1783 if (slot_size <= 16) { 1784 return 16; 1785 } 1786 if (max_soo_slot_size <= 24) { 1787 return max_soo_slot_size; 1788 } 1789 static_assert(MaxSooSlotSize() <= 24, "unexpectedly large SOO slot size"); 1790 return 24; 1791 } 1792 1793 // Resizes a full SOO table to the NextCapacity(SooCapacity()). 1794 // All possible template combinations are defined in cc file to improve 1795 // compilation time. 1796 template <size_t SooSlotMemcpySize, bool TransferUsesMemcpy> 1797 void GrowFullSooTableToNextCapacity(CommonFields& common, 1798 const PolicyFunctions& policy, 1799 size_t soo_slot_hash); 1800 1801 // As `ResizeFullSooTableToNextCapacity`, except that we also force the SOO 1802 // table to be sampled. SOO tables need to switch from SOO to heap in order to 1803 // store the infoz. No-op if sampling is disabled or not possible. 1804 void GrowFullSooTableToNextCapacityForceSampling(CommonFields& common, 1805 const PolicyFunctions& policy); 1806 1807 // Resizes table with allocated slots and change the table seed. 1808 // Tables with SOO enabled must have capacity > policy.soo_capacity. 1809 // No sampling will be performed since table is already allocated. 1810 void ResizeAllocatedTableWithSeedChange(CommonFields& common, 1811 const PolicyFunctions& policy, 1812 size_t new_capacity); 1813 1814 inline void PrepareInsertCommon(CommonFields& common) { 1815 common.increment_size(); 1816 common.maybe_increment_generation_on_insert(); 1817 } 1818 1819 // Like prepare_insert, but for the case of inserting into a full SOO table. 1820 size_t PrepareInsertAfterSoo(size_t hash, size_t slot_size, 1821 CommonFields& common); 1822 1823 // ClearBackingArray clears the backing array, either modifying it in place, 1824 // or creating a new one based on the value of "reuse". 1825 // REQUIRES: c.capacity > 0 1826 void ClearBackingArray(CommonFields& c, const PolicyFunctions& policy, 1827 void* alloc, bool reuse, bool soo_enabled); 1828 1829 // Type-erased version of raw_hash_set::erase_meta_only. 1830 void EraseMetaOnly(CommonFields& c, size_t index, size_t slot_size); 1831 1832 // For trivially relocatable types we use memcpy directly. This allows us to 1833 // share the same function body for raw_hash_set instantiations that have the 1834 // same slot size as long as they are relocatable. 1835 // Separate function for relocating single slot cause significant binary bloat. 1836 template <size_t SizeOfSlot> 1837 ABSL_ATTRIBUTE_NOINLINE void TransferNRelocatable(void*, void* dst, void* src, 1838 size_t count) { 1839 // TODO(b/382423690): Experiment with making specialization for power of 2 and 1840 // non power of 2. This would require passing the size of the slot. 1841 memcpy(dst, src, SizeOfSlot * count); 1842 } 1843 1844 // Returns a pointer to `common`. This is used to implement type erased 1845 // raw_hash_set::get_hash_ref_fn and raw_hash_set::get_alloc_ref_fn for the 1846 // empty class cases. 1847 void* GetRefForEmptyClass(CommonFields& common); 1848 1849 // Given the hash of a value not currently in the table and the first empty 1850 // slot in the probe sequence, finds a viable slot index to insert it at. 1851 // 1852 // In case there's no space left, the table can be resized or rehashed 1853 // (for tables with deleted slots, see FindInsertPositionWithGrowthOrRehash). 1854 // 1855 // In the case of absence of deleted slots and positive growth_left, the element 1856 // can be inserted in the provided `target` position. 1857 // 1858 // When the table has deleted slots (according to GrowthInfo), the target 1859 // position will be searched one more time using `find_first_non_full`. 1860 // 1861 // REQUIRES: Table is not SOO. 1862 // REQUIRES: At least one non-full slot available. 1863 // REQUIRES: `target` is a valid empty position to insert. 1864 size_t PrepareInsertNonSoo(CommonFields& common, const PolicyFunctions& policy, 1865 size_t hash, FindInfo target); 1866 1867 // A SwissTable. 1868 // 1869 // Policy: a policy defines how to perform different operations on 1870 // the slots of the hashtable (see hash_policy_traits.h for the full interface 1871 // of policy). 1872 // 1873 // Hash: a (possibly polymorphic) functor that hashes keys of the hashtable. The 1874 // functor should accept a key and return size_t as hash. For best performance 1875 // it is important that the hash function provides high entropy across all bits 1876 // of the hash. 1877 // 1878 // Eq: a (possibly polymorphic) functor that compares two keys for equality. It 1879 // should accept two (of possibly different type) keys and return a bool: true 1880 // if they are equal, false if they are not. If two keys compare equal, then 1881 // their hash values as defined by Hash MUST be equal. 1882 // 1883 // Allocator: an Allocator 1884 // [https://en.cppreference.com/w/cpp/named_req/Allocator] with which 1885 // the storage of the hashtable will be allocated and the elements will be 1886 // constructed and destroyed. 1887 template <class Policy, class Hash, class Eq, class Alloc> 1888 class raw_hash_set { 1889 using PolicyTraits = hash_policy_traits<Policy>; 1890 using KeyArgImpl = 1891 KeyArg<IsTransparent<Eq>::value && IsTransparent<Hash>::value>; 1892 1893 public: 1894 using init_type = typename PolicyTraits::init_type; 1895 using key_type = typename PolicyTraits::key_type; 1896 using allocator_type = Alloc; 1897 using size_type = size_t; 1898 using difference_type = ptrdiff_t; 1899 using hasher = Hash; 1900 using key_equal = Eq; 1901 using policy_type = Policy; 1902 using value_type = typename PolicyTraits::value_type; 1903 using reference = value_type&; 1904 using const_reference = const value_type&; 1905 using pointer = typename absl::allocator_traits< 1906 allocator_type>::template rebind_traits<value_type>::pointer; 1907 using const_pointer = typename absl::allocator_traits< 1908 allocator_type>::template rebind_traits<value_type>::const_pointer; 1909 1910 private: 1911 // Alias used for heterogeneous lookup functions. 1912 // `key_arg<K>` evaluates to `K` when the functors are transparent and to 1913 // `key_type` otherwise. It permits template argument deduction on `K` for the 1914 // transparent case. 1915 template <class K> 1916 using key_arg = typename KeyArgImpl::template type<K, key_type>; 1917 1918 using slot_type = typename PolicyTraits::slot_type; 1919 1920 // TODO(b/289225379): we could add extra SOO space inside raw_hash_set 1921 // after CommonFields to allow inlining larger slot_types (e.g. std::string), 1922 // but it's a bit complicated if we want to support incomplete mapped_type in 1923 // flat_hash_map. We could potentially do this for flat_hash_set and for an 1924 // allowlist of `mapped_type`s of flat_hash_map that includes e.g. arithmetic 1925 // types, strings, cords, and pairs/tuples of allowlisted types. 1926 constexpr static bool SooEnabled() { 1927 return PolicyTraits::soo_enabled() && 1928 sizeof(slot_type) <= sizeof(HeapOrSoo) && 1929 alignof(slot_type) <= alignof(HeapOrSoo); 1930 } 1931 1932 constexpr static size_t DefaultCapacity() { 1933 return SooEnabled() ? SooCapacity() : 0; 1934 } 1935 1936 // Whether `size` fits in the SOO capacity of this table. 1937 bool fits_in_soo(size_t size) const { 1938 return SooEnabled() && size <= SooCapacity(); 1939 } 1940 // Whether this table is in SOO mode or non-SOO mode. 1941 bool is_soo() const { return fits_in_soo(capacity()); } 1942 bool is_full_soo() const { return is_soo() && !empty(); } 1943 1944 // Give an early error when key_type is not hashable/eq. 1945 auto KeyTypeCanBeHashed(const Hash& h, const key_type& k) -> decltype(h(k)); 1946 auto KeyTypeCanBeEq(const Eq& eq, const key_type& k) -> decltype(eq(k, k)); 1947 1948 using AllocTraits = absl::allocator_traits<allocator_type>; 1949 using SlotAlloc = typename absl::allocator_traits< 1950 allocator_type>::template rebind_alloc<slot_type>; 1951 // People are often sloppy with the exact type of their allocator (sometimes 1952 // it has an extra const or is missing the pair, but rebinds made it work 1953 // anyway). 1954 using CharAlloc = 1955 typename absl::allocator_traits<Alloc>::template rebind_alloc<char>; 1956 using SlotAllocTraits = typename absl::allocator_traits< 1957 allocator_type>::template rebind_traits<slot_type>; 1958 1959 static_assert(std::is_lvalue_reference<reference>::value, 1960 "Policy::element() must return a reference"); 1961 1962 // An enabler for insert(T&&): T must be convertible to init_type or be the 1963 // same as [cv] value_type [ref]. 1964 template <class T> 1965 using Insertable = absl::disjunction< 1966 std::is_same<absl::remove_cvref_t<reference>, absl::remove_cvref_t<T>>, 1967 std::is_convertible<T, init_type>>; 1968 template <class T> 1969 using IsNotBitField = std::is_pointer<T*>; 1970 1971 // RequiresNotInit is a workaround for gcc prior to 7.1. 1972 // See https://godbolt.org/g/Y4xsUh. 1973 template <class T> 1974 using RequiresNotInit = 1975 typename std::enable_if<!std::is_same<T, init_type>::value, int>::type; 1976 1977 template <class... Ts> 1978 using IsDecomposable = IsDecomposable<void, PolicyTraits, Hash, Eq, Ts...>; 1979 1980 template <class T> 1981 using IsDecomposableAndInsertable = 1982 IsDecomposable<std::enable_if_t<Insertable<T>::value, T>>; 1983 1984 // Evaluates to true if an assignment from the given type would require the 1985 // source object to remain alive for the life of the element. 1986 template <class U> 1987 using IsLifetimeBoundAssignmentFrom = std::conditional_t< 1988 policy_trait_element_is_owner<Policy>::value, std::false_type, 1989 type_traits_internal::IsLifetimeBoundAssignment<init_type, U>>; 1990 1991 public: 1992 static_assert(std::is_same<pointer, value_type*>::value, 1993 "Allocators with custom pointer types are not supported"); 1994 static_assert(std::is_same<const_pointer, const value_type*>::value, 1995 "Allocators with custom pointer types are not supported"); 1996 1997 class iterator : private HashSetIteratorGenerationInfo { 1998 friend class raw_hash_set; 1999 friend struct HashtableFreeFunctionsAccess; 2000 2001 public: 2002 using iterator_category = std::forward_iterator_tag; 2003 using value_type = typename raw_hash_set::value_type; 2004 using reference = 2005 absl::conditional_t<PolicyTraits::constant_iterators::value, 2006 const value_type&, value_type&>; 2007 using pointer = absl::remove_reference_t<reference>*; 2008 using difference_type = typename raw_hash_set::difference_type; 2009 2010 iterator() {} 2011 2012 // PRECONDITION: not an end() iterator. 2013 reference operator*() const { 2014 AssertIsFull(ctrl_, generation(), generation_ptr(), "operator*()"); 2015 return unchecked_deref(); 2016 } 2017 2018 // PRECONDITION: not an end() iterator. 2019 pointer operator->() const { 2020 AssertIsFull(ctrl_, generation(), generation_ptr(), "operator->"); 2021 return &operator*(); 2022 } 2023 2024 // PRECONDITION: not an end() iterator. 2025 iterator& operator++() { 2026 AssertIsFull(ctrl_, generation(), generation_ptr(), "operator++"); 2027 ++ctrl_; 2028 ++slot_; 2029 skip_empty_or_deleted(); 2030 if (ABSL_PREDICT_FALSE(*ctrl_ == ctrl_t::kSentinel)) ctrl_ = nullptr; 2031 return *this; 2032 } 2033 // PRECONDITION: not an end() iterator. 2034 iterator operator++(int) { 2035 auto tmp = *this; 2036 ++*this; 2037 return tmp; 2038 } 2039 2040 friend bool operator==(const iterator& a, const iterator& b) { 2041 AssertIsValidForComparison(a.ctrl_, a.generation(), a.generation_ptr()); 2042 AssertIsValidForComparison(b.ctrl_, b.generation(), b.generation_ptr()); 2043 AssertSameContainer(a.ctrl_, b.ctrl_, a.slot_, b.slot_, 2044 a.generation_ptr(), b.generation_ptr()); 2045 return a.ctrl_ == b.ctrl_; 2046 } 2047 friend bool operator!=(const iterator& a, const iterator& b) { 2048 return !(a == b); 2049 } 2050 2051 private: 2052 iterator(ctrl_t* ctrl, slot_type* slot, 2053 const GenerationType* generation_ptr) 2054 : HashSetIteratorGenerationInfo(generation_ptr), 2055 ctrl_(ctrl), 2056 slot_(slot) { 2057 // This assumption helps the compiler know that any non-end iterator is 2058 // not equal to any end iterator. 2059 ABSL_ASSUME(ctrl != nullptr); 2060 } 2061 // This constructor is used in begin() to avoid an MSan 2062 // use-of-uninitialized-value error. Delegating from this constructor to 2063 // the previous one doesn't avoid the error. 2064 iterator(ctrl_t* ctrl, MaybeInitializedPtr slot, 2065 const GenerationType* generation_ptr) 2066 : HashSetIteratorGenerationInfo(generation_ptr), 2067 ctrl_(ctrl), 2068 slot_(to_slot(slot.get())) { 2069 // This assumption helps the compiler know that any non-end iterator is 2070 // not equal to any end iterator. 2071 ABSL_ASSUME(ctrl != nullptr); 2072 } 2073 // For end() iterators. 2074 explicit iterator(const GenerationType* generation_ptr) 2075 : HashSetIteratorGenerationInfo(generation_ptr), ctrl_(nullptr) {} 2076 2077 // Fixes up `ctrl_` to point to a full or sentinel by advancing `ctrl_` and 2078 // `slot_` until they reach one. 2079 void skip_empty_or_deleted() { 2080 while (IsEmptyOrDeleted(*ctrl_)) { 2081 uint32_t shift = 2082 GroupFullEmptyOrDeleted{ctrl_}.CountLeadingEmptyOrDeleted(); 2083 ctrl_ += shift; 2084 slot_ += shift; 2085 } 2086 } 2087 2088 ctrl_t* control() const { return ctrl_; } 2089 slot_type* slot() const { return slot_; } 2090 2091 // We use EmptyGroup() for default-constructed iterators so that they can 2092 // be distinguished from end iterators, which have nullptr ctrl_. 2093 ctrl_t* ctrl_ = EmptyGroup(); 2094 // To avoid uninitialized member warnings, put slot_ in an anonymous union. 2095 // The member is not initialized on singleton and end iterators. 2096 union { 2097 slot_type* slot_; 2098 }; 2099 2100 // An equality check which skips ABSL Hardening iterator invalidation 2101 // checks. 2102 // Should be used when the lifetimes of the iterators are well-enough 2103 // understood to prove that they cannot be invalid. 2104 bool unchecked_equals(const iterator& b) { return ctrl_ == b.control(); } 2105 2106 // Dereferences the iterator without ABSL Hardening iterator invalidation 2107 // checks. 2108 reference unchecked_deref() const { return PolicyTraits::element(slot_); } 2109 }; 2110 2111 class const_iterator { 2112 friend class raw_hash_set; 2113 template <class Container, typename Enabler> 2114 friend struct absl::container_internal::hashtable_debug_internal:: 2115 HashtableDebugAccess; 2116 2117 public: 2118 using iterator_category = typename iterator::iterator_category; 2119 using value_type = typename raw_hash_set::value_type; 2120 using reference = typename raw_hash_set::const_reference; 2121 using pointer = typename raw_hash_set::const_pointer; 2122 using difference_type = typename raw_hash_set::difference_type; 2123 2124 const_iterator() = default; 2125 // Implicit construction from iterator. 2126 const_iterator(iterator i) : inner_(std::move(i)) {} // NOLINT 2127 2128 reference operator*() const { return *inner_; } 2129 pointer operator->() const { return inner_.operator->(); } 2130 2131 const_iterator& operator++() { 2132 ++inner_; 2133 return *this; 2134 } 2135 const_iterator operator++(int) { return inner_++; } 2136 2137 friend bool operator==(const const_iterator& a, const const_iterator& b) { 2138 return a.inner_ == b.inner_; 2139 } 2140 friend bool operator!=(const const_iterator& a, const const_iterator& b) { 2141 return !(a == b); 2142 } 2143 2144 private: 2145 const_iterator(const ctrl_t* ctrl, const slot_type* slot, 2146 const GenerationType* gen) 2147 : inner_(const_cast<ctrl_t*>(ctrl), const_cast<slot_type*>(slot), gen) { 2148 } 2149 ctrl_t* control() const { return inner_.control(); } 2150 slot_type* slot() const { return inner_.slot(); } 2151 2152 iterator inner_; 2153 2154 bool unchecked_equals(const const_iterator& b) { 2155 return inner_.unchecked_equals(b.inner_); 2156 } 2157 }; 2158 2159 using node_type = node_handle<Policy, hash_policy_traits<Policy>, Alloc>; 2160 using insert_return_type = InsertReturnType<iterator, node_type>; 2161 2162 // Note: can't use `= default` due to non-default noexcept (causes 2163 // problems for some compilers). NOLINTNEXTLINE 2164 raw_hash_set() noexcept( 2165 std::is_nothrow_default_constructible<hasher>::value && 2166 std::is_nothrow_default_constructible<key_equal>::value && 2167 std::is_nothrow_default_constructible<allocator_type>::value) {} 2168 2169 ABSL_ATTRIBUTE_NOINLINE explicit raw_hash_set( 2170 size_t bucket_count, const hasher& hash = hasher(), 2171 const key_equal& eq = key_equal(), 2172 const allocator_type& alloc = allocator_type()) 2173 : settings_(CommonFields::CreateDefault<SooEnabled()>(), hash, eq, 2174 alloc) { 2175 if (bucket_count > DefaultCapacity()) { 2176 ReserveEmptyNonAllocatedTableToFitBucketCount( 2177 common(), GetPolicyFunctions(), bucket_count); 2178 } 2179 } 2180 2181 raw_hash_set(size_t bucket_count, const hasher& hash, 2182 const allocator_type& alloc) 2183 : raw_hash_set(bucket_count, hash, key_equal(), alloc) {} 2184 2185 raw_hash_set(size_t bucket_count, const allocator_type& alloc) 2186 : raw_hash_set(bucket_count, hasher(), key_equal(), alloc) {} 2187 2188 explicit raw_hash_set(const allocator_type& alloc) 2189 : raw_hash_set(0, hasher(), key_equal(), alloc) {} 2190 2191 template <class InputIter> 2192 raw_hash_set(InputIter first, InputIter last, size_t bucket_count = 0, 2193 const hasher& hash = hasher(), const key_equal& eq = key_equal(), 2194 const allocator_type& alloc = allocator_type()) 2195 : raw_hash_set(SelectBucketCountForIterRange(first, last, bucket_count), 2196 hash, eq, alloc) { 2197 insert(first, last); 2198 } 2199 2200 template <class InputIter> 2201 raw_hash_set(InputIter first, InputIter last, size_t bucket_count, 2202 const hasher& hash, const allocator_type& alloc) 2203 : raw_hash_set(first, last, bucket_count, hash, key_equal(), alloc) {} 2204 2205 template <class InputIter> 2206 raw_hash_set(InputIter first, InputIter last, size_t bucket_count, 2207 const allocator_type& alloc) 2208 : raw_hash_set(first, last, bucket_count, hasher(), key_equal(), alloc) {} 2209 2210 template <class InputIter> 2211 raw_hash_set(InputIter first, InputIter last, const allocator_type& alloc) 2212 : raw_hash_set(first, last, 0, hasher(), key_equal(), alloc) {} 2213 2214 // Instead of accepting std::initializer_list<value_type> as the first 2215 // argument like std::unordered_set<value_type> does, we have two overloads 2216 // that accept std::initializer_list<T> and std::initializer_list<init_type>. 2217 // This is advantageous for performance. 2218 // 2219 // // Turns {"abc", "def"} into std::initializer_list<std::string>, then 2220 // // copies the strings into the set. 2221 // std::unordered_set<std::string> s = {"abc", "def"}; 2222 // 2223 // // Turns {"abc", "def"} into std::initializer_list<const char*>, then 2224 // // copies the strings into the set. 2225 // absl::flat_hash_set<std::string> s = {"abc", "def"}; 2226 // 2227 // The same trick is used in insert(). 2228 // 2229 // The enabler is necessary to prevent this constructor from triggering where 2230 // the copy constructor is meant to be called. 2231 // 2232 // absl::flat_hash_set<int> a, b{a}; 2233 // 2234 // RequiresNotInit<T> is a workaround for gcc prior to 7.1. 2235 template <class T, RequiresNotInit<T> = 0, 2236 std::enable_if_t<Insertable<T>::value, int> = 0> 2237 raw_hash_set(std::initializer_list<T> init, size_t bucket_count = 0, 2238 const hasher& hash = hasher(), const key_equal& eq = key_equal(), 2239 const allocator_type& alloc = allocator_type()) 2240 : raw_hash_set(init.begin(), init.end(), bucket_count, hash, eq, alloc) {} 2241 2242 raw_hash_set(std::initializer_list<init_type> init, size_t bucket_count = 0, 2243 const hasher& hash = hasher(), const key_equal& eq = key_equal(), 2244 const allocator_type& alloc = allocator_type()) 2245 : raw_hash_set(init.begin(), init.end(), bucket_count, hash, eq, alloc) {} 2246 2247 template <class T, RequiresNotInit<T> = 0, 2248 std::enable_if_t<Insertable<T>::value, int> = 0> 2249 raw_hash_set(std::initializer_list<T> init, size_t bucket_count, 2250 const hasher& hash, const allocator_type& alloc) 2251 : raw_hash_set(init, bucket_count, hash, key_equal(), alloc) {} 2252 2253 raw_hash_set(std::initializer_list<init_type> init, size_t bucket_count, 2254 const hasher& hash, const allocator_type& alloc) 2255 : raw_hash_set(init, bucket_count, hash, key_equal(), alloc) {} 2256 2257 template <class T, RequiresNotInit<T> = 0, 2258 std::enable_if_t<Insertable<T>::value, int> = 0> 2259 raw_hash_set(std::initializer_list<T> init, size_t bucket_count, 2260 const allocator_type& alloc) 2261 : raw_hash_set(init, bucket_count, hasher(), key_equal(), alloc) {} 2262 2263 raw_hash_set(std::initializer_list<init_type> init, size_t bucket_count, 2264 const allocator_type& alloc) 2265 : raw_hash_set(init, bucket_count, hasher(), key_equal(), alloc) {} 2266 2267 template <class T, RequiresNotInit<T> = 0, 2268 std::enable_if_t<Insertable<T>::value, int> = 0> 2269 raw_hash_set(std::initializer_list<T> init, const allocator_type& alloc) 2270 : raw_hash_set(init, 0, hasher(), key_equal(), alloc) {} 2271 2272 raw_hash_set(std::initializer_list<init_type> init, 2273 const allocator_type& alloc) 2274 : raw_hash_set(init, 0, hasher(), key_equal(), alloc) {} 2275 2276 raw_hash_set(const raw_hash_set& that) 2277 : raw_hash_set(that, AllocTraits::select_on_container_copy_construction( 2278 allocator_type(that.char_alloc_ref()))) {} 2279 2280 raw_hash_set(const raw_hash_set& that, const allocator_type& a) 2281 : raw_hash_set(GrowthToLowerboundCapacity(that.size()), that.hash_ref(), 2282 that.eq_ref(), a) { 2283 that.AssertNotDebugCapacity(); 2284 const size_t size = that.size(); 2285 if (size == 0) { 2286 return; 2287 } 2288 // We don't use `that.is_soo()` here because `that` can have non-SOO 2289 // capacity but have a size that fits into SOO capacity. 2290 if (fits_in_soo(size)) { 2291 ABSL_SWISSTABLE_ASSERT(size == 1); 2292 common().set_full_soo(); 2293 emplace_at(soo_iterator(), *that.begin()); 2294 if (should_sample_soo()) { 2295 GrowFullSooTableToNextCapacityForceSampling(common(), 2296 GetPolicyFunctions()); 2297 } 2298 return; 2299 } 2300 ABSL_SWISSTABLE_ASSERT(!that.is_soo()); 2301 const size_t cap = capacity(); 2302 // Note about single group tables: 2303 // 1. It is correct to have any order of elements. 2304 // 2. Order has to be non deterministic. 2305 // 3. We are assigning elements with arbitrary `shift` starting from 2306 // `capacity + shift` position. 2307 // 4. `shift` must be coprime with `capacity + 1` in order to be able to use 2308 // modular arithmetic to traverse all positions, instead if cycling 2309 // through a subset of positions. Odd numbers are coprime with any 2310 // `capacity + 1` (2^N). 2311 size_t offset = cap; 2312 const size_t shift = 2313 is_single_group(cap) ? (common().seed().seed() | 1) : 0; 2314 IterateOverFullSlots( 2315 that.common(), sizeof(slot_type), 2316 [&](const ctrl_t* that_ctrl, void* that_slot_void) { 2317 slot_type* that_slot = static_cast<slot_type*>(that_slot_void); 2318 if (shift == 0) { 2319 // Big tables case. Position must be searched via probing. 2320 // The table is guaranteed to be empty, so we can do faster than 2321 // a full `insert`. 2322 const size_t hash = PolicyTraits::apply( 2323 HashElement{hash_ref()}, PolicyTraits::element(that_slot)); 2324 FindInfo target = find_first_non_full_outofline(common(), hash); 2325 infoz().RecordInsert(hash, target.probe_length); 2326 offset = target.offset; 2327 } else { 2328 // Small tables case. Next position is computed via shift. 2329 offset = (offset + shift) & cap; 2330 } 2331 const h2_t h2 = static_cast<h2_t>(*that_ctrl); 2332 ABSL_SWISSTABLE_ASSERT( // We rely that hash is not changed for small 2333 // tables. 2334 H2(PolicyTraits::apply(HashElement{hash_ref()}, 2335 PolicyTraits::element(that_slot))) == h2 && 2336 "hash function value changed unexpectedly during the copy"); 2337 SetCtrl(common(), offset, h2, sizeof(slot_type)); 2338 emplace_at(iterator_at(offset), PolicyTraits::element(that_slot)); 2339 common().maybe_increment_generation_on_insert(); 2340 }); 2341 if (shift != 0) { 2342 // On small table copy we do not record individual inserts. 2343 // RecordInsert requires hash, but it is unknown for small tables. 2344 infoz().RecordStorageChanged(size, cap); 2345 } 2346 common().increment_size(size); 2347 growth_info().OverwriteManyEmptyAsFull(size); 2348 } 2349 2350 ABSL_ATTRIBUTE_NOINLINE raw_hash_set(raw_hash_set&& that) noexcept( 2351 std::is_nothrow_copy_constructible<hasher>::value && 2352 std::is_nothrow_copy_constructible<key_equal>::value && 2353 std::is_nothrow_copy_constructible<allocator_type>::value) 2354 : // Hash, equality and allocator are copied instead of moved because 2355 // `that` must be left valid. If Hash is std::function<Key>, moving it 2356 // would create a nullptr functor that cannot be called. 2357 // Note: we avoid using exchange for better generated code. 2358 settings_(PolicyTraits::transfer_uses_memcpy() || !that.is_full_soo() 2359 ? std::move(that.common()) 2360 : CommonFields{full_soo_tag_t{}}, 2361 that.hash_ref(), that.eq_ref(), that.char_alloc_ref()) { 2362 if (!PolicyTraits::transfer_uses_memcpy() && that.is_full_soo()) { 2363 transfer(soo_slot(), that.soo_slot()); 2364 } 2365 that.common() = CommonFields::CreateDefault<SooEnabled()>(); 2366 annotate_for_bug_detection_on_move(that); 2367 } 2368 2369 raw_hash_set(raw_hash_set&& that, const allocator_type& a) 2370 : settings_(CommonFields::CreateDefault<SooEnabled()>(), that.hash_ref(), 2371 that.eq_ref(), a) { 2372 if (CharAlloc(a) == that.char_alloc_ref()) { 2373 swap_common(that); 2374 annotate_for_bug_detection_on_move(that); 2375 } else { 2376 move_elements_allocs_unequal(std::move(that)); 2377 } 2378 } 2379 2380 raw_hash_set& operator=(const raw_hash_set& that) { 2381 that.AssertNotDebugCapacity(); 2382 if (ABSL_PREDICT_FALSE(this == &that)) return *this; 2383 constexpr bool propagate_alloc = 2384 AllocTraits::propagate_on_container_copy_assignment::value; 2385 // TODO(ezb): maybe avoid allocating a new backing array if this->capacity() 2386 // is an exact match for that.size(). If this->capacity() is too big, then 2387 // it would make iteration very slow to reuse the allocation. Maybe we can 2388 // do the same heuristic as clear() and reuse if it's small enough. 2389 allocator_type alloc(propagate_alloc ? that.char_alloc_ref() 2390 : char_alloc_ref()); 2391 raw_hash_set tmp(that, alloc); 2392 // NOLINTNEXTLINE: not returning *this for performance. 2393 return assign_impl<propagate_alloc>(std::move(tmp)); 2394 } 2395 2396 raw_hash_set& operator=(raw_hash_set&& that) noexcept( 2397 absl::allocator_traits<allocator_type>::is_always_equal::value && 2398 std::is_nothrow_move_assignable<hasher>::value && 2399 std::is_nothrow_move_assignable<key_equal>::value) { 2400 // TODO(sbenza): We should only use the operations from the noexcept clause 2401 // to make sure we actually adhere to that contract. 2402 // NOLINTNEXTLINE: not returning *this for performance. 2403 return move_assign( 2404 std::move(that), 2405 typename AllocTraits::propagate_on_container_move_assignment()); 2406 } 2407 2408 ~raw_hash_set() { 2409 destructor_impl(); 2410 if constexpr (SwisstableAssertAccessToDestroyedTable()) { 2411 common().set_capacity(InvalidCapacity::kDestroyed); 2412 } 2413 } 2414 2415 iterator begin() ABSL_ATTRIBUTE_LIFETIME_BOUND { 2416 if (ABSL_PREDICT_FALSE(empty())) return end(); 2417 if (is_soo()) return soo_iterator(); 2418 iterator it = {control(), common().slots_union(), 2419 common().generation_ptr()}; 2420 it.skip_empty_or_deleted(); 2421 ABSL_SWISSTABLE_ASSERT(IsFull(*it.control())); 2422 return it; 2423 } 2424 iterator end() ABSL_ATTRIBUTE_LIFETIME_BOUND { 2425 AssertNotDebugCapacity(); 2426 return iterator(common().generation_ptr()); 2427 } 2428 2429 const_iterator begin() const ABSL_ATTRIBUTE_LIFETIME_BOUND { 2430 return const_cast<raw_hash_set*>(this)->begin(); 2431 } 2432 const_iterator end() const ABSL_ATTRIBUTE_LIFETIME_BOUND { 2433 return const_cast<raw_hash_set*>(this)->end(); 2434 } 2435 const_iterator cbegin() const ABSL_ATTRIBUTE_LIFETIME_BOUND { 2436 return begin(); 2437 } 2438 const_iterator cend() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return end(); } 2439 2440 bool empty() const { return !size(); } 2441 size_t size() const { 2442 AssertNotDebugCapacity(); 2443 return common().size(); 2444 } 2445 size_t capacity() const { 2446 const size_t cap = common().capacity(); 2447 // Compiler complains when using functions in ASSUME so use local variable. 2448 ABSL_ATTRIBUTE_UNUSED static constexpr size_t kDefaultCapacity = 2449 DefaultCapacity(); 2450 ABSL_ASSUME(cap >= kDefaultCapacity); 2451 return cap; 2452 } 2453 size_t max_size() const { return MaxValidSize(sizeof(slot_type)); } 2454 2455 ABSL_ATTRIBUTE_REINITIALIZES void clear() { 2456 if (SwisstableGenerationsEnabled() && 2457 capacity() >= InvalidCapacity::kMovedFrom) { 2458 common().set_capacity(DefaultCapacity()); 2459 } 2460 AssertNotDebugCapacity(); 2461 // Iterating over this container is O(bucket_count()). When bucket_count() 2462 // is much greater than size(), iteration becomes prohibitively expensive. 2463 // For clear() it is more important to reuse the allocated array when the 2464 // container is small because allocation takes comparatively long time 2465 // compared to destruction of the elements of the container. So we pick the 2466 // largest bucket_count() threshold for which iteration is still fast and 2467 // past that we simply deallocate the array. 2468 const size_t cap = capacity(); 2469 if (cap == 0) { 2470 // Already guaranteed to be empty; so nothing to do. 2471 } else if (is_soo()) { 2472 if (!empty()) destroy(soo_slot()); 2473 common().set_empty_soo(); 2474 } else { 2475 destroy_slots(); 2476 clear_backing_array(/*reuse=*/cap < 128); 2477 } 2478 common().set_reserved_growth(0); 2479 common().set_reservation_size(0); 2480 } 2481 2482 // This overload kicks in when the argument is an rvalue of insertable and 2483 // decomposable type other than init_type. 2484 // 2485 // flat_hash_map<std::string, int> m; 2486 // m.insert(std::make_pair("abc", 42)); 2487 template <class T, 2488 int = std::enable_if_t<IsDecomposableAndInsertable<T>::value && 2489 IsNotBitField<T>::value && 2490 !IsLifetimeBoundAssignmentFrom<T>::value, 2491 int>()> 2492 std::pair<iterator, bool> insert(T&& value) ABSL_ATTRIBUTE_LIFETIME_BOUND { 2493 return emplace(std::forward<T>(value)); 2494 } 2495 2496 template <class T, int&..., 2497 std::enable_if_t<IsDecomposableAndInsertable<T>::value && 2498 IsNotBitField<T>::value && 2499 IsLifetimeBoundAssignmentFrom<T>::value, 2500 int> = 0> 2501 std::pair<iterator, bool> insert( 2502 T&& value ABSL_INTERNAL_ATTRIBUTE_CAPTURED_BY(this)) 2503 ABSL_ATTRIBUTE_LIFETIME_BOUND { 2504 return this->template insert<T, 0>(std::forward<T>(value)); 2505 } 2506 2507 // This overload kicks in when the argument is a bitfield or an lvalue of 2508 // insertable and decomposable type. 2509 // 2510 // union { int n : 1; }; 2511 // flat_hash_set<int> s; 2512 // s.insert(n); 2513 // 2514 // flat_hash_set<std::string> s; 2515 // const char* p = "hello"; 2516 // s.insert(p); 2517 // 2518 template <class T, int = std::enable_if_t< 2519 IsDecomposableAndInsertable<const T&>::value && 2520 !IsLifetimeBoundAssignmentFrom<const T&>::value, 2521 int>()> 2522 std::pair<iterator, bool> insert(const T& value) 2523 ABSL_ATTRIBUTE_LIFETIME_BOUND { 2524 return emplace(value); 2525 } 2526 template <class T, int&..., 2527 std::enable_if_t<IsDecomposableAndInsertable<const T&>::value && 2528 IsLifetimeBoundAssignmentFrom<const T&>::value, 2529 int> = 0> 2530 std::pair<iterator, bool> insert( 2531 const T& value ABSL_INTERNAL_ATTRIBUTE_CAPTURED_BY(this)) 2532 ABSL_ATTRIBUTE_LIFETIME_BOUND { 2533 return this->template insert<T, 0>(value); 2534 } 2535 2536 // This overload kicks in when the argument is an rvalue of init_type. Its 2537 // purpose is to handle brace-init-list arguments. 2538 // 2539 // flat_hash_map<std::string, int> s; 2540 // s.insert({"abc", 42}); 2541 std::pair<iterator, bool> insert(init_type&& value) 2542 ABSL_ATTRIBUTE_LIFETIME_BOUND 2543 #if __cplusplus >= 202002L 2544 requires(!IsLifetimeBoundAssignmentFrom<init_type>::value) 2545 #endif 2546 { 2547 return emplace(std::move(value)); 2548 } 2549 #if __cplusplus >= 202002L 2550 std::pair<iterator, bool> insert( 2551 init_type&& value ABSL_INTERNAL_ATTRIBUTE_CAPTURED_BY(this)) 2552 ABSL_ATTRIBUTE_LIFETIME_BOUND 2553 requires(IsLifetimeBoundAssignmentFrom<init_type>::value) 2554 { 2555 return emplace(std::move(value)); 2556 } 2557 #endif 2558 2559 template <class T, 2560 int = std::enable_if_t<IsDecomposableAndInsertable<T>::value && 2561 IsNotBitField<T>::value && 2562 !IsLifetimeBoundAssignmentFrom<T>::value, 2563 int>()> 2564 iterator insert(const_iterator, T&& value) ABSL_ATTRIBUTE_LIFETIME_BOUND { 2565 return insert(std::forward<T>(value)).first; 2566 } 2567 template <class T, int&..., 2568 std::enable_if_t<IsDecomposableAndInsertable<T>::value && 2569 IsNotBitField<T>::value && 2570 IsLifetimeBoundAssignmentFrom<T>::value, 2571 int> = 0> 2572 iterator insert(const_iterator hint, 2573 T&& value ABSL_INTERNAL_ATTRIBUTE_CAPTURED_BY(this)) 2574 ABSL_ATTRIBUTE_LIFETIME_BOUND { 2575 return this->template insert<T, 0>(hint, std::forward<T>(value)); 2576 } 2577 2578 template <class T, std::enable_if_t< 2579 IsDecomposableAndInsertable<const T&>::value, int> = 0> 2580 iterator insert(const_iterator, 2581 const T& value) ABSL_ATTRIBUTE_LIFETIME_BOUND { 2582 return insert(value).first; 2583 } 2584 2585 iterator insert(const_iterator, 2586 init_type&& value) ABSL_ATTRIBUTE_LIFETIME_BOUND { 2587 return insert(std::move(value)).first; 2588 } 2589 2590 template <class InputIt> 2591 void insert(InputIt first, InputIt last) { 2592 for (; first != last; ++first) emplace(*first); 2593 } 2594 2595 template <class T, RequiresNotInit<T> = 0, 2596 std::enable_if_t<Insertable<const T&>::value, int> = 0> 2597 void insert(std::initializer_list<T> ilist) { 2598 insert(ilist.begin(), ilist.end()); 2599 } 2600 2601 void insert(std::initializer_list<init_type> ilist) { 2602 insert(ilist.begin(), ilist.end()); 2603 } 2604 2605 insert_return_type insert(node_type&& node) ABSL_ATTRIBUTE_LIFETIME_BOUND { 2606 if (!node) return {end(), false, node_type()}; 2607 const auto& elem = PolicyTraits::element(CommonAccess::GetSlot(node)); 2608 auto res = PolicyTraits::apply( 2609 InsertSlot<false>{*this, std::move(*CommonAccess::GetSlot(node))}, 2610 elem); 2611 if (res.second) { 2612 CommonAccess::Reset(&node); 2613 return {res.first, true, node_type()}; 2614 } else { 2615 return {res.first, false, std::move(node)}; 2616 } 2617 } 2618 2619 iterator insert(const_iterator, 2620 node_type&& node) ABSL_ATTRIBUTE_LIFETIME_BOUND { 2621 auto res = insert(std::move(node)); 2622 node = std::move(res.node); 2623 return res.position; 2624 } 2625 2626 // This overload kicks in if we can deduce the key from args. This enables us 2627 // to avoid constructing value_type if an entry with the same key already 2628 // exists. 2629 // 2630 // For example: 2631 // 2632 // flat_hash_map<std::string, std::string> m = {{"abc", "def"}}; 2633 // // Creates no std::string copies and makes no heap allocations. 2634 // m.emplace("abc", "xyz"); 2635 template <class... Args, 2636 std::enable_if_t<IsDecomposable<Args...>::value, int> = 0> 2637 std::pair<iterator, bool> emplace(Args&&... args) 2638 ABSL_ATTRIBUTE_LIFETIME_BOUND { 2639 return PolicyTraits::apply(EmplaceDecomposable{*this}, 2640 std::forward<Args>(args)...); 2641 } 2642 2643 // This overload kicks in if we cannot deduce the key from args. It constructs 2644 // value_type unconditionally and then either moves it into the table or 2645 // destroys. 2646 template <class... Args, 2647 std::enable_if_t<!IsDecomposable<Args...>::value, int> = 0> 2648 std::pair<iterator, bool> emplace(Args&&... args) 2649 ABSL_ATTRIBUTE_LIFETIME_BOUND { 2650 alignas(slot_type) unsigned char raw[sizeof(slot_type)]; 2651 slot_type* slot = to_slot(&raw); 2652 2653 construct(slot, std::forward<Args>(args)...); 2654 const auto& elem = PolicyTraits::element(slot); 2655 return PolicyTraits::apply(InsertSlot<true>{*this, std::move(*slot)}, elem); 2656 } 2657 2658 template <class... Args> 2659 iterator emplace_hint(const_iterator, 2660 Args&&... args) ABSL_ATTRIBUTE_LIFETIME_BOUND { 2661 return emplace(std::forward<Args>(args)...).first; 2662 } 2663 2664 // Extension API: support for lazy emplace. 2665 // 2666 // Looks up key in the table. If found, returns the iterator to the element. 2667 // Otherwise calls `f` with one argument of type `raw_hash_set::constructor`, 2668 // and returns an iterator to the new element. 2669 // 2670 // `f` must abide by several restrictions: 2671 // - it MUST call `raw_hash_set::constructor` with arguments as if a 2672 // `raw_hash_set::value_type` is constructed, 2673 // - it MUST NOT access the container before the call to 2674 // `raw_hash_set::constructor`, and 2675 // - it MUST NOT erase the lazily emplaced element. 2676 // Doing any of these is undefined behavior. 2677 // 2678 // For example: 2679 // 2680 // std::unordered_set<ArenaString> s; 2681 // // Makes ArenaStr even if "abc" is in the map. 2682 // s.insert(ArenaString(&arena, "abc")); 2683 // 2684 // flat_hash_set<ArenaStr> s; 2685 // // Makes ArenaStr only if "abc" is not in the map. 2686 // s.lazy_emplace("abc", [&](const constructor& ctor) { 2687 // ctor(&arena, "abc"); 2688 // }); 2689 // 2690 // WARNING: This API is currently experimental. If there is a way to implement 2691 // the same thing with the rest of the API, prefer that. 2692 class constructor { 2693 friend class raw_hash_set; 2694 2695 public: 2696 template <class... Args> 2697 void operator()(Args&&... args) const { 2698 ABSL_SWISSTABLE_ASSERT(*slot_); 2699 PolicyTraits::construct(alloc_, *slot_, std::forward<Args>(args)...); 2700 *slot_ = nullptr; 2701 } 2702 2703 private: 2704 constructor(allocator_type* a, slot_type** slot) : alloc_(a), slot_(slot) {} 2705 2706 allocator_type* alloc_; 2707 slot_type** slot_; 2708 }; 2709 2710 template <class K = key_type, class F> 2711 iterator lazy_emplace(const key_arg<K>& key, 2712 F&& f) ABSL_ATTRIBUTE_LIFETIME_BOUND { 2713 auto res = find_or_prepare_insert(key); 2714 if (res.second) { 2715 slot_type* slot = res.first.slot(); 2716 allocator_type alloc(char_alloc_ref()); 2717 std::forward<F>(f)(constructor(&alloc, &slot)); 2718 ABSL_SWISSTABLE_ASSERT(!slot); 2719 } 2720 return res.first; 2721 } 2722 2723 // Extension API: support for heterogeneous keys. 2724 // 2725 // std::unordered_set<std::string> s; 2726 // // Turns "abc" into std::string. 2727 // s.erase("abc"); 2728 // 2729 // flat_hash_set<std::string> s; 2730 // // Uses "abc" directly without copying it into std::string. 2731 // s.erase("abc"); 2732 template <class K = key_type> 2733 size_type erase(const key_arg<K>& key) { 2734 auto it = find(key); 2735 if (it == end()) return 0; 2736 erase(it); 2737 return 1; 2738 } 2739 2740 // Erases the element pointed to by `it`. Unlike `std::unordered_set::erase`, 2741 // this method returns void to reduce algorithmic complexity to O(1). The 2742 // iterator is invalidated so any increment should be done before calling 2743 // erase (e.g. `erase(it++)`). 2744 void erase(const_iterator cit) { erase(cit.inner_); } 2745 2746 // This overload is necessary because otherwise erase<K>(const K&) would be 2747 // a better match if non-const iterator is passed as an argument. 2748 void erase(iterator it) { 2749 AssertNotDebugCapacity(); 2750 AssertIsFull(it.control(), it.generation(), it.generation_ptr(), "erase()"); 2751 destroy(it.slot()); 2752 if (is_soo()) { 2753 common().set_empty_soo(); 2754 } else { 2755 erase_meta_only(it); 2756 } 2757 } 2758 2759 iterator erase(const_iterator first, 2760 const_iterator last) ABSL_ATTRIBUTE_LIFETIME_BOUND { 2761 AssertNotDebugCapacity(); 2762 // We check for empty first because clear_backing_array requires that 2763 // capacity() > 0 as a precondition. 2764 if (empty()) return end(); 2765 if (first == last) return last.inner_; 2766 if (is_soo()) { 2767 destroy(soo_slot()); 2768 common().set_empty_soo(); 2769 return end(); 2770 } 2771 if (first == begin() && last == end()) { 2772 // TODO(ezb): we access control bytes in destroy_slots so it could make 2773 // sense to combine destroy_slots and clear_backing_array to avoid cache 2774 // misses when the table is large. Note that we also do this in clear(). 2775 destroy_slots(); 2776 clear_backing_array(/*reuse=*/true); 2777 common().set_reserved_growth(common().reservation_size()); 2778 return end(); 2779 } 2780 while (first != last) { 2781 erase(first++); 2782 } 2783 return last.inner_; 2784 } 2785 2786 // Moves elements from `src` into `this`. 2787 // If the element already exists in `this`, it is left unmodified in `src`. 2788 template <typename H, typename E> 2789 void merge(raw_hash_set<Policy, H, E, Alloc>& src) { // NOLINT 2790 AssertNotDebugCapacity(); 2791 src.AssertNotDebugCapacity(); 2792 assert(this != &src); 2793 // Returns whether insertion took place. 2794 const auto insert_slot = [this](slot_type* src_slot) { 2795 return PolicyTraits::apply(InsertSlot<false>{*this, std::move(*src_slot)}, 2796 PolicyTraits::element(src_slot)) 2797 .second; 2798 }; 2799 2800 if (src.is_soo()) { 2801 if (src.empty()) return; 2802 if (insert_slot(src.soo_slot())) src.common().set_empty_soo(); 2803 return; 2804 } 2805 for (auto it = src.begin(), e = src.end(); it != e;) { 2806 auto next = std::next(it); 2807 if (insert_slot(it.slot())) src.erase_meta_only(it); 2808 it = next; 2809 } 2810 } 2811 2812 template <typename H, typename E> 2813 void merge(raw_hash_set<Policy, H, E, Alloc>&& src) { 2814 merge(src); 2815 } 2816 2817 node_type extract(const_iterator position) { 2818 AssertNotDebugCapacity(); 2819 AssertIsFull(position.control(), position.inner_.generation(), 2820 position.inner_.generation_ptr(), "extract()"); 2821 allocator_type alloc(char_alloc_ref()); 2822 auto node = CommonAccess::Transfer<node_type>(alloc, position.slot()); 2823 if (is_soo()) { 2824 common().set_empty_soo(); 2825 } else { 2826 erase_meta_only(position); 2827 } 2828 return node; 2829 } 2830 2831 template <class K = key_type, 2832 std::enable_if_t<!std::is_same<K, iterator>::value, int> = 0> 2833 node_type extract(const key_arg<K>& key) { 2834 auto it = find(key); 2835 return it == end() ? node_type() : extract(const_iterator{it}); 2836 } 2837 2838 void swap(raw_hash_set& that) noexcept( 2839 IsNoThrowSwappable<hasher>() && IsNoThrowSwappable<key_equal>() && 2840 IsNoThrowSwappable<allocator_type>( 2841 typename AllocTraits::propagate_on_container_swap{})) { 2842 AssertNotDebugCapacity(); 2843 that.AssertNotDebugCapacity(); 2844 using std::swap; 2845 swap_common(that); 2846 swap(hash_ref(), that.hash_ref()); 2847 swap(eq_ref(), that.eq_ref()); 2848 SwapAlloc(char_alloc_ref(), that.char_alloc_ref(), 2849 typename AllocTraits::propagate_on_container_swap{}); 2850 } 2851 2852 void rehash(size_t n) { Rehash(common(), GetPolicyFunctions(), n); } 2853 2854 void reserve(size_t n) { 2855 const size_t cap = capacity(); 2856 if (ABSL_PREDICT_TRUE(cap > DefaultCapacity() || 2857 // !SooEnabled() implies empty(), so we can skip the 2858 // check for optimization. 2859 (SooEnabled() && !empty()))) { 2860 ReserveAllocatedTable(common(), GetPolicyFunctions(), n); 2861 } else { 2862 if (ABSL_PREDICT_TRUE(n > DefaultCapacity())) { 2863 ReserveEmptyNonAllocatedTableToFitNewSize(common(), 2864 GetPolicyFunctions(), n); 2865 } 2866 } 2867 } 2868 2869 // Extension API: support for heterogeneous keys. 2870 // 2871 // std::unordered_set<std::string> s; 2872 // // Turns "abc" into std::string. 2873 // s.count("abc"); 2874 // 2875 // ch_set<std::string> s; 2876 // // Uses "abc" directly without copying it into std::string. 2877 // s.count("abc"); 2878 template <class K = key_type> 2879 size_t count(const key_arg<K>& key) const { 2880 return find(key) == end() ? 0 : 1; 2881 } 2882 2883 // Issues CPU prefetch instructions for the memory needed to find or insert 2884 // a key. Like all lookup functions, this support heterogeneous keys. 2885 // 2886 // NOTE: This is a very low level operation and should not be used without 2887 // specific benchmarks indicating its importance. 2888 template <class K = key_type> 2889 void prefetch(const key_arg<K>& key) const { 2890 if (capacity() == DefaultCapacity()) return; 2891 (void)key; 2892 // Avoid probing if we won't be able to prefetch the addresses received. 2893 #ifdef ABSL_HAVE_PREFETCH 2894 prefetch_heap_block(); 2895 auto seq = probe(common(), hash_ref()(key)); 2896 PrefetchToLocalCache(control() + seq.offset()); 2897 PrefetchToLocalCache(slot_array() + seq.offset()); 2898 #endif // ABSL_HAVE_PREFETCH 2899 } 2900 2901 template <class K = key_type> 2902 ABSL_DEPRECATE_AND_INLINE() 2903 iterator find(const key_arg<K>& key, 2904 size_t) ABSL_ATTRIBUTE_LIFETIME_BOUND { 2905 return find(key); 2906 } 2907 // The API of find() has one extension: the type of the key argument doesn't 2908 // have to be key_type. This is so called heterogeneous key support. 2909 template <class K = key_type> 2910 iterator find(const key_arg<K>& key) ABSL_ATTRIBUTE_LIFETIME_BOUND { 2911 AssertOnFind(key); 2912 if (is_soo()) return find_soo(key); 2913 prefetch_heap_block(); 2914 return find_non_soo(key, hash_ref()(key)); 2915 } 2916 2917 template <class K = key_type> 2918 ABSL_DEPRECATE_AND_INLINE() 2919 const_iterator find(const key_arg<K>& key, 2920 size_t) const ABSL_ATTRIBUTE_LIFETIME_BOUND { 2921 return find(key); 2922 } 2923 template <class K = key_type> 2924 const_iterator find(const key_arg<K>& key) const 2925 ABSL_ATTRIBUTE_LIFETIME_BOUND { 2926 return const_cast<raw_hash_set*>(this)->find(key); 2927 } 2928 2929 template <class K = key_type> 2930 bool contains(const key_arg<K>& key) const { 2931 // Here neither the iterator returned by `find()` nor `end()` can be invalid 2932 // outside of potential thread-safety issues. 2933 // `find()`'s return value is constructed, used, and then destructed 2934 // all in this context. 2935 return !find(key).unchecked_equals(end()); 2936 } 2937 2938 template <class K = key_type> 2939 std::pair<iterator, iterator> equal_range(const key_arg<K>& key) 2940 ABSL_ATTRIBUTE_LIFETIME_BOUND { 2941 auto it = find(key); 2942 if (it != end()) return {it, std::next(it)}; 2943 return {it, it}; 2944 } 2945 template <class K = key_type> 2946 std::pair<const_iterator, const_iterator> equal_range( 2947 const key_arg<K>& key) const ABSL_ATTRIBUTE_LIFETIME_BOUND { 2948 auto it = find(key); 2949 if (it != end()) return {it, std::next(it)}; 2950 return {it, it}; 2951 } 2952 2953 size_t bucket_count() const { return capacity(); } 2954 float load_factor() const { 2955 return capacity() ? static_cast<double>(size()) / capacity() : 0.0; 2956 } 2957 float max_load_factor() const { return 1.0f; } 2958 void max_load_factor(float) { 2959 // Does nothing. 2960 } 2961 2962 hasher hash_function() const { return hash_ref(); } 2963 key_equal key_eq() const { return eq_ref(); } 2964 allocator_type get_allocator() const { 2965 return allocator_type(char_alloc_ref()); 2966 } 2967 2968 friend bool operator==(const raw_hash_set& a, const raw_hash_set& b) { 2969 if (a.size() != b.size()) return false; 2970 const raw_hash_set* outer = &a; 2971 const raw_hash_set* inner = &b; 2972 if (outer->capacity() > inner->capacity()) std::swap(outer, inner); 2973 for (const value_type& elem : *outer) { 2974 auto it = PolicyTraits::apply(FindElement{*inner}, elem); 2975 if (it == inner->end()) return false; 2976 // Note: we used key_equal to check for key equality in FindElement, but 2977 // we may need to do an additional comparison using 2978 // value_type::operator==. E.g. the keys could be equal and the 2979 // mapped_types could be unequal in a map or even in a set, key_equal 2980 // could ignore some fields that aren't ignored by operator==. 2981 static constexpr bool kKeyEqIsValueEq = 2982 std::is_same<key_type, value_type>::value && 2983 std::is_same<key_equal, hash_default_eq<key_type>>::value; 2984 if (!kKeyEqIsValueEq && !(*it == elem)) return false; 2985 } 2986 return true; 2987 } 2988 2989 friend bool operator!=(const raw_hash_set& a, const raw_hash_set& b) { 2990 return !(a == b); 2991 } 2992 2993 template <typename H> 2994 friend typename std::enable_if<H::template is_hashable<value_type>::value, 2995 H>::type 2996 AbslHashValue(H h, const raw_hash_set& s) { 2997 return H::combine(H::combine_unordered(std::move(h), s.begin(), s.end()), 2998 s.size()); 2999 } 3000 3001 friend void swap(raw_hash_set& a, 3002 raw_hash_set& b) noexcept(noexcept(a.swap(b))) { 3003 a.swap(b); 3004 } 3005 3006 private: 3007 template <class Container, typename Enabler> 3008 friend struct absl::container_internal::hashtable_debug_internal:: 3009 HashtableDebugAccess; 3010 3011 friend struct absl::container_internal::HashtableFreeFunctionsAccess; 3012 3013 struct FindElement { 3014 template <class K, class... Args> 3015 const_iterator operator()(const K& key, Args&&...) const { 3016 return s.find(key); 3017 } 3018 const raw_hash_set& s; 3019 }; 3020 3021 struct HashElement { 3022 template <class K, class... Args> 3023 size_t operator()(const K& key, Args&&...) const { 3024 return h(key); 3025 } 3026 const hasher& h; 3027 }; 3028 3029 template <class K1> 3030 struct EqualElement { 3031 template <class K2, class... Args> 3032 bool operator()(const K2& lhs, Args&&...) const { 3033 ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(eq(lhs, rhs)); 3034 } 3035 const K1& rhs; 3036 const key_equal& eq; 3037 }; 3038 3039 struct EmplaceDecomposable { 3040 template <class K, class... Args> 3041 std::pair<iterator, bool> operator()(const K& key, Args&&... args) const { 3042 auto res = s.find_or_prepare_insert(key); 3043 if (res.second) { 3044 s.emplace_at(res.first, std::forward<Args>(args)...); 3045 } 3046 return res; 3047 } 3048 raw_hash_set& s; 3049 }; 3050 3051 template <bool do_destroy> 3052 struct InsertSlot { 3053 template <class K, class... Args> 3054 std::pair<iterator, bool> operator()(const K& key, Args&&...) && { 3055 auto res = s.find_or_prepare_insert(key); 3056 if (res.second) { 3057 s.transfer(res.first.slot(), &slot); 3058 } else if (do_destroy) { 3059 s.destroy(&slot); 3060 } 3061 return res; 3062 } 3063 raw_hash_set& s; 3064 // Constructed slot. Either moved into place or destroyed. 3065 slot_type&& slot; 3066 }; 3067 3068 template <typename... Args> 3069 inline void construct(slot_type* slot, Args&&... args) { 3070 common().RunWithReentrancyGuard([&] { 3071 allocator_type alloc(char_alloc_ref()); 3072 PolicyTraits::construct(&alloc, slot, std::forward<Args>(args)...); 3073 }); 3074 } 3075 inline void destroy(slot_type* slot) { 3076 common().RunWithReentrancyGuard([&] { 3077 allocator_type alloc(char_alloc_ref()); 3078 PolicyTraits::destroy(&alloc, slot); 3079 }); 3080 } 3081 inline void transfer(slot_type* to, slot_type* from) { 3082 common().RunWithReentrancyGuard([&] { 3083 allocator_type alloc(char_alloc_ref()); 3084 PolicyTraits::transfer(&alloc, to, from); 3085 }); 3086 } 3087 3088 // TODO(b/289225379): consider having a helper class that has the impls for 3089 // SOO functionality. 3090 template <class K = key_type> 3091 iterator find_soo(const key_arg<K>& key) { 3092 ABSL_SWISSTABLE_ASSERT(is_soo()); 3093 return empty() || !PolicyTraits::apply(EqualElement<K>{key, eq_ref()}, 3094 PolicyTraits::element(soo_slot())) 3095 ? end() 3096 : soo_iterator(); 3097 } 3098 3099 template <class K = key_type> 3100 iterator find_non_soo(const key_arg<K>& key, size_t hash) { 3101 ABSL_SWISSTABLE_ASSERT(!is_soo()); 3102 auto seq = probe(common(), hash); 3103 const h2_t h2 = H2(hash); 3104 const ctrl_t* ctrl = control(); 3105 while (true) { 3106 Group g{ctrl + seq.offset()}; 3107 for (uint32_t i : g.Match(h2)) { 3108 if (ABSL_PREDICT_TRUE(PolicyTraits::apply( 3109 EqualElement<K>{key, eq_ref()}, 3110 PolicyTraits::element(slot_array() + seq.offset(i))))) 3111 return iterator_at(seq.offset(i)); 3112 } 3113 if (ABSL_PREDICT_TRUE(g.MaskEmpty())) return end(); 3114 seq.next(); 3115 ABSL_SWISSTABLE_ASSERT(seq.index() <= capacity() && "full table!"); 3116 } 3117 } 3118 3119 // Returns true if the table needs to be sampled. 3120 // This should be called on insertion into an empty SOO table and in copy 3121 // construction when the size can fit in SOO capacity. 3122 bool should_sample_soo() const { 3123 ABSL_SWISSTABLE_ASSERT(is_soo()); 3124 if (!ShouldSampleHashtablezInfoForAlloc<CharAlloc>()) return false; 3125 return ABSL_PREDICT_FALSE(ShouldSampleNextTable()); 3126 } 3127 3128 void clear_backing_array(bool reuse) { 3129 ABSL_SWISSTABLE_ASSERT(capacity() > DefaultCapacity()); 3130 ClearBackingArray(common(), GetPolicyFunctions(), &char_alloc_ref(), reuse, 3131 SooEnabled()); 3132 } 3133 3134 void destroy_slots() { 3135 ABSL_SWISSTABLE_ASSERT(!is_soo()); 3136 if (PolicyTraits::template destroy_is_trivial<Alloc>()) return; 3137 auto destroy_slot = [&](const ctrl_t*, void* slot) { 3138 this->destroy(static_cast<slot_type*>(slot)); 3139 }; 3140 if constexpr (SwisstableAssertAccessToDestroyedTable()) { 3141 CommonFields common_copy(non_soo_tag_t{}, this->common()); 3142 common().set_capacity(InvalidCapacity::kDestroyed); 3143 IterateOverFullSlots(common_copy, sizeof(slot_type), destroy_slot); 3144 common().set_capacity(common_copy.capacity()); 3145 } else { 3146 IterateOverFullSlots(common(), sizeof(slot_type), destroy_slot); 3147 } 3148 } 3149 3150 void dealloc() { 3151 ABSL_SWISSTABLE_ASSERT(capacity() > DefaultCapacity()); 3152 // Unpoison before returning the memory to the allocator. 3153 SanitizerUnpoisonMemoryRegion(slot_array(), sizeof(slot_type) * capacity()); 3154 infoz().Unregister(); 3155 DeallocateBackingArray<BackingArrayAlignment(alignof(slot_type)), 3156 CharAlloc>(&char_alloc_ref(), capacity(), control(), 3157 sizeof(slot_type), alignof(slot_type), 3158 common().has_infoz()); 3159 } 3160 3161 void destructor_impl() { 3162 if (SwisstableGenerationsEnabled() && 3163 capacity() >= InvalidCapacity::kMovedFrom) { 3164 return; 3165 } 3166 if (capacity() == 0) return; 3167 if (is_soo()) { 3168 if (!empty()) { 3169 ABSL_SWISSTABLE_IGNORE_UNINITIALIZED(destroy(soo_slot())); 3170 } 3171 return; 3172 } 3173 destroy_slots(); 3174 dealloc(); 3175 } 3176 3177 // Erases, but does not destroy, the value pointed to by `it`. 3178 // 3179 // This merely updates the pertinent control byte. This can be used in 3180 // conjunction with Policy::transfer to move the object to another place. 3181 void erase_meta_only(const_iterator it) { 3182 ABSL_SWISSTABLE_ASSERT(!is_soo()); 3183 EraseMetaOnly(common(), static_cast<size_t>(it.control() - control()), 3184 sizeof(slot_type)); 3185 } 3186 3187 size_t hash_of(slot_type* slot) const { 3188 return PolicyTraits::apply(HashElement{hash_ref()}, 3189 PolicyTraits::element(slot)); 3190 } 3191 3192 void resize_full_soo_table_to_next_capacity() { 3193 ABSL_SWISSTABLE_ASSERT(SooEnabled()); 3194 ABSL_SWISSTABLE_ASSERT(capacity() == SooCapacity()); 3195 ABSL_SWISSTABLE_ASSERT(!empty()); 3196 if constexpr (SooEnabled()) { 3197 GrowFullSooTableToNextCapacity<PolicyTraits::transfer_uses_memcpy() 3198 ? OptimalMemcpySizeForSooSlotTransfer( 3199 sizeof(slot_type)) 3200 : 0, 3201 PolicyTraits::transfer_uses_memcpy()>( 3202 common(), GetPolicyFunctions(), hash_of(soo_slot())); 3203 } 3204 } 3205 3206 // Casting directly from e.g. char* to slot_type* can cause compilation errors 3207 // on objective-C. This function converts to void* first, avoiding the issue. 3208 static slot_type* to_slot(void* buf) { return static_cast<slot_type*>(buf); } 3209 3210 // Requires that lhs does not have a full SOO slot. 3211 static void move_common(bool rhs_is_full_soo, CharAlloc& rhs_alloc, 3212 CommonFields& lhs, CommonFields&& rhs) { 3213 if (PolicyTraits::transfer_uses_memcpy() || !rhs_is_full_soo) { 3214 lhs = std::move(rhs); 3215 } else { 3216 lhs.move_non_heap_or_soo_fields(rhs); 3217 rhs.RunWithReentrancyGuard([&] { 3218 lhs.RunWithReentrancyGuard([&] { 3219 PolicyTraits::transfer(&rhs_alloc, to_slot(lhs.soo_data()), 3220 to_slot(rhs.soo_data())); 3221 }); 3222 }); 3223 } 3224 } 3225 3226 // Swaps common fields making sure to avoid memcpy'ing a full SOO slot if we 3227 // aren't allowed to do so. 3228 void swap_common(raw_hash_set& that) { 3229 using std::swap; 3230 if (PolicyTraits::transfer_uses_memcpy()) { 3231 swap(common(), that.common()); 3232 return; 3233 } 3234 CommonFields tmp = CommonFields(uninitialized_tag_t{}); 3235 const bool that_is_full_soo = that.is_full_soo(); 3236 move_common(that_is_full_soo, that.char_alloc_ref(), tmp, 3237 std::move(that.common())); 3238 move_common(is_full_soo(), char_alloc_ref(), that.common(), 3239 std::move(common())); 3240 move_common(that_is_full_soo, that.char_alloc_ref(), common(), 3241 std::move(tmp)); 3242 } 3243 3244 void annotate_for_bug_detection_on_move( 3245 ABSL_ATTRIBUTE_UNUSED raw_hash_set& that) { 3246 // We only enable moved-from validation when generations are enabled (rather 3247 // than using NDEBUG) to avoid issues in which NDEBUG is enabled in some 3248 // translation units but not in others. 3249 if (SwisstableGenerationsEnabled()) { 3250 that.common().set_capacity(this == &that ? InvalidCapacity::kSelfMovedFrom 3251 : InvalidCapacity::kMovedFrom); 3252 } 3253 if (!SwisstableGenerationsEnabled() || capacity() == DefaultCapacity() || 3254 capacity() > kAboveMaxValidCapacity) { 3255 return; 3256 } 3257 common().increment_generation(); 3258 if (!empty() && common().should_rehash_for_bug_detection_on_move()) { 3259 ResizeAllocatedTableWithSeedChange(common(), GetPolicyFunctions(), 3260 capacity()); 3261 } 3262 } 3263 3264 template <bool propagate_alloc> 3265 raw_hash_set& assign_impl(raw_hash_set&& that) { 3266 // We don't bother checking for this/that aliasing. We just need to avoid 3267 // breaking the invariants in that case. 3268 destructor_impl(); 3269 move_common(that.is_full_soo(), that.char_alloc_ref(), common(), 3270 std::move(that.common())); 3271 hash_ref() = that.hash_ref(); 3272 eq_ref() = that.eq_ref(); 3273 CopyAlloc(char_alloc_ref(), that.char_alloc_ref(), 3274 std::integral_constant<bool, propagate_alloc>()); 3275 that.common() = CommonFields::CreateDefault<SooEnabled()>(); 3276 annotate_for_bug_detection_on_move(that); 3277 return *this; 3278 } 3279 3280 raw_hash_set& move_elements_allocs_unequal(raw_hash_set&& that) { 3281 const size_t size = that.size(); 3282 if (size == 0) return *this; 3283 reserve(size); 3284 for (iterator it = that.begin(); it != that.end(); ++it) { 3285 insert(std::move(PolicyTraits::element(it.slot()))); 3286 that.destroy(it.slot()); 3287 } 3288 if (!that.is_soo()) that.dealloc(); 3289 that.common() = CommonFields::CreateDefault<SooEnabled()>(); 3290 annotate_for_bug_detection_on_move(that); 3291 return *this; 3292 } 3293 3294 raw_hash_set& move_assign(raw_hash_set&& that, 3295 std::true_type /*propagate_alloc*/) { 3296 return assign_impl<true>(std::move(that)); 3297 } 3298 raw_hash_set& move_assign(raw_hash_set&& that, 3299 std::false_type /*propagate_alloc*/) { 3300 if (char_alloc_ref() == that.char_alloc_ref()) { 3301 return assign_impl<false>(std::move(that)); 3302 } 3303 // Aliasing can't happen here because allocs would compare equal above. 3304 assert(this != &that); 3305 destructor_impl(); 3306 // We can't take over that's memory so we need to move each element. 3307 // While moving elements, this should have that's hash/eq so copy hash/eq 3308 // before moving elements. 3309 hash_ref() = that.hash_ref(); 3310 eq_ref() = that.eq_ref(); 3311 return move_elements_allocs_unequal(std::move(that)); 3312 } 3313 3314 template <class K> 3315 std::pair<iterator, bool> find_or_prepare_insert_soo(const K& key) { 3316 if (empty()) { 3317 if (should_sample_soo()) { 3318 GrowEmptySooTableToNextCapacityForceSampling(common(), 3319 GetPolicyFunctions()); 3320 } else { 3321 common().set_full_soo(); 3322 return {soo_iterator(), true}; 3323 } 3324 } else if (PolicyTraits::apply(EqualElement<K>{key, eq_ref()}, 3325 PolicyTraits::element(soo_slot()))) { 3326 return {soo_iterator(), false}; 3327 } else { 3328 resize_full_soo_table_to_next_capacity(); 3329 } 3330 const size_t index = 3331 PrepareInsertAfterSoo(hash_ref()(key), sizeof(slot_type), common()); 3332 return {iterator_at(index), true}; 3333 } 3334 3335 template <class K> 3336 std::pair<iterator, bool> find_or_prepare_insert_non_soo(const K& key) { 3337 ABSL_SWISSTABLE_ASSERT(!is_soo()); 3338 prefetch_heap_block(); 3339 const size_t hash = hash_ref()(key); 3340 auto seq = probe(common(), hash); 3341 const h2_t h2 = H2(hash); 3342 const ctrl_t* ctrl = control(); 3343 while (true) { 3344 Group g{ctrl + seq.offset()}; 3345 for (uint32_t i : g.Match(h2)) { 3346 if (ABSL_PREDICT_TRUE(PolicyTraits::apply( 3347 EqualElement<K>{key, eq_ref()}, 3348 PolicyTraits::element(slot_array() + seq.offset(i))))) 3349 return {iterator_at(seq.offset(i)), false}; 3350 } 3351 auto mask_empty = g.MaskEmpty(); 3352 if (ABSL_PREDICT_TRUE(mask_empty)) { 3353 size_t target = seq.offset( 3354 GetInsertionOffset(mask_empty, capacity(), hash, common().seed())); 3355 return {iterator_at(PrepareInsertNonSoo(common(), GetPolicyFunctions(), 3356 hash, 3357 FindInfo{target, seq.index()})), 3358 true}; 3359 } 3360 seq.next(); 3361 ABSL_SWISSTABLE_ASSERT(seq.index() <= capacity() && "full table!"); 3362 } 3363 } 3364 3365 protected: 3366 // Asserts for correctness that we run on find/find_or_prepare_insert. 3367 template <class K> 3368 void AssertOnFind(ABSL_ATTRIBUTE_UNUSED const K& key) { 3369 AssertHashEqConsistent(key); 3370 AssertNotDebugCapacity(); 3371 } 3372 3373 // Asserts that the capacity is not a sentinel invalid value. 3374 void AssertNotDebugCapacity() const { 3375 #ifdef NDEBUG 3376 if (!SwisstableGenerationsEnabled()) { 3377 return; 3378 } 3379 #endif 3380 if (ABSL_PREDICT_TRUE(capacity() < 3381 InvalidCapacity::kAboveMaxValidCapacity)) { 3382 return; 3383 } 3384 assert(capacity() != InvalidCapacity::kReentrance && 3385 "Reentrant container access during element construction/destruction " 3386 "is not allowed."); 3387 if constexpr (SwisstableAssertAccessToDestroyedTable()) { 3388 if (capacity() == InvalidCapacity::kDestroyed) { 3389 ABSL_RAW_LOG(FATAL, "Use of destroyed hash table."); 3390 } 3391 } 3392 if (SwisstableGenerationsEnabled() && 3393 ABSL_PREDICT_FALSE(capacity() >= InvalidCapacity::kMovedFrom)) { 3394 if (capacity() == InvalidCapacity::kSelfMovedFrom) { 3395 // If this log triggers, then a hash table was move-assigned to itself 3396 // and then used again later without being reinitialized. 3397 ABSL_RAW_LOG(FATAL, "Use of self-move-assigned hash table."); 3398 } 3399 ABSL_RAW_LOG(FATAL, "Use of moved-from hash table."); 3400 } 3401 } 3402 3403 // Asserts that hash and equal functors provided by the user are consistent, 3404 // meaning that `eq(k1, k2)` implies `hash(k1)==hash(k2)`. 3405 template <class K> 3406 void AssertHashEqConsistent(const K& key) { 3407 #ifdef NDEBUG 3408 return; 3409 #endif 3410 // If the hash/eq functors are known to be consistent, then skip validation. 3411 if (std::is_same<hasher, absl::container_internal::StringHash>::value && 3412 std::is_same<key_equal, absl::container_internal::StringEq>::value) { 3413 return; 3414 } 3415 if (std::is_scalar<key_type>::value && 3416 std::is_same<hasher, absl::Hash<key_type>>::value && 3417 std::is_same<key_equal, std::equal_to<key_type>>::value) { 3418 return; 3419 } 3420 if (empty()) return; 3421 3422 const size_t hash_of_arg = hash_ref()(key); 3423 const auto assert_consistent = [&](const ctrl_t*, void* slot) { 3424 const value_type& element = 3425 PolicyTraits::element(static_cast<slot_type*>(slot)); 3426 const bool is_key_equal = 3427 PolicyTraits::apply(EqualElement<K>{key, eq_ref()}, element); 3428 if (!is_key_equal) return; 3429 3430 const size_t hash_of_slot = 3431 PolicyTraits::apply(HashElement{hash_ref()}, element); 3432 ABSL_ATTRIBUTE_UNUSED const bool is_hash_equal = 3433 hash_of_arg == hash_of_slot; 3434 assert((!is_key_equal || is_hash_equal) && 3435 "eq(k1, k2) must imply that hash(k1) == hash(k2). " 3436 "hash/eq functors are inconsistent."); 3437 }; 3438 3439 if (is_soo()) { 3440 assert_consistent(/*unused*/ nullptr, soo_slot()); 3441 return; 3442 } 3443 // We only do validation for small tables so that it's constant time. 3444 if (capacity() > 16) return; 3445 IterateOverFullSlots(common(), sizeof(slot_type), assert_consistent); 3446 } 3447 3448 // Attempts to find `key` in the table; if it isn't found, returns an iterator 3449 // where the value can be inserted into, with the control byte already set to 3450 // `key`'s H2. Returns a bool indicating whether an insertion can take place. 3451 template <class K> 3452 std::pair<iterator, bool> find_or_prepare_insert(const K& key) { 3453 AssertOnFind(key); 3454 if (is_soo()) return find_or_prepare_insert_soo(key); 3455 return find_or_prepare_insert_non_soo(key); 3456 } 3457 3458 // Constructs the value in the space pointed by the iterator. This only works 3459 // after an unsuccessful find_or_prepare_insert() and before any other 3460 // modifications happen in the raw_hash_set. 3461 // 3462 // PRECONDITION: iter was returned from find_or_prepare_insert(k), where k is 3463 // the key decomposed from `forward<Args>(args)...`, and the bool returned by 3464 // find_or_prepare_insert(k) was true. 3465 // POSTCONDITION: *m.iterator_at(i) == value_type(forward<Args>(args)...). 3466 template <class... Args> 3467 void emplace_at(iterator iter, Args&&... args) { 3468 construct(iter.slot(), std::forward<Args>(args)...); 3469 3470 assert(PolicyTraits::apply(FindElement{*this}, *iter) == iter && 3471 "constructed value does not match the lookup key"); 3472 } 3473 3474 iterator iterator_at(size_t i) ABSL_ATTRIBUTE_LIFETIME_BOUND { 3475 return {control() + i, slot_array() + i, common().generation_ptr()}; 3476 } 3477 const_iterator iterator_at(size_t i) const ABSL_ATTRIBUTE_LIFETIME_BOUND { 3478 return const_cast<raw_hash_set*>(this)->iterator_at(i); 3479 } 3480 3481 reference unchecked_deref(iterator it) { return it.unchecked_deref(); } 3482 3483 private: 3484 friend struct RawHashSetTestOnlyAccess; 3485 3486 // The number of slots we can still fill without needing to rehash. 3487 // 3488 // This is stored separately due to tombstones: we do not include tombstones 3489 // in the growth capacity, because we'd like to rehash when the table is 3490 // otherwise filled with tombstones: otherwise, probe sequences might get 3491 // unacceptably long without triggering a rehash. Callers can also force a 3492 // rehash via the standard `rehash(0)`, which will recompute this value as a 3493 // side-effect. 3494 // 3495 // See `CapacityToGrowth()`. 3496 size_t growth_left() const { 3497 ABSL_SWISSTABLE_ASSERT(!is_soo()); 3498 return common().growth_left(); 3499 } 3500 3501 GrowthInfo& growth_info() { 3502 ABSL_SWISSTABLE_ASSERT(!is_soo()); 3503 return common().growth_info(); 3504 } 3505 GrowthInfo growth_info() const { 3506 ABSL_SWISSTABLE_ASSERT(!is_soo()); 3507 return common().growth_info(); 3508 } 3509 3510 // Prefetch the heap-allocated memory region to resolve potential TLB and 3511 // cache misses. This is intended to overlap with execution of calculating the 3512 // hash for a key. 3513 void prefetch_heap_block() const { 3514 ABSL_SWISSTABLE_ASSERT(!is_soo()); 3515 #if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__) 3516 __builtin_prefetch(control(), 0, 1); 3517 #endif 3518 } 3519 3520 CommonFields& common() { return settings_.template get<0>(); } 3521 const CommonFields& common() const { return settings_.template get<0>(); } 3522 3523 ctrl_t* control() const { 3524 ABSL_SWISSTABLE_ASSERT(!is_soo()); 3525 return common().control(); 3526 } 3527 slot_type* slot_array() const { 3528 ABSL_SWISSTABLE_ASSERT(!is_soo()); 3529 return static_cast<slot_type*>(common().slot_array()); 3530 } 3531 slot_type* soo_slot() { 3532 ABSL_SWISSTABLE_ASSERT(is_soo()); 3533 ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN( 3534 static_cast<slot_type*>(common().soo_data())); 3535 } 3536 const slot_type* soo_slot() const { 3537 ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN( 3538 const_cast<raw_hash_set*>(this)->soo_slot()); 3539 } 3540 iterator soo_iterator() { 3541 return {SooControl(), soo_slot(), common().generation_ptr()}; 3542 } 3543 const_iterator soo_iterator() const { 3544 return const_cast<raw_hash_set*>(this)->soo_iterator(); 3545 } 3546 HashtablezInfoHandle infoz() { 3547 ABSL_SWISSTABLE_ASSERT(!is_soo()); 3548 return common().infoz(); 3549 } 3550 3551 hasher& hash_ref() { return settings_.template get<1>(); } 3552 const hasher& hash_ref() const { return settings_.template get<1>(); } 3553 key_equal& eq_ref() { return settings_.template get<2>(); } 3554 const key_equal& eq_ref() const { return settings_.template get<2>(); } 3555 CharAlloc& char_alloc_ref() { return settings_.template get<3>(); } 3556 const CharAlloc& char_alloc_ref() const { 3557 return settings_.template get<3>(); 3558 } 3559 3560 static void* get_char_alloc_ref_fn(CommonFields& common) { 3561 auto* h = reinterpret_cast<raw_hash_set*>(&common); 3562 return &h->char_alloc_ref(); 3563 } 3564 static void* get_hash_ref_fn(CommonFields& common) { 3565 auto* h = reinterpret_cast<raw_hash_set*>(&common); 3566 // TODO(b/397453582): Remove support for const hasher. 3567 return const_cast<std::remove_const_t<hasher>*>(&h->hash_ref()); 3568 } 3569 static void transfer_n_slots_fn(void* set, void* dst, void* src, 3570 size_t count) { 3571 auto* src_slot = to_slot(src); 3572 auto* dst_slot = to_slot(dst); 3573 3574 auto* h = static_cast<raw_hash_set*>(set); 3575 for (; count > 0; --count, ++src_slot, ++dst_slot) { 3576 h->transfer(dst_slot, src_slot); 3577 } 3578 } 3579 3580 // TODO(b/382423690): Type erase by GetKey + Hash for memcpyable types. 3581 static size_t find_new_positions_and_transfer_slots_fn(CommonFields& common, 3582 ctrl_t* old_ctrl, 3583 void* old_slots, 3584 size_t old_capacity) { 3585 auto* set = reinterpret_cast<raw_hash_set*>(&common); 3586 slot_type* new_slots = set->slot_array(); 3587 slot_type* old_slots_ptr = to_slot(old_slots); 3588 const auto insert_slot = [&](slot_type* slot) { 3589 size_t hash = PolicyTraits::apply(HashElement{set->hash_ref()}, 3590 PolicyTraits::element(slot)); 3591 auto target = find_first_non_full(common, hash); 3592 SetCtrl(common, target.offset, H2(hash), sizeof(slot_type)); 3593 set->transfer(new_slots + target.offset, slot); 3594 return target.probe_length; 3595 }; 3596 size_t total_probe_length = 0; 3597 for (size_t i = 0; i < old_capacity; ++i) { 3598 if (IsFull(old_ctrl[i])) { 3599 total_probe_length += insert_slot(old_slots_ptr + i); 3600 } 3601 } 3602 return total_probe_length; 3603 } 3604 3605 static const PolicyFunctions& GetPolicyFunctions() { 3606 static_assert(sizeof(slot_type) <= (std::numeric_limits<uint32_t>::max)(), 3607 "Slot size is too large. Use std::unique_ptr for value type " 3608 "or use absl::node_hash_{map,set}."); 3609 static_assert(alignof(slot_type) <= 3610 size_t{(std::numeric_limits<uint16_t>::max)()}); 3611 static_assert(sizeof(key_type) <= 3612 size_t{(std::numeric_limits<uint32_t>::max)()}); 3613 static_assert(sizeof(value_type) <= 3614 size_t{(std::numeric_limits<uint32_t>::max)()}); 3615 static constexpr size_t kBackingArrayAlignment = 3616 BackingArrayAlignment(alignof(slot_type)); 3617 static constexpr PolicyFunctions value = { 3618 static_cast<uint32_t>(sizeof(key_type)), 3619 static_cast<uint32_t>(sizeof(value_type)), 3620 static_cast<uint16_t>(sizeof(slot_type)), 3621 static_cast<uint16_t>(alignof(slot_type)), 3622 static_cast<uint8_t>(SooEnabled() ? SooCapacity() : 0), 3623 ShouldSampleHashtablezInfoForAlloc<CharAlloc>(), 3624 // TODO(b/328722020): try to type erase 3625 // for standard layout and alignof(Hash) <= alignof(CommonFields). 3626 std::is_empty_v<hasher> ? &GetRefForEmptyClass 3627 : &raw_hash_set::get_hash_ref_fn, 3628 PolicyTraits::template get_hash_slot_fn<hasher>(), 3629 PolicyTraits::transfer_uses_memcpy() 3630 ? TransferNRelocatable<sizeof(slot_type)> 3631 : &raw_hash_set::transfer_n_slots_fn, 3632 std::is_empty_v<Alloc> ? &GetRefForEmptyClass 3633 : &raw_hash_set::get_char_alloc_ref_fn, 3634 &AllocateBackingArray<kBackingArrayAlignment, CharAlloc>, 3635 &DeallocateBackingArray<kBackingArrayAlignment, CharAlloc>, 3636 &raw_hash_set::find_new_positions_and_transfer_slots_fn}; 3637 return value; 3638 } 3639 3640 // Bundle together CommonFields plus other objects which might be empty. 3641 // CompressedTuple will ensure that sizeof is not affected by any of the empty 3642 // fields that occur after CommonFields. 3643 absl::container_internal::CompressedTuple<CommonFields, hasher, key_equal, 3644 CharAlloc> 3645 settings_{CommonFields::CreateDefault<SooEnabled()>(), hasher{}, 3646 key_equal{}, CharAlloc{}}; 3647 }; 3648 3649 // Friend access for free functions in raw_hash_set.h. 3650 struct HashtableFreeFunctionsAccess { 3651 template <class Predicate, typename Set> 3652 static typename Set::size_type EraseIf(Predicate& pred, Set* c) { 3653 if (c->empty()) { 3654 return 0; 3655 } 3656 if (c->is_soo()) { 3657 auto it = c->soo_iterator(); 3658 if (!pred(*it)) { 3659 ABSL_SWISSTABLE_ASSERT(c->size() == 1 && 3660 "hash table was modified unexpectedly"); 3661 return 0; 3662 } 3663 c->destroy(it.slot()); 3664 c->common().set_empty_soo(); 3665 return 1; 3666 } 3667 ABSL_ATTRIBUTE_UNUSED const size_t original_size_for_assert = c->size(); 3668 size_t num_deleted = 0; 3669 using SlotType = typename Set::slot_type; 3670 IterateOverFullSlots( 3671 c->common(), sizeof(SlotType), 3672 [&](const ctrl_t* ctrl, void* slot_void) { 3673 auto* slot = static_cast<SlotType*>(slot_void); 3674 if (pred(Set::PolicyTraits::element(slot))) { 3675 c->destroy(slot); 3676 EraseMetaOnly(c->common(), static_cast<size_t>(ctrl - c->control()), 3677 sizeof(*slot)); 3678 ++num_deleted; 3679 } 3680 }); 3681 // NOTE: IterateOverFullSlots allow removal of the current element, so we 3682 // verify the size additionally here. 3683 ABSL_SWISSTABLE_ASSERT(original_size_for_assert - num_deleted == 3684 c->size() && 3685 "hash table was modified unexpectedly"); 3686 return num_deleted; 3687 } 3688 3689 template <class Callback, typename Set> 3690 static void ForEach(Callback& cb, Set* c) { 3691 if (c->empty()) { 3692 return; 3693 } 3694 if (c->is_soo()) { 3695 cb(*c->soo_iterator()); 3696 return; 3697 } 3698 using SlotType = typename Set::slot_type; 3699 using ElementTypeWithConstness = decltype(*c->begin()); 3700 IterateOverFullSlots( 3701 c->common(), sizeof(SlotType), [&cb](const ctrl_t*, void* slot) { 3702 ElementTypeWithConstness& element = 3703 Set::PolicyTraits::element(static_cast<SlotType*>(slot)); 3704 cb(element); 3705 }); 3706 } 3707 }; 3708 3709 // Erases all elements that satisfy the predicate `pred` from the container `c`. 3710 template <typename P, typename H, typename E, typename A, typename Predicate> 3711 typename raw_hash_set<P, H, E, A>::size_type EraseIf( 3712 Predicate& pred, raw_hash_set<P, H, E, A>* c) { 3713 return HashtableFreeFunctionsAccess::EraseIf(pred, c); 3714 } 3715 3716 // Calls `cb` for all elements in the container `c`. 3717 template <typename P, typename H, typename E, typename A, typename Callback> 3718 void ForEach(Callback& cb, raw_hash_set<P, H, E, A>* c) { 3719 return HashtableFreeFunctionsAccess::ForEach(cb, c); 3720 } 3721 template <typename P, typename H, typename E, typename A, typename Callback> 3722 void ForEach(Callback& cb, const raw_hash_set<P, H, E, A>* c) { 3723 return HashtableFreeFunctionsAccess::ForEach(cb, c); 3724 } 3725 3726 namespace hashtable_debug_internal { 3727 template <typename Set> 3728 struct HashtableDebugAccess<Set, absl::void_t<typename Set::raw_hash_set>> { 3729 using Traits = typename Set::PolicyTraits; 3730 using Slot = typename Traits::slot_type; 3731 3732 static size_t GetNumProbes(const Set& set, 3733 const typename Set::key_type& key) { 3734 if (set.is_soo()) return 0; 3735 size_t num_probes = 0; 3736 const size_t hash = set.hash_ref()(key); 3737 auto seq = probe(set.common(), hash); 3738 const h2_t h2 = H2(hash); 3739 const ctrl_t* ctrl = set.control(); 3740 while (true) { 3741 container_internal::Group g{ctrl + seq.offset()}; 3742 for (uint32_t i : g.Match(h2)) { 3743 if (Traits::apply( 3744 typename Set::template EqualElement<typename Set::key_type>{ 3745 key, set.eq_ref()}, 3746 Traits::element(set.slot_array() + seq.offset(i)))) 3747 return num_probes; 3748 ++num_probes; 3749 } 3750 if (g.MaskEmpty()) return num_probes; 3751 seq.next(); 3752 ++num_probes; 3753 } 3754 } 3755 3756 static size_t AllocatedByteSize(const Set& c) { 3757 size_t capacity = c.capacity(); 3758 if (capacity == 0) return 0; 3759 size_t m = 3760 c.is_soo() ? 0 : c.common().alloc_size(sizeof(Slot), alignof(Slot)); 3761 3762 size_t per_slot = Traits::space_used(static_cast<const Slot*>(nullptr)); 3763 if (per_slot != ~size_t{}) { 3764 m += per_slot * c.size(); 3765 } else { 3766 for (auto it = c.begin(); it != c.end(); ++it) { 3767 m += Traits::space_used(it.slot()); 3768 } 3769 } 3770 return m; 3771 } 3772 }; 3773 3774 } // namespace hashtable_debug_internal 3775 3776 // Extern template instantiations reduce binary size and linker input size. 3777 // Function definition is in raw_hash_set.cc. 3778 extern template void GrowFullSooTableToNextCapacity<0, false>( 3779 CommonFields&, const PolicyFunctions&, size_t); 3780 extern template void GrowFullSooTableToNextCapacity<1, true>( 3781 CommonFields&, const PolicyFunctions&, size_t); 3782 extern template void GrowFullSooTableToNextCapacity<4, true>( 3783 CommonFields&, const PolicyFunctions&, size_t); 3784 extern template void GrowFullSooTableToNextCapacity<8, true>( 3785 CommonFields&, const PolicyFunctions&, size_t); 3786 #if UINTPTR_MAX == UINT64_MAX 3787 extern template void GrowFullSooTableToNextCapacity<16, true>( 3788 CommonFields&, const PolicyFunctions&, size_t); 3789 #endif 3790 3791 } // namespace container_internal 3792 ABSL_NAMESPACE_END 3793 } // namespace absl 3794 3795 #undef ABSL_SWISSTABLE_ENABLE_GENERATIONS 3796 #undef ABSL_SWISSTABLE_IGNORE_UNINITIALIZED 3797 #undef ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN 3798 #undef ABSL_SWISSTABLE_ASSERT 3799 3800 #endif // ABSL_CONTAINER_INTERNAL_RAW_HASH_SET_H_