node_hash_set.h (22195B)
1 // Copyright 2018 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // ----------------------------------------------------------------------------- 16 // File: node_hash_set.h 17 // ----------------------------------------------------------------------------- 18 // 19 // An `absl::node_hash_set<T>` is an unordered associative container designed to 20 // be a more efficient replacement for `std::unordered_set`. Like 21 // `unordered_set`, search, insertion, and deletion of set elements can be done 22 // as an `O(1)` operation. However, `node_hash_set` (and other unordered 23 // associative containers known as the collection of Abseil "Swiss tables") 24 // contain other optimizations that result in both memory and computation 25 // advantages. 26 // 27 // In most cases, your default choice for a hash table should be a map of type 28 // `flat_hash_map` or a set of type `flat_hash_set`. However, if you need 29 // pointer stability, a `node_hash_set` should be your preferred choice. As 30 // well, if you are migrating your code from using `std::unordered_set`, a 31 // `node_hash_set` should be an easy migration. Consider migrating to 32 // `node_hash_set` and perhaps converting to a more efficient `flat_hash_set` 33 // upon further review. 34 // 35 // `node_hash_set` is not exception-safe. 36 37 #ifndef ABSL_CONTAINER_NODE_HASH_SET_H_ 38 #define ABSL_CONTAINER_NODE_HASH_SET_H_ 39 40 #include <cstddef> 41 #include <memory> 42 #include <type_traits> 43 44 #include "absl/algorithm/container.h" 45 #include "absl/base/attributes.h" 46 #include "absl/container/hash_container_defaults.h" 47 #include "absl/container/internal/container_memory.h" 48 #include "absl/container/internal/node_slot_policy.h" 49 #include "absl/container/internal/raw_hash_set.h" // IWYU pragma: export 50 #include "absl/memory/memory.h" 51 #include "absl/meta/type_traits.h" 52 53 namespace absl { 54 ABSL_NAMESPACE_BEGIN 55 namespace container_internal { 56 template <typename T> 57 struct NodeHashSetPolicy; 58 } // namespace container_internal 59 60 // ----------------------------------------------------------------------------- 61 // absl::node_hash_set 62 // ----------------------------------------------------------------------------- 63 // 64 // An `absl::node_hash_set<T>` is an unordered associative container which 65 // has been optimized for both speed and memory footprint in most common use 66 // cases. Its interface is similar to that of `std::unordered_set<T>` with the 67 // following notable differences: 68 // 69 // * Supports heterogeneous lookup, through `find()`, `operator[]()` and 70 // `insert()`, provided that the set is provided a compatible heterogeneous 71 // hashing function and equality operator. See below for details. 72 // * Contains a `capacity()` member function indicating the number of element 73 // slots (open, deleted, and empty) within the hash set. 74 // * Returns `void` from the `erase(iterator)` overload. 75 // 76 // By default, `node_hash_set` uses the `absl::Hash` hashing framework. 77 // All fundamental and Abseil types that support the `absl::Hash` framework have 78 // a compatible equality operator for comparing insertions into `node_hash_set`. 79 // If your type is not yet supported by the `absl::Hash` framework, see 80 // absl/hash/hash.h for information on extending Abseil hashing to user-defined 81 // types. 82 // 83 // Using `absl::node_hash_set` at interface boundaries in dynamically loaded 84 // libraries (e.g. .dll, .so) is unsupported due to way `absl::Hash` values may 85 // be randomized across dynamically loaded libraries. 86 // 87 // To achieve heterogeneous lookup for custom types either `Hash` and `Eq` type 88 // parameters can be used or `T` should have public inner types 89 // `absl_container_hash` and (optionally) `absl_container_eq`. In either case, 90 // `typename Hash::is_transparent` and `typename Eq::is_transparent` should be 91 // well-formed. Both types are basically functors: 92 // * `Hash` should support `size_t operator()(U val) const` that returns a hash 93 // for the given `val`. 94 // * `Eq` should support `bool operator()(U lhs, V rhs) const` that returns true 95 // if `lhs` is equal to `rhs`. 96 // 97 // In most cases `T` needs only to provide the `absl_container_hash`. In this 98 // case `std::equal_to<void>` will be used instead of `eq` part. 99 // 100 // PERFORMANCE WARNING: Erasure & sparsity can negatively affect performance: 101 // * Iteration takes O(capacity) time, not O(size). 102 // * erase() slows down begin() and ++iterator. 103 // * Capacity only shrinks on rehash() or clear() -- not on erase(). 104 // 105 // Example: 106 // 107 // // Create a node hash set of three strings 108 // absl::node_hash_set<std::string> ducks = 109 // {"huey", "dewey", "louie"}; 110 // 111 // // Insert a new element into the node hash set 112 // ducks.insert("donald"); 113 // 114 // // Force a rehash of the node hash set 115 // ducks.rehash(0); 116 // 117 // // See if "dewey" is present 118 // if (ducks.contains("dewey")) { 119 // std::cout << "We found dewey!" << std::endl; 120 // } 121 template <class T, class Hash = DefaultHashContainerHash<T>, 122 class Eq = DefaultHashContainerEq<T>, class Alloc = std::allocator<T>> 123 class ABSL_ATTRIBUTE_OWNER node_hash_set 124 : public absl::container_internal::raw_hash_set< 125 absl::container_internal::NodeHashSetPolicy<T>, Hash, Eq, Alloc> { 126 using Base = typename node_hash_set::raw_hash_set; 127 128 public: 129 // Constructors and Assignment Operators 130 // 131 // A node_hash_set supports the same overload set as `std::unordered_set` 132 // for construction and assignment: 133 // 134 // * Default constructor 135 // 136 // // No allocation for the table's elements is made. 137 // absl::node_hash_set<std::string> set1; 138 // 139 // * Initializer List constructor 140 // 141 // absl::node_hash_set<std::string> set2 = 142 // {{"huey"}, {"dewey"}, {"louie"}}; 143 // 144 // * Copy constructor 145 // 146 // absl::node_hash_set<std::string> set3(set2); 147 // 148 // * Copy assignment operator 149 // 150 // // Hash functor and Comparator are copied as well 151 // absl::node_hash_set<std::string> set4; 152 // set4 = set3; 153 // 154 // * Move constructor 155 // 156 // // Move is guaranteed efficient 157 // absl::node_hash_set<std::string> set5(std::move(set4)); 158 // 159 // * Move assignment operator 160 // 161 // // May be efficient if allocators are compatible 162 // absl::node_hash_set<std::string> set6; 163 // set6 = std::move(set5); 164 // 165 // * Range constructor 166 // 167 // std::vector<std::string> v = {"a", "b"}; 168 // absl::node_hash_set<std::string> set7(v.begin(), v.end()); 169 node_hash_set() {} 170 using Base::Base; 171 172 // node_hash_set::begin() 173 // 174 // Returns an iterator to the beginning of the `node_hash_set`. 175 using Base::begin; 176 177 // node_hash_set::cbegin() 178 // 179 // Returns a const iterator to the beginning of the `node_hash_set`. 180 using Base::cbegin; 181 182 // node_hash_set::cend() 183 // 184 // Returns a const iterator to the end of the `node_hash_set`. 185 using Base::cend; 186 187 // node_hash_set::end() 188 // 189 // Returns an iterator to the end of the `node_hash_set`. 190 using Base::end; 191 192 // node_hash_set::capacity() 193 // 194 // Returns the number of element slots (assigned, deleted, and empty) 195 // available within the `node_hash_set`. 196 // 197 // NOTE: this member function is particular to `absl::node_hash_set` and is 198 // not provided in the `std::unordered_set` API. 199 using Base::capacity; 200 201 // node_hash_set::empty() 202 // 203 // Returns whether or not the `node_hash_set` is empty. 204 using Base::empty; 205 206 // node_hash_set::max_size() 207 // 208 // Returns the largest theoretical possible number of elements within a 209 // `node_hash_set` under current memory constraints. This value can be thought 210 // of the largest value of `std::distance(begin(), end())` for a 211 // `node_hash_set<T>`. 212 using Base::max_size; 213 214 // node_hash_set::size() 215 // 216 // Returns the number of elements currently within the `node_hash_set`. 217 using Base::size; 218 219 // node_hash_set::clear() 220 // 221 // Removes all elements from the `node_hash_set`. Invalidates any references, 222 // pointers, or iterators referring to contained elements. 223 // 224 // NOTE: this operation may shrink the underlying buffer. To avoid shrinking 225 // the underlying buffer call `erase(begin(), end())`. 226 using Base::clear; 227 228 // node_hash_set::erase() 229 // 230 // Erases elements within the `node_hash_set`. Erasing does not trigger a 231 // rehash. Overloads are listed below. 232 // 233 // void erase(const_iterator pos): 234 // 235 // Erases the element at `position` of the `node_hash_set`, returning 236 // `void`. 237 // 238 // NOTE: Returning `void` in this case is different than that of STL 239 // containers in general and `std::unordered_map` in particular (which 240 // return an iterator to the element following the erased element). If that 241 // iterator is needed, simply post increment the iterator: 242 // 243 // map.erase(it++); 244 // 245 // 246 // iterator erase(const_iterator first, const_iterator last): 247 // 248 // Erases the elements in the open interval [`first`, `last`), returning an 249 // iterator pointing to `last`. The special case of calling 250 // `erase(begin(), end())` resets the reserved growth such that if 251 // `reserve(N)` has previously been called and there has been no intervening 252 // call to `clear()`, then after calling `erase(begin(), end())`, it is safe 253 // to assume that inserting N elements will not cause a rehash. 254 // 255 // size_type erase(const key_type& key): 256 // 257 // Erases the element with the matching key, if it exists, returning the 258 // number of elements erased (0 or 1). 259 using Base::erase; 260 261 // node_hash_set::insert() 262 // 263 // Inserts an element of the specified value into the `node_hash_set`, 264 // returning an iterator pointing to the newly inserted element, provided that 265 // an element with the given key does not already exist. If rehashing occurs 266 // due to the insertion, all iterators are invalidated. Overloads are listed 267 // below. 268 // 269 // std::pair<iterator,bool> insert(const T& value): 270 // 271 // Inserts a value into the `node_hash_set`. Returns a pair consisting of an 272 // iterator to the inserted element (or to the element that prevented the 273 // insertion) and a bool denoting whether the insertion took place. 274 // 275 // std::pair<iterator,bool> insert(T&& value): 276 // 277 // Inserts a moveable value into the `node_hash_set`. Returns a pair 278 // consisting of an iterator to the inserted element (or to the element that 279 // prevented the insertion) and a bool denoting whether the insertion took 280 // place. 281 // 282 // iterator insert(const_iterator hint, const T& value): 283 // iterator insert(const_iterator hint, T&& value): 284 // 285 // Inserts a value, using the position of `hint` as a non-binding suggestion 286 // for where to begin the insertion search. Returns an iterator to the 287 // inserted element, or to the existing element that prevented the 288 // insertion. 289 // 290 // void insert(InputIterator first, InputIterator last): 291 // 292 // Inserts a range of values [`first`, `last`). 293 // 294 // NOTE: Although the STL does not specify which element may be inserted if 295 // multiple keys compare equivalently, for `node_hash_set` we guarantee the 296 // first match is inserted. 297 // 298 // void insert(std::initializer_list<T> ilist): 299 // 300 // Inserts the elements within the initializer list `ilist`. 301 // 302 // NOTE: Although the STL does not specify which element may be inserted if 303 // multiple keys compare equivalently within the initializer list, for 304 // `node_hash_set` we guarantee the first match is inserted. 305 using Base::insert; 306 307 // node_hash_set::emplace() 308 // 309 // Inserts an element of the specified value by constructing it in-place 310 // within the `node_hash_set`, provided that no element with the given key 311 // already exists. 312 // 313 // The element may be constructed even if there already is an element with the 314 // key in the container, in which case the newly constructed element will be 315 // destroyed immediately. 316 // 317 // If rehashing occurs due to the insertion, all iterators are invalidated. 318 using Base::emplace; 319 320 // node_hash_set::emplace_hint() 321 // 322 // Inserts an element of the specified value by constructing it in-place 323 // within the `node_hash_set`, using the position of `hint` as a non-binding 324 // suggestion for where to begin the insertion search, and only inserts 325 // provided that no element with the given key already exists. 326 // 327 // The element may be constructed even if there already is an element with the 328 // key in the container, in which case the newly constructed element will be 329 // destroyed immediately. 330 // 331 // If rehashing occurs due to the insertion, all iterators are invalidated. 332 using Base::emplace_hint; 333 334 // node_hash_set::extract() 335 // 336 // Extracts the indicated element, erasing it in the process, and returns it 337 // as a C++17-compatible node handle. Overloads are listed below. 338 // 339 // node_type extract(const_iterator position): 340 // 341 // Extracts the element at the indicated position and returns a node handle 342 // owning that extracted data. 343 // 344 // node_type extract(const key_type& x): 345 // 346 // Extracts the element with the key matching the passed key value and 347 // returns a node handle owning that extracted data. If the `node_hash_set` 348 // does not contain an element with a matching key, this function returns an 349 // empty node handle. 350 using Base::extract; 351 352 // node_hash_set::merge() 353 // 354 // Extracts elements from a given `source` node hash set into this 355 // `node_hash_set`. If the destination `node_hash_set` already contains an 356 // element with an equivalent key, that element is not extracted. 357 using Base::merge; 358 359 // node_hash_set::swap(node_hash_set& other) 360 // 361 // Exchanges the contents of this `node_hash_set` with those of the `other` 362 // node hash set. 363 // 364 // All iterators and references on the `node_hash_set` remain valid, excepting 365 // for the past-the-end iterator, which is invalidated. 366 // 367 // `swap()` requires that the node hash set's hashing and key equivalence 368 // functions be Swappable, and are exchanged using unqualified calls to 369 // non-member `swap()`. If the set's allocator has 370 // `std::allocator_traits<allocator_type>::propagate_on_container_swap::value` 371 // set to `true`, the allocators are also exchanged using an unqualified call 372 // to non-member `swap()`; otherwise, the allocators are not swapped. 373 using Base::swap; 374 375 // node_hash_set::rehash(count) 376 // 377 // Rehashes the `node_hash_set`, setting the number of slots to be at least 378 // the passed value. If the new number of slots increases the load factor more 379 // than the current maximum load factor 380 // (`count` < `size()` / `max_load_factor()`), then the new number of slots 381 // will be at least `size()` / `max_load_factor()`. 382 // 383 // To force a rehash, pass rehash(0). 384 // 385 // NOTE: unlike behavior in `std::unordered_set`, references are also 386 // invalidated upon a `rehash()`. 387 using Base::rehash; 388 389 // node_hash_set::reserve(count) 390 // 391 // Sets the number of slots in the `node_hash_set` to the number needed to 392 // accommodate at least `count` total elements without exceeding the current 393 // maximum load factor, and may rehash the container if needed. 394 using Base::reserve; 395 396 // node_hash_set::contains() 397 // 398 // Determines whether an element comparing equal to the given `key` exists 399 // within the `node_hash_set`, returning `true` if so or `false` otherwise. 400 using Base::contains; 401 402 // node_hash_set::count(const Key& key) const 403 // 404 // Returns the number of elements comparing equal to the given `key` within 405 // the `node_hash_set`. note that this function will return either `1` or `0` 406 // since duplicate elements are not allowed within a `node_hash_set`. 407 using Base::count; 408 409 // node_hash_set::equal_range() 410 // 411 // Returns a closed range [first, last], defined by a `std::pair` of two 412 // iterators, containing all elements with the passed key in the 413 // `node_hash_set`. 414 using Base::equal_range; 415 416 // node_hash_set::find() 417 // 418 // Finds an element with the passed `key` within the `node_hash_set`. 419 using Base::find; 420 421 // node_hash_set::bucket_count() 422 // 423 // Returns the number of "buckets" within the `node_hash_set`. Note that 424 // because a node hash set contains all elements within its internal storage, 425 // this value simply equals the current capacity of the `node_hash_set`. 426 using Base::bucket_count; 427 428 // node_hash_set::load_factor() 429 // 430 // Returns the current load factor of the `node_hash_set` (the average number 431 // of slots occupied with a value within the hash set). 432 using Base::load_factor; 433 434 // node_hash_set::max_load_factor() 435 // 436 // Manages the maximum load factor of the `node_hash_set`. Overloads are 437 // listed below. 438 // 439 // float node_hash_set::max_load_factor() 440 // 441 // Returns the current maximum load factor of the `node_hash_set`. 442 // 443 // void node_hash_set::max_load_factor(float ml) 444 // 445 // Sets the maximum load factor of the `node_hash_set` to the passed value. 446 // 447 // NOTE: This overload is provided only for API compatibility with the STL; 448 // `node_hash_set` will ignore any set load factor and manage its rehashing 449 // internally as an implementation detail. 450 using Base::max_load_factor; 451 452 // node_hash_set::get_allocator() 453 // 454 // Returns the allocator function associated with this `node_hash_set`. 455 using Base::get_allocator; 456 457 // node_hash_set::hash_function() 458 // 459 // Returns the hashing function used to hash the keys within this 460 // `node_hash_set`. 461 using Base::hash_function; 462 463 // node_hash_set::key_eq() 464 // 465 // Returns the function used for comparing keys equality. 466 using Base::key_eq; 467 }; 468 469 // erase_if(node_hash_set<>, Pred) 470 // 471 // Erases all elements that satisfy the predicate `pred` from the container `c`. 472 // Returns the number of erased elements. 473 template <typename T, typename H, typename E, typename A, typename Predicate> 474 typename node_hash_set<T, H, E, A>::size_type erase_if( 475 node_hash_set<T, H, E, A>& c, Predicate pred) { 476 return container_internal::EraseIf(pred, &c); 477 } 478 479 // swap(node_hash_set<>, node_hash_set<>) 480 // 481 // Swaps the contents of two `node_hash_set` containers. 482 // 483 // NOTE: we need to define this function template in order for 484 // `flat_hash_set::swap` to be called instead of `std::swap`. Even though we 485 // have `swap(raw_hash_set&, raw_hash_set&)` defined, that function requires a 486 // derived-to-base conversion, whereas `std::swap` is a function template so 487 // `std::swap` will be preferred by compiler. 488 template <typename T, typename H, typename E, typename A> 489 void swap(node_hash_set<T, H, E, A>& x, 490 node_hash_set<T, H, E, A>& y) noexcept(noexcept(x.swap(y))) { 491 return x.swap(y); 492 } 493 494 namespace container_internal { 495 496 // c_for_each_fast(node_hash_set<>, Function) 497 // 498 // Container-based version of the <algorithm> `std::for_each()` function to 499 // apply a function to a container's elements. 500 // There is no guarantees on the order of the function calls. 501 // Erasure and/or insertion of elements in the function is not allowed. 502 template <typename T, typename H, typename E, typename A, typename Function> 503 decay_t<Function> c_for_each_fast(const node_hash_set<T, H, E, A>& c, 504 Function&& f) { 505 container_internal::ForEach(f, &c); 506 return f; 507 } 508 template <typename T, typename H, typename E, typename A, typename Function> 509 decay_t<Function> c_for_each_fast(node_hash_set<T, H, E, A>& c, Function&& f) { 510 container_internal::ForEach(f, &c); 511 return f; 512 } 513 template <typename T, typename H, typename E, typename A, typename Function> 514 decay_t<Function> c_for_each_fast(node_hash_set<T, H, E, A>&& c, Function&& f) { 515 container_internal::ForEach(f, &c); 516 return f; 517 } 518 519 } // namespace container_internal 520 521 namespace container_internal { 522 523 template <class T> 524 struct NodeHashSetPolicy 525 : absl::container_internal::node_slot_policy<T&, NodeHashSetPolicy<T>> { 526 using key_type = T; 527 using init_type = T; 528 using constant_iterators = std::true_type; 529 530 template <class Allocator, class... Args> 531 static T* new_element(Allocator* alloc, Args&&... args) { 532 using ValueAlloc = 533 typename absl::allocator_traits<Allocator>::template rebind_alloc<T>; 534 ValueAlloc value_alloc(*alloc); 535 T* res = absl::allocator_traits<ValueAlloc>::allocate(value_alloc, 1); 536 absl::allocator_traits<ValueAlloc>::construct(value_alloc, res, 537 std::forward<Args>(args)...); 538 return res; 539 } 540 541 template <class Allocator> 542 static void delete_element(Allocator* alloc, T* elem) { 543 using ValueAlloc = 544 typename absl::allocator_traits<Allocator>::template rebind_alloc<T>; 545 ValueAlloc value_alloc(*alloc); 546 absl::allocator_traits<ValueAlloc>::destroy(value_alloc, elem); 547 absl::allocator_traits<ValueAlloc>::deallocate(value_alloc, elem, 1); 548 } 549 550 template <class F, class... Args> 551 static decltype(absl::container_internal::DecomposeValue( 552 std::declval<F>(), std::declval<Args>()...)) 553 apply(F&& f, Args&&... args) { 554 return absl::container_internal::DecomposeValue( 555 std::forward<F>(f), std::forward<Args>(args)...); 556 } 557 558 static size_t element_space_used(const T*) { return sizeof(T); } 559 560 template <class Hash> 561 static constexpr HashSlotFn get_hash_slot_fn() { 562 return &TypeErasedDerefAndApplyToSlotFn<Hash, T>; 563 } 564 }; 565 } // namespace container_internal 566 567 namespace container_algorithm_internal { 568 569 // Specialization of trait in absl/algorithm/container.h 570 template <class Key, class Hash, class KeyEqual, class Allocator> 571 struct IsUnorderedContainer<absl::node_hash_set<Key, Hash, KeyEqual, Allocator>> 572 : std::true_type {}; 573 574 } // namespace container_algorithm_internal 575 ABSL_NAMESPACE_END 576 } // namespace absl 577 578 #endif // ABSL_CONTAINER_NODE_HASH_SET_H_