cord.h (65385B)
1 // Copyright 2020 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // ----------------------------------------------------------------------------- 16 // File: cord.h 17 // ----------------------------------------------------------------------------- 18 // 19 // This file defines the `absl::Cord` data structure and operations on that data 20 // structure. A Cord is a string-like sequence of characters optimized for 21 // specific use cases. Unlike a `std::string`, which stores an array of 22 // contiguous characters, Cord data is stored in a structure consisting of 23 // separate, reference-counted "chunks." 24 // 25 // Because a Cord consists of these chunks, data can be added to or removed from 26 // a Cord during its lifetime. Chunks may also be shared between Cords. Unlike a 27 // `std::string`, a Cord can therefore accommodate data that changes over its 28 // lifetime, though it's not quite "mutable"; it can change only in the 29 // attachment, detachment, or rearrangement of chunks of its constituent data. 30 // 31 // A Cord provides some benefit over `std::string` under the following (albeit 32 // narrow) circumstances: 33 // 34 // * Cord data is designed to grow and shrink over a Cord's lifetime. Cord 35 // provides efficient insertions and deletions at the start and end of the 36 // character sequences, avoiding copies in those cases. Static data should 37 // generally be stored as strings. 38 // * External memory consisting of string-like data can be directly added to 39 // a Cord without requiring copies or allocations. 40 // * Cord data may be shared and copied cheaply. Cord provides a copy-on-write 41 // implementation and cheap sub-Cord operations. Copying a Cord is an O(1) 42 // operation. 43 // 44 // As a consequence to the above, Cord data is generally large. Small data 45 // should generally use strings, as construction of a Cord requires some 46 // overhead. Small Cords (<= 15 bytes) are represented inline, but most small 47 // Cords are expected to grow over their lifetimes. 48 // 49 // Note that because a Cord is made up of separate chunked data, random access 50 // to character data within a Cord is slower than within a `std::string`. 51 // 52 // Thread Safety 53 // 54 // Cord has the same thread-safety properties as many other types like 55 // std::string, std::vector<>, int, etc -- it is thread-compatible. In 56 // particular, if threads do not call non-const methods, then it is safe to call 57 // const methods without synchronization. Copying a Cord produces a new instance 58 // that can be used concurrently with the original in arbitrary ways. 59 60 #ifndef ABSL_STRINGS_CORD_H_ 61 #define ABSL_STRINGS_CORD_H_ 62 63 #include <algorithm> 64 #include <cassert> 65 #include <cstddef> 66 #include <cstdint> 67 #include <cstring> 68 #include <iosfwd> 69 #include <iterator> 70 #include <string> 71 #include <type_traits> 72 #include <utility> 73 74 #include "absl/base/attributes.h" 75 #include "absl/base/config.h" 76 #include "absl/base/internal/endian.h" 77 #include "absl/base/macros.h" 78 #include "absl/base/nullability.h" 79 #include "absl/base/optimization.h" 80 #include "absl/crc/internal/crc_cord_state.h" 81 #include "absl/functional/function_ref.h" 82 #include "absl/meta/type_traits.h" 83 #include "absl/strings/cord_analysis.h" 84 #include "absl/strings/cord_buffer.h" 85 #include "absl/strings/internal/cord_data_edge.h" 86 #include "absl/strings/internal/cord_internal.h" 87 #include "absl/strings/internal/cord_rep_btree.h" 88 #include "absl/strings/internal/cord_rep_btree_reader.h" 89 #include "absl/strings/internal/cord_rep_crc.h" 90 #include "absl/strings/internal/cord_rep_flat.h" 91 #include "absl/strings/internal/cordz_info.h" 92 #include "absl/strings/internal/cordz_update_scope.h" 93 #include "absl/strings/internal/cordz_update_tracker.h" 94 #include "absl/strings/internal/string_constant.h" 95 #include "absl/strings/string_view.h" 96 #include "absl/types/compare.h" 97 #include "absl/types/optional.h" 98 99 namespace absl { 100 ABSL_NAMESPACE_BEGIN 101 class Cord; 102 class CordTestPeer; 103 template <typename Releaser> 104 Cord MakeCordFromExternal(absl::string_view, Releaser&&); 105 void CopyCordToString(const Cord& src, absl::Nonnull<std::string*> dst); 106 void AppendCordToString(const Cord& src, absl::Nonnull<std::string*> dst); 107 108 // Cord memory accounting modes 109 enum class CordMemoryAccounting { 110 // Counts the *approximate* number of bytes held in full or in part by this 111 // Cord (which may not remain the same between invocations). Cords that share 112 // memory could each be "charged" independently for the same shared memory. 113 // See also comment on `kTotalMorePrecise` on internally shared memory. 114 kTotal, 115 116 // Counts the *approximate* number of bytes held in full or in part by this 117 // Cord for the distinct memory held by this cord. This option is similar 118 // to `kTotal`, except that if the cord has multiple references to the same 119 // memory, that memory is only counted once. 120 // 121 // For example: 122 // absl::Cord cord; 123 // cord.Append(some_other_cord); 124 // cord.Append(some_other_cord); 125 // // Counts `some_other_cord` twice: 126 // cord.EstimatedMemoryUsage(kTotal); 127 // // Counts `some_other_cord` once: 128 // cord.EstimatedMemoryUsage(kTotalMorePrecise); 129 // 130 // The `kTotalMorePrecise` number is more expensive to compute as it requires 131 // deduplicating all memory references. Applications should prefer to use 132 // `kFairShare` or `kTotal` unless they really need a more precise estimate 133 // on "how much memory is potentially held / kept alive by this cord?" 134 kTotalMorePrecise, 135 136 // Counts the *approximate* number of bytes held in full or in part by this 137 // Cord weighted by the sharing ratio of that data. For example, if some data 138 // edge is shared by 4 different Cords, then each cord is attributed 1/4th of 139 // the total memory usage as a 'fair share' of the total memory usage. 140 kFairShare, 141 }; 142 143 // Cord 144 // 145 // A Cord is a sequence of characters, designed to be more efficient than a 146 // `std::string` in certain circumstances: namely, large string data that needs 147 // to change over its lifetime or shared, especially when such data is shared 148 // across API boundaries. 149 // 150 // A Cord stores its character data in a structure that allows efficient prepend 151 // and append operations. This makes a Cord useful for large string data sent 152 // over in a wire format that may need to be prepended or appended at some point 153 // during the data exchange (e.g. HTTP, protocol buffers). For example, a 154 // Cord is useful for storing an HTTP request, and prepending an HTTP header to 155 // such a request. 156 // 157 // Cords should not be used for storing general string data, however. They 158 // require overhead to construct and are slower than strings for random access. 159 // 160 // The Cord API provides the following common API operations: 161 // 162 // * Create or assign Cords out of existing string data, memory, or other Cords 163 // * Append and prepend data to an existing Cord 164 // * Create new Sub-Cords from existing Cord data 165 // * Swap Cord data and compare Cord equality 166 // * Write out Cord data by constructing a `std::string` 167 // 168 // Additionally, the API provides iterator utilities to iterate through Cord 169 // data via chunks or character bytes. 170 // 171 class Cord { 172 private: 173 template <typename T> 174 using EnableIfString = 175 absl::enable_if_t<std::is_same<T, std::string>::value, int>; 176 177 public: 178 // Cord::Cord() Constructors. 179 180 // Creates an empty Cord. 181 constexpr Cord() noexcept; 182 183 // Creates a Cord from an existing Cord. Cord is copyable and efficiently 184 // movable. The moved-from state is valid but unspecified. 185 Cord(const Cord& src); 186 Cord(Cord&& src) noexcept; 187 Cord& operator=(const Cord& x); 188 Cord& operator=(Cord&& x) noexcept; 189 190 // Creates a Cord from a `src` string. This constructor is marked explicit to 191 // prevent implicit Cord constructions from arguments convertible to an 192 // `absl::string_view`. 193 explicit Cord(absl::string_view src); 194 Cord& operator=(absl::string_view src); 195 196 // Creates a Cord from a `std::string&&` rvalue. These constructors are 197 // templated to avoid ambiguities for types that are convertible to both 198 // `absl::string_view` and `std::string`, such as `const char*`. 199 template <typename T, EnableIfString<T> = 0> 200 explicit Cord(T&& src); 201 template <typename T, EnableIfString<T> = 0> 202 Cord& operator=(T&& src); 203 204 // Cord::~Cord() 205 // 206 // Destructs the Cord. 207 ~Cord() { 208 if (contents_.is_tree()) DestroyCordSlow(); 209 } 210 211 // MakeCordFromExternal() 212 // 213 // Creates a Cord that takes ownership of external string memory. The 214 // contents of `data` are not copied to the Cord; instead, the external 215 // memory is added to the Cord and reference-counted. This data may not be 216 // changed for the life of the Cord, though it may be prepended or appended 217 // to. 218 // 219 // `MakeCordFromExternal()` takes a callable "releaser" that is invoked when 220 // the reference count for `data` reaches zero. As noted above, this data must 221 // remain live until the releaser is invoked. The callable releaser also must: 222 // 223 // * be move constructible 224 // * support `void operator()(absl::string_view) const` or `void operator()` 225 // 226 // Example: 227 // 228 // Cord MakeCord(BlockPool* pool) { 229 // Block* block = pool->NewBlock(); 230 // FillBlock(block); 231 // return absl::MakeCordFromExternal( 232 // block->ToStringView(), 233 // [pool, block](absl::string_view v) { 234 // pool->FreeBlock(block, v); 235 // }); 236 // } 237 // 238 // WARNING: Because a Cord can be reference-counted, it's likely a bug if your 239 // releaser doesn't do anything. For example, consider the following: 240 // 241 // void Foo(const char* buffer, int len) { 242 // auto c = absl::MakeCordFromExternal(absl::string_view(buffer, len), 243 // [](absl::string_view) {}); 244 // 245 // // BUG: If Bar() copies its cord for any reason, including keeping a 246 // // substring of it, the lifetime of buffer might be extended beyond 247 // // when Foo() returns. 248 // Bar(c); 249 // } 250 template <typename Releaser> 251 friend Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser); 252 253 // Cord::Clear() 254 // 255 // Releases the Cord data. Any nodes that share data with other Cords, if 256 // applicable, will have their reference counts reduced by 1. 257 ABSL_ATTRIBUTE_REINITIALIZES void Clear(); 258 259 // Cord::Append() 260 // 261 // Appends data to the Cord, which may come from another Cord or other string 262 // data. 263 void Append(const Cord& src); 264 void Append(Cord&& src); 265 void Append(absl::string_view src); 266 template <typename T, EnableIfString<T> = 0> 267 void Append(T&& src); 268 269 // Appends `buffer` to this cord, unless `buffer` has a zero length in which 270 // case this method has no effect on this cord instance. 271 // This method is guaranteed to consume `buffer`. 272 void Append(CordBuffer buffer); 273 274 // Returns a CordBuffer, re-using potential existing capacity in this cord. 275 // 276 // Cord instances may have additional unused capacity in the last (or first) 277 // nodes of the underlying tree to facilitate amortized growth. This method 278 // allows applications to explicitly use this spare capacity if available, 279 // or create a new CordBuffer instance otherwise. 280 // If this cord has a final non-shared node with at least `min_capacity` 281 // available, then this method will return that buffer including its data 282 // contents. I.e.; the returned buffer will have a non-zero length, and 283 // a capacity of at least `buffer.length + min_capacity`. Otherwise, this 284 // method will return `CordBuffer::CreateWithDefaultLimit(capacity)`. 285 // 286 // Below an example of using GetAppendBuffer. Notice that in this example we 287 // use `GetAppendBuffer()` only on the first iteration. As we know nothing 288 // about any initial extra capacity in `cord`, we may be able to use the extra 289 // capacity. But as we add new buffers with fully utilized contents after that 290 // we avoid calling `GetAppendBuffer()` on subsequent iterations: while this 291 // works fine, it results in an unnecessary inspection of cord contents: 292 // 293 // void AppendRandomDataToCord(absl::Cord &cord, size_t n) { 294 // bool first = true; 295 // while (n > 0) { 296 // CordBuffer buffer = first ? cord.GetAppendBuffer(n) 297 // : CordBuffer::CreateWithDefaultLimit(n); 298 // absl::Span<char> data = buffer.available_up_to(n); 299 // FillRandomValues(data.data(), data.size()); 300 // buffer.IncreaseLengthBy(data.size()); 301 // cord.Append(std::move(buffer)); 302 // n -= data.size(); 303 // first = false; 304 // } 305 // } 306 CordBuffer GetAppendBuffer(size_t capacity, size_t min_capacity = 16); 307 308 // Returns a CordBuffer, re-using potential existing capacity in this cord. 309 // 310 // This function is identical to `GetAppendBuffer`, except that in the case 311 // where a new `CordBuffer` is allocated, it is allocated using the provided 312 // custom limit instead of the default limit. `GetAppendBuffer` will default 313 // to `CordBuffer::CreateWithDefaultLimit(capacity)` whereas this method 314 // will default to `CordBuffer::CreateWithCustomLimit(block_size, capacity)`. 315 // This method is equivalent to `GetAppendBuffer` if `block_size` is zero. 316 // See the documentation for `CreateWithCustomLimit` for more details on the 317 // restrictions and legal values for `block_size`. 318 CordBuffer GetCustomAppendBuffer(size_t block_size, size_t capacity, 319 size_t min_capacity = 16); 320 321 // Cord::Prepend() 322 // 323 // Prepends data to the Cord, which may come from another Cord or other string 324 // data. 325 void Prepend(const Cord& src); 326 void Prepend(absl::string_view src); 327 template <typename T, EnableIfString<T> = 0> 328 void Prepend(T&& src); 329 330 // Prepends `buffer` to this cord, unless `buffer` has a zero length in which 331 // case this method has no effect on this cord instance. 332 // This method is guaranteed to consume `buffer`. 333 void Prepend(CordBuffer buffer); 334 335 // Cord::RemovePrefix() 336 // 337 // Removes the first `n` bytes of a Cord. 338 void RemovePrefix(size_t n); 339 void RemoveSuffix(size_t n); 340 341 // Cord::Subcord() 342 // 343 // Returns a new Cord representing the subrange [pos, pos + new_size) of 344 // *this. If pos >= size(), the result is empty(). If 345 // (pos + new_size) >= size(), the result is the subrange [pos, size()). 346 Cord Subcord(size_t pos, size_t new_size) const; 347 348 // Cord::swap() 349 // 350 // Swaps the contents of the Cord with `other`. 351 void swap(Cord& other) noexcept; 352 353 // swap() 354 // 355 // Swaps the contents of two Cords. 356 friend void swap(Cord& x, Cord& y) noexcept { x.swap(y); } 357 358 // Cord::size() 359 // 360 // Returns the size of the Cord. 361 size_t size() const; 362 363 // Cord::empty() 364 // 365 // Determines whether the given Cord is empty, returning `true` if so. 366 bool empty() const; 367 368 // Cord::EstimatedMemoryUsage() 369 // 370 // Returns the *approximate* number of bytes held by this cord. 371 // See CordMemoryAccounting for more information on the accounting method. 372 size_t EstimatedMemoryUsage(CordMemoryAccounting accounting_method = 373 CordMemoryAccounting::kTotal) const; 374 375 // Cord::Compare() 376 // 377 // Compares 'this' Cord with rhs. This function and its relatives treat Cords 378 // as sequences of unsigned bytes. The comparison is a straightforward 379 // lexicographic comparison. `Cord::Compare()` returns values as follows: 380 // 381 // -1 'this' Cord is smaller 382 // 0 two Cords are equal 383 // 1 'this' Cord is larger 384 int Compare(absl::string_view rhs) const; 385 int Compare(const Cord& rhs) const; 386 387 // Cord::StartsWith() 388 // 389 // Determines whether the Cord starts with the passed string data `rhs`. 390 bool StartsWith(const Cord& rhs) const; 391 bool StartsWith(absl::string_view rhs) const; 392 393 // Cord::EndsWith() 394 // 395 // Determines whether the Cord ends with the passed string data `rhs`. 396 bool EndsWith(absl::string_view rhs) const; 397 bool EndsWith(const Cord& rhs) const; 398 399 // Cord::Contains() 400 // 401 // Determines whether the Cord contains the passed string data `rhs`. 402 bool Contains(absl::string_view rhs) const; 403 bool Contains(const Cord& rhs) const; 404 405 // Cord::operator std::string() 406 // 407 // Converts a Cord into a `std::string()`. This operator is marked explicit to 408 // prevent unintended Cord usage in functions that take a string. 409 explicit operator std::string() const; 410 411 // CopyCordToString() 412 // 413 // Copies the contents of a `src` Cord into a `*dst` string. 414 // 415 // This function optimizes the case of reusing the destination string since it 416 // can reuse previously allocated capacity. However, this function does not 417 // guarantee that pointers previously returned by `dst->data()` remain valid 418 // even if `*dst` had enough capacity to hold `src`. If `*dst` is a new 419 // object, prefer to simply use the conversion operator to `std::string`. 420 friend void CopyCordToString(const Cord& src, 421 absl::Nonnull<std::string*> dst); 422 423 // AppendCordToString() 424 // 425 // Appends the contents of a `src` Cord to a `*dst` string. 426 // 427 // This function optimizes the case of appending to a non-empty destination 428 // string. If `*dst` already has capacity to store the contents of the cord, 429 // this function does not invalidate pointers previously returned by 430 // `dst->data()`. If `*dst` is a new object, prefer to simply use the 431 // conversion operator to `std::string`. 432 friend void AppendCordToString(const Cord& src, 433 absl::Nonnull<std::string*> dst); 434 435 class CharIterator; 436 437 //---------------------------------------------------------------------------- 438 // Cord::ChunkIterator 439 //---------------------------------------------------------------------------- 440 // 441 // A `Cord::ChunkIterator` allows iteration over the constituent chunks of its 442 // Cord. Such iteration allows you to perform non-const operations on the data 443 // of a Cord without modifying it. 444 // 445 // Generally, you do not instantiate a `Cord::ChunkIterator` directly; 446 // instead, you create one implicitly through use of the `Cord::Chunks()` 447 // member function. 448 // 449 // The `Cord::ChunkIterator` has the following properties: 450 // 451 // * The iterator is invalidated after any non-const operation on the 452 // Cord object over which it iterates. 453 // * The `string_view` returned by dereferencing a valid, non-`end()` 454 // iterator is guaranteed to be non-empty. 455 // * Two `ChunkIterator` objects can be compared equal if and only if they 456 // remain valid and iterate over the same Cord. 457 // * The iterator in this case is a proxy iterator; the `string_view` 458 // returned by the iterator does not live inside the Cord, and its 459 // lifetime is limited to the lifetime of the iterator itself. To help 460 // prevent lifetime issues, `ChunkIterator::reference` is not a true 461 // reference type and is equivalent to `value_type`. 462 // * The iterator keeps state that can grow for Cords that contain many 463 // nodes and are imbalanced due to sharing. Prefer to pass this type by 464 // const reference instead of by value. 465 class ChunkIterator { 466 public: 467 using iterator_category = std::input_iterator_tag; 468 using value_type = absl::string_view; 469 using difference_type = ptrdiff_t; 470 using pointer = absl::Nonnull<const value_type*>; 471 using reference = value_type; 472 473 ChunkIterator() = default; 474 475 ChunkIterator& operator++(); 476 ChunkIterator operator++(int); 477 bool operator==(const ChunkIterator& other) const; 478 bool operator!=(const ChunkIterator& other) const; 479 reference operator*() const; 480 pointer operator->() const; 481 482 friend class Cord; 483 friend class CharIterator; 484 485 private: 486 using CordRep = absl::cord_internal::CordRep; 487 using CordRepBtree = absl::cord_internal::CordRepBtree; 488 using CordRepBtreeReader = absl::cord_internal::CordRepBtreeReader; 489 490 // Constructs a `begin()` iterator from `tree`. 491 explicit ChunkIterator(absl::Nonnull<cord_internal::CordRep*> tree); 492 493 // Constructs a `begin()` iterator from `cord`. 494 explicit ChunkIterator(absl::Nonnull<const Cord*> cord); 495 496 // Initializes this instance from a tree. Invoked by constructors. 497 void InitTree(absl::Nonnull<cord_internal::CordRep*> tree); 498 499 // Removes `n` bytes from `current_chunk_`. Expects `n` to be smaller than 500 // `current_chunk_.size()`. 501 void RemoveChunkPrefix(size_t n); 502 Cord AdvanceAndReadBytes(size_t n); 503 void AdvanceBytes(size_t n); 504 505 // Btree specific operator++ 506 ChunkIterator& AdvanceBtree(); 507 void AdvanceBytesBtree(size_t n); 508 509 // A view into bytes of the current `CordRep`. It may only be a view to a 510 // suffix of bytes if this is being used by `CharIterator`. 511 absl::string_view current_chunk_; 512 // The current leaf, or `nullptr` if the iterator points to short data. 513 // If the current chunk is a substring node, current_leaf_ points to the 514 // underlying flat or external node. 515 absl::Nullable<absl::cord_internal::CordRep*> current_leaf_ = nullptr; 516 // The number of bytes left in the `Cord` over which we are iterating. 517 size_t bytes_remaining_ = 0; 518 519 // Cord reader for cord btrees. Empty if not traversing a btree. 520 CordRepBtreeReader btree_reader_; 521 }; 522 523 // Cord::chunk_begin() 524 // 525 // Returns an iterator to the first chunk of the `Cord`. 526 // 527 // Generally, prefer using `Cord::Chunks()` within a range-based for loop for 528 // iterating over the chunks of a Cord. This method may be useful for getting 529 // a `ChunkIterator` where range-based for-loops are not useful. 530 // 531 // Example: 532 // 533 // absl::Cord::ChunkIterator FindAsChunk(const absl::Cord& c, 534 // absl::string_view s) { 535 // return std::find(c.chunk_begin(), c.chunk_end(), s); 536 // } 537 ChunkIterator chunk_begin() const ABSL_ATTRIBUTE_LIFETIME_BOUND; 538 539 // Cord::chunk_end() 540 // 541 // Returns an iterator one increment past the last chunk of the `Cord`. 542 // 543 // Generally, prefer using `Cord::Chunks()` within a range-based for loop for 544 // iterating over the chunks of a Cord. This method may be useful for getting 545 // a `ChunkIterator` where range-based for-loops may not be available. 546 ChunkIterator chunk_end() const ABSL_ATTRIBUTE_LIFETIME_BOUND; 547 548 //---------------------------------------------------------------------------- 549 // Cord::ChunkRange 550 //---------------------------------------------------------------------------- 551 // 552 // `ChunkRange` is a helper class for iterating over the chunks of the `Cord`, 553 // producing an iterator which can be used within a range-based for loop. 554 // Construction of a `ChunkRange` will return an iterator pointing to the 555 // first chunk of the Cord. Generally, do not construct a `ChunkRange` 556 // directly; instead, prefer to use the `Cord::Chunks()` method. 557 // 558 // Implementation note: `ChunkRange` is simply a convenience wrapper over 559 // `Cord::chunk_begin()` and `Cord::chunk_end()`. 560 class ChunkRange { 561 public: 562 // Fulfill minimum c++ container requirements [container.requirements] 563 // These (partial) container type definitions allow ChunkRange to be used 564 // in various utilities expecting a subset of [container.requirements]. 565 // For example, the below enables using `::testing::ElementsAre(...)` 566 using value_type = absl::string_view; 567 using reference = value_type&; 568 using const_reference = const value_type&; 569 using iterator = ChunkIterator; 570 using const_iterator = ChunkIterator; 571 572 explicit ChunkRange(absl::Nonnull<const Cord*> cord) : cord_(cord) {} 573 574 ChunkIterator begin() const; 575 ChunkIterator end() const; 576 577 private: 578 absl::Nonnull<const Cord*> cord_; 579 }; 580 581 // Cord::Chunks() 582 // 583 // Returns a `Cord::ChunkRange` for iterating over the chunks of a `Cord` with 584 // a range-based for-loop. For most iteration tasks on a Cord, use 585 // `Cord::Chunks()` to retrieve this iterator. 586 // 587 // Example: 588 // 589 // void ProcessChunks(const Cord& cord) { 590 // for (absl::string_view chunk : cord.Chunks()) { ... } 591 // } 592 // 593 // Note that the ordinary caveats of temporary lifetime extension apply: 594 // 595 // void Process() { 596 // for (absl::string_view chunk : CordFactory().Chunks()) { 597 // // The temporary Cord returned by CordFactory has been destroyed! 598 // } 599 // } 600 ChunkRange Chunks() const ABSL_ATTRIBUTE_LIFETIME_BOUND; 601 602 //---------------------------------------------------------------------------- 603 // Cord::CharIterator 604 //---------------------------------------------------------------------------- 605 // 606 // A `Cord::CharIterator` allows iteration over the constituent characters of 607 // a `Cord`. 608 // 609 // Generally, you do not instantiate a `Cord::CharIterator` directly; instead, 610 // you create one implicitly through use of the `Cord::Chars()` member 611 // function. 612 // 613 // A `Cord::CharIterator` has the following properties: 614 // 615 // * The iterator is invalidated after any non-const operation on the 616 // Cord object over which it iterates. 617 // * Two `CharIterator` objects can be compared equal if and only if they 618 // remain valid and iterate over the same Cord. 619 // * The iterator keeps state that can grow for Cords that contain many 620 // nodes and are imbalanced due to sharing. Prefer to pass this type by 621 // const reference instead of by value. 622 // * This type cannot act as a forward iterator because a `Cord` can reuse 623 // sections of memory. This fact violates the requirement for forward 624 // iterators to compare equal if dereferencing them returns the same 625 // object. 626 class CharIterator { 627 public: 628 using iterator_category = std::input_iterator_tag; 629 using value_type = char; 630 using difference_type = ptrdiff_t; 631 using pointer = absl::Nonnull<const char*>; 632 using reference = const char&; 633 634 CharIterator() = default; 635 636 CharIterator& operator++(); 637 CharIterator operator++(int); 638 bool operator==(const CharIterator& other) const; 639 bool operator!=(const CharIterator& other) const; 640 reference operator*() const; 641 642 friend Cord; 643 644 private: 645 explicit CharIterator(absl::Nonnull<const Cord*> cord) 646 : chunk_iterator_(cord) {} 647 648 ChunkIterator chunk_iterator_; 649 }; 650 651 // Cord::AdvanceAndRead() 652 // 653 // Advances the `Cord::CharIterator` by `n_bytes` and returns the bytes 654 // advanced as a separate `Cord`. `n_bytes` must be less than or equal to the 655 // number of bytes within the Cord; otherwise, behavior is undefined. It is 656 // valid to pass `char_end()` and `0`. 657 static Cord AdvanceAndRead(absl::Nonnull<CharIterator*> it, size_t n_bytes); 658 659 // Cord::Advance() 660 // 661 // Advances the `Cord::CharIterator` by `n_bytes`. `n_bytes` must be less than 662 // or equal to the number of bytes remaining within the Cord; otherwise, 663 // behavior is undefined. It is valid to pass `char_end()` and `0`. 664 static void Advance(absl::Nonnull<CharIterator*> it, size_t n_bytes); 665 666 // Cord::ChunkRemaining() 667 // 668 // Returns the longest contiguous view starting at the iterator's position. 669 // 670 // `it` must be dereferenceable. 671 static absl::string_view ChunkRemaining(const CharIterator& it); 672 673 // Cord::char_begin() 674 // 675 // Returns an iterator to the first character of the `Cord`. 676 // 677 // Generally, prefer using `Cord::Chars()` within a range-based for loop for 678 // iterating over the chunks of a Cord. This method may be useful for getting 679 // a `CharIterator` where range-based for-loops may not be available. 680 CharIterator char_begin() const ABSL_ATTRIBUTE_LIFETIME_BOUND; 681 682 // Cord::char_end() 683 // 684 // Returns an iterator to one past the last character of the `Cord`. 685 // 686 // Generally, prefer using `Cord::Chars()` within a range-based for loop for 687 // iterating over the chunks of a Cord. This method may be useful for getting 688 // a `CharIterator` where range-based for-loops are not useful. 689 CharIterator char_end() const ABSL_ATTRIBUTE_LIFETIME_BOUND; 690 691 // Cord::CharRange 692 // 693 // `CharRange` is a helper class for iterating over the characters of a 694 // producing an iterator which can be used within a range-based for loop. 695 // Construction of a `CharRange` will return an iterator pointing to the first 696 // character of the Cord. Generally, do not construct a `CharRange` directly; 697 // instead, prefer to use the `Cord::Chars()` method shown below. 698 // 699 // Implementation note: `CharRange` is simply a convenience wrapper over 700 // `Cord::char_begin()` and `Cord::char_end()`. 701 class CharRange { 702 public: 703 // Fulfill minimum c++ container requirements [container.requirements] 704 // These (partial) container type definitions allow CharRange to be used 705 // in various utilities expecting a subset of [container.requirements]. 706 // For example, the below enables using `::testing::ElementsAre(...)` 707 using value_type = char; 708 using reference = value_type&; 709 using const_reference = const value_type&; 710 using iterator = CharIterator; 711 using const_iterator = CharIterator; 712 713 explicit CharRange(absl::Nonnull<const Cord*> cord) : cord_(cord) {} 714 715 CharIterator begin() const; 716 CharIterator end() const; 717 718 private: 719 absl::Nonnull<const Cord*> cord_; 720 }; 721 722 // Cord::Chars() 723 // 724 // Returns a `Cord::CharRange` for iterating over the characters of a `Cord` 725 // with a range-based for-loop. For most character-based iteration tasks on a 726 // Cord, use `Cord::Chars()` to retrieve this iterator. 727 // 728 // Example: 729 // 730 // void ProcessCord(const Cord& cord) { 731 // for (char c : cord.Chars()) { ... } 732 // } 733 // 734 // Note that the ordinary caveats of temporary lifetime extension apply: 735 // 736 // void Process() { 737 // for (char c : CordFactory().Chars()) { 738 // // The temporary Cord returned by CordFactory has been destroyed! 739 // } 740 // } 741 CharRange Chars() const ABSL_ATTRIBUTE_LIFETIME_BOUND; 742 743 // Cord::operator[] 744 // 745 // Gets the "i"th character of the Cord and returns it, provided that 746 // 0 <= i < Cord.size(). 747 // 748 // NOTE: This routine is reasonably efficient. It is roughly 749 // logarithmic based on the number of chunks that make up the cord. Still, 750 // if you need to iterate over the contents of a cord, you should 751 // use a CharIterator/ChunkIterator rather than call operator[] or Get() 752 // repeatedly in a loop. 753 char operator[](size_t i) const; 754 755 // Cord::TryFlat() 756 // 757 // If this cord's representation is a single flat array, returns a 758 // string_view referencing that array. Otherwise returns nullopt. 759 absl::optional<absl::string_view> TryFlat() const 760 ABSL_ATTRIBUTE_LIFETIME_BOUND; 761 762 // Cord::Flatten() 763 // 764 // Flattens the cord into a single array and returns a view of the data. 765 // 766 // If the cord was already flat, the contents are not modified. 767 absl::string_view Flatten() ABSL_ATTRIBUTE_LIFETIME_BOUND; 768 769 // Cord::Find() 770 // 771 // Returns an iterator to the first occurrence of the substring `needle`. 772 // 773 // If the substring `needle` does not occur, `Cord::char_end()` is returned. 774 CharIterator Find(absl::string_view needle) const; 775 CharIterator Find(const absl::Cord& needle) const; 776 777 // Supports absl::Cord as a sink object for absl::Format(). 778 friend void AbslFormatFlush(absl::Nonnull<absl::Cord*> cord, 779 absl::string_view part) { 780 cord->Append(part); 781 } 782 783 // Support automatic stringification with absl::StrCat and absl::StrFormat. 784 template <typename Sink> 785 friend void AbslStringify(Sink& sink, const absl::Cord& cord) { 786 for (absl::string_view chunk : cord.Chunks()) { 787 sink.Append(chunk); 788 } 789 } 790 791 // Cord::SetExpectedChecksum() 792 // 793 // Stores a checksum value with this non-empty cord instance, for later 794 // retrieval. 795 // 796 // The expected checksum is a number stored out-of-band, alongside the data. 797 // It is preserved across copies and assignments, but any mutations to a cord 798 // will cause it to lose its expected checksum. 799 // 800 // The expected checksum is not part of a Cord's value, and does not affect 801 // operations such as equality or hashing. 802 // 803 // This field is intended to store a CRC32C checksum for later validation, to 804 // help support end-to-end checksum workflows. However, the Cord API itself 805 // does no CRC validation, and assigns no meaning to this number. 806 // 807 // This call has no effect if this cord is empty. 808 void SetExpectedChecksum(uint32_t crc); 809 810 // Returns this cord's expected checksum, if it has one. Otherwise, returns 811 // nullopt. 812 absl::optional<uint32_t> ExpectedChecksum() const; 813 814 template <typename H> 815 friend H AbslHashValue(H hash_state, const absl::Cord& c) { 816 absl::optional<absl::string_view> maybe_flat = c.TryFlat(); 817 if (maybe_flat.has_value()) { 818 return H::combine(std::move(hash_state), *maybe_flat); 819 } 820 return c.HashFragmented(std::move(hash_state)); 821 } 822 823 // Create a Cord with the contents of StringConstant<T>::value. 824 // No allocations will be done and no data will be copied. 825 // This is an INTERNAL API and subject to change or removal. This API can only 826 // be used by spelling absl::strings_internal::MakeStringConstant, which is 827 // also an internal API. 828 template <typename T> 829 // NOLINTNEXTLINE(google-explicit-constructor) 830 constexpr Cord(strings_internal::StringConstant<T>); 831 832 private: 833 using CordRep = absl::cord_internal::CordRep; 834 using CordRepFlat = absl::cord_internal::CordRepFlat; 835 using CordzInfo = cord_internal::CordzInfo; 836 using CordzUpdateScope = cord_internal::CordzUpdateScope; 837 using CordzUpdateTracker = cord_internal::CordzUpdateTracker; 838 using InlineData = cord_internal::InlineData; 839 using MethodIdentifier = CordzUpdateTracker::MethodIdentifier; 840 841 // Creates a cord instance with `method` representing the originating 842 // public API call causing the cord to be created. 843 explicit Cord(absl::string_view src, MethodIdentifier method); 844 845 friend class CordTestPeer; 846 friend bool operator==(const Cord& lhs, const Cord& rhs); 847 friend bool operator==(const Cord& lhs, absl::string_view rhs); 848 849 #ifdef __cpp_impl_three_way_comparison 850 851 // Cords support comparison with other Cords and string_views via operator< 852 // and others; here we provide a wrapper for the C++20 three-way comparison 853 // <=> operator. 854 855 static inline std::strong_ordering ConvertCompareResultToStrongOrdering( 856 int c) { 857 if (c == 0) { 858 return std::strong_ordering::equal; 859 } else if (c < 0) { 860 return std::strong_ordering::less; 861 } else { 862 return std::strong_ordering::greater; 863 } 864 } 865 866 friend inline std::strong_ordering operator<=>(const Cord& x, const Cord& y) { 867 return ConvertCompareResultToStrongOrdering(x.Compare(y)); 868 } 869 870 friend inline std::strong_ordering operator<=>(const Cord& lhs, 871 absl::string_view rhs) { 872 return ConvertCompareResultToStrongOrdering(lhs.Compare(rhs)); 873 } 874 875 friend inline std::strong_ordering operator<=>(absl::string_view lhs, 876 const Cord& rhs) { 877 return ConvertCompareResultToStrongOrdering(-rhs.Compare(lhs)); 878 } 879 #endif 880 881 friend absl::Nullable<const CordzInfo*> GetCordzInfoForTesting( 882 const Cord& cord); 883 884 // Calls the provided function once for each cord chunk, in order. Unlike 885 // Chunks(), this API will not allocate memory. 886 void ForEachChunk(absl::FunctionRef<void(absl::string_view)>) const; 887 888 // Allocates new contiguous storage for the contents of the cord. This is 889 // called by Flatten() when the cord was not already flat. 890 absl::string_view FlattenSlowPath(); 891 892 // Actual cord contents are hidden inside the following simple 893 // class so that we can isolate the bulk of cord.cc from changes 894 // to the representation. 895 // 896 // InlineRep holds either a tree pointer, or an array of kMaxInline bytes. 897 class InlineRep { 898 public: 899 static constexpr unsigned char kMaxInline = cord_internal::kMaxInline; 900 static_assert(kMaxInline >= sizeof(absl::cord_internal::CordRep*), ""); 901 902 constexpr InlineRep() : data_() {} 903 explicit InlineRep(InlineData::DefaultInitType init) : data_(init) {} 904 InlineRep(const InlineRep& src); 905 InlineRep(InlineRep&& src); 906 InlineRep& operator=(const InlineRep& src); 907 InlineRep& operator=(InlineRep&& src) noexcept; 908 909 explicit constexpr InlineRep(absl::string_view sv, 910 absl::Nullable<CordRep*> rep); 911 912 void Swap(absl::Nonnull<InlineRep*> rhs); 913 size_t size() const; 914 // Returns nullptr if holding pointer 915 absl::Nullable<const char*> data() const; 916 // Discards pointer, if any 917 void set_data(absl::Nonnull<const char*> data, size_t n); 918 absl::Nonnull<char*> set_data(size_t n); // Write data to the result 919 // Returns nullptr if holding bytes 920 absl::Nullable<absl::cord_internal::CordRep*> tree() const; 921 absl::Nonnull<absl::cord_internal::CordRep*> as_tree() const; 922 absl::Nonnull<const char*> as_chars() const; 923 // Returns non-null iff was holding a pointer 924 absl::Nullable<absl::cord_internal::CordRep*> clear(); 925 // Converts to pointer if necessary. 926 void reduce_size(size_t n); // REQUIRES: holding data 927 void remove_prefix(size_t n); // REQUIRES: holding data 928 void AppendArray(absl::string_view src, MethodIdentifier method); 929 absl::string_view FindFlatStartPiece() const; 930 931 // Creates a CordRepFlat instance from the current inlined data with `extra' 932 // bytes of desired additional capacity. 933 absl::Nonnull<CordRepFlat*> MakeFlatWithExtraCapacity(size_t extra); 934 935 // Sets the tree value for this instance. `rep` must not be null. 936 // Requires the current instance to hold a tree, and a lock to be held on 937 // any CordzInfo referenced by this instance. The latter is enforced through 938 // the CordzUpdateScope argument. If the current instance is sampled, then 939 // the CordzInfo instance is updated to reference the new `rep` value. 940 void SetTree(absl::Nonnull<CordRep*> rep, const CordzUpdateScope& scope); 941 942 // Identical to SetTree(), except that `rep` is allowed to be null, in 943 // which case the current instance is reset to an empty value. 944 void SetTreeOrEmpty(absl::Nullable<CordRep*> rep, 945 const CordzUpdateScope& scope); 946 947 // Sets the tree value for this instance, and randomly samples this cord. 948 // This function disregards existing contents in `data_`, and should be 949 // called when a Cord is 'promoted' from an 'uninitialized' or 'inlined' 950 // value to a non-inlined (tree / ring) value. 951 void EmplaceTree(absl::Nonnull<CordRep*> rep, MethodIdentifier method); 952 953 // Identical to EmplaceTree, except that it copies the parent stack from 954 // the provided `parent` data if the parent is sampled. 955 void EmplaceTree(absl::Nonnull<CordRep*> rep, const InlineData& parent, 956 MethodIdentifier method); 957 958 // Commits the change of a newly created, or updated `rep` root value into 959 // this cord. `old_rep` indicates the old (inlined or tree) value of the 960 // cord, and determines if the commit invokes SetTree() or EmplaceTree(). 961 void CommitTree(absl::Nullable<const CordRep*> old_rep, 962 absl::Nonnull<CordRep*> rep, const CordzUpdateScope& scope, 963 MethodIdentifier method); 964 965 void AppendTreeToInlined(absl::Nonnull<CordRep*> tree, 966 MethodIdentifier method); 967 void AppendTreeToTree(absl::Nonnull<CordRep*> tree, 968 MethodIdentifier method); 969 void AppendTree(absl::Nonnull<CordRep*> tree, MethodIdentifier method); 970 void PrependTreeToInlined(absl::Nonnull<CordRep*> tree, 971 MethodIdentifier method); 972 void PrependTreeToTree(absl::Nonnull<CordRep*> tree, 973 MethodIdentifier method); 974 void PrependTree(absl::Nonnull<CordRep*> tree, MethodIdentifier method); 975 976 bool IsSame(const InlineRep& other) const { return data_ == other.data_; } 977 978 // Copies the inline contents into `dst`. Assumes the cord is not empty. 979 void CopyTo(absl::Nonnull<std::string*> dst) const { 980 data_.CopyInlineToString(dst); 981 } 982 983 // Copies the inline contents into `dst`. Assumes the cord is not empty. 984 void CopyToArray(absl::Nonnull<char*> dst) const; 985 986 bool is_tree() const { return data_.is_tree(); } 987 988 // Returns true if the Cord is being profiled by cordz. 989 bool is_profiled() const { return data_.is_tree() && data_.is_profiled(); } 990 991 // Returns the available inlined capacity, or 0 if is_tree() == true. 992 size_t remaining_inline_capacity() const { 993 return data_.is_tree() ? 0 : kMaxInline - data_.inline_size(); 994 } 995 996 // Returns the profiled CordzInfo, or nullptr if not sampled. 997 absl::Nullable<absl::cord_internal::CordzInfo*> cordz_info() const { 998 return data_.cordz_info(); 999 } 1000 1001 // Sets the profiled CordzInfo. 1002 void set_cordz_info(absl::Nonnull<cord_internal::CordzInfo*> cordz_info) { 1003 assert(cordz_info != nullptr); 1004 data_.set_cordz_info(cordz_info); 1005 } 1006 1007 // Resets the current cordz_info to null / empty. 1008 void clear_cordz_info() { data_.clear_cordz_info(); } 1009 1010 private: 1011 friend class Cord; 1012 1013 void AssignSlow(const InlineRep& src); 1014 // Unrefs the tree and stops profiling. 1015 void UnrefTree(); 1016 1017 void ResetToEmpty() { data_ = {}; } 1018 1019 void set_inline_size(size_t size) { data_.set_inline_size(size); } 1020 size_t inline_size() const { return data_.inline_size(); } 1021 1022 // Empty cords that carry a checksum have a CordRepCrc node with a null 1023 // child node. The code can avoid lots of special cases where it would 1024 // otherwise transition from tree to inline storage if we just remove the 1025 // CordRepCrc node before mutations. Must never be called inside a 1026 // CordzUpdateScope since it untracks the cordz info. 1027 void MaybeRemoveEmptyCrcNode(); 1028 1029 cord_internal::InlineData data_; 1030 }; 1031 InlineRep contents_; 1032 1033 // Helper for GetFlat() and TryFlat(). 1034 static bool GetFlatAux(absl::Nonnull<absl::cord_internal::CordRep*> rep, 1035 absl::Nonnull<absl::string_view*> fragment); 1036 1037 // Helper for ForEachChunk(). 1038 static void ForEachChunkAux( 1039 absl::Nonnull<absl::cord_internal::CordRep*> rep, 1040 absl::FunctionRef<void(absl::string_view)> callback); 1041 1042 // The destructor for non-empty Cords. 1043 void DestroyCordSlow(); 1044 1045 // Out-of-line implementation of slower parts of logic. 1046 void CopyToArraySlowPath(absl::Nonnull<char*> dst) const; 1047 int CompareSlowPath(absl::string_view rhs, size_t compared_size, 1048 size_t size_to_compare) const; 1049 int CompareSlowPath(const Cord& rhs, size_t compared_size, 1050 size_t size_to_compare) const; 1051 bool EqualsImpl(absl::string_view rhs, size_t size_to_compare) const; 1052 bool EqualsImpl(const Cord& rhs, size_t size_to_compare) const; 1053 int CompareImpl(const Cord& rhs) const; 1054 1055 template <typename ResultType, typename RHS> 1056 friend ResultType GenericCompare(const Cord& lhs, const RHS& rhs, 1057 size_t size_to_compare); 1058 static absl::string_view GetFirstChunk(const Cord& c); 1059 static absl::string_view GetFirstChunk(absl::string_view sv); 1060 1061 // Returns a new reference to contents_.tree(), or steals an existing 1062 // reference if called on an rvalue. 1063 absl::Nonnull<absl::cord_internal::CordRep*> TakeRep() const&; 1064 absl::Nonnull<absl::cord_internal::CordRep*> TakeRep() &&; 1065 1066 // Helper for Append(). 1067 template <typename C> 1068 void AppendImpl(C&& src); 1069 1070 // Appends / Prepends `src` to this instance, using precise sizing. 1071 // This method does explicitly not attempt to use any spare capacity 1072 // in any pending last added private owned flat. 1073 // Requires `src` to be <= kMaxFlatLength. 1074 void AppendPrecise(absl::string_view src, MethodIdentifier method); 1075 void PrependPrecise(absl::string_view src, MethodIdentifier method); 1076 1077 CordBuffer GetAppendBufferSlowPath(size_t block_size, size_t capacity, 1078 size_t min_capacity); 1079 1080 // Prepends the provided data to this instance. `method` contains the public 1081 // API method for this action which is tracked for Cordz sampling purposes. 1082 void PrependArray(absl::string_view src, MethodIdentifier method); 1083 1084 // Assigns the value in 'src' to this instance, 'stealing' its contents. 1085 // Requires src.length() > kMaxBytesToCopy. 1086 Cord& AssignLargeString(std::string&& src); 1087 1088 // Helper for AbslHashValue(). 1089 template <typename H> 1090 H HashFragmented(H hash_state) const { 1091 typename H::AbslInternalPiecewiseCombiner combiner; 1092 ForEachChunk([&combiner, &hash_state](absl::string_view chunk) { 1093 hash_state = combiner.add_buffer(std::move(hash_state), chunk.data(), 1094 chunk.size()); 1095 }); 1096 return H::combine(combiner.finalize(std::move(hash_state)), size()); 1097 } 1098 1099 friend class CrcCord; 1100 void SetCrcCordState(crc_internal::CrcCordState state); 1101 absl::Nullable<const crc_internal::CrcCordState*> MaybeGetCrcCordState() 1102 const; 1103 1104 CharIterator FindImpl(CharIterator it, absl::string_view needle) const; 1105 1106 void CopyToArrayImpl(absl::Nonnull<char*> dst) const; 1107 }; 1108 1109 ABSL_NAMESPACE_END 1110 } // namespace absl 1111 1112 namespace absl { 1113 ABSL_NAMESPACE_BEGIN 1114 1115 // allow a Cord to be logged 1116 extern std::ostream& operator<<(std::ostream& out, const Cord& cord); 1117 1118 // ------------------------------------------------------------------ 1119 // Internal details follow. Clients should ignore. 1120 1121 namespace cord_internal { 1122 1123 // Does non-template-specific `CordRepExternal` initialization. 1124 // Requires `data` to be non-empty. 1125 void InitializeCordRepExternal(absl::string_view data, 1126 absl::Nonnull<CordRepExternal*> rep); 1127 1128 // Creates a new `CordRep` that owns `data` and `releaser` and returns a pointer 1129 // to it. Requires `data` to be non-empty. 1130 template <typename Releaser> 1131 // NOLINTNEXTLINE - suppress clang-tidy raw pointer return. 1132 absl::Nonnull<CordRep*> NewExternalRep(absl::string_view data, 1133 Releaser&& releaser) { 1134 assert(!data.empty()); 1135 using ReleaserType = absl::decay_t<Releaser>; 1136 CordRepExternal* rep = new CordRepExternalImpl<ReleaserType>( 1137 std::forward<Releaser>(releaser), 0); 1138 InitializeCordRepExternal(data, rep); 1139 return rep; 1140 } 1141 1142 // Overload for function reference types that dispatches using a function 1143 // pointer because there are no `alignof()` or `sizeof()` a function reference. 1144 // NOLINTNEXTLINE - suppress clang-tidy raw pointer return. 1145 inline absl::Nonnull<CordRep*> NewExternalRep( 1146 absl::string_view data, void (&releaser)(absl::string_view)) { 1147 return NewExternalRep(data, &releaser); 1148 } 1149 1150 } // namespace cord_internal 1151 1152 template <typename Releaser> 1153 Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser) { 1154 Cord cord; 1155 if (ABSL_PREDICT_TRUE(!data.empty())) { 1156 cord.contents_.EmplaceTree(::absl::cord_internal::NewExternalRep( 1157 data, std::forward<Releaser>(releaser)), 1158 Cord::MethodIdentifier::kMakeCordFromExternal); 1159 } else { 1160 using ReleaserType = absl::decay_t<Releaser>; 1161 cord_internal::InvokeReleaser( 1162 cord_internal::Rank1{}, ReleaserType(std::forward<Releaser>(releaser)), 1163 data); 1164 } 1165 return cord; 1166 } 1167 1168 constexpr Cord::InlineRep::InlineRep(absl::string_view sv, 1169 absl::Nullable<CordRep*> rep) 1170 : data_(sv, rep) {} 1171 1172 inline Cord::InlineRep::InlineRep(const Cord::InlineRep& src) 1173 : data_(InlineData::kDefaultInit) { 1174 if (CordRep* tree = src.tree()) { 1175 EmplaceTree(CordRep::Ref(tree), src.data_, 1176 CordzUpdateTracker::kConstructorCord); 1177 } else { 1178 data_ = src.data_; 1179 } 1180 } 1181 1182 inline Cord::InlineRep::InlineRep(Cord::InlineRep&& src) : data_(src.data_) { 1183 src.ResetToEmpty(); 1184 } 1185 1186 inline Cord::InlineRep& Cord::InlineRep::operator=(const Cord::InlineRep& src) { 1187 if (this == &src) { 1188 return *this; 1189 } 1190 if (!is_tree() && !src.is_tree()) { 1191 data_ = src.data_; 1192 return *this; 1193 } 1194 AssignSlow(src); 1195 return *this; 1196 } 1197 1198 inline Cord::InlineRep& Cord::InlineRep::operator=( 1199 Cord::InlineRep&& src) noexcept { 1200 if (is_tree()) { 1201 UnrefTree(); 1202 } 1203 data_ = src.data_; 1204 src.ResetToEmpty(); 1205 return *this; 1206 } 1207 1208 inline void Cord::InlineRep::Swap(absl::Nonnull<Cord::InlineRep*> rhs) { 1209 if (rhs == this) { 1210 return; 1211 } 1212 using std::swap; 1213 swap(data_, rhs->data_); 1214 } 1215 1216 inline absl::Nullable<const char*> Cord::InlineRep::data() const { 1217 return is_tree() ? nullptr : data_.as_chars(); 1218 } 1219 1220 inline absl::Nonnull<const char*> Cord::InlineRep::as_chars() const { 1221 assert(!data_.is_tree()); 1222 return data_.as_chars(); 1223 } 1224 1225 inline absl::Nonnull<absl::cord_internal::CordRep*> Cord::InlineRep::as_tree() 1226 const { 1227 assert(data_.is_tree()); 1228 return data_.as_tree(); 1229 } 1230 1231 inline absl::Nullable<absl::cord_internal::CordRep*> Cord::InlineRep::tree() 1232 const { 1233 if (is_tree()) { 1234 return as_tree(); 1235 } else { 1236 return nullptr; 1237 } 1238 } 1239 1240 inline size_t Cord::InlineRep::size() const { 1241 return is_tree() ? as_tree()->length : inline_size(); 1242 } 1243 1244 inline absl::Nonnull<cord_internal::CordRepFlat*> 1245 Cord::InlineRep::MakeFlatWithExtraCapacity(size_t extra) { 1246 static_assert(cord_internal::kMinFlatLength >= sizeof(data_), ""); 1247 size_t len = data_.inline_size(); 1248 auto* result = CordRepFlat::New(len + extra); 1249 result->length = len; 1250 data_.copy_max_inline_to(result->Data()); 1251 return result; 1252 } 1253 1254 inline void Cord::InlineRep::EmplaceTree(absl::Nonnull<CordRep*> rep, 1255 MethodIdentifier method) { 1256 assert(rep); 1257 data_.make_tree(rep); 1258 CordzInfo::MaybeTrackCord(data_, method); 1259 } 1260 1261 inline void Cord::InlineRep::EmplaceTree(absl::Nonnull<CordRep*> rep, 1262 const InlineData& parent, 1263 MethodIdentifier method) { 1264 data_.make_tree(rep); 1265 CordzInfo::MaybeTrackCord(data_, parent, method); 1266 } 1267 1268 inline void Cord::InlineRep::SetTree(absl::Nonnull<CordRep*> rep, 1269 const CordzUpdateScope& scope) { 1270 assert(rep); 1271 assert(data_.is_tree()); 1272 data_.set_tree(rep); 1273 scope.SetCordRep(rep); 1274 } 1275 1276 inline void Cord::InlineRep::SetTreeOrEmpty(absl::Nullable<CordRep*> rep, 1277 const CordzUpdateScope& scope) { 1278 assert(data_.is_tree()); 1279 if (rep) { 1280 data_.set_tree(rep); 1281 } else { 1282 data_ = {}; 1283 } 1284 scope.SetCordRep(rep); 1285 } 1286 1287 inline void Cord::InlineRep::CommitTree(absl::Nullable<const CordRep*> old_rep, 1288 absl::Nonnull<CordRep*> rep, 1289 const CordzUpdateScope& scope, 1290 MethodIdentifier method) { 1291 if (old_rep) { 1292 SetTree(rep, scope); 1293 } else { 1294 EmplaceTree(rep, method); 1295 } 1296 } 1297 1298 inline absl::Nullable<absl::cord_internal::CordRep*> Cord::InlineRep::clear() { 1299 if (is_tree()) { 1300 CordzInfo::MaybeUntrackCord(cordz_info()); 1301 } 1302 absl::cord_internal::CordRep* result = tree(); 1303 ResetToEmpty(); 1304 return result; 1305 } 1306 1307 inline void Cord::InlineRep::CopyToArray(absl::Nonnull<char*> dst) const { 1308 assert(!is_tree()); 1309 size_t n = inline_size(); 1310 assert(n != 0); 1311 cord_internal::SmallMemmove(dst, data_.as_chars(), n); 1312 } 1313 1314 inline void Cord::InlineRep::MaybeRemoveEmptyCrcNode() { 1315 CordRep* rep = tree(); 1316 if (rep == nullptr || ABSL_PREDICT_TRUE(rep->length > 0)) { 1317 return; 1318 } 1319 assert(rep->IsCrc()); 1320 assert(rep->crc()->child == nullptr); 1321 CordzInfo::MaybeUntrackCord(cordz_info()); 1322 CordRep::Unref(rep); 1323 ResetToEmpty(); 1324 } 1325 1326 constexpr inline Cord::Cord() noexcept {} 1327 1328 inline Cord::Cord(absl::string_view src) 1329 : Cord(src, CordzUpdateTracker::kConstructorString) {} 1330 1331 template <typename T> 1332 constexpr Cord::Cord(strings_internal::StringConstant<T>) 1333 : contents_(strings_internal::StringConstant<T>::value, 1334 strings_internal::StringConstant<T>::value.size() <= 1335 cord_internal::kMaxInline 1336 ? nullptr 1337 : &cord_internal::ConstInitExternalStorage< 1338 strings_internal::StringConstant<T>>::value) {} 1339 1340 inline Cord& Cord::operator=(const Cord& x) { 1341 contents_ = x.contents_; 1342 return *this; 1343 } 1344 1345 template <typename T, Cord::EnableIfString<T>> 1346 Cord& Cord::operator=(T&& src) { 1347 if (src.size() <= cord_internal::kMaxBytesToCopy) { 1348 return operator=(absl::string_view(src)); 1349 } else { 1350 return AssignLargeString(std::forward<T>(src)); 1351 } 1352 } 1353 1354 inline Cord::Cord(const Cord& src) : contents_(src.contents_) {} 1355 1356 inline Cord::Cord(Cord&& src) noexcept : contents_(std::move(src.contents_)) {} 1357 1358 inline void Cord::swap(Cord& other) noexcept { 1359 contents_.Swap(&other.contents_); 1360 } 1361 1362 inline Cord& Cord::operator=(Cord&& x) noexcept { 1363 contents_ = std::move(x.contents_); 1364 return *this; 1365 } 1366 1367 extern template Cord::Cord(std::string&& src); 1368 1369 inline size_t Cord::size() const { 1370 // Length is 1st field in str.rep_ 1371 return contents_.size(); 1372 } 1373 1374 inline bool Cord::empty() const { return size() == 0; } 1375 1376 inline size_t Cord::EstimatedMemoryUsage( 1377 CordMemoryAccounting accounting_method) const { 1378 size_t result = sizeof(Cord); 1379 if (const absl::cord_internal::CordRep* rep = contents_.tree()) { 1380 switch (accounting_method) { 1381 case CordMemoryAccounting::kFairShare: 1382 result += cord_internal::GetEstimatedFairShareMemoryUsage(rep); 1383 break; 1384 case CordMemoryAccounting::kTotalMorePrecise: 1385 result += cord_internal::GetMorePreciseMemoryUsage(rep); 1386 break; 1387 case CordMemoryAccounting::kTotal: 1388 result += cord_internal::GetEstimatedMemoryUsage(rep); 1389 break; 1390 } 1391 } 1392 return result; 1393 } 1394 1395 inline absl::optional<absl::string_view> Cord::TryFlat() const 1396 ABSL_ATTRIBUTE_LIFETIME_BOUND { 1397 absl::cord_internal::CordRep* rep = contents_.tree(); 1398 if (rep == nullptr) { 1399 return absl::string_view(contents_.data(), contents_.size()); 1400 } 1401 absl::string_view fragment; 1402 if (GetFlatAux(rep, &fragment)) { 1403 return fragment; 1404 } 1405 return absl::nullopt; 1406 } 1407 1408 inline absl::string_view Cord::Flatten() ABSL_ATTRIBUTE_LIFETIME_BOUND { 1409 absl::cord_internal::CordRep* rep = contents_.tree(); 1410 if (rep == nullptr) { 1411 return absl::string_view(contents_.data(), contents_.size()); 1412 } else { 1413 absl::string_view already_flat_contents; 1414 if (GetFlatAux(rep, &already_flat_contents)) { 1415 return already_flat_contents; 1416 } 1417 } 1418 return FlattenSlowPath(); 1419 } 1420 1421 inline void Cord::Append(absl::string_view src) { 1422 contents_.AppendArray(src, CordzUpdateTracker::kAppendString); 1423 } 1424 1425 inline void Cord::Prepend(absl::string_view src) { 1426 PrependArray(src, CordzUpdateTracker::kPrependString); 1427 } 1428 1429 inline void Cord::Append(CordBuffer buffer) { 1430 if (ABSL_PREDICT_FALSE(buffer.length() == 0)) return; 1431 contents_.MaybeRemoveEmptyCrcNode(); 1432 absl::string_view short_value; 1433 if (CordRep* rep = buffer.ConsumeValue(short_value)) { 1434 contents_.AppendTree(rep, CordzUpdateTracker::kAppendCordBuffer); 1435 } else { 1436 AppendPrecise(short_value, CordzUpdateTracker::kAppendCordBuffer); 1437 } 1438 } 1439 1440 inline void Cord::Prepend(CordBuffer buffer) { 1441 if (ABSL_PREDICT_FALSE(buffer.length() == 0)) return; 1442 contents_.MaybeRemoveEmptyCrcNode(); 1443 absl::string_view short_value; 1444 if (CordRep* rep = buffer.ConsumeValue(short_value)) { 1445 contents_.PrependTree(rep, CordzUpdateTracker::kPrependCordBuffer); 1446 } else { 1447 PrependPrecise(short_value, CordzUpdateTracker::kPrependCordBuffer); 1448 } 1449 } 1450 1451 inline CordBuffer Cord::GetAppendBuffer(size_t capacity, size_t min_capacity) { 1452 if (empty()) return CordBuffer::CreateWithDefaultLimit(capacity); 1453 return GetAppendBufferSlowPath(0, capacity, min_capacity); 1454 } 1455 1456 inline CordBuffer Cord::GetCustomAppendBuffer(size_t block_size, 1457 size_t capacity, 1458 size_t min_capacity) { 1459 if (empty()) { 1460 return block_size ? CordBuffer::CreateWithCustomLimit(block_size, capacity) 1461 : CordBuffer::CreateWithDefaultLimit(capacity); 1462 } 1463 return GetAppendBufferSlowPath(block_size, capacity, min_capacity); 1464 } 1465 1466 extern template void Cord::Append(std::string&& src); 1467 extern template void Cord::Prepend(std::string&& src); 1468 1469 inline int Cord::Compare(const Cord& rhs) const { 1470 if (!contents_.is_tree() && !rhs.contents_.is_tree()) { 1471 return contents_.data_.Compare(rhs.contents_.data_); 1472 } 1473 1474 return CompareImpl(rhs); 1475 } 1476 1477 // Does 'this' cord start/end with rhs 1478 inline bool Cord::StartsWith(const Cord& rhs) const { 1479 if (contents_.IsSame(rhs.contents_)) return true; 1480 size_t rhs_size = rhs.size(); 1481 if (size() < rhs_size) return false; 1482 return EqualsImpl(rhs, rhs_size); 1483 } 1484 1485 inline bool Cord::StartsWith(absl::string_view rhs) const { 1486 size_t rhs_size = rhs.size(); 1487 if (size() < rhs_size) return false; 1488 return EqualsImpl(rhs, rhs_size); 1489 } 1490 1491 inline void Cord::CopyToArrayImpl(absl::Nonnull<char*> dst) const { 1492 if (!contents_.is_tree()) { 1493 if (!empty()) contents_.CopyToArray(dst); 1494 } else { 1495 CopyToArraySlowPath(dst); 1496 } 1497 } 1498 1499 inline void Cord::ChunkIterator::InitTree( 1500 absl::Nonnull<cord_internal::CordRep*> tree) { 1501 tree = cord_internal::SkipCrcNode(tree); 1502 if (tree->tag == cord_internal::BTREE) { 1503 current_chunk_ = btree_reader_.Init(tree->btree()); 1504 } else { 1505 current_leaf_ = tree; 1506 current_chunk_ = cord_internal::EdgeData(tree); 1507 } 1508 } 1509 1510 inline Cord::ChunkIterator::ChunkIterator( 1511 absl::Nonnull<cord_internal::CordRep*> tree) { 1512 bytes_remaining_ = tree->length; 1513 InitTree(tree); 1514 } 1515 1516 inline Cord::ChunkIterator::ChunkIterator(absl::Nonnull<const Cord*> cord) { 1517 if (CordRep* tree = cord->contents_.tree()) { 1518 bytes_remaining_ = tree->length; 1519 if (ABSL_PREDICT_TRUE(bytes_remaining_ != 0)) { 1520 InitTree(tree); 1521 } else { 1522 current_chunk_ = {}; 1523 } 1524 } else { 1525 bytes_remaining_ = cord->contents_.inline_size(); 1526 current_chunk_ = {cord->contents_.data(), bytes_remaining_}; 1527 } 1528 } 1529 1530 inline Cord::ChunkIterator& Cord::ChunkIterator::AdvanceBtree() { 1531 current_chunk_ = btree_reader_.Next(); 1532 return *this; 1533 } 1534 1535 inline void Cord::ChunkIterator::AdvanceBytesBtree(size_t n) { 1536 assert(n >= current_chunk_.size()); 1537 bytes_remaining_ -= n; 1538 if (bytes_remaining_) { 1539 if (n == current_chunk_.size()) { 1540 current_chunk_ = btree_reader_.Next(); 1541 } else { 1542 size_t offset = btree_reader_.length() - bytes_remaining_; 1543 current_chunk_ = btree_reader_.Seek(offset); 1544 } 1545 } else { 1546 current_chunk_ = {}; 1547 } 1548 } 1549 1550 inline Cord::ChunkIterator& Cord::ChunkIterator::operator++() { 1551 ABSL_HARDENING_ASSERT(bytes_remaining_ > 0 && 1552 "Attempted to iterate past `end()`"); 1553 assert(bytes_remaining_ >= current_chunk_.size()); 1554 bytes_remaining_ -= current_chunk_.size(); 1555 if (bytes_remaining_ > 0) { 1556 if (btree_reader_) { 1557 return AdvanceBtree(); 1558 } else { 1559 assert(!current_chunk_.empty()); // Called on invalid iterator. 1560 } 1561 current_chunk_ = {}; 1562 } 1563 return *this; 1564 } 1565 1566 inline Cord::ChunkIterator Cord::ChunkIterator::operator++(int) { 1567 ChunkIterator tmp(*this); 1568 operator++(); 1569 return tmp; 1570 } 1571 1572 inline bool Cord::ChunkIterator::operator==(const ChunkIterator& other) const { 1573 return bytes_remaining_ == other.bytes_remaining_; 1574 } 1575 1576 inline bool Cord::ChunkIterator::operator!=(const ChunkIterator& other) const { 1577 return !(*this == other); 1578 } 1579 1580 inline Cord::ChunkIterator::reference Cord::ChunkIterator::operator*() const { 1581 ABSL_HARDENING_ASSERT(bytes_remaining_ != 0); 1582 return current_chunk_; 1583 } 1584 1585 inline Cord::ChunkIterator::pointer Cord::ChunkIterator::operator->() const { 1586 ABSL_HARDENING_ASSERT(bytes_remaining_ != 0); 1587 return ¤t_chunk_; 1588 } 1589 1590 inline void Cord::ChunkIterator::RemoveChunkPrefix(size_t n) { 1591 assert(n < current_chunk_.size()); 1592 current_chunk_.remove_prefix(n); 1593 bytes_remaining_ -= n; 1594 } 1595 1596 inline void Cord::ChunkIterator::AdvanceBytes(size_t n) { 1597 assert(bytes_remaining_ >= n); 1598 if (ABSL_PREDICT_TRUE(n < current_chunk_.size())) { 1599 RemoveChunkPrefix(n); 1600 } else if (n != 0) { 1601 if (btree_reader_) { 1602 AdvanceBytesBtree(n); 1603 } else { 1604 bytes_remaining_ = 0; 1605 } 1606 } 1607 } 1608 1609 inline Cord::ChunkIterator Cord::chunk_begin() const { 1610 return ChunkIterator(this); 1611 } 1612 1613 inline Cord::ChunkIterator Cord::chunk_end() const { return ChunkIterator(); } 1614 1615 inline Cord::ChunkIterator Cord::ChunkRange::begin() const { 1616 return cord_->chunk_begin(); 1617 } 1618 1619 inline Cord::ChunkIterator Cord::ChunkRange::end() const { 1620 return cord_->chunk_end(); 1621 } 1622 1623 inline Cord::ChunkRange Cord::Chunks() const { return ChunkRange(this); } 1624 1625 inline Cord::CharIterator& Cord::CharIterator::operator++() { 1626 if (ABSL_PREDICT_TRUE(chunk_iterator_->size() > 1)) { 1627 chunk_iterator_.RemoveChunkPrefix(1); 1628 } else { 1629 ++chunk_iterator_; 1630 } 1631 return *this; 1632 } 1633 1634 inline Cord::CharIterator Cord::CharIterator::operator++(int) { 1635 CharIterator tmp(*this); 1636 operator++(); 1637 return tmp; 1638 } 1639 1640 inline bool Cord::CharIterator::operator==(const CharIterator& other) const { 1641 return chunk_iterator_ == other.chunk_iterator_; 1642 } 1643 1644 inline bool Cord::CharIterator::operator!=(const CharIterator& other) const { 1645 return !(*this == other); 1646 } 1647 1648 inline Cord::CharIterator::reference Cord::CharIterator::operator*() const { 1649 return *chunk_iterator_->data(); 1650 } 1651 1652 inline Cord Cord::AdvanceAndRead(absl::Nonnull<CharIterator*> it, 1653 size_t n_bytes) { 1654 assert(it != nullptr); 1655 return it->chunk_iterator_.AdvanceAndReadBytes(n_bytes); 1656 } 1657 1658 inline void Cord::Advance(absl::Nonnull<CharIterator*> it, size_t n_bytes) { 1659 assert(it != nullptr); 1660 it->chunk_iterator_.AdvanceBytes(n_bytes); 1661 } 1662 1663 inline absl::string_view Cord::ChunkRemaining(const CharIterator& it) { 1664 return *it.chunk_iterator_; 1665 } 1666 1667 inline Cord::CharIterator Cord::char_begin() const { 1668 return CharIterator(this); 1669 } 1670 1671 inline Cord::CharIterator Cord::char_end() const { return CharIterator(); } 1672 1673 inline Cord::CharIterator Cord::CharRange::begin() const { 1674 return cord_->char_begin(); 1675 } 1676 1677 inline Cord::CharIterator Cord::CharRange::end() const { 1678 return cord_->char_end(); 1679 } 1680 1681 inline Cord::CharRange Cord::Chars() const { return CharRange(this); } 1682 1683 inline void Cord::ForEachChunk( 1684 absl::FunctionRef<void(absl::string_view)> callback) const { 1685 absl::cord_internal::CordRep* rep = contents_.tree(); 1686 if (rep == nullptr) { 1687 callback(absl::string_view(contents_.data(), contents_.size())); 1688 } else { 1689 ForEachChunkAux(rep, callback); 1690 } 1691 } 1692 1693 // Nonmember Cord-to-Cord relational operators. 1694 inline bool operator==(const Cord& lhs, const Cord& rhs) { 1695 if (lhs.contents_.IsSame(rhs.contents_)) return true; 1696 size_t rhs_size = rhs.size(); 1697 if (lhs.size() != rhs_size) return false; 1698 return lhs.EqualsImpl(rhs, rhs_size); 1699 } 1700 1701 inline bool operator!=(const Cord& x, const Cord& y) { return !(x == y); } 1702 inline bool operator<(const Cord& x, const Cord& y) { return x.Compare(y) < 0; } 1703 inline bool operator>(const Cord& x, const Cord& y) { return x.Compare(y) > 0; } 1704 inline bool operator<=(const Cord& x, const Cord& y) { 1705 return x.Compare(y) <= 0; 1706 } 1707 inline bool operator>=(const Cord& x, const Cord& y) { 1708 return x.Compare(y) >= 0; 1709 } 1710 1711 // Nonmember Cord-to-absl::string_view relational operators. 1712 // 1713 // Due to implicit conversions, these also enable comparisons of Cord with 1714 // std::string and const char*. 1715 inline bool operator==(const Cord& lhs, absl::string_view rhs) { 1716 size_t lhs_size = lhs.size(); 1717 size_t rhs_size = rhs.size(); 1718 if (lhs_size != rhs_size) return false; 1719 return lhs.EqualsImpl(rhs, rhs_size); 1720 } 1721 1722 inline bool operator==(absl::string_view x, const Cord& y) { return y == x; } 1723 inline bool operator!=(const Cord& x, absl::string_view y) { return !(x == y); } 1724 inline bool operator!=(absl::string_view x, const Cord& y) { return !(x == y); } 1725 inline bool operator<(const Cord& x, absl::string_view y) { 1726 return x.Compare(y) < 0; 1727 } 1728 inline bool operator<(absl::string_view x, const Cord& y) { 1729 return y.Compare(x) > 0; 1730 } 1731 inline bool operator>(const Cord& x, absl::string_view y) { return y < x; } 1732 inline bool operator>(absl::string_view x, const Cord& y) { return y < x; } 1733 inline bool operator<=(const Cord& x, absl::string_view y) { return !(y < x); } 1734 inline bool operator<=(absl::string_view x, const Cord& y) { return !(y < x); } 1735 inline bool operator>=(const Cord& x, absl::string_view y) { return !(x < y); } 1736 inline bool operator>=(absl::string_view x, const Cord& y) { return !(x < y); } 1737 1738 // Some internals exposed to test code. 1739 namespace strings_internal { 1740 class CordTestAccess { 1741 public: 1742 static size_t FlatOverhead(); 1743 static size_t MaxFlatLength(); 1744 static size_t SizeofCordRepExternal(); 1745 static size_t SizeofCordRepSubstring(); 1746 static size_t FlatTagToLength(uint8_t tag); 1747 static uint8_t LengthToTag(size_t s); 1748 }; 1749 } // namespace strings_internal 1750 ABSL_NAMESPACE_END 1751 } // namespace absl 1752 1753 #endif // ABSL_STRINGS_CORD_H_