cord_internal.h (33441B)
1 // Copyright 2021 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ 16 #define ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ 17 18 #include <atomic> 19 #include <cassert> 20 #include <cstddef> 21 #include <cstdint> 22 #include <cstring> 23 #include <string> 24 25 #include "absl/base/attributes.h" 26 #include "absl/base/config.h" 27 #include "absl/base/internal/endian.h" 28 #include "absl/base/macros.h" 29 #include "absl/base/nullability.h" 30 #include "absl/base/optimization.h" 31 #include "absl/container/internal/compressed_tuple.h" 32 #include "absl/container/internal/container_memory.h" 33 #include "absl/strings/string_view.h" 34 35 // We can only add poisoning if we can detect consteval executions. 36 #if defined(ABSL_HAVE_CONSTANT_EVALUATED) && \ 37 (defined(ABSL_HAVE_ADDRESS_SANITIZER) || \ 38 defined(ABSL_HAVE_MEMORY_SANITIZER)) 39 #define ABSL_INTERNAL_CORD_HAVE_SANITIZER 1 40 #endif 41 42 #define ABSL_CORD_INTERNAL_NO_SANITIZE \ 43 ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY 44 45 namespace absl { 46 ABSL_NAMESPACE_BEGIN 47 namespace cord_internal { 48 49 // The overhead of a vtable is too much for Cord, so we roll our own subclasses 50 // using only a single byte to differentiate classes from each other - the "tag" 51 // byte. Define the subclasses first so we can provide downcasting helper 52 // functions in the base class. 53 struct CordRep; 54 struct CordRepConcat; 55 struct CordRepExternal; 56 struct CordRepFlat; 57 struct CordRepSubstring; 58 struct CordRepCrc; 59 class CordRepBtree; 60 61 class CordzInfo; 62 63 // Default feature enable states for cord ring buffers 64 enum CordFeatureDefaults { kCordShallowSubcordsDefault = false }; 65 66 extern std::atomic<bool> shallow_subcords_enabled; 67 68 inline void enable_shallow_subcords(bool enable) { 69 shallow_subcords_enabled.store(enable, std::memory_order_relaxed); 70 } 71 72 enum Constants { 73 // The inlined size to use with absl::InlinedVector. 74 // 75 // Note: The InlinedVectors in this file (and in cord.h) do not need to use 76 // the same value for their inlined size. The fact that they do is historical. 77 // It may be desirable for each to use a different inlined size optimized for 78 // that InlinedVector's usage. 79 // 80 // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for 81 // the inlined vector size (47 exists for backward compatibility). 82 kInlinedVectorSize = 47, 83 84 // Prefer copying blocks of at most this size, otherwise reference count. 85 kMaxBytesToCopy = 511 86 }; 87 88 // Emits a fatal error "Unexpected node type: xyz" and aborts the program. 89 [[noreturn]] void LogFatalNodeType(CordRep* rep); 90 91 // Fast implementation of memmove for up to 15 bytes. This implementation is 92 // safe for overlapping regions. If nullify_tail is true, the destination is 93 // padded with '\0' up to 15 bytes. 94 template <bool nullify_tail = false> 95 inline void SmallMemmove(char* dst, const char* src, size_t n) { 96 if (n >= 8) { 97 assert(n <= 15); 98 uint64_t buf1; 99 uint64_t buf2; 100 memcpy(&buf1, src, 8); 101 memcpy(&buf2, src + n - 8, 8); 102 if (nullify_tail) { 103 memset(dst + 7, 0, 8); 104 } 105 // GCC 12 has a false-positive -Wstringop-overflow warning here. 106 #if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(12, 0) 107 #pragma GCC diagnostic push 108 #pragma GCC diagnostic ignored "-Wstringop-overflow" 109 #endif 110 memcpy(dst, &buf1, 8); 111 memcpy(dst + n - 8, &buf2, 8); 112 #if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(12, 0) 113 #pragma GCC diagnostic pop 114 #endif 115 } else if (n >= 4) { 116 uint32_t buf1; 117 uint32_t buf2; 118 memcpy(&buf1, src, 4); 119 memcpy(&buf2, src + n - 4, 4); 120 if (nullify_tail) { 121 memset(dst + 4, 0, 4); 122 memset(dst + 7, 0, 8); 123 } 124 memcpy(dst, &buf1, 4); 125 memcpy(dst + n - 4, &buf2, 4); 126 } else { 127 if (n != 0) { 128 dst[0] = src[0]; 129 dst[n / 2] = src[n / 2]; 130 dst[n - 1] = src[n - 1]; 131 } 132 if (nullify_tail) { 133 memset(dst + 7, 0, 8); 134 memset(dst + n, 0, 8); 135 } 136 } 137 } 138 139 // Compact class for tracking the reference count and state flags for CordRep 140 // instances. Data is stored in an atomic int32_t for compactness and speed. 141 class RefcountAndFlags { 142 public: 143 constexpr RefcountAndFlags() : count_{kRefIncrement} {} 144 struct Immortal {}; 145 explicit constexpr RefcountAndFlags(Immortal) : count_(kImmortalFlag) {} 146 147 // Increments the reference count. Imposes no memory ordering. 148 inline void Increment() { 149 count_.fetch_add(kRefIncrement, std::memory_order_relaxed); 150 } 151 152 // Asserts that the current refcount is greater than 0. If the refcount is 153 // greater than 1, decrements the reference count. 154 // 155 // Returns false if there are no references outstanding; true otherwise. 156 // Inserts barriers to ensure that state written before this method returns 157 // false will be visible to a thread that just observed this method returning 158 // false. Always returns false when the immortal bit is set. 159 inline bool Decrement() { 160 int32_t refcount = count_.load(std::memory_order_acquire); 161 assert(refcount > 0 || refcount & kImmortalFlag); 162 return refcount != kRefIncrement && 163 count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) != 164 kRefIncrement; 165 } 166 167 // Same as Decrement but expect that refcount is greater than 1. 168 inline bool DecrementExpectHighRefcount() { 169 int32_t refcount = 170 count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel); 171 assert(refcount > 0 || refcount & kImmortalFlag); 172 return refcount != kRefIncrement; 173 } 174 175 // Returns the current reference count using acquire semantics. 176 inline size_t Get() const { 177 return static_cast<size_t>(count_.load(std::memory_order_acquire) >> 178 kNumFlags); 179 } 180 181 // Returns whether the atomic integer is 1. 182 // If the reference count is used in the conventional way, a 183 // reference count of 1 implies that the current thread owns the 184 // reference and no other thread shares it. 185 // This call performs the test for a reference count of one, and 186 // performs the memory barrier needed for the owning thread 187 // to act on the object, knowing that it has exclusive access to the 188 // object. Always returns false when the immortal bit is set. 189 inline bool IsOne() { 190 return count_.load(std::memory_order_acquire) == kRefIncrement; 191 } 192 193 bool IsImmortal() const { 194 return (count_.load(std::memory_order_relaxed) & kImmortalFlag) != 0; 195 } 196 197 private: 198 // We reserve the bottom bit for flag. 199 // kImmortalBit indicates that this entity should never be collected; it is 200 // used for the StringConstant constructor to avoid collecting immutable 201 // constant cords. 202 enum Flags { 203 kNumFlags = 1, 204 205 kImmortalFlag = 0x1, 206 kRefIncrement = (1 << kNumFlags), 207 }; 208 209 std::atomic<int32_t> count_; 210 }; 211 212 // Various representations that we allow 213 enum CordRepKind { 214 UNUSED_0 = 0, 215 SUBSTRING = 1, 216 CRC = 2, 217 BTREE = 3, 218 UNUSED_4 = 4, 219 EXTERNAL = 5, 220 221 // We have different tags for different sized flat arrays, 222 // starting with FLAT, and limited to MAX_FLAT_TAG. The below values map to an 223 // allocated range of 32 bytes to 256 KB. The current granularity is: 224 // - 8 byte granularity for flat sizes in [32 - 512] 225 // - 64 byte granularity for flat sizes in (512 - 8KiB] 226 // - 4KiB byte granularity for flat sizes in (8KiB, 256 KiB] 227 // If a new tag is needed in the future, then 'FLAT' and 'MAX_FLAT_TAG' should 228 // be adjusted as well as the Tag <---> Size mapping logic so that FLAT still 229 // represents the minimum flat allocation size. (32 bytes as of now). 230 FLAT = 6, 231 MAX_FLAT_TAG = 248 232 }; 233 234 // There are various locations where we want to check if some rep is a 'plain' 235 // data edge, i.e. an external or flat rep. By having FLAT == EXTERNAL + 1, we 236 // can perform this check in a single branch as 'tag >= EXTERNAL' 237 // Note that we can leave this optimization to the compiler. The compiler will 238 // DTRT when it sees a condition like `tag == EXTERNAL || tag >= FLAT`. 239 static_assert(FLAT == EXTERNAL + 1, "EXTERNAL and FLAT not consecutive"); 240 241 struct CordRep { 242 // Result from an `extract edge` operation. Contains the (possibly changed) 243 // tree node as well as the extracted edge, or {tree, nullptr} if no edge 244 // could be extracted. 245 // On success, the returned `tree` value is null if `extracted` was the only 246 // data edge inside the tree, a data edge if there were only two data edges in 247 // the tree, or the (possibly new / smaller) remaining tree with the extracted 248 // data edge removed. 249 struct ExtractResult { 250 CordRep* tree; 251 CordRep* extracted; 252 }; 253 254 CordRep() = default; 255 constexpr CordRep(RefcountAndFlags::Immortal immortal, size_t l) 256 : length(l), refcount(immortal), tag(EXTERNAL), storage{} {} 257 258 // The following three fields have to be less than 32 bytes since 259 // that is the smallest supported flat node size. Some code optimizations rely 260 // on the specific layout of these fields. Notably: the non-trivial field 261 // `refcount` being preceded by `length`, and being tailed by POD data 262 // members only. 263 // LINT.IfChange 264 size_t length; 265 RefcountAndFlags refcount; 266 // If tag < FLAT, it represents CordRepKind and indicates the type of node. 267 // Otherwise, the node type is CordRepFlat and the tag is the encoded size. 268 uint8_t tag; 269 270 // `storage` provides two main purposes: 271 // - the starting point for FlatCordRep.Data() [flexible-array-member] 272 // - 3 bytes of additional storage for use by derived classes. 273 // The latter is used by CordrepConcat and CordRepBtree. CordRepConcat stores 274 // a 'depth' value in storage[0], and the (future) CordRepBtree class stores 275 // `height`, `begin` and `end` in the 3 entries. Otherwise we would need to 276 // allocate room for these in the derived class, as not all compilers reuse 277 // padding space from the base class (clang and gcc do, MSVC does not, etc) 278 uint8_t storage[3]; 279 // LINT.ThenChange(cord_rep_btree.h:copy_raw) 280 281 // Returns true if this instance's tag matches the requested type. 282 constexpr bool IsSubstring() const { return tag == SUBSTRING; } 283 constexpr bool IsCrc() const { return tag == CRC; } 284 constexpr bool IsExternal() const { return tag == EXTERNAL; } 285 constexpr bool IsFlat() const { return tag >= FLAT; } 286 constexpr bool IsBtree() const { return tag == BTREE; } 287 288 inline CordRepSubstring* substring(); 289 inline const CordRepSubstring* substring() const; 290 inline CordRepCrc* crc(); 291 inline const CordRepCrc* crc() const; 292 inline CordRepExternal* external(); 293 inline const CordRepExternal* external() const; 294 inline CordRepFlat* flat(); 295 inline const CordRepFlat* flat() const; 296 inline CordRepBtree* btree(); 297 inline const CordRepBtree* btree() const; 298 299 // -------------------------------------------------------------------- 300 // Memory management 301 302 // Destroys the provided `rep`. 303 static void Destroy(CordRep* rep); 304 305 // Increments the reference count of `rep`. 306 // Requires `rep` to be a non-null pointer value. 307 static inline CordRep* Ref(CordRep* rep); 308 309 // Decrements the reference count of `rep`. Destroys rep if count reaches 310 // zero. Requires `rep` to be a non-null pointer value. 311 static inline void Unref(CordRep* rep); 312 }; 313 314 struct CordRepSubstring : public CordRep { 315 size_t start; // Starting offset of substring in child 316 CordRep* child; 317 318 // Creates a substring on `child`, adopting a reference on `child`. 319 // Requires `child` to be either a flat or external node, and `pos` and `n` to 320 // form a non-empty partial sub range of `'child`, i.e.: 321 // `n > 0 && n < length && n + pos <= length` 322 static inline CordRepSubstring* Create(CordRep* child, size_t pos, size_t n); 323 324 // Creates a substring of `rep`. Does not adopt a reference on `rep`. 325 // Requires `IsDataEdge(rep) && n > 0 && pos + n <= rep->length`. 326 // If `n == rep->length` then this method returns `CordRep::Ref(rep)` 327 // If `rep` is a substring of a flat or external node, then this method will 328 // return a new substring of that flat or external node with `pos` adjusted 329 // with the original `start` position. 330 static inline CordRep* Substring(CordRep* rep, size_t pos, size_t n); 331 }; 332 333 // Type for function pointer that will invoke the releaser function and also 334 // delete the `CordRepExternalImpl` corresponding to the passed in 335 // `CordRepExternal`. 336 using ExternalReleaserInvoker = void (*)(CordRepExternal*); 337 338 // External CordReps are allocated together with a type erased releaser. The 339 // releaser is stored in the memory directly following the CordRepExternal. 340 struct CordRepExternal : public CordRep { 341 CordRepExternal() = default; 342 explicit constexpr CordRepExternal(absl::string_view str) 343 : CordRep(RefcountAndFlags::Immortal{}, str.size()), 344 base(str.data()), 345 releaser_invoker(nullptr) {} 346 347 const char* base; 348 // Pointer to function that knows how to call and destroy the releaser. 349 ExternalReleaserInvoker releaser_invoker; 350 351 // Deletes (releases) the external rep. 352 // Requires rep != nullptr and rep->IsExternal() 353 static void Delete(CordRep* rep); 354 }; 355 356 // Use go/ranked-overloads for dispatching. 357 struct Rank0 {}; 358 struct Rank1 : Rank0 {}; 359 360 template <typename Releaser, 361 typename = ::std::invoke_result_t<Releaser, absl::string_view>> 362 void InvokeReleaser(Rank1, Releaser&& releaser, absl::string_view data) { 363 ::std::invoke(std::forward<Releaser>(releaser), data); 364 } 365 366 template <typename Releaser, typename = ::std::invoke_result_t<Releaser>> 367 void InvokeReleaser(Rank0, Releaser&& releaser, absl::string_view) { 368 ::std::invoke(std::forward<Releaser>(releaser)); 369 } 370 371 // We use CompressedTuple so that we can benefit from EBCO. 372 template <typename Releaser> 373 struct CordRepExternalImpl 374 : public CordRepExternal, 375 public ::absl::container_internal::CompressedTuple<Releaser> { 376 // The extra int arg is so that we can avoid interfering with copy/move 377 // constructors while still benefitting from perfect forwarding. 378 template <typename T> 379 CordRepExternalImpl(T&& releaser, int) 380 : CordRepExternalImpl::CompressedTuple(std::forward<T>(releaser)) { 381 this->releaser_invoker = &Release; 382 } 383 384 ~CordRepExternalImpl() { 385 InvokeReleaser(Rank1{}, std::move(this->template get<0>()), 386 absl::string_view(base, length)); 387 } 388 389 static void Release(CordRepExternal* rep) { 390 delete static_cast<CordRepExternalImpl*>(rep); 391 } 392 }; 393 394 inline CordRepSubstring* CordRepSubstring::Create(CordRep* child, size_t pos, 395 size_t n) { 396 assert(child != nullptr); 397 assert(n > 0); 398 assert(n < child->length); 399 assert(pos < child->length); 400 assert(n <= child->length - pos); 401 402 // Move to strategical places inside the Cord logic and make this an assert. 403 if (ABSL_PREDICT_FALSE(!(child->IsExternal() || child->IsFlat()))) { 404 LogFatalNodeType(child); 405 } 406 407 CordRepSubstring* rep = new CordRepSubstring(); 408 rep->length = n; 409 rep->tag = SUBSTRING; 410 rep->start = pos; 411 rep->child = child; 412 return rep; 413 } 414 415 inline CordRep* CordRepSubstring::Substring(CordRep* rep, size_t pos, 416 size_t n) { 417 assert(rep != nullptr); 418 assert(n != 0); 419 assert(pos < rep->length); 420 assert(n <= rep->length - pos); 421 if (n == rep->length) return CordRep::Ref(rep); 422 if (rep->IsSubstring()) { 423 pos += rep->substring()->start; 424 rep = rep->substring()->child; 425 } 426 CordRepSubstring* substr = new CordRepSubstring(); 427 substr->length = n; 428 substr->tag = SUBSTRING; 429 substr->start = pos; 430 substr->child = CordRep::Ref(rep); 431 return substr; 432 } 433 434 inline void CordRepExternal::Delete(CordRep* rep) { 435 assert(rep != nullptr && rep->IsExternal()); 436 auto* rep_external = static_cast<CordRepExternal*>(rep); 437 assert(rep_external->releaser_invoker != nullptr); 438 rep_external->releaser_invoker(rep_external); 439 } 440 441 template <typename Str> 442 struct ConstInitExternalStorage { 443 ABSL_CONST_INIT static CordRepExternal value; 444 }; 445 446 template <typename Str> 447 ABSL_CONST_INIT CordRepExternal 448 ConstInitExternalStorage<Str>::value(Str::value); 449 450 enum { 451 kMaxInline = 15, 452 }; 453 454 constexpr char GetOrNull(absl::string_view data, size_t pos) { 455 return pos < data.size() ? data[pos] : '\0'; 456 } 457 458 // We store cordz_info as 64 bit pointer value in little endian format. This 459 // guarantees that the least significant byte of cordz_info matches the first 460 // byte of the inline data representation in `data`, which holds the inlined 461 // size or the 'is_tree' bit. 462 using cordz_info_t = int64_t; 463 464 // Assert that the `cordz_info` pointer value perfectly overlaps the last half 465 // of `data` and can hold a pointer value. 466 static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, ""); 467 static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), ""); 468 469 // LittleEndianByte() creates a little endian representation of 'value', i.e.: 470 // a little endian value where the first byte in the host's representation 471 // holds 'value`, with all other bytes being 0. 472 static constexpr cordz_info_t LittleEndianByte(unsigned char value) { 473 #if defined(ABSL_IS_BIG_ENDIAN) 474 return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8); 475 #else 476 return value; 477 #endif 478 } 479 480 class InlineData { 481 public: 482 // DefaultInitType forces the use of the default initialization constructor. 483 enum DefaultInitType { kDefaultInit }; 484 485 // kNullCordzInfo holds the little endian representation of intptr_t(1) 486 // This is the 'null' / initial value of 'cordz_info'. The null value 487 // is specifically big endian 1 as with 64-bit pointers, the last 488 // byte of cordz_info overlaps with the last byte holding the tag. 489 static constexpr cordz_info_t kNullCordzInfo = LittleEndianByte(1); 490 491 // kTagOffset contains the offset of the control byte / tag. This constant is 492 // intended mostly for debugging purposes: do not remove this constant as it 493 // is actively inspected and used by gdb pretty printing code. 494 static constexpr size_t kTagOffset = 0; 495 496 // Implement `~InlineData()` conditionally: we only need this destructor to 497 // unpoison poisoned instances under *SAN, and it will only compile correctly 498 // if the current compiler supports `absl::is_constant_evaluated()`. 499 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER 500 ~InlineData() noexcept { unpoison(); } 501 #endif 502 503 constexpr InlineData() noexcept { poison_this(); } 504 505 explicit InlineData(DefaultInitType) noexcept : rep_(kDefaultInit) { 506 poison_this(); 507 } 508 509 explicit InlineData(CordRep* rep) noexcept : rep_(rep) { 510 ABSL_ASSERT(rep != nullptr); 511 } 512 513 // Explicit constexpr constructor to create a constexpr InlineData 514 // value. Creates an inlined SSO value if `rep` is null, otherwise 515 // creates a tree instance value. 516 constexpr InlineData(absl::string_view sv, CordRep* rep) noexcept 517 : rep_(rep ? Rep(rep) : Rep(sv)) { 518 poison(); 519 } 520 521 constexpr InlineData(const InlineData& rhs) noexcept; 522 InlineData& operator=(const InlineData& rhs) noexcept; 523 friend void swap(InlineData& lhs, InlineData& rhs) noexcept; 524 525 friend bool operator==(const InlineData& lhs, const InlineData& rhs) { 526 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER 527 const Rep l = lhs.rep_.SanitizerSafeCopy(); 528 const Rep r = rhs.rep_.SanitizerSafeCopy(); 529 return memcmp(&l, &r, sizeof(l)) == 0; 530 #else 531 return memcmp(&lhs, &rhs, sizeof(lhs)) == 0; 532 #endif 533 } 534 friend bool operator!=(const InlineData& lhs, const InlineData& rhs) { 535 return !operator==(lhs, rhs); 536 } 537 538 // Poisons the unused inlined SSO data if the current instance 539 // is inlined, else un-poisons the entire instance. 540 constexpr void poison(); 541 542 // Un-poisons this instance. 543 constexpr void unpoison(); 544 545 // Poisons the current instance. This is used on default initialization. 546 constexpr void poison_this(); 547 548 // Returns true if the current instance is empty. 549 // The 'empty value' is an inlined data value of zero length. 550 bool is_empty() const { return rep_.tag() == 0; } 551 552 // Returns true if the current instance holds a tree value. 553 bool is_tree() const { return (rep_.tag() & 1) != 0; } 554 555 // Returns true if the current instance holds a cordz_info value. 556 // Requires the current instance to hold a tree value. 557 bool is_profiled() const { 558 assert(is_tree()); 559 return rep_.cordz_info() != kNullCordzInfo; 560 } 561 562 // Returns true if either of the provided instances hold a cordz_info value. 563 // This method is more efficient than the equivalent `data1.is_profiled() || 564 // data2.is_profiled()`. Requires both arguments to hold a tree. 565 static bool is_either_profiled(const InlineData& data1, 566 const InlineData& data2) { 567 assert(data1.is_tree() && data2.is_tree()); 568 return (data1.rep_.cordz_info() | data2.rep_.cordz_info()) != 569 kNullCordzInfo; 570 } 571 572 // Returns the cordz_info sampling instance for this instance, or nullptr 573 // if the current instance is not sampled and does not have CordzInfo data. 574 // Requires the current instance to hold a tree value. 575 CordzInfo* cordz_info() const { 576 assert(is_tree()); 577 intptr_t info = static_cast<intptr_t>(absl::little_endian::ToHost64( 578 static_cast<uint64_t>(rep_.cordz_info()))); 579 assert(info & 1); 580 return reinterpret_cast<CordzInfo*>(info - 1); 581 } 582 583 // Sets the current cordz_info sampling instance for this instance, or nullptr 584 // if the current instance is not sampled and does not have CordzInfo data. 585 // Requires the current instance to hold a tree value. 586 void set_cordz_info(CordzInfo* cordz_info) { 587 assert(is_tree()); 588 uintptr_t info = reinterpret_cast<uintptr_t>(cordz_info) | 1; 589 rep_.set_cordz_info( 590 static_cast<cordz_info_t>(absl::little_endian::FromHost64(info))); 591 } 592 593 // Resets the current cordz_info to null / empty. 594 void clear_cordz_info() { 595 assert(is_tree()); 596 rep_.set_cordz_info(kNullCordzInfo); 597 } 598 599 // Returns a read only pointer to the character data inside this instance. 600 // Requires the current instance to hold inline data. 601 const char* as_chars() const { 602 assert(!is_tree()); 603 return rep_.as_chars(); 604 } 605 606 // Returns a mutable pointer to the character data inside this instance. 607 // Should be used for 'write only' operations setting an inlined value. 608 // Applications can set the value of inlined data either before or after 609 // setting the inlined size, i.e., both of the below are valid: 610 // 611 // // Set inlined data and inline size 612 // memcpy(data_.as_chars(), data, size); 613 // data_.set_inline_size(size); 614 // 615 // // Set inlined size and inline data 616 // data_.set_inline_size(size); 617 // memcpy(data_.as_chars(), data, size); 618 // 619 // It's an error to read from the returned pointer without a preceding write 620 // if the current instance does not hold inline data, i.e.: is_tree() == true. 621 char* as_chars() { return rep_.as_chars(); } 622 623 // Returns the tree value of this value. 624 // Requires the current instance to hold a tree value. 625 CordRep* as_tree() const { 626 assert(is_tree()); 627 return rep_.tree(); 628 } 629 630 void set_inline_data(const char* data, size_t n) { 631 ABSL_ASSERT(n <= kMaxInline); 632 unpoison(); 633 rep_.set_tag(static_cast<int8_t>(n << 1)); 634 SmallMemmove<true>(rep_.as_chars(), data, n); 635 poison(); 636 } 637 638 void CopyInlineToString(absl::Nonnull<std::string*> dst) const { 639 assert(!is_tree()); 640 // As Cord can store only 15 bytes it is smaller than std::string's 641 // small string optimization buffer size. Therefore we will always trigger 642 // the fast assign short path. 643 // 644 // Copying with a size equal to the maximum allows more efficient, wider 645 // stores to be used and no branching. 646 dst->assign(rep_.SanitizerSafeCopy().as_chars(), kMaxInline); 647 // After the copy we then change the size and put in a 0 byte. 648 dst->erase(inline_size()); 649 } 650 651 void copy_max_inline_to(char* dst) const { 652 assert(!is_tree()); 653 memcpy(dst, rep_.SanitizerSafeCopy().as_chars(), kMaxInline); 654 } 655 656 // Initialize this instance to holding the tree value `rep`, 657 // initializing the cordz_info to null, i.e.: 'not profiled'. 658 void make_tree(CordRep* rep) { 659 unpoison(); 660 rep_.make_tree(rep); 661 } 662 663 // Set the tree value of this instance to 'rep`. 664 // Requires the current instance to already hold a tree value. 665 // Does not affect the value of cordz_info. 666 void set_tree(CordRep* rep) { 667 assert(is_tree()); 668 rep_.set_tree(rep); 669 } 670 671 // Returns the size of the inlined character data inside this instance. 672 // Requires the current instance to hold inline data. 673 size_t inline_size() const { return rep_.inline_size(); } 674 675 // Sets the size of the inlined character data inside this instance. 676 // Requires `size` to be <= kMaxInline. 677 // See the documentation on 'as_chars()' for more information and examples. 678 void set_inline_size(size_t size) { 679 unpoison(); 680 rep_.set_inline_size(size); 681 poison(); 682 } 683 684 // Compares 'this' inlined data with rhs. The comparison is a straightforward 685 // lexicographic comparison. `Compare()` returns values as follows: 686 // 687 // -1 'this' InlineData instance is smaller 688 // 0 the InlineData instances are equal 689 // 1 'this' InlineData instance larger 690 int Compare(const InlineData& rhs) const { 691 return Compare(rep_.SanitizerSafeCopy(), rhs.rep_.SanitizerSafeCopy()); 692 } 693 694 private: 695 struct Rep { 696 // See cordz_info_t for forced alignment and size of `cordz_info` details. 697 struct AsTree { 698 explicit constexpr AsTree(absl::cord_internal::CordRep* tree) 699 : rep(tree) {} 700 cordz_info_t cordz_info = kNullCordzInfo; 701 absl::cord_internal::CordRep* rep; 702 }; 703 704 explicit Rep(DefaultInitType) {} 705 constexpr Rep() : data{0} {} 706 constexpr Rep(const Rep&) = default; 707 constexpr Rep& operator=(const Rep&) = default; 708 709 explicit constexpr Rep(CordRep* rep) : as_tree(rep) {} 710 711 explicit constexpr Rep(absl::string_view chars) 712 : data{static_cast<char>((chars.size() << 1)), 713 GetOrNull(chars, 0), 714 GetOrNull(chars, 1), 715 GetOrNull(chars, 2), 716 GetOrNull(chars, 3), 717 GetOrNull(chars, 4), 718 GetOrNull(chars, 5), 719 GetOrNull(chars, 6), 720 GetOrNull(chars, 7), 721 GetOrNull(chars, 8), 722 GetOrNull(chars, 9), 723 GetOrNull(chars, 10), 724 GetOrNull(chars, 11), 725 GetOrNull(chars, 12), 726 GetOrNull(chars, 13), 727 GetOrNull(chars, 14)} {} 728 729 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER 730 // Break compiler optimization for cases when value is allocated on the 731 // stack. Compiler assumes that the the variable is fully accessible 732 // regardless of our poisoning. 733 // Missing report: https://github.com/llvm/llvm-project/issues/100640 734 const Rep* self() const { 735 const Rep* volatile ptr = this; 736 return ptr; 737 } 738 Rep* self() { 739 Rep* volatile ptr = this; 740 return ptr; 741 } 742 #else 743 constexpr const Rep* self() const { return this; } 744 constexpr Rep* self() { return this; } 745 #endif 746 747 // Disable sanitizer as we must always be able to read `tag`. 748 ABSL_CORD_INTERNAL_NO_SANITIZE 749 int8_t tag() const { return reinterpret_cast<const int8_t*>(this)[0]; } 750 void set_tag(int8_t rhs) { reinterpret_cast<int8_t*>(self())[0] = rhs; } 751 752 char* as_chars() { return self()->data + 1; } 753 const char* as_chars() const { return self()->data + 1; } 754 755 bool is_tree() const { return (self()->tag() & 1) != 0; } 756 757 size_t inline_size() const { 758 ABSL_ASSERT(!self()->is_tree()); 759 return static_cast<size_t>(self()->tag()) >> 1; 760 } 761 762 void set_inline_size(size_t size) { 763 ABSL_ASSERT(size <= kMaxInline); 764 self()->set_tag(static_cast<int8_t>(size << 1)); 765 } 766 767 CordRep* tree() const { return self()->as_tree.rep; } 768 void set_tree(CordRep* rhs) { self()->as_tree.rep = rhs; } 769 770 cordz_info_t cordz_info() const { return self()->as_tree.cordz_info; } 771 void set_cordz_info(cordz_info_t rhs) { self()->as_tree.cordz_info = rhs; } 772 773 void make_tree(CordRep* tree) { 774 self()->as_tree.rep = tree; 775 self()->as_tree.cordz_info = kNullCordzInfo; 776 } 777 778 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER 779 constexpr Rep SanitizerSafeCopy() const { 780 if (!absl::is_constant_evaluated()) { 781 Rep res; 782 if (is_tree()) { 783 res = *this; 784 } else { 785 res.set_tag(tag()); 786 memcpy(res.as_chars(), as_chars(), inline_size()); 787 } 788 return res; 789 } else { 790 return *this; 791 } 792 } 793 #else 794 constexpr const Rep& SanitizerSafeCopy() const { return *this; } 795 #endif 796 797 // If the data has length <= kMaxInline, we store it in `data`, and 798 // store the size in the first char of `data` shifted left + 1. 799 // Else we store it in a tree and store a pointer to that tree in 800 // `as_tree.rep` with a tagged pointer to make `tag() & 1` non zero. 801 union { 802 char data[kMaxInline + 1]; 803 AsTree as_tree; 804 }; 805 806 // TODO(b/145829486): see swap(InlineData, InlineData) for more info. 807 inline void SwapValue(Rep rhs, Rep& refrhs) { 808 memcpy(&refrhs, this, sizeof(*this)); 809 memcpy(this, &rhs, sizeof(*this)); 810 } 811 }; 812 813 // Private implementation of `Compare()` 814 static inline int Compare(const Rep& lhs, const Rep& rhs) { 815 uint64_t x, y; 816 memcpy(&x, lhs.as_chars(), sizeof(x)); 817 memcpy(&y, rhs.as_chars(), sizeof(y)); 818 if (x == y) { 819 memcpy(&x, lhs.as_chars() + 7, sizeof(x)); 820 memcpy(&y, rhs.as_chars() + 7, sizeof(y)); 821 if (x == y) { 822 if (lhs.inline_size() == rhs.inline_size()) return 0; 823 return lhs.inline_size() < rhs.inline_size() ? -1 : 1; 824 } 825 } 826 x = absl::big_endian::FromHost64(x); 827 y = absl::big_endian::FromHost64(y); 828 return x < y ? -1 : 1; 829 } 830 831 Rep rep_; 832 }; 833 834 static_assert(sizeof(InlineData) == kMaxInline + 1, ""); 835 836 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER 837 838 constexpr InlineData::InlineData(const InlineData& rhs) noexcept 839 : rep_(rhs.rep_.SanitizerSafeCopy()) { 840 poison(); 841 } 842 843 inline InlineData& InlineData::operator=(const InlineData& rhs) noexcept { 844 unpoison(); 845 rep_ = rhs.rep_.SanitizerSafeCopy(); 846 poison(); 847 return *this; 848 } 849 850 constexpr void InlineData::poison_this() { 851 if (!absl::is_constant_evaluated()) { 852 container_internal::SanitizerPoisonObject(this); 853 } 854 } 855 856 constexpr void InlineData::unpoison() { 857 if (!absl::is_constant_evaluated()) { 858 container_internal::SanitizerUnpoisonObject(this); 859 } 860 } 861 862 constexpr void InlineData::poison() { 863 if (!absl::is_constant_evaluated()) { 864 if (is_tree()) { 865 container_internal::SanitizerUnpoisonObject(this); 866 } else if (const size_t size = inline_size()) { 867 if (size < kMaxInline) { 868 const char* end = rep_.as_chars() + size; 869 container_internal::SanitizerPoisonMemoryRegion(end, kMaxInline - size); 870 } 871 } else { 872 container_internal::SanitizerPoisonObject(this); 873 } 874 } 875 } 876 877 #else // ABSL_INTERNAL_CORD_HAVE_SANITIZER 878 879 constexpr InlineData::InlineData(const InlineData&) noexcept = default; 880 inline InlineData& InlineData::operator=(const InlineData&) noexcept = default; 881 882 constexpr void InlineData::poison_this() {} 883 constexpr void InlineData::unpoison() {} 884 constexpr void InlineData::poison() {} 885 886 #endif // ABSL_INTERNAL_CORD_HAVE_SANITIZER 887 888 inline CordRepSubstring* CordRep::substring() { 889 assert(IsSubstring()); 890 return static_cast<CordRepSubstring*>(this); 891 } 892 893 inline const CordRepSubstring* CordRep::substring() const { 894 assert(IsSubstring()); 895 return static_cast<const CordRepSubstring*>(this); 896 } 897 898 inline CordRepExternal* CordRep::external() { 899 assert(IsExternal()); 900 return static_cast<CordRepExternal*>(this); 901 } 902 903 inline const CordRepExternal* CordRep::external() const { 904 assert(IsExternal()); 905 return static_cast<const CordRepExternal*>(this); 906 } 907 908 inline CordRep* CordRep::Ref(CordRep* rep) { 909 // ABSL_ASSUME is a workaround for 910 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105585 911 ABSL_ASSUME(rep != nullptr); 912 rep->refcount.Increment(); 913 return rep; 914 } 915 916 inline void CordRep::Unref(CordRep* rep) { 917 assert(rep != nullptr); 918 // Expect refcount to be 0. Avoiding the cost of an atomic decrement should 919 // typically outweigh the cost of an extra branch checking for ref == 1. 920 if (ABSL_PREDICT_FALSE(!rep->refcount.DecrementExpectHighRefcount())) { 921 Destroy(rep); 922 } 923 } 924 925 inline void swap(InlineData& lhs, InlineData& rhs) noexcept { 926 lhs.unpoison(); 927 rhs.unpoison(); 928 // TODO(b/145829486): `std::swap(lhs.rep_, rhs.rep_)` results in bad codegen 929 // on clang, spilling the temporary swap value on the stack. Since `Rep` is 930 // trivial, we can make clang DTRT by calling a hand-rolled `SwapValue` where 931 // we pass `rhs` both by value (register allocated) and by reference. The IR 932 // then folds and inlines correctly into an optimized swap without spill. 933 lhs.rep_.SwapValue(rhs.rep_, rhs.rep_); 934 rhs.poison(); 935 lhs.poison(); 936 } 937 938 } // namespace cord_internal 939 940 ABSL_NAMESPACE_END 941 } // namespace absl 942 #endif // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_