tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

cord_internal.h (33441B)


      1 // Copyright 2021 The Abseil Authors.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      https://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #ifndef ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
     16 #define ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
     17 
     18 #include <atomic>
     19 #include <cassert>
     20 #include <cstddef>
     21 #include <cstdint>
     22 #include <cstring>
     23 #include <string>
     24 
     25 #include "absl/base/attributes.h"
     26 #include "absl/base/config.h"
     27 #include "absl/base/internal/endian.h"
     28 #include "absl/base/macros.h"
     29 #include "absl/base/nullability.h"
     30 #include "absl/base/optimization.h"
     31 #include "absl/container/internal/compressed_tuple.h"
     32 #include "absl/container/internal/container_memory.h"
     33 #include "absl/strings/string_view.h"
     34 
     35 // We can only add poisoning if we can detect consteval executions.
     36 #if defined(ABSL_HAVE_CONSTANT_EVALUATED) && \
     37    (defined(ABSL_HAVE_ADDRESS_SANITIZER) || \
     38     defined(ABSL_HAVE_MEMORY_SANITIZER))
     39 #define ABSL_INTERNAL_CORD_HAVE_SANITIZER 1
     40 #endif
     41 
     42 #define ABSL_CORD_INTERNAL_NO_SANITIZE \
     43  ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY
     44 
     45 namespace absl {
     46 ABSL_NAMESPACE_BEGIN
     47 namespace cord_internal {
     48 
     49 // The overhead of a vtable is too much for Cord, so we roll our own subclasses
     50 // using only a single byte to differentiate classes from each other - the "tag"
     51 // byte.  Define the subclasses first so we can provide downcasting helper
     52 // functions in the base class.
     53 struct CordRep;
     54 struct CordRepConcat;
     55 struct CordRepExternal;
     56 struct CordRepFlat;
     57 struct CordRepSubstring;
     58 struct CordRepCrc;
     59 class CordRepBtree;
     60 
     61 class CordzInfo;
     62 
     63 // Default feature enable states for cord ring buffers
     64 enum CordFeatureDefaults { kCordShallowSubcordsDefault = false };
     65 
     66 extern std::atomic<bool> shallow_subcords_enabled;
     67 
     68 inline void enable_shallow_subcords(bool enable) {
     69  shallow_subcords_enabled.store(enable, std::memory_order_relaxed);
     70 }
     71 
     72 enum Constants {
     73  // The inlined size to use with absl::InlinedVector.
     74  //
     75  // Note: The InlinedVectors in this file (and in cord.h) do not need to use
     76  // the same value for their inlined size. The fact that they do is historical.
     77  // It may be desirable for each to use a different inlined size optimized for
     78  // that InlinedVector's usage.
     79  //
     80  // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for
     81  // the inlined vector size (47 exists for backward compatibility).
     82  kInlinedVectorSize = 47,
     83 
     84  // Prefer copying blocks of at most this size, otherwise reference count.
     85  kMaxBytesToCopy = 511
     86 };
     87 
     88 // Emits a fatal error "Unexpected node type: xyz" and aborts the program.
     89 [[noreturn]] void LogFatalNodeType(CordRep* rep);
     90 
     91 // Fast implementation of memmove for up to 15 bytes. This implementation is
     92 // safe for overlapping regions. If nullify_tail is true, the destination is
     93 // padded with '\0' up to 15 bytes.
     94 template <bool nullify_tail = false>
     95 inline void SmallMemmove(char* dst, const char* src, size_t n) {
     96  if (n >= 8) {
     97    assert(n <= 15);
     98    uint64_t buf1;
     99    uint64_t buf2;
    100    memcpy(&buf1, src, 8);
    101    memcpy(&buf2, src + n - 8, 8);
    102    if (nullify_tail) {
    103      memset(dst + 7, 0, 8);
    104    }
    105    // GCC 12 has a false-positive -Wstringop-overflow warning here.
    106 #if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(12, 0)
    107 #pragma GCC diagnostic push
    108 #pragma GCC diagnostic ignored "-Wstringop-overflow"
    109 #endif
    110    memcpy(dst, &buf1, 8);
    111    memcpy(dst + n - 8, &buf2, 8);
    112 #if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(12, 0)
    113 #pragma GCC diagnostic pop
    114 #endif
    115  } else if (n >= 4) {
    116    uint32_t buf1;
    117    uint32_t buf2;
    118    memcpy(&buf1, src, 4);
    119    memcpy(&buf2, src + n - 4, 4);
    120    if (nullify_tail) {
    121      memset(dst + 4, 0, 4);
    122      memset(dst + 7, 0, 8);
    123    }
    124    memcpy(dst, &buf1, 4);
    125    memcpy(dst + n - 4, &buf2, 4);
    126  } else {
    127    if (n != 0) {
    128      dst[0] = src[0];
    129      dst[n / 2] = src[n / 2];
    130      dst[n - 1] = src[n - 1];
    131    }
    132    if (nullify_tail) {
    133      memset(dst + 7, 0, 8);
    134      memset(dst + n, 0, 8);
    135    }
    136  }
    137 }
    138 
    139 // Compact class for tracking the reference count and state flags for CordRep
    140 // instances.  Data is stored in an atomic int32_t for compactness and speed.
    141 class RefcountAndFlags {
    142 public:
    143  constexpr RefcountAndFlags() : count_{kRefIncrement} {}
    144  struct Immortal {};
    145  explicit constexpr RefcountAndFlags(Immortal) : count_(kImmortalFlag) {}
    146 
    147  // Increments the reference count. Imposes no memory ordering.
    148  inline void Increment() {
    149    count_.fetch_add(kRefIncrement, std::memory_order_relaxed);
    150  }
    151 
    152  // Asserts that the current refcount is greater than 0. If the refcount is
    153  // greater than 1, decrements the reference count.
    154  //
    155  // Returns false if there are no references outstanding; true otherwise.
    156  // Inserts barriers to ensure that state written before this method returns
    157  // false will be visible to a thread that just observed this method returning
    158  // false.  Always returns false when the immortal bit is set.
    159  inline bool Decrement() {
    160    int32_t refcount = count_.load(std::memory_order_acquire);
    161    assert(refcount > 0 || refcount & kImmortalFlag);
    162    return refcount != kRefIncrement &&
    163           count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) !=
    164               kRefIncrement;
    165  }
    166 
    167  // Same as Decrement but expect that refcount is greater than 1.
    168  inline bool DecrementExpectHighRefcount() {
    169    int32_t refcount =
    170        count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel);
    171    assert(refcount > 0 || refcount & kImmortalFlag);
    172    return refcount != kRefIncrement;
    173  }
    174 
    175  // Returns the current reference count using acquire semantics.
    176  inline size_t Get() const {
    177    return static_cast<size_t>(count_.load(std::memory_order_acquire) >>
    178                               kNumFlags);
    179  }
    180 
    181  // Returns whether the atomic integer is 1.
    182  // If the reference count is used in the conventional way, a
    183  // reference count of 1 implies that the current thread owns the
    184  // reference and no other thread shares it.
    185  // This call performs the test for a reference count of one, and
    186  // performs the memory barrier needed for the owning thread
    187  // to act on the object, knowing that it has exclusive access to the
    188  // object. Always returns false when the immortal bit is set.
    189  inline bool IsOne() {
    190    return count_.load(std::memory_order_acquire) == kRefIncrement;
    191  }
    192 
    193  bool IsImmortal() const {
    194    return (count_.load(std::memory_order_relaxed) & kImmortalFlag) != 0;
    195  }
    196 
    197 private:
    198  // We reserve the bottom bit for flag.
    199  // kImmortalBit indicates that this entity should never be collected; it is
    200  // used for the StringConstant constructor to avoid collecting immutable
    201  // constant cords.
    202  enum Flags {
    203    kNumFlags = 1,
    204 
    205    kImmortalFlag = 0x1,
    206    kRefIncrement = (1 << kNumFlags),
    207  };
    208 
    209  std::atomic<int32_t> count_;
    210 };
    211 
    212 // Various representations that we allow
    213 enum CordRepKind {
    214  UNUSED_0 = 0,
    215  SUBSTRING = 1,
    216  CRC = 2,
    217  BTREE = 3,
    218  UNUSED_4 = 4,
    219  EXTERNAL = 5,
    220 
    221  // We have different tags for different sized flat arrays,
    222  // starting with FLAT, and limited to MAX_FLAT_TAG. The below values map to an
    223  // allocated range of 32 bytes to 256 KB. The current granularity is:
    224  // - 8 byte granularity for flat sizes in [32 - 512]
    225  // - 64 byte granularity for flat sizes in (512 - 8KiB]
    226  // - 4KiB byte granularity for flat sizes in (8KiB, 256 KiB]
    227  // If a new tag is needed in the future, then 'FLAT' and 'MAX_FLAT_TAG' should
    228  // be adjusted as well as the Tag <---> Size mapping logic so that FLAT still
    229  // represents the minimum flat allocation size. (32 bytes as of now).
    230  FLAT = 6,
    231  MAX_FLAT_TAG = 248
    232 };
    233 
    234 // There are various locations where we want to check if some rep is a 'plain'
    235 // data edge, i.e. an external or flat rep. By having FLAT == EXTERNAL + 1, we
    236 // can perform this check in a single branch as 'tag >= EXTERNAL'
    237 // Note that we can leave this optimization to the compiler. The compiler will
    238 // DTRT when it sees a condition like `tag == EXTERNAL || tag >= FLAT`.
    239 static_assert(FLAT == EXTERNAL + 1, "EXTERNAL and FLAT not consecutive");
    240 
    241 struct CordRep {
    242  // Result from an `extract edge` operation. Contains the (possibly changed)
    243  // tree node as well as the extracted edge, or {tree, nullptr} if no edge
    244  // could be extracted.
    245  // On success, the returned `tree` value is null if `extracted` was the only
    246  // data edge inside the tree, a data edge if there were only two data edges in
    247  // the tree, or the (possibly new / smaller) remaining tree with the extracted
    248  // data edge removed.
    249  struct ExtractResult {
    250    CordRep* tree;
    251    CordRep* extracted;
    252  };
    253 
    254  CordRep() = default;
    255  constexpr CordRep(RefcountAndFlags::Immortal immortal, size_t l)
    256      : length(l), refcount(immortal), tag(EXTERNAL), storage{} {}
    257 
    258  // The following three fields have to be less than 32 bytes since
    259  // that is the smallest supported flat node size. Some code optimizations rely
    260  // on the specific layout of these fields. Notably: the non-trivial field
    261  // `refcount` being preceded by `length`, and being tailed by POD data
    262  // members only.
    263  // LINT.IfChange
    264  size_t length;
    265  RefcountAndFlags refcount;
    266  // If tag < FLAT, it represents CordRepKind and indicates the type of node.
    267  // Otherwise, the node type is CordRepFlat and the tag is the encoded size.
    268  uint8_t tag;
    269 
    270  // `storage` provides two main purposes:
    271  // - the starting point for FlatCordRep.Data() [flexible-array-member]
    272  // - 3 bytes of additional storage for use by derived classes.
    273  // The latter is used by CordrepConcat and CordRepBtree. CordRepConcat stores
    274  // a 'depth' value in storage[0], and the (future) CordRepBtree class stores
    275  // `height`, `begin` and `end` in the 3 entries. Otherwise we would need to
    276  // allocate room for these in the derived class, as not all compilers reuse
    277  // padding space from the base class (clang and gcc do, MSVC does not, etc)
    278  uint8_t storage[3];
    279  // LINT.ThenChange(cord_rep_btree.h:copy_raw)
    280 
    281  // Returns true if this instance's tag matches the requested type.
    282  constexpr bool IsSubstring() const { return tag == SUBSTRING; }
    283  constexpr bool IsCrc() const { return tag == CRC; }
    284  constexpr bool IsExternal() const { return tag == EXTERNAL; }
    285  constexpr bool IsFlat() const { return tag >= FLAT; }
    286  constexpr bool IsBtree() const { return tag == BTREE; }
    287 
    288  inline CordRepSubstring* substring();
    289  inline const CordRepSubstring* substring() const;
    290  inline CordRepCrc* crc();
    291  inline const CordRepCrc* crc() const;
    292  inline CordRepExternal* external();
    293  inline const CordRepExternal* external() const;
    294  inline CordRepFlat* flat();
    295  inline const CordRepFlat* flat() const;
    296  inline CordRepBtree* btree();
    297  inline const CordRepBtree* btree() const;
    298 
    299  // --------------------------------------------------------------------
    300  // Memory management
    301 
    302  // Destroys the provided `rep`.
    303  static void Destroy(CordRep* rep);
    304 
    305  // Increments the reference count of `rep`.
    306  // Requires `rep` to be a non-null pointer value.
    307  static inline CordRep* Ref(CordRep* rep);
    308 
    309  // Decrements the reference count of `rep`. Destroys rep if count reaches
    310  // zero. Requires `rep` to be a non-null pointer value.
    311  static inline void Unref(CordRep* rep);
    312 };
    313 
    314 struct CordRepSubstring : public CordRep {
    315  size_t start;  // Starting offset of substring in child
    316  CordRep* child;
    317 
    318  // Creates a substring on `child`, adopting a reference on `child`.
    319  // Requires `child` to be either a flat or external node, and `pos` and `n` to
    320  // form a non-empty partial sub range of `'child`, i.e.:
    321  // `n > 0 && n < length && n + pos <= length`
    322  static inline CordRepSubstring* Create(CordRep* child, size_t pos, size_t n);
    323 
    324  // Creates a substring of `rep`. Does not adopt a reference on `rep`.
    325  // Requires `IsDataEdge(rep) && n > 0 && pos + n <= rep->length`.
    326  // If `n == rep->length` then this method returns `CordRep::Ref(rep)`
    327  // If `rep` is a substring of a flat or external node, then this method will
    328  // return a new substring of that flat or external node with `pos` adjusted
    329  // with the original `start` position.
    330  static inline CordRep* Substring(CordRep* rep, size_t pos, size_t n);
    331 };
    332 
    333 // Type for function pointer that will invoke the releaser function and also
    334 // delete the `CordRepExternalImpl` corresponding to the passed in
    335 // `CordRepExternal`.
    336 using ExternalReleaserInvoker = void (*)(CordRepExternal*);
    337 
    338 // External CordReps are allocated together with a type erased releaser. The
    339 // releaser is stored in the memory directly following the CordRepExternal.
    340 struct CordRepExternal : public CordRep {
    341  CordRepExternal() = default;
    342  explicit constexpr CordRepExternal(absl::string_view str)
    343      : CordRep(RefcountAndFlags::Immortal{}, str.size()),
    344        base(str.data()),
    345        releaser_invoker(nullptr) {}
    346 
    347  const char* base;
    348  // Pointer to function that knows how to call and destroy the releaser.
    349  ExternalReleaserInvoker releaser_invoker;
    350 
    351  // Deletes (releases) the external rep.
    352  // Requires rep != nullptr and rep->IsExternal()
    353  static void Delete(CordRep* rep);
    354 };
    355 
    356 // Use go/ranked-overloads for dispatching.
    357 struct Rank0 {};
    358 struct Rank1 : Rank0 {};
    359 
    360 template <typename Releaser,
    361          typename = ::std::invoke_result_t<Releaser, absl::string_view>>
    362 void InvokeReleaser(Rank1, Releaser&& releaser, absl::string_view data) {
    363  ::std::invoke(std::forward<Releaser>(releaser), data);
    364 }
    365 
    366 template <typename Releaser, typename = ::std::invoke_result_t<Releaser>>
    367 void InvokeReleaser(Rank0, Releaser&& releaser, absl::string_view) {
    368  ::std::invoke(std::forward<Releaser>(releaser));
    369 }
    370 
    371 // We use CompressedTuple so that we can benefit from EBCO.
    372 template <typename Releaser>
    373 struct CordRepExternalImpl
    374    : public CordRepExternal,
    375      public ::absl::container_internal::CompressedTuple<Releaser> {
    376  // The extra int arg is so that we can avoid interfering with copy/move
    377  // constructors while still benefitting from perfect forwarding.
    378  template <typename T>
    379  CordRepExternalImpl(T&& releaser, int)
    380      : CordRepExternalImpl::CompressedTuple(std::forward<T>(releaser)) {
    381    this->releaser_invoker = &Release;
    382  }
    383 
    384  ~CordRepExternalImpl() {
    385    InvokeReleaser(Rank1{}, std::move(this->template get<0>()),
    386                   absl::string_view(base, length));
    387  }
    388 
    389  static void Release(CordRepExternal* rep) {
    390    delete static_cast<CordRepExternalImpl*>(rep);
    391  }
    392 };
    393 
    394 inline CordRepSubstring* CordRepSubstring::Create(CordRep* child, size_t pos,
    395                                                  size_t n) {
    396  assert(child != nullptr);
    397  assert(n > 0);
    398  assert(n < child->length);
    399  assert(pos < child->length);
    400  assert(n <= child->length - pos);
    401 
    402  // Move to strategical places inside the Cord logic and make this an assert.
    403  if (ABSL_PREDICT_FALSE(!(child->IsExternal() || child->IsFlat()))) {
    404    LogFatalNodeType(child);
    405  }
    406 
    407  CordRepSubstring* rep = new CordRepSubstring();
    408  rep->length = n;
    409  rep->tag = SUBSTRING;
    410  rep->start = pos;
    411  rep->child = child;
    412  return rep;
    413 }
    414 
    415 inline CordRep* CordRepSubstring::Substring(CordRep* rep, size_t pos,
    416                                            size_t n) {
    417  assert(rep != nullptr);
    418  assert(n != 0);
    419  assert(pos < rep->length);
    420  assert(n <= rep->length - pos);
    421  if (n == rep->length) return CordRep::Ref(rep);
    422  if (rep->IsSubstring()) {
    423    pos += rep->substring()->start;
    424    rep = rep->substring()->child;
    425  }
    426  CordRepSubstring* substr = new CordRepSubstring();
    427  substr->length = n;
    428  substr->tag = SUBSTRING;
    429  substr->start = pos;
    430  substr->child = CordRep::Ref(rep);
    431  return substr;
    432 }
    433 
    434 inline void CordRepExternal::Delete(CordRep* rep) {
    435  assert(rep != nullptr && rep->IsExternal());
    436  auto* rep_external = static_cast<CordRepExternal*>(rep);
    437  assert(rep_external->releaser_invoker != nullptr);
    438  rep_external->releaser_invoker(rep_external);
    439 }
    440 
    441 template <typename Str>
    442 struct ConstInitExternalStorage {
    443  ABSL_CONST_INIT static CordRepExternal value;
    444 };
    445 
    446 template <typename Str>
    447 ABSL_CONST_INIT CordRepExternal
    448    ConstInitExternalStorage<Str>::value(Str::value);
    449 
    450 enum {
    451  kMaxInline = 15,
    452 };
    453 
    454 constexpr char GetOrNull(absl::string_view data, size_t pos) {
    455  return pos < data.size() ? data[pos] : '\0';
    456 }
    457 
    458 // We store cordz_info as 64 bit pointer value in little endian format. This
    459 // guarantees that the least significant byte of cordz_info matches the first
    460 // byte of the inline data representation in `data`, which holds the inlined
    461 // size or the 'is_tree' bit.
    462 using cordz_info_t = int64_t;
    463 
    464 // Assert that the `cordz_info` pointer value perfectly overlaps the last half
    465 // of `data` and can hold a pointer value.
    466 static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, "");
    467 static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), "");
    468 
    469 // LittleEndianByte() creates a little endian representation of 'value', i.e.:
    470 // a little endian value where the first byte in the host's representation
    471 // holds 'value`, with all other bytes being 0.
    472 static constexpr cordz_info_t LittleEndianByte(unsigned char value) {
    473 #if defined(ABSL_IS_BIG_ENDIAN)
    474  return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8);
    475 #else
    476  return value;
    477 #endif
    478 }
    479 
    480 class InlineData {
    481 public:
    482  // DefaultInitType forces the use of the default initialization constructor.
    483  enum DefaultInitType { kDefaultInit };
    484 
    485  // kNullCordzInfo holds the little endian representation of intptr_t(1)
    486  // This is the 'null' / initial value of 'cordz_info'. The null value
    487  // is specifically big endian 1 as with 64-bit pointers, the last
    488  // byte of cordz_info overlaps with the last byte holding the tag.
    489  static constexpr cordz_info_t kNullCordzInfo = LittleEndianByte(1);
    490 
    491  // kTagOffset contains the offset of the control byte / tag. This constant is
    492  // intended mostly for debugging purposes: do not remove this constant as it
    493  // is actively inspected and used by gdb pretty printing code.
    494  static constexpr size_t kTagOffset = 0;
    495 
    496  // Implement `~InlineData()` conditionally: we only need this destructor to
    497  // unpoison poisoned instances under *SAN, and it will only compile correctly
    498  // if the current compiler supports `absl::is_constant_evaluated()`.
    499 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
    500  ~InlineData() noexcept { unpoison(); }
    501 #endif
    502 
    503  constexpr InlineData() noexcept { poison_this(); }
    504 
    505  explicit InlineData(DefaultInitType) noexcept : rep_(kDefaultInit) {
    506    poison_this();
    507  }
    508 
    509  explicit InlineData(CordRep* rep) noexcept : rep_(rep) {
    510    ABSL_ASSERT(rep != nullptr);
    511  }
    512 
    513  // Explicit constexpr constructor to create a constexpr InlineData
    514  // value. Creates an inlined SSO value if `rep` is null, otherwise
    515  // creates a tree instance value.
    516  constexpr InlineData(absl::string_view sv, CordRep* rep) noexcept
    517      : rep_(rep ? Rep(rep) : Rep(sv)) {
    518    poison();
    519  }
    520 
    521  constexpr InlineData(const InlineData& rhs) noexcept;
    522  InlineData& operator=(const InlineData& rhs) noexcept;
    523  friend void swap(InlineData& lhs, InlineData& rhs) noexcept;
    524 
    525  friend bool operator==(const InlineData& lhs, const InlineData& rhs) {
    526 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
    527    const Rep l = lhs.rep_.SanitizerSafeCopy();
    528    const Rep r = rhs.rep_.SanitizerSafeCopy();
    529    return memcmp(&l, &r, sizeof(l)) == 0;
    530 #else
    531    return memcmp(&lhs, &rhs, sizeof(lhs)) == 0;
    532 #endif
    533  }
    534  friend bool operator!=(const InlineData& lhs, const InlineData& rhs) {
    535    return !operator==(lhs, rhs);
    536  }
    537 
    538  // Poisons the unused inlined SSO data if the current instance
    539  // is inlined, else un-poisons the entire instance.
    540  constexpr void poison();
    541 
    542  // Un-poisons this instance.
    543  constexpr void unpoison();
    544 
    545  // Poisons the current instance. This is used on default initialization.
    546  constexpr void poison_this();
    547 
    548  // Returns true if the current instance is empty.
    549  // The 'empty value' is an inlined data value of zero length.
    550  bool is_empty() const { return rep_.tag() == 0; }
    551 
    552  // Returns true if the current instance holds a tree value.
    553  bool is_tree() const { return (rep_.tag() & 1) != 0; }
    554 
    555  // Returns true if the current instance holds a cordz_info value.
    556  // Requires the current instance to hold a tree value.
    557  bool is_profiled() const {
    558    assert(is_tree());
    559    return rep_.cordz_info() != kNullCordzInfo;
    560  }
    561 
    562  // Returns true if either of the provided instances hold a cordz_info value.
    563  // This method is more efficient than the equivalent `data1.is_profiled() ||
    564  // data2.is_profiled()`. Requires both arguments to hold a tree.
    565  static bool is_either_profiled(const InlineData& data1,
    566                                 const InlineData& data2) {
    567    assert(data1.is_tree() && data2.is_tree());
    568    return (data1.rep_.cordz_info() | data2.rep_.cordz_info()) !=
    569           kNullCordzInfo;
    570  }
    571 
    572  // Returns the cordz_info sampling instance for this instance, or nullptr
    573  // if the current instance is not sampled and does not have CordzInfo data.
    574  // Requires the current instance to hold a tree value.
    575  CordzInfo* cordz_info() const {
    576    assert(is_tree());
    577    intptr_t info = static_cast<intptr_t>(absl::little_endian::ToHost64(
    578        static_cast<uint64_t>(rep_.cordz_info())));
    579    assert(info & 1);
    580    return reinterpret_cast<CordzInfo*>(info - 1);
    581  }
    582 
    583  // Sets the current cordz_info sampling instance for this instance, or nullptr
    584  // if the current instance is not sampled and does not have CordzInfo data.
    585  // Requires the current instance to hold a tree value.
    586  void set_cordz_info(CordzInfo* cordz_info) {
    587    assert(is_tree());
    588    uintptr_t info = reinterpret_cast<uintptr_t>(cordz_info) | 1;
    589    rep_.set_cordz_info(
    590        static_cast<cordz_info_t>(absl::little_endian::FromHost64(info)));
    591  }
    592 
    593  // Resets the current cordz_info to null / empty.
    594  void clear_cordz_info() {
    595    assert(is_tree());
    596    rep_.set_cordz_info(kNullCordzInfo);
    597  }
    598 
    599  // Returns a read only pointer to the character data inside this instance.
    600  // Requires the current instance to hold inline data.
    601  const char* as_chars() const {
    602    assert(!is_tree());
    603    return rep_.as_chars();
    604  }
    605 
    606  // Returns a mutable pointer to the character data inside this instance.
    607  // Should be used for 'write only' operations setting an inlined value.
    608  // Applications can set the value of inlined data either before or after
    609  // setting the inlined size, i.e., both of the below are valid:
    610  //
    611  //   // Set inlined data and inline size
    612  //   memcpy(data_.as_chars(), data, size);
    613  //   data_.set_inline_size(size);
    614  //
    615  //   // Set inlined size and inline data
    616  //   data_.set_inline_size(size);
    617  //   memcpy(data_.as_chars(), data, size);
    618  //
    619  // It's an error to read from the returned pointer without a preceding write
    620  // if the current instance does not hold inline data, i.e.: is_tree() == true.
    621  char* as_chars() { return rep_.as_chars(); }
    622 
    623  // Returns the tree value of this value.
    624  // Requires the current instance to hold a tree value.
    625  CordRep* as_tree() const {
    626    assert(is_tree());
    627    return rep_.tree();
    628  }
    629 
    630  void set_inline_data(const char* data, size_t n) {
    631    ABSL_ASSERT(n <= kMaxInline);
    632    unpoison();
    633    rep_.set_tag(static_cast<int8_t>(n << 1));
    634    SmallMemmove<true>(rep_.as_chars(), data, n);
    635    poison();
    636  }
    637 
    638  void CopyInlineToString(absl::Nonnull<std::string*> dst) const {
    639    assert(!is_tree());
    640    // As Cord can store only 15 bytes it is smaller than std::string's
    641    // small string optimization buffer size. Therefore we will always trigger
    642    // the fast assign short path.
    643    //
    644    // Copying with a size equal to the maximum allows more efficient, wider
    645    // stores to be used and no branching.
    646    dst->assign(rep_.SanitizerSafeCopy().as_chars(), kMaxInline);
    647    // After the copy we then change the size and put in a 0 byte.
    648    dst->erase(inline_size());
    649  }
    650 
    651  void copy_max_inline_to(char* dst) const {
    652    assert(!is_tree());
    653    memcpy(dst, rep_.SanitizerSafeCopy().as_chars(), kMaxInline);
    654  }
    655 
    656  // Initialize this instance to holding the tree value `rep`,
    657  // initializing the cordz_info to null, i.e.: 'not profiled'.
    658  void make_tree(CordRep* rep) {
    659    unpoison();
    660    rep_.make_tree(rep);
    661  }
    662 
    663  // Set the tree value of this instance to 'rep`.
    664  // Requires the current instance to already hold a tree value.
    665  // Does not affect the value of cordz_info.
    666  void set_tree(CordRep* rep) {
    667    assert(is_tree());
    668    rep_.set_tree(rep);
    669  }
    670 
    671  // Returns the size of the inlined character data inside this instance.
    672  // Requires the current instance to hold inline data.
    673  size_t inline_size() const { return rep_.inline_size(); }
    674 
    675  // Sets the size of the inlined character data inside this instance.
    676  // Requires `size` to be <= kMaxInline.
    677  // See the documentation on 'as_chars()' for more information and examples.
    678  void set_inline_size(size_t size) {
    679    unpoison();
    680    rep_.set_inline_size(size);
    681    poison();
    682  }
    683 
    684  // Compares 'this' inlined data  with rhs. The comparison is a straightforward
    685  // lexicographic comparison. `Compare()` returns values as follows:
    686  //
    687  //   -1  'this' InlineData instance is smaller
    688  //    0  the InlineData instances are equal
    689  //    1  'this' InlineData instance larger
    690  int Compare(const InlineData& rhs) const {
    691    return Compare(rep_.SanitizerSafeCopy(), rhs.rep_.SanitizerSafeCopy());
    692  }
    693 
    694 private:
    695  struct Rep {
    696    // See cordz_info_t for forced alignment and size of `cordz_info` details.
    697    struct AsTree {
    698      explicit constexpr AsTree(absl::cord_internal::CordRep* tree)
    699          : rep(tree) {}
    700      cordz_info_t cordz_info = kNullCordzInfo;
    701      absl::cord_internal::CordRep* rep;
    702    };
    703 
    704    explicit Rep(DefaultInitType) {}
    705    constexpr Rep() : data{0} {}
    706    constexpr Rep(const Rep&) = default;
    707    constexpr Rep& operator=(const Rep&) = default;
    708 
    709    explicit constexpr Rep(CordRep* rep) : as_tree(rep) {}
    710 
    711    explicit constexpr Rep(absl::string_view chars)
    712        : data{static_cast<char>((chars.size() << 1)),
    713               GetOrNull(chars, 0),
    714               GetOrNull(chars, 1),
    715               GetOrNull(chars, 2),
    716               GetOrNull(chars, 3),
    717               GetOrNull(chars, 4),
    718               GetOrNull(chars, 5),
    719               GetOrNull(chars, 6),
    720               GetOrNull(chars, 7),
    721               GetOrNull(chars, 8),
    722               GetOrNull(chars, 9),
    723               GetOrNull(chars, 10),
    724               GetOrNull(chars, 11),
    725               GetOrNull(chars, 12),
    726               GetOrNull(chars, 13),
    727               GetOrNull(chars, 14)} {}
    728 
    729 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
    730    // Break compiler optimization for cases when value is allocated on the
    731    // stack. Compiler assumes that the the variable is fully accessible
    732    // regardless of our poisoning.
    733    // Missing report: https://github.com/llvm/llvm-project/issues/100640
    734    const Rep* self() const {
    735      const Rep* volatile ptr = this;
    736      return ptr;
    737    }
    738    Rep* self() {
    739      Rep* volatile ptr = this;
    740      return ptr;
    741    }
    742 #else
    743    constexpr const Rep* self() const { return this; }
    744    constexpr Rep* self() { return this; }
    745 #endif
    746 
    747    // Disable sanitizer as we must always be able to read `tag`.
    748    ABSL_CORD_INTERNAL_NO_SANITIZE
    749    int8_t tag() const { return reinterpret_cast<const int8_t*>(this)[0]; }
    750    void set_tag(int8_t rhs) { reinterpret_cast<int8_t*>(self())[0] = rhs; }
    751 
    752    char* as_chars() { return self()->data + 1; }
    753    const char* as_chars() const { return self()->data + 1; }
    754 
    755    bool is_tree() const { return (self()->tag() & 1) != 0; }
    756 
    757    size_t inline_size() const {
    758      ABSL_ASSERT(!self()->is_tree());
    759      return static_cast<size_t>(self()->tag()) >> 1;
    760    }
    761 
    762    void set_inline_size(size_t size) {
    763      ABSL_ASSERT(size <= kMaxInline);
    764      self()->set_tag(static_cast<int8_t>(size << 1));
    765    }
    766 
    767    CordRep* tree() const { return self()->as_tree.rep; }
    768    void set_tree(CordRep* rhs) { self()->as_tree.rep = rhs; }
    769 
    770    cordz_info_t cordz_info() const { return self()->as_tree.cordz_info; }
    771    void set_cordz_info(cordz_info_t rhs) { self()->as_tree.cordz_info = rhs; }
    772 
    773    void make_tree(CordRep* tree) {
    774      self()->as_tree.rep = tree;
    775      self()->as_tree.cordz_info = kNullCordzInfo;
    776    }
    777 
    778 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
    779    constexpr Rep SanitizerSafeCopy() const {
    780      if (!absl::is_constant_evaluated()) {
    781        Rep res;
    782        if (is_tree()) {
    783          res = *this;
    784        } else {
    785          res.set_tag(tag());
    786          memcpy(res.as_chars(), as_chars(), inline_size());
    787        }
    788        return res;
    789      } else {
    790        return *this;
    791      }
    792    }
    793 #else
    794    constexpr const Rep& SanitizerSafeCopy() const { return *this; }
    795 #endif
    796 
    797    // If the data has length <= kMaxInline, we store it in `data`, and
    798    // store the size in the first char of `data` shifted left + 1.
    799    // Else we store it in a tree and store a pointer to that tree in
    800    // `as_tree.rep` with a tagged pointer to make `tag() & 1` non zero.
    801    union {
    802      char data[kMaxInline + 1];
    803      AsTree as_tree;
    804    };
    805 
    806    // TODO(b/145829486): see swap(InlineData, InlineData) for more info.
    807    inline void SwapValue(Rep rhs, Rep& refrhs) {
    808      memcpy(&refrhs, this, sizeof(*this));
    809      memcpy(this, &rhs, sizeof(*this));
    810    }
    811  };
    812 
    813  // Private implementation of `Compare()`
    814  static inline int Compare(const Rep& lhs, const Rep& rhs) {
    815    uint64_t x, y;
    816    memcpy(&x, lhs.as_chars(), sizeof(x));
    817    memcpy(&y, rhs.as_chars(), sizeof(y));
    818    if (x == y) {
    819      memcpy(&x, lhs.as_chars() + 7, sizeof(x));
    820      memcpy(&y, rhs.as_chars() + 7, sizeof(y));
    821      if (x == y) {
    822        if (lhs.inline_size() == rhs.inline_size()) return 0;
    823        return lhs.inline_size() < rhs.inline_size() ? -1 : 1;
    824      }
    825    }
    826    x = absl::big_endian::FromHost64(x);
    827    y = absl::big_endian::FromHost64(y);
    828    return x < y ? -1 : 1;
    829  }
    830 
    831  Rep rep_;
    832 };
    833 
    834 static_assert(sizeof(InlineData) == kMaxInline + 1, "");
    835 
    836 #ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
    837 
    838 constexpr InlineData::InlineData(const InlineData& rhs) noexcept
    839    : rep_(rhs.rep_.SanitizerSafeCopy()) {
    840  poison();
    841 }
    842 
    843 inline InlineData& InlineData::operator=(const InlineData& rhs) noexcept {
    844  unpoison();
    845  rep_ = rhs.rep_.SanitizerSafeCopy();
    846  poison();
    847  return *this;
    848 }
    849 
    850 constexpr void InlineData::poison_this() {
    851  if (!absl::is_constant_evaluated()) {
    852    container_internal::SanitizerPoisonObject(this);
    853  }
    854 }
    855 
    856 constexpr void InlineData::unpoison() {
    857  if (!absl::is_constant_evaluated()) {
    858    container_internal::SanitizerUnpoisonObject(this);
    859  }
    860 }
    861 
    862 constexpr void InlineData::poison() {
    863  if (!absl::is_constant_evaluated()) {
    864    if (is_tree()) {
    865      container_internal::SanitizerUnpoisonObject(this);
    866    } else if (const size_t size = inline_size()) {
    867      if (size < kMaxInline) {
    868        const char* end = rep_.as_chars() + size;
    869        container_internal::SanitizerPoisonMemoryRegion(end, kMaxInline - size);
    870      }
    871    } else {
    872      container_internal::SanitizerPoisonObject(this);
    873    }
    874  }
    875 }
    876 
    877 #else  // ABSL_INTERNAL_CORD_HAVE_SANITIZER
    878 
    879 constexpr InlineData::InlineData(const InlineData&) noexcept = default;
    880 inline InlineData& InlineData::operator=(const InlineData&) noexcept = default;
    881 
    882 constexpr void InlineData::poison_this() {}
    883 constexpr void InlineData::unpoison() {}
    884 constexpr void InlineData::poison() {}
    885 
    886 #endif  // ABSL_INTERNAL_CORD_HAVE_SANITIZER
    887 
    888 inline CordRepSubstring* CordRep::substring() {
    889  assert(IsSubstring());
    890  return static_cast<CordRepSubstring*>(this);
    891 }
    892 
    893 inline const CordRepSubstring* CordRep::substring() const {
    894  assert(IsSubstring());
    895  return static_cast<const CordRepSubstring*>(this);
    896 }
    897 
    898 inline CordRepExternal* CordRep::external() {
    899  assert(IsExternal());
    900  return static_cast<CordRepExternal*>(this);
    901 }
    902 
    903 inline const CordRepExternal* CordRep::external() const {
    904  assert(IsExternal());
    905  return static_cast<const CordRepExternal*>(this);
    906 }
    907 
    908 inline CordRep* CordRep::Ref(CordRep* rep) {
    909  // ABSL_ASSUME is a workaround for
    910  // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105585
    911  ABSL_ASSUME(rep != nullptr);
    912  rep->refcount.Increment();
    913  return rep;
    914 }
    915 
    916 inline void CordRep::Unref(CordRep* rep) {
    917  assert(rep != nullptr);
    918  // Expect refcount to be 0. Avoiding the cost of an atomic decrement should
    919  // typically outweigh the cost of an extra branch checking for ref == 1.
    920  if (ABSL_PREDICT_FALSE(!rep->refcount.DecrementExpectHighRefcount())) {
    921    Destroy(rep);
    922  }
    923 }
    924 
    925 inline void swap(InlineData& lhs, InlineData& rhs) noexcept {
    926  lhs.unpoison();
    927  rhs.unpoison();
    928  // TODO(b/145829486): `std::swap(lhs.rep_, rhs.rep_)` results in bad codegen
    929  // on clang, spilling the temporary swap value on the stack. Since `Rep` is
    930  // trivial, we can make clang DTRT by calling a hand-rolled `SwapValue` where
    931  // we pass `rhs` both by value (register allocated) and by reference. The IR
    932  // then folds and inlines correctly into an optimized swap without spill.
    933  lhs.rep_.SwapValue(rhs.rep_, rhs.rep_);
    934  rhs.poison();
    935  lhs.poison();
    936 }
    937 
    938 }  // namespace cord_internal
    939 
    940 ABSL_NAMESPACE_END
    941 }  // namespace absl
    942 #endif  // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_