cord.cc (54045B)
1 // Copyright 2020 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "absl/strings/cord.h" 16 17 #include <algorithm> 18 #include <cassert> 19 #include <cstddef> 20 #include <cstdint> 21 #include <cstdio> 22 #include <cstdlib> 23 #include <cstring> 24 #include <iomanip> 25 #include <ios> 26 #include <iostream> 27 #include <limits> 28 #include <memory> 29 #include <ostream> 30 #include <sstream> 31 #include <string> 32 #include <utility> 33 34 #include "absl/base/attributes.h" 35 #include "absl/base/config.h" 36 #include "absl/base/internal/endian.h" 37 #include "absl/base/internal/raw_logging.h" 38 #include "absl/base/macros.h" 39 #include "absl/base/optimization.h" 40 #include "absl/base/nullability.h" 41 #include "absl/container/inlined_vector.h" 42 #include "absl/crc/crc32c.h" 43 #include "absl/crc/internal/crc_cord_state.h" 44 #include "absl/functional/function_ref.h" 45 #include "absl/strings/cord_buffer.h" 46 #include "absl/strings/escaping.h" 47 #include "absl/strings/internal/cord_data_edge.h" 48 #include "absl/strings/internal/cord_internal.h" 49 #include "absl/strings/internal/cord_rep_btree.h" 50 #include "absl/strings/internal/cord_rep_crc.h" 51 #include "absl/strings/internal/cord_rep_flat.h" 52 #include "absl/strings/internal/cordz_update_tracker.h" 53 #include "absl/strings/internal/resize_uninitialized.h" 54 #include "absl/strings/match.h" 55 #include "absl/strings/str_cat.h" 56 #include "absl/strings/string_view.h" 57 #include "absl/strings/strip.h" 58 #include "absl/types/optional.h" 59 #include "absl/types/span.h" 60 61 namespace absl { 62 ABSL_NAMESPACE_BEGIN 63 64 using ::absl::cord_internal::CordRep; 65 using ::absl::cord_internal::CordRepBtree; 66 using ::absl::cord_internal::CordRepCrc; 67 using ::absl::cord_internal::CordRepExternal; 68 using ::absl::cord_internal::CordRepFlat; 69 using ::absl::cord_internal::CordRepSubstring; 70 using ::absl::cord_internal::CordzUpdateTracker; 71 using ::absl::cord_internal::InlineData; 72 using ::absl::cord_internal::kMaxFlatLength; 73 using ::absl::cord_internal::kMinFlatLength; 74 75 using ::absl::cord_internal::kInlinedVectorSize; 76 using ::absl::cord_internal::kMaxBytesToCopy; 77 78 static void DumpNode(absl::Nonnull<CordRep*> nonnull_rep, bool include_data, 79 absl::Nonnull<std::ostream*> os, int indent = 0); 80 static bool VerifyNode(absl::Nonnull<CordRep*> root, 81 absl::Nonnull<CordRep*> start_node); 82 83 static inline absl::Nullable<CordRep*> VerifyTree( 84 absl::Nullable<CordRep*> node) { 85 assert(node == nullptr || VerifyNode(node, node)); 86 static_cast<void>(&VerifyNode); 87 return node; 88 } 89 90 static absl::Nonnull<CordRepFlat*> CreateFlat(absl::Nonnull<const char*> data, 91 size_t length, 92 size_t alloc_hint) { 93 CordRepFlat* flat = CordRepFlat::New(length + alloc_hint); 94 flat->length = length; 95 memcpy(flat->Data(), data, length); 96 return flat; 97 } 98 99 // Creates a new flat or Btree out of the specified array. 100 // The returned node has a refcount of 1. 101 static absl::Nonnull<CordRep*> NewBtree(absl::Nonnull<const char*> data, 102 size_t length, size_t alloc_hint) { 103 if (length <= kMaxFlatLength) { 104 return CreateFlat(data, length, alloc_hint); 105 } 106 CordRepFlat* flat = CreateFlat(data, kMaxFlatLength, 0); 107 data += kMaxFlatLength; 108 length -= kMaxFlatLength; 109 auto* root = CordRepBtree::Create(flat); 110 return CordRepBtree::Append(root, {data, length}, alloc_hint); 111 } 112 113 // Create a new tree out of the specified array. 114 // The returned node has a refcount of 1. 115 static absl::Nullable<CordRep*> NewTree(absl::Nullable<const char*> data, 116 size_t length, size_t alloc_hint) { 117 if (length == 0) return nullptr; 118 return NewBtree(data, length, alloc_hint); 119 } 120 121 namespace cord_internal { 122 123 void InitializeCordRepExternal(absl::string_view data, 124 absl::Nonnull<CordRepExternal*> rep) { 125 assert(!data.empty()); 126 rep->length = data.size(); 127 rep->tag = EXTERNAL; 128 rep->base = data.data(); 129 VerifyTree(rep); 130 } 131 132 } // namespace cord_internal 133 134 // Creates a CordRep from the provided string. If the string is large enough, 135 // and not wasteful, we move the string into an external cord rep, preserving 136 // the already allocated string contents. 137 // Requires the provided string length to be larger than `kMaxInline`. 138 static absl::Nonnull<CordRep*> CordRepFromString(std::string&& src) { 139 assert(src.length() > cord_internal::kMaxInline); 140 if ( 141 // String is short: copy data to avoid external block overhead. 142 src.size() <= kMaxBytesToCopy || 143 // String is wasteful: copy data to avoid pinning too much unused memory. 144 src.size() < src.capacity() / 2 145 ) { 146 return NewTree(src.data(), src.size(), 0); 147 } 148 149 struct StringReleaser { 150 void operator()(absl::string_view /* data */) {} 151 std::string data; 152 }; 153 const absl::string_view original_data = src; 154 auto* rep = 155 static_cast<::absl::cord_internal::CordRepExternalImpl<StringReleaser>*>( 156 absl::cord_internal::NewExternalRep(original_data, 157 StringReleaser{std::move(src)})); 158 // Moving src may have invalidated its data pointer, so adjust it. 159 rep->base = rep->template get<0>().data.data(); 160 return rep; 161 } 162 163 // -------------------------------------------------------------------- 164 // Cord::InlineRep functions 165 166 inline void Cord::InlineRep::set_data(absl::Nonnull<const char*> data, 167 size_t n) { 168 static_assert(kMaxInline == 15, "set_data is hard-coded for a length of 15"); 169 data_.set_inline_data(data, n); 170 } 171 172 inline absl::Nonnull<char*> Cord::InlineRep::set_data(size_t n) { 173 assert(n <= kMaxInline); 174 ResetToEmpty(); 175 set_inline_size(n); 176 return data_.as_chars(); 177 } 178 179 inline void Cord::InlineRep::reduce_size(size_t n) { 180 size_t tag = inline_size(); 181 assert(tag <= kMaxInline); 182 assert(tag >= n); 183 tag -= n; 184 memset(data_.as_chars() + tag, 0, n); 185 set_inline_size(tag); 186 } 187 188 inline void Cord::InlineRep::remove_prefix(size_t n) { 189 cord_internal::SmallMemmove(data_.as_chars(), data_.as_chars() + n, 190 inline_size() - n); 191 reduce_size(n); 192 } 193 194 // Returns `rep` converted into a CordRepBtree. 195 // Directly returns `rep` if `rep` is already a CordRepBtree. 196 static absl::Nonnull<CordRepBtree*> ForceBtree(CordRep* rep) { 197 return rep->IsBtree() 198 ? rep->btree() 199 : CordRepBtree::Create(cord_internal::RemoveCrcNode(rep)); 200 } 201 202 void Cord::InlineRep::AppendTreeToInlined(absl::Nonnull<CordRep*> tree, 203 MethodIdentifier method) { 204 assert(!is_tree()); 205 if (!data_.is_empty()) { 206 CordRepFlat* flat = MakeFlatWithExtraCapacity(0); 207 tree = CordRepBtree::Append(CordRepBtree::Create(flat), tree); 208 } 209 EmplaceTree(tree, method); 210 } 211 212 void Cord::InlineRep::AppendTreeToTree(absl::Nonnull<CordRep*> tree, 213 MethodIdentifier method) { 214 assert(is_tree()); 215 const CordzUpdateScope scope(data_.cordz_info(), method); 216 tree = CordRepBtree::Append(ForceBtree(data_.as_tree()), tree); 217 SetTree(tree, scope); 218 } 219 220 void Cord::InlineRep::AppendTree(absl::Nonnull<CordRep*> tree, 221 MethodIdentifier method) { 222 assert(tree != nullptr); 223 assert(tree->length != 0); 224 assert(!tree->IsCrc()); 225 if (data_.is_tree()) { 226 AppendTreeToTree(tree, method); 227 } else { 228 AppendTreeToInlined(tree, method); 229 } 230 } 231 232 void Cord::InlineRep::PrependTreeToInlined(absl::Nonnull<CordRep*> tree, 233 MethodIdentifier method) { 234 assert(!is_tree()); 235 if (!data_.is_empty()) { 236 CordRepFlat* flat = MakeFlatWithExtraCapacity(0); 237 tree = CordRepBtree::Prepend(CordRepBtree::Create(flat), tree); 238 } 239 EmplaceTree(tree, method); 240 } 241 242 void Cord::InlineRep::PrependTreeToTree(absl::Nonnull<CordRep*> tree, 243 MethodIdentifier method) { 244 assert(is_tree()); 245 const CordzUpdateScope scope(data_.cordz_info(), method); 246 tree = CordRepBtree::Prepend(ForceBtree(data_.as_tree()), tree); 247 SetTree(tree, scope); 248 } 249 250 void Cord::InlineRep::PrependTree(absl::Nonnull<CordRep*> tree, 251 MethodIdentifier method) { 252 assert(tree != nullptr); 253 assert(tree->length != 0); 254 assert(!tree->IsCrc()); 255 if (data_.is_tree()) { 256 PrependTreeToTree(tree, method); 257 } else { 258 PrependTreeToInlined(tree, method); 259 } 260 } 261 262 // Searches for a non-full flat node at the rightmost leaf of the tree. If a 263 // suitable leaf is found, the function will update the length field for all 264 // nodes to account for the size increase. The append region address will be 265 // written to region and the actual size increase will be written to size. 266 static inline bool PrepareAppendRegion( 267 absl::Nonnull<CordRep*> root, absl::Nonnull<absl::Nullable<char*>*> region, 268 absl::Nonnull<size_t*> size, size_t max_length) { 269 if (root->IsBtree() && root->refcount.IsOne()) { 270 Span<char> span = root->btree()->GetAppendBuffer(max_length); 271 if (!span.empty()) { 272 *region = span.data(); 273 *size = span.size(); 274 return true; 275 } 276 } 277 278 CordRep* dst = root; 279 if (!dst->IsFlat() || !dst->refcount.IsOne()) { 280 *region = nullptr; 281 *size = 0; 282 return false; 283 } 284 285 const size_t in_use = dst->length; 286 const size_t capacity = dst->flat()->Capacity(); 287 if (in_use == capacity) { 288 *region = nullptr; 289 *size = 0; 290 return false; 291 } 292 293 const size_t size_increase = std::min(capacity - in_use, max_length); 294 dst->length += size_increase; 295 296 *region = dst->flat()->Data() + in_use; 297 *size = size_increase; 298 return true; 299 } 300 301 void Cord::InlineRep::AssignSlow(const Cord::InlineRep& src) { 302 assert(&src != this); 303 assert(is_tree() || src.is_tree()); 304 auto constexpr method = CordzUpdateTracker::kAssignCord; 305 if (ABSL_PREDICT_TRUE(!is_tree())) { 306 EmplaceTree(CordRep::Ref(src.as_tree()), src.data_, method); 307 return; 308 } 309 310 CordRep* tree = as_tree(); 311 if (CordRep* src_tree = src.tree()) { 312 // Leave any existing `cordz_info` in place, and let MaybeTrackCord() 313 // decide if this cord should be (or remains to be) sampled or not. 314 data_.set_tree(CordRep::Ref(src_tree)); 315 CordzInfo::MaybeTrackCord(data_, src.data_, method); 316 } else { 317 CordzInfo::MaybeUntrackCord(data_.cordz_info()); 318 data_ = src.data_; 319 } 320 CordRep::Unref(tree); 321 } 322 323 void Cord::InlineRep::UnrefTree() { 324 if (is_tree()) { 325 CordzInfo::MaybeUntrackCord(data_.cordz_info()); 326 CordRep::Unref(tree()); 327 } 328 } 329 330 // -------------------------------------------------------------------- 331 // Constructors and destructors 332 333 Cord::Cord(absl::string_view src, MethodIdentifier method) 334 : contents_(InlineData::kDefaultInit) { 335 const size_t n = src.size(); 336 if (n <= InlineRep::kMaxInline) { 337 contents_.set_data(src.data(), n); 338 } else { 339 CordRep* rep = NewTree(src.data(), n, 0); 340 contents_.EmplaceTree(rep, method); 341 } 342 } 343 344 template <typename T, Cord::EnableIfString<T>> 345 Cord::Cord(T&& src) : contents_(InlineData::kDefaultInit) { 346 if (src.size() <= InlineRep::kMaxInline) { 347 contents_.set_data(src.data(), src.size()); 348 } else { 349 CordRep* rep = CordRepFromString(std::forward<T>(src)); 350 contents_.EmplaceTree(rep, CordzUpdateTracker::kConstructorString); 351 } 352 } 353 354 template Cord::Cord(std::string&& src); 355 356 // The destruction code is separate so that the compiler can determine 357 // that it does not need to call the destructor on a moved-from Cord. 358 void Cord::DestroyCordSlow() { 359 assert(contents_.is_tree()); 360 CordzInfo::MaybeUntrackCord(contents_.cordz_info()); 361 CordRep::Unref(VerifyTree(contents_.as_tree())); 362 } 363 364 // -------------------------------------------------------------------- 365 // Mutators 366 367 void Cord::Clear() { 368 if (CordRep* tree = contents_.clear()) { 369 CordRep::Unref(tree); 370 } 371 } 372 373 Cord& Cord::AssignLargeString(std::string&& src) { 374 auto constexpr method = CordzUpdateTracker::kAssignString; 375 assert(src.size() > kMaxBytesToCopy); 376 CordRep* rep = CordRepFromString(std::move(src)); 377 if (CordRep* tree = contents_.tree()) { 378 CordzUpdateScope scope(contents_.cordz_info(), method); 379 contents_.SetTree(rep, scope); 380 CordRep::Unref(tree); 381 } else { 382 contents_.EmplaceTree(rep, method); 383 } 384 return *this; 385 } 386 387 Cord& Cord::operator=(absl::string_view src) { 388 auto constexpr method = CordzUpdateTracker::kAssignString; 389 const char* data = src.data(); 390 size_t length = src.size(); 391 CordRep* tree = contents_.tree(); 392 if (length <= InlineRep::kMaxInline) { 393 // Embed into this->contents_, which is somewhat subtle: 394 // - MaybeUntrackCord must be called before Unref(tree). 395 // - MaybeUntrackCord must be called before set_data() clobbers cordz_info. 396 // - set_data() must be called before Unref(tree) as it may reference tree. 397 if (tree != nullptr) CordzInfo::MaybeUntrackCord(contents_.cordz_info()); 398 contents_.set_data(data, length); 399 if (tree != nullptr) CordRep::Unref(tree); 400 return *this; 401 } 402 if (tree != nullptr) { 403 CordzUpdateScope scope(contents_.cordz_info(), method); 404 if (tree->IsFlat() && tree->flat()->Capacity() >= length && 405 tree->refcount.IsOne()) { 406 // Copy in place if the existing FLAT node is reusable. 407 memmove(tree->flat()->Data(), data, length); 408 tree->length = length; 409 VerifyTree(tree); 410 return *this; 411 } 412 contents_.SetTree(NewTree(data, length, 0), scope); 413 CordRep::Unref(tree); 414 } else { 415 contents_.EmplaceTree(NewTree(data, length, 0), method); 416 } 417 return *this; 418 } 419 420 // TODO(sanjay): Move to Cord::InlineRep section of file. For now, 421 // we keep it here to make diffs easier. 422 void Cord::InlineRep::AppendArray(absl::string_view src, 423 MethodIdentifier method) { 424 if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. 425 MaybeRemoveEmptyCrcNode(); 426 427 size_t appended = 0; 428 CordRep* rep = tree(); 429 const CordRep* const root = rep; 430 CordzUpdateScope scope(root ? cordz_info() : nullptr, method); 431 if (root != nullptr) { 432 rep = cord_internal::RemoveCrcNode(rep); 433 char* region; 434 if (PrepareAppendRegion(rep, ®ion, &appended, src.size())) { 435 memcpy(region, src.data(), appended); 436 } 437 } else { 438 // Try to fit in the inline buffer if possible. 439 size_t inline_length = inline_size(); 440 if (src.size() <= kMaxInline - inline_length) { 441 // Append new data to embedded array 442 set_inline_size(inline_length + src.size()); 443 memcpy(data_.as_chars() + inline_length, src.data(), src.size()); 444 return; 445 } 446 447 // Allocate flat to be a perfect fit on first append exceeding inlined size. 448 // Subsequent growth will use amortized growth until we reach maximum flat 449 // size. 450 rep = CordRepFlat::New(inline_length + src.size()); 451 appended = std::min(src.size(), rep->flat()->Capacity() - inline_length); 452 memcpy(rep->flat()->Data(), data_.as_chars(), inline_length); 453 memcpy(rep->flat()->Data() + inline_length, src.data(), appended); 454 rep->length = inline_length + appended; 455 } 456 457 src.remove_prefix(appended); 458 if (src.empty()) { 459 CommitTree(root, rep, scope, method); 460 return; 461 } 462 463 // TODO(b/192061034): keep legacy 10% growth rate: consider other rates. 464 rep = ForceBtree(rep); 465 const size_t min_growth = std::max<size_t>(rep->length / 10, src.size()); 466 rep = CordRepBtree::Append(rep->btree(), src, min_growth - src.size()); 467 468 CommitTree(root, rep, scope, method); 469 } 470 471 inline absl::Nonnull<CordRep*> Cord::TakeRep() const& { 472 return CordRep::Ref(contents_.tree()); 473 } 474 475 inline absl::Nonnull<CordRep*> Cord::TakeRep() && { 476 CordRep* rep = contents_.tree(); 477 contents_.clear(); 478 return rep; 479 } 480 481 template <typename C> 482 inline void Cord::AppendImpl(C&& src) { 483 auto constexpr method = CordzUpdateTracker::kAppendCord; 484 485 contents_.MaybeRemoveEmptyCrcNode(); 486 if (src.empty()) return; 487 488 if (empty()) { 489 // Since destination is empty, we can avoid allocating a node, 490 if (src.contents_.is_tree()) { 491 // by taking the tree directly 492 CordRep* rep = 493 cord_internal::RemoveCrcNode(std::forward<C>(src).TakeRep()); 494 contents_.EmplaceTree(rep, method); 495 } else { 496 // or copying over inline data 497 contents_.data_ = src.contents_.data_; 498 } 499 return; 500 } 501 502 // For short cords, it is faster to copy data if there is room in dst. 503 const size_t src_size = src.contents_.size(); 504 if (src_size <= kMaxBytesToCopy) { 505 CordRep* src_tree = src.contents_.tree(); 506 if (src_tree == nullptr) { 507 // src has embedded data. 508 contents_.AppendArray({src.contents_.data(), src_size}, method); 509 return; 510 } 511 if (src_tree->IsFlat()) { 512 // src tree just has one flat node. 513 contents_.AppendArray({src_tree->flat()->Data(), src_size}, method); 514 return; 515 } 516 if (&src == this) { 517 // ChunkIterator below assumes that src is not modified during traversal. 518 Append(Cord(src)); 519 return; 520 } 521 // TODO(mec): Should we only do this if "dst" has space? 522 for (absl::string_view chunk : src.Chunks()) { 523 Append(chunk); 524 } 525 return; 526 } 527 528 // Guaranteed to be a tree (kMaxBytesToCopy > kInlinedSize) 529 CordRep* rep = cord_internal::RemoveCrcNode(std::forward<C>(src).TakeRep()); 530 contents_.AppendTree(rep, CordzUpdateTracker::kAppendCord); 531 } 532 533 static CordRep::ExtractResult ExtractAppendBuffer(absl::Nonnull<CordRep*> rep, 534 size_t min_capacity) { 535 switch (rep->tag) { 536 case cord_internal::BTREE: 537 return CordRepBtree::ExtractAppendBuffer(rep->btree(), min_capacity); 538 default: 539 if (rep->IsFlat() && rep->refcount.IsOne() && 540 rep->flat()->Capacity() - rep->length >= min_capacity) { 541 return {nullptr, rep}; 542 } 543 return {rep, nullptr}; 544 } 545 } 546 547 static CordBuffer CreateAppendBuffer(InlineData& data, size_t block_size, 548 size_t capacity) { 549 // Watch out for overflow, people can ask for size_t::max(). 550 const size_t size = data.inline_size(); 551 const size_t max_capacity = std::numeric_limits<size_t>::max() - size; 552 capacity = (std::min)(max_capacity, capacity) + size; 553 CordBuffer buffer = 554 block_size ? CordBuffer::CreateWithCustomLimit(block_size, capacity) 555 : CordBuffer::CreateWithDefaultLimit(capacity); 556 cord_internal::SmallMemmove(buffer.data(), data.as_chars(), size); 557 buffer.SetLength(size); 558 data = {}; 559 return buffer; 560 } 561 562 CordBuffer Cord::GetAppendBufferSlowPath(size_t block_size, size_t capacity, 563 size_t min_capacity) { 564 auto constexpr method = CordzUpdateTracker::kGetAppendBuffer; 565 CordRep* tree = contents_.tree(); 566 if (tree != nullptr) { 567 CordzUpdateScope scope(contents_.cordz_info(), method); 568 CordRep::ExtractResult result = ExtractAppendBuffer(tree, min_capacity); 569 if (result.extracted != nullptr) { 570 contents_.SetTreeOrEmpty(result.tree, scope); 571 return CordBuffer(result.extracted->flat()); 572 } 573 return block_size ? CordBuffer::CreateWithCustomLimit(block_size, capacity) 574 : CordBuffer::CreateWithDefaultLimit(capacity); 575 } 576 return CreateAppendBuffer(contents_.data_, block_size, capacity); 577 } 578 579 void Cord::Append(const Cord& src) { AppendImpl(src); } 580 581 void Cord::Append(Cord&& src) { AppendImpl(std::move(src)); } 582 583 template <typename T, Cord::EnableIfString<T>> 584 void Cord::Append(T&& src) { 585 if (src.size() <= kMaxBytesToCopy) { 586 Append(absl::string_view(src)); 587 } else { 588 CordRep* rep = CordRepFromString(std::forward<T>(src)); 589 contents_.AppendTree(rep, CordzUpdateTracker::kAppendString); 590 } 591 } 592 593 template void Cord::Append(std::string&& src); 594 595 void Cord::Prepend(const Cord& src) { 596 contents_.MaybeRemoveEmptyCrcNode(); 597 if (src.empty()) return; 598 599 CordRep* src_tree = src.contents_.tree(); 600 if (src_tree != nullptr) { 601 CordRep::Ref(src_tree); 602 contents_.PrependTree(cord_internal::RemoveCrcNode(src_tree), 603 CordzUpdateTracker::kPrependCord); 604 return; 605 } 606 607 // `src` cord is inlined. 608 absl::string_view src_contents(src.contents_.data(), src.contents_.size()); 609 return Prepend(src_contents); 610 } 611 612 void Cord::PrependArray(absl::string_view src, MethodIdentifier method) { 613 contents_.MaybeRemoveEmptyCrcNode(); 614 if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. 615 616 if (!contents_.is_tree()) { 617 size_t cur_size = contents_.inline_size(); 618 if (cur_size + src.size() <= InlineRep::kMaxInline) { 619 // Use embedded storage. 620 InlineData data; 621 data.set_inline_size(cur_size + src.size()); 622 memcpy(data.as_chars(), src.data(), src.size()); 623 memcpy(data.as_chars() + src.size(), contents_.data(), cur_size); 624 contents_.data_ = data; 625 return; 626 } 627 } 628 CordRep* rep = NewTree(src.data(), src.size(), 0); 629 contents_.PrependTree(rep, method); 630 } 631 632 void Cord::AppendPrecise(absl::string_view src, MethodIdentifier method) { 633 assert(!src.empty()); 634 assert(src.size() <= cord_internal::kMaxFlatLength); 635 if (contents_.remaining_inline_capacity() >= src.size()) { 636 const size_t inline_length = contents_.inline_size(); 637 contents_.set_inline_size(inline_length + src.size()); 638 memcpy(contents_.data_.as_chars() + inline_length, src.data(), src.size()); 639 } else { 640 contents_.AppendTree(CordRepFlat::Create(src), method); 641 } 642 } 643 644 void Cord::PrependPrecise(absl::string_view src, MethodIdentifier method) { 645 assert(!src.empty()); 646 assert(src.size() <= cord_internal::kMaxFlatLength); 647 if (contents_.remaining_inline_capacity() >= src.size()) { 648 const size_t cur_size = contents_.inline_size(); 649 InlineData data; 650 data.set_inline_size(cur_size + src.size()); 651 memcpy(data.as_chars(), src.data(), src.size()); 652 memcpy(data.as_chars() + src.size(), contents_.data(), cur_size); 653 contents_.data_ = data; 654 } else { 655 contents_.PrependTree(CordRepFlat::Create(src), method); 656 } 657 } 658 659 template <typename T, Cord::EnableIfString<T>> 660 inline void Cord::Prepend(T&& src) { 661 if (src.size() <= kMaxBytesToCopy) { 662 Prepend(absl::string_view(src)); 663 } else { 664 CordRep* rep = CordRepFromString(std::forward<T>(src)); 665 contents_.PrependTree(rep, CordzUpdateTracker::kPrependString); 666 } 667 } 668 669 template void Cord::Prepend(std::string&& src); 670 671 void Cord::RemovePrefix(size_t n) { 672 ABSL_INTERNAL_CHECK(n <= size(), 673 absl::StrCat("Requested prefix size ", n, 674 " exceeds Cord's size ", size())); 675 contents_.MaybeRemoveEmptyCrcNode(); 676 CordRep* tree = contents_.tree(); 677 if (tree == nullptr) { 678 contents_.remove_prefix(n); 679 } else { 680 auto constexpr method = CordzUpdateTracker::kRemovePrefix; 681 CordzUpdateScope scope(contents_.cordz_info(), method); 682 tree = cord_internal::RemoveCrcNode(tree); 683 if (n >= tree->length) { 684 CordRep::Unref(tree); 685 tree = nullptr; 686 } else if (tree->IsBtree()) { 687 CordRep* old = tree; 688 tree = tree->btree()->SubTree(n, tree->length - n); 689 CordRep::Unref(old); 690 } else if (tree->IsSubstring() && tree->refcount.IsOne()) { 691 tree->substring()->start += n; 692 tree->length -= n; 693 } else { 694 CordRep* rep = CordRepSubstring::Substring(tree, n, tree->length - n); 695 CordRep::Unref(tree); 696 tree = rep; 697 } 698 contents_.SetTreeOrEmpty(tree, scope); 699 } 700 } 701 702 void Cord::RemoveSuffix(size_t n) { 703 ABSL_INTERNAL_CHECK(n <= size(), 704 absl::StrCat("Requested suffix size ", n, 705 " exceeds Cord's size ", size())); 706 contents_.MaybeRemoveEmptyCrcNode(); 707 CordRep* tree = contents_.tree(); 708 if (tree == nullptr) { 709 contents_.reduce_size(n); 710 } else { 711 auto constexpr method = CordzUpdateTracker::kRemoveSuffix; 712 CordzUpdateScope scope(contents_.cordz_info(), method); 713 tree = cord_internal::RemoveCrcNode(tree); 714 if (n >= tree->length) { 715 CordRep::Unref(tree); 716 tree = nullptr; 717 } else if (tree->IsBtree()) { 718 tree = CordRepBtree::RemoveSuffix(tree->btree(), n); 719 } else if (!tree->IsExternal() && tree->refcount.IsOne()) { 720 assert(tree->IsFlat() || tree->IsSubstring()); 721 tree->length -= n; 722 } else { 723 CordRep* rep = CordRepSubstring::Substring(tree, 0, tree->length - n); 724 CordRep::Unref(tree); 725 tree = rep; 726 } 727 contents_.SetTreeOrEmpty(tree, scope); 728 } 729 } 730 731 Cord Cord::Subcord(size_t pos, size_t new_size) const { 732 Cord sub_cord; 733 size_t length = size(); 734 if (pos > length) pos = length; 735 if (new_size > length - pos) new_size = length - pos; 736 if (new_size == 0) return sub_cord; 737 738 CordRep* tree = contents_.tree(); 739 if (tree == nullptr) { 740 sub_cord.contents_.set_data(contents_.data() + pos, new_size); 741 return sub_cord; 742 } 743 744 if (new_size <= InlineRep::kMaxInline) { 745 sub_cord.contents_.set_inline_size(new_size); 746 char* dest = sub_cord.contents_.data_.as_chars(); 747 Cord::ChunkIterator it = chunk_begin(); 748 it.AdvanceBytes(pos); 749 size_t remaining_size = new_size; 750 while (remaining_size > it->size()) { 751 cord_internal::SmallMemmove(dest, it->data(), it->size()); 752 remaining_size -= it->size(); 753 dest += it->size(); 754 ++it; 755 } 756 cord_internal::SmallMemmove(dest, it->data(), remaining_size); 757 return sub_cord; 758 } 759 760 tree = cord_internal::SkipCrcNode(tree); 761 if (tree->IsBtree()) { 762 tree = tree->btree()->SubTree(pos, new_size); 763 } else { 764 tree = CordRepSubstring::Substring(tree, pos, new_size); 765 } 766 sub_cord.contents_.EmplaceTree(tree, contents_.data_, 767 CordzUpdateTracker::kSubCord); 768 return sub_cord; 769 } 770 771 // -------------------------------------------------------------------- 772 // Comparators 773 774 namespace { 775 776 int ClampResult(int memcmp_res) { 777 return static_cast<int>(memcmp_res > 0) - static_cast<int>(memcmp_res < 0); 778 } 779 780 int CompareChunks(absl::Nonnull<absl::string_view*> lhs, 781 absl::Nonnull<absl::string_view*> rhs, 782 absl::Nonnull<size_t*> size_to_compare) { 783 size_t compared_size = std::min(lhs->size(), rhs->size()); 784 assert(*size_to_compare >= compared_size); 785 *size_to_compare -= compared_size; 786 787 int memcmp_res = ::memcmp(lhs->data(), rhs->data(), compared_size); 788 if (memcmp_res != 0) return memcmp_res; 789 790 lhs->remove_prefix(compared_size); 791 rhs->remove_prefix(compared_size); 792 793 return 0; 794 } 795 796 // This overload set computes comparison results from memcmp result. This 797 // interface is used inside GenericCompare below. Different implementations 798 // are specialized for int and bool. For int we clamp result to {-1, 0, 1} 799 // set. For bool we just interested in "value == 0". 800 template <typename ResultType> 801 ResultType ComputeCompareResult(int memcmp_res) { 802 return ClampResult(memcmp_res); 803 } 804 template <> 805 bool ComputeCompareResult<bool>(int memcmp_res) { 806 return memcmp_res == 0; 807 } 808 809 } // namespace 810 811 // Helper routine. Locates the first flat or external chunk of the Cord without 812 // initializing the iterator, and returns a string_view referencing the data. 813 inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const { 814 if (!is_tree()) { 815 return absl::string_view(data_.as_chars(), data_.inline_size()); 816 } 817 818 CordRep* node = cord_internal::SkipCrcNode(tree()); 819 if (node->IsFlat()) { 820 return absl::string_view(node->flat()->Data(), node->length); 821 } 822 823 if (node->IsExternal()) { 824 return absl::string_view(node->external()->base, node->length); 825 } 826 827 if (node->IsBtree()) { 828 CordRepBtree* tree = node->btree(); 829 int height = tree->height(); 830 while (--height >= 0) { 831 tree = tree->Edge(CordRepBtree::kFront)->btree(); 832 } 833 return tree->Data(tree->begin()); 834 } 835 836 // Get the child node if we encounter a SUBSTRING. 837 size_t offset = 0; 838 size_t length = node->length; 839 assert(length != 0); 840 841 if (node->IsSubstring()) { 842 offset = node->substring()->start; 843 node = node->substring()->child; 844 } 845 846 if (node->IsFlat()) { 847 return absl::string_view(node->flat()->Data() + offset, length); 848 } 849 850 assert(node->IsExternal() && "Expect FLAT or EXTERNAL node here"); 851 852 return absl::string_view(node->external()->base + offset, length); 853 } 854 855 void Cord::SetCrcCordState(crc_internal::CrcCordState state) { 856 auto constexpr method = CordzUpdateTracker::kSetExpectedChecksum; 857 if (empty()) { 858 contents_.MaybeRemoveEmptyCrcNode(); 859 CordRep* rep = CordRepCrc::New(nullptr, std::move(state)); 860 contents_.EmplaceTree(rep, method); 861 } else if (!contents_.is_tree()) { 862 CordRep* rep = contents_.MakeFlatWithExtraCapacity(0); 863 rep = CordRepCrc::New(rep, std::move(state)); 864 contents_.EmplaceTree(rep, method); 865 } else { 866 const CordzUpdateScope scope(contents_.data_.cordz_info(), method); 867 CordRep* rep = CordRepCrc::New(contents_.data_.as_tree(), std::move(state)); 868 contents_.SetTree(rep, scope); 869 } 870 } 871 872 void Cord::SetExpectedChecksum(uint32_t crc) { 873 // Construct a CrcCordState with a single chunk. 874 crc_internal::CrcCordState state; 875 state.mutable_rep()->prefix_crc.push_back( 876 crc_internal::CrcCordState::PrefixCrc(size(), absl::crc32c_t{crc})); 877 SetCrcCordState(std::move(state)); 878 } 879 880 absl::Nullable<const crc_internal::CrcCordState*> Cord::MaybeGetCrcCordState() 881 const { 882 if (!contents_.is_tree() || !contents_.tree()->IsCrc()) { 883 return nullptr; 884 } 885 return &contents_.tree()->crc()->crc_cord_state; 886 } 887 888 absl::optional<uint32_t> Cord::ExpectedChecksum() const { 889 if (!contents_.is_tree() || !contents_.tree()->IsCrc()) { 890 return absl::nullopt; 891 } 892 return static_cast<uint32_t>( 893 contents_.tree()->crc()->crc_cord_state.Checksum()); 894 } 895 896 inline int Cord::CompareSlowPath(absl::string_view rhs, size_t compared_size, 897 size_t size_to_compare) const { 898 auto advance = [](absl::Nonnull<Cord::ChunkIterator*> it, 899 absl::Nonnull<absl::string_view*> chunk) { 900 if (!chunk->empty()) return true; 901 ++*it; 902 if (it->bytes_remaining_ == 0) return false; 903 *chunk = **it; 904 return true; 905 }; 906 907 Cord::ChunkIterator lhs_it = chunk_begin(); 908 909 // compared_size is inside first chunk. 910 absl::string_view lhs_chunk = 911 (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view(); 912 assert(compared_size <= lhs_chunk.size()); 913 assert(compared_size <= rhs.size()); 914 lhs_chunk.remove_prefix(compared_size); 915 rhs.remove_prefix(compared_size); 916 size_to_compare -= compared_size; // skip already compared size. 917 918 while (advance(&lhs_it, &lhs_chunk) && !rhs.empty()) { 919 int comparison_result = CompareChunks(&lhs_chunk, &rhs, &size_to_compare); 920 if (comparison_result != 0) return comparison_result; 921 if (size_to_compare == 0) return 0; 922 } 923 924 return static_cast<int>(rhs.empty()) - static_cast<int>(lhs_chunk.empty()); 925 } 926 927 inline int Cord::CompareSlowPath(const Cord& rhs, size_t compared_size, 928 size_t size_to_compare) const { 929 auto advance = [](absl::Nonnull<Cord::ChunkIterator*> it, 930 absl::Nonnull<absl::string_view*> chunk) { 931 if (!chunk->empty()) return true; 932 ++*it; 933 if (it->bytes_remaining_ == 0) return false; 934 *chunk = **it; 935 return true; 936 }; 937 938 Cord::ChunkIterator lhs_it = chunk_begin(); 939 Cord::ChunkIterator rhs_it = rhs.chunk_begin(); 940 941 // compared_size is inside both first chunks. 942 absl::string_view lhs_chunk = 943 (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view(); 944 absl::string_view rhs_chunk = 945 (rhs_it.bytes_remaining_ != 0) ? *rhs_it : absl::string_view(); 946 assert(compared_size <= lhs_chunk.size()); 947 assert(compared_size <= rhs_chunk.size()); 948 lhs_chunk.remove_prefix(compared_size); 949 rhs_chunk.remove_prefix(compared_size); 950 size_to_compare -= compared_size; // skip already compared size. 951 952 while (advance(&lhs_it, &lhs_chunk) && advance(&rhs_it, &rhs_chunk)) { 953 int memcmp_res = CompareChunks(&lhs_chunk, &rhs_chunk, &size_to_compare); 954 if (memcmp_res != 0) return memcmp_res; 955 if (size_to_compare == 0) return 0; 956 } 957 958 return static_cast<int>(rhs_chunk.empty()) - 959 static_cast<int>(lhs_chunk.empty()); 960 } 961 962 inline absl::string_view Cord::GetFirstChunk(const Cord& c) { 963 if (c.empty()) return {}; 964 return c.contents_.FindFlatStartPiece(); 965 } 966 inline absl::string_view Cord::GetFirstChunk(absl::string_view sv) { 967 return sv; 968 } 969 970 // Compares up to 'size_to_compare' bytes of 'lhs' with 'rhs'. It is assumed 971 // that 'size_to_compare' is greater that size of smallest of first chunks. 972 template <typename ResultType, typename RHS> 973 ResultType GenericCompare(const Cord& lhs, const RHS& rhs, 974 size_t size_to_compare) { 975 absl::string_view lhs_chunk = Cord::GetFirstChunk(lhs); 976 absl::string_view rhs_chunk = Cord::GetFirstChunk(rhs); 977 978 size_t compared_size = std::min(lhs_chunk.size(), rhs_chunk.size()); 979 assert(size_to_compare >= compared_size); 980 int memcmp_res = compared_size > 0 ? ::memcmp(lhs_chunk.data(), 981 rhs_chunk.data(), compared_size) 982 : 0; 983 if (compared_size == size_to_compare || memcmp_res != 0) { 984 return ComputeCompareResult<ResultType>(memcmp_res); 985 } 986 987 return ComputeCompareResult<ResultType>( 988 lhs.CompareSlowPath(rhs, compared_size, size_to_compare)); 989 } 990 991 bool Cord::EqualsImpl(absl::string_view rhs, size_t size_to_compare) const { 992 return GenericCompare<bool>(*this, rhs, size_to_compare); 993 } 994 995 bool Cord::EqualsImpl(const Cord& rhs, size_t size_to_compare) const { 996 return GenericCompare<bool>(*this, rhs, size_to_compare); 997 } 998 999 template <typename RHS> 1000 inline int SharedCompareImpl(const Cord& lhs, const RHS& rhs) { 1001 size_t lhs_size = lhs.size(); 1002 size_t rhs_size = rhs.size(); 1003 if (lhs_size == rhs_size) { 1004 return GenericCompare<int>(lhs, rhs, lhs_size); 1005 } 1006 if (lhs_size < rhs_size) { 1007 auto data_comp_res = GenericCompare<int>(lhs, rhs, lhs_size); 1008 return data_comp_res == 0 ? -1 : data_comp_res; 1009 } 1010 1011 auto data_comp_res = GenericCompare<int>(lhs, rhs, rhs_size); 1012 return data_comp_res == 0 ? +1 : data_comp_res; 1013 } 1014 1015 int Cord::Compare(absl::string_view rhs) const { 1016 return SharedCompareImpl(*this, rhs); 1017 } 1018 1019 int Cord::CompareImpl(const Cord& rhs) const { 1020 return SharedCompareImpl(*this, rhs); 1021 } 1022 1023 bool Cord::EndsWith(absl::string_view rhs) const { 1024 size_t my_size = size(); 1025 size_t rhs_size = rhs.size(); 1026 1027 if (my_size < rhs_size) return false; 1028 1029 Cord tmp(*this); 1030 tmp.RemovePrefix(my_size - rhs_size); 1031 return tmp.EqualsImpl(rhs, rhs_size); 1032 } 1033 1034 bool Cord::EndsWith(const Cord& rhs) const { 1035 size_t my_size = size(); 1036 size_t rhs_size = rhs.size(); 1037 1038 if (my_size < rhs_size) return false; 1039 1040 Cord tmp(*this); 1041 tmp.RemovePrefix(my_size - rhs_size); 1042 return tmp.EqualsImpl(rhs, rhs_size); 1043 } 1044 1045 // -------------------------------------------------------------------- 1046 // Misc. 1047 1048 Cord::operator std::string() const { 1049 std::string s; 1050 absl::CopyCordToString(*this, &s); 1051 return s; 1052 } 1053 1054 void CopyCordToString(const Cord& src, absl::Nonnull<std::string*> dst) { 1055 if (!src.contents_.is_tree()) { 1056 src.contents_.CopyTo(dst); 1057 } else { 1058 absl::strings_internal::STLStringResizeUninitialized(dst, src.size()); 1059 src.CopyToArraySlowPath(&(*dst)[0]); 1060 } 1061 } 1062 1063 void AppendCordToString(const Cord& src, absl::Nonnull<std::string*> dst) { 1064 const size_t cur_dst_size = dst->size(); 1065 const size_t new_dst_size = cur_dst_size + src.size(); 1066 absl::strings_internal::STLStringResizeUninitializedAmortized(dst, 1067 new_dst_size); 1068 char* append_ptr = &(*dst)[cur_dst_size]; 1069 src.CopyToArrayImpl(append_ptr); 1070 } 1071 1072 void Cord::CopyToArraySlowPath(absl::Nonnull<char*> dst) const { 1073 assert(contents_.is_tree()); 1074 absl::string_view fragment; 1075 if (GetFlatAux(contents_.tree(), &fragment) && !fragment.empty()) { 1076 memcpy(dst, fragment.data(), fragment.size()); 1077 return; 1078 } 1079 for (absl::string_view chunk : Chunks()) { 1080 memcpy(dst, chunk.data(), chunk.size()); 1081 dst += chunk.size(); 1082 } 1083 } 1084 1085 Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { 1086 ABSL_HARDENING_ASSERT(bytes_remaining_ >= n && 1087 "Attempted to iterate past `end()`"); 1088 Cord subcord; 1089 auto constexpr method = CordzUpdateTracker::kCordReader; 1090 1091 if (n <= InlineRep::kMaxInline) { 1092 // Range to read fits in inline data. Flatten it. 1093 char* data = subcord.contents_.set_data(n); 1094 while (n > current_chunk_.size()) { 1095 memcpy(data, current_chunk_.data(), current_chunk_.size()); 1096 data += current_chunk_.size(); 1097 n -= current_chunk_.size(); 1098 ++*this; 1099 } 1100 memcpy(data, current_chunk_.data(), n); 1101 if (n < current_chunk_.size()) { 1102 RemoveChunkPrefix(n); 1103 } else if (n > 0) { 1104 ++*this; 1105 } 1106 return subcord; 1107 } 1108 1109 if (btree_reader_) { 1110 size_t chunk_size = current_chunk_.size(); 1111 if (n <= chunk_size && n <= kMaxBytesToCopy) { 1112 subcord = Cord(current_chunk_.substr(0, n), method); 1113 if (n < chunk_size) { 1114 current_chunk_.remove_prefix(n); 1115 } else { 1116 current_chunk_ = btree_reader_.Next(); 1117 } 1118 } else { 1119 CordRep* rep; 1120 current_chunk_ = btree_reader_.Read(n, chunk_size, rep); 1121 subcord.contents_.EmplaceTree(rep, method); 1122 } 1123 bytes_remaining_ -= n; 1124 return subcord; 1125 } 1126 1127 // Short circuit if reading the entire data edge. 1128 assert(current_leaf_ != nullptr); 1129 if (n == current_leaf_->length) { 1130 bytes_remaining_ = 0; 1131 current_chunk_ = {}; 1132 CordRep* tree = CordRep::Ref(current_leaf_); 1133 subcord.contents_.EmplaceTree(VerifyTree(tree), method); 1134 return subcord; 1135 } 1136 1137 // From this point on, we need a partial substring node. 1138 // Get pointer to the underlying flat or external data payload and 1139 // compute data pointer and offset into current flat or external. 1140 CordRep* payload = current_leaf_->IsSubstring() 1141 ? current_leaf_->substring()->child 1142 : current_leaf_; 1143 const char* data = payload->IsExternal() ? payload->external()->base 1144 : payload->flat()->Data(); 1145 const size_t offset = static_cast<size_t>(current_chunk_.data() - data); 1146 1147 auto* tree = CordRepSubstring::Substring(payload, offset, n); 1148 subcord.contents_.EmplaceTree(VerifyTree(tree), method); 1149 bytes_remaining_ -= n; 1150 current_chunk_.remove_prefix(n); 1151 return subcord; 1152 } 1153 1154 char Cord::operator[](size_t i) const { 1155 ABSL_HARDENING_ASSERT(i < size()); 1156 size_t offset = i; 1157 const CordRep* rep = contents_.tree(); 1158 if (rep == nullptr) { 1159 return contents_.data()[i]; 1160 } 1161 rep = cord_internal::SkipCrcNode(rep); 1162 while (true) { 1163 assert(rep != nullptr); 1164 assert(offset < rep->length); 1165 if (rep->IsFlat()) { 1166 // Get the "i"th character directly from the flat array. 1167 return rep->flat()->Data()[offset]; 1168 } else if (rep->IsBtree()) { 1169 return rep->btree()->GetCharacter(offset); 1170 } else if (rep->IsExternal()) { 1171 // Get the "i"th character from the external array. 1172 return rep->external()->base[offset]; 1173 } else { 1174 // This must be a substring a node, so bypass it to get to the child. 1175 assert(rep->IsSubstring()); 1176 offset += rep->substring()->start; 1177 rep = rep->substring()->child; 1178 } 1179 } 1180 } 1181 1182 namespace { 1183 1184 // Tests whether the sequence of chunks beginning at `position` starts with 1185 // `needle`. 1186 // 1187 // REQUIRES: remaining `absl::Cord` starting at `position` is greater than or 1188 // equal to `needle.size()`. 1189 bool IsSubstringInCordAt(absl::Cord::CharIterator position, 1190 absl::string_view needle) { 1191 auto haystack_chunk = absl::Cord::ChunkRemaining(position); 1192 while (true) { 1193 // Precondition is that `absl::Cord::ChunkRemaining(position)` is not 1194 // empty. This assert will trigger if that is not true. 1195 assert(!haystack_chunk.empty()); 1196 auto min_length = std::min(haystack_chunk.size(), needle.size()); 1197 if (!absl::ConsumePrefix(&needle, haystack_chunk.substr(0, min_length))) { 1198 return false; 1199 } 1200 if (needle.empty()) { 1201 return true; 1202 } 1203 absl::Cord::Advance(&position, min_length); 1204 haystack_chunk = absl::Cord::ChunkRemaining(position); 1205 } 1206 } 1207 1208 } // namespace 1209 1210 // A few options how this could be implemented: 1211 // (a) Flatten the Cord and find, i.e. 1212 // haystack.Flatten().find(needle) 1213 // For large 'haystack' (where Cord makes sense to be used), this copies 1214 // the whole 'haystack' and can be slow. 1215 // (b) Use std::search, i.e. 1216 // std::search(haystack.char_begin(), haystack.char_end(), 1217 // needle.begin(), needle.end()) 1218 // This avoids the copy, but compares one byte at a time, and branches a 1219 // lot every time it has to advance. It is also not possible to use 1220 // std::search as is, because CharIterator is only an input iterator, not a 1221 // forward iterator. 1222 // (c) Use string_view::find in each fragment, and specifically handle fragment 1223 // boundaries. 1224 // 1225 // This currently implements option (b). 1226 absl::Cord::CharIterator absl::Cord::FindImpl(CharIterator it, 1227 absl::string_view needle) const { 1228 // Ensure preconditions are met by callers first. 1229 1230 // Needle must not be empty. 1231 assert(!needle.empty()); 1232 // Haystack must be at least as large as needle. 1233 assert(it.chunk_iterator_.bytes_remaining_ >= needle.size()); 1234 1235 // Cord is a sequence of chunks. To find `needle` we go chunk by chunk looking 1236 // for the first char of needle, up until we have advanced `N` defined as 1237 // `haystack.size() - needle.size()`. If we find the first char of needle at 1238 // `P` and `P` is less than `N`, we then call `IsSubstringInCordAt` to 1239 // see if this is the needle. If not, we advance to `P + 1` and try again. 1240 while (it.chunk_iterator_.bytes_remaining_ >= needle.size()) { 1241 auto haystack_chunk = Cord::ChunkRemaining(it); 1242 assert(!haystack_chunk.empty()); 1243 // Look for the first char of `needle` in the current chunk. 1244 auto idx = haystack_chunk.find(needle.front()); 1245 if (idx == absl::string_view::npos) { 1246 // No potential match in this chunk, advance past it. 1247 Cord::Advance(&it, haystack_chunk.size()); 1248 continue; 1249 } 1250 // We found the start of a potential match in the chunk. Advance the 1251 // iterator and haystack chunk to the match the position. 1252 Cord::Advance(&it, idx); 1253 // Check if there is enough haystack remaining to actually have a match. 1254 if (it.chunk_iterator_.bytes_remaining_ < needle.size()) { 1255 break; 1256 } 1257 // Check if this is `needle`. 1258 if (IsSubstringInCordAt(it, needle)) { 1259 return it; 1260 } 1261 // No match, increment the iterator for the next attempt. 1262 Cord::Advance(&it, 1); 1263 } 1264 // If we got here, we did not find `needle`. 1265 return char_end(); 1266 } 1267 1268 absl::Cord::CharIterator absl::Cord::Find(absl::string_view needle) const { 1269 if (needle.empty()) { 1270 return char_begin(); 1271 } 1272 if (needle.size() > size()) { 1273 return char_end(); 1274 } 1275 if (needle.size() == size()) { 1276 return *this == needle ? char_begin() : char_end(); 1277 } 1278 return FindImpl(char_begin(), needle); 1279 } 1280 1281 namespace { 1282 1283 // Tests whether the sequence of chunks beginning at `haystack` starts with the 1284 // sequence of chunks beginning at `needle_begin` and extending to `needle_end`. 1285 // 1286 // REQUIRES: remaining `absl::Cord` starting at `position` is greater than or 1287 // equal to `needle_end - needle_begin` and `advance`. 1288 bool IsSubcordInCordAt(absl::Cord::CharIterator haystack, 1289 absl::Cord::CharIterator needle_begin, 1290 absl::Cord::CharIterator needle_end) { 1291 while (needle_begin != needle_end) { 1292 auto haystack_chunk = absl::Cord::ChunkRemaining(haystack); 1293 assert(!haystack_chunk.empty()); 1294 auto needle_chunk = absl::Cord::ChunkRemaining(needle_begin); 1295 auto min_length = std::min(haystack_chunk.size(), needle_chunk.size()); 1296 if (haystack_chunk.substr(0, min_length) != 1297 needle_chunk.substr(0, min_length)) { 1298 return false; 1299 } 1300 absl::Cord::Advance(&haystack, min_length); 1301 absl::Cord::Advance(&needle_begin, min_length); 1302 } 1303 return true; 1304 } 1305 1306 // Tests whether the sequence of chunks beginning at `position` starts with the 1307 // cord `needle`. 1308 // 1309 // REQUIRES: remaining `absl::Cord` starting at `position` is greater than or 1310 // equal to `needle.size()`. 1311 bool IsSubcordInCordAt(absl::Cord::CharIterator position, 1312 const absl::Cord& needle) { 1313 return IsSubcordInCordAt(position, needle.char_begin(), needle.char_end()); 1314 } 1315 1316 } // namespace 1317 1318 absl::Cord::CharIterator absl::Cord::Find(const absl::Cord& needle) const { 1319 if (needle.empty()) { 1320 return char_begin(); 1321 } 1322 const auto needle_size = needle.size(); 1323 if (needle_size > size()) { 1324 return char_end(); 1325 } 1326 if (needle_size == size()) { 1327 return *this == needle ? char_begin() : char_end(); 1328 } 1329 const auto needle_chunk = Cord::ChunkRemaining(needle.char_begin()); 1330 auto haystack_it = char_begin(); 1331 while (true) { 1332 haystack_it = FindImpl(haystack_it, needle_chunk); 1333 if (haystack_it == char_end() || 1334 haystack_it.chunk_iterator_.bytes_remaining_ < needle_size) { 1335 break; 1336 } 1337 // We found the first chunk of `needle` at `haystack_it` but not the entire 1338 // subcord. Advance past the first chunk and check for the remainder. 1339 auto haystack_advanced_it = haystack_it; 1340 auto needle_it = needle.char_begin(); 1341 Cord::Advance(&haystack_advanced_it, needle_chunk.size()); 1342 Cord::Advance(&needle_it, needle_chunk.size()); 1343 if (IsSubcordInCordAt(haystack_advanced_it, needle_it, needle.char_end())) { 1344 return haystack_it; 1345 } 1346 Cord::Advance(&haystack_it, 1); 1347 if (haystack_it.chunk_iterator_.bytes_remaining_ < needle_size) { 1348 break; 1349 } 1350 if (haystack_it.chunk_iterator_.bytes_remaining_ == needle_size) { 1351 // Special case, if there is exactly `needle_size` bytes remaining, the 1352 // subcord is either at `haystack_it` or not at all. 1353 if (IsSubcordInCordAt(haystack_it, needle)) { 1354 return haystack_it; 1355 } 1356 break; 1357 } 1358 } 1359 return char_end(); 1360 } 1361 1362 bool Cord::Contains(absl::string_view rhs) const { 1363 return rhs.empty() || Find(rhs) != char_end(); 1364 } 1365 1366 bool Cord::Contains(const absl::Cord& rhs) const { 1367 return rhs.empty() || Find(rhs) != char_end(); 1368 } 1369 1370 absl::string_view Cord::FlattenSlowPath() { 1371 assert(contents_.is_tree()); 1372 size_t total_size = size(); 1373 CordRep* new_rep; 1374 char* new_buffer; 1375 1376 // Try to put the contents into a new flat rep. If they won't fit in the 1377 // biggest possible flat node, use an external rep instead. 1378 if (total_size <= kMaxFlatLength) { 1379 new_rep = CordRepFlat::New(total_size); 1380 new_rep->length = total_size; 1381 new_buffer = new_rep->flat()->Data(); 1382 CopyToArraySlowPath(new_buffer); 1383 } else { 1384 new_buffer = std::allocator<char>().allocate(total_size); 1385 CopyToArraySlowPath(new_buffer); 1386 new_rep = absl::cord_internal::NewExternalRep( 1387 absl::string_view(new_buffer, total_size), [](absl::string_view s) { 1388 std::allocator<char>().deallocate(const_cast<char*>(s.data()), 1389 s.size()); 1390 }); 1391 } 1392 CordzUpdateScope scope(contents_.cordz_info(), CordzUpdateTracker::kFlatten); 1393 CordRep::Unref(contents_.as_tree()); 1394 contents_.SetTree(new_rep, scope); 1395 return absl::string_view(new_buffer, total_size); 1396 } 1397 1398 /* static */ bool Cord::GetFlatAux(absl::Nonnull<CordRep*> rep, 1399 absl::Nonnull<absl::string_view*> fragment) { 1400 assert(rep != nullptr); 1401 if (rep->length == 0) { 1402 *fragment = absl::string_view(); 1403 return true; 1404 } 1405 rep = cord_internal::SkipCrcNode(rep); 1406 if (rep->IsFlat()) { 1407 *fragment = absl::string_view(rep->flat()->Data(), rep->length); 1408 return true; 1409 } else if (rep->IsExternal()) { 1410 *fragment = absl::string_view(rep->external()->base, rep->length); 1411 return true; 1412 } else if (rep->IsBtree()) { 1413 return rep->btree()->IsFlat(fragment); 1414 } else if (rep->IsSubstring()) { 1415 CordRep* child = rep->substring()->child; 1416 if (child->IsFlat()) { 1417 *fragment = absl::string_view( 1418 child->flat()->Data() + rep->substring()->start, rep->length); 1419 return true; 1420 } else if (child->IsExternal()) { 1421 *fragment = absl::string_view( 1422 child->external()->base + rep->substring()->start, rep->length); 1423 return true; 1424 } else if (child->IsBtree()) { 1425 return child->btree()->IsFlat(rep->substring()->start, rep->length, 1426 fragment); 1427 } 1428 } 1429 return false; 1430 } 1431 1432 /* static */ void Cord::ForEachChunkAux( 1433 absl::Nonnull<absl::cord_internal::CordRep*> rep, 1434 absl::FunctionRef<void(absl::string_view)> callback) { 1435 assert(rep != nullptr); 1436 if (rep->length == 0) return; 1437 rep = cord_internal::SkipCrcNode(rep); 1438 1439 if (rep->IsBtree()) { 1440 ChunkIterator it(rep), end; 1441 while (it != end) { 1442 callback(*it); 1443 ++it; 1444 } 1445 return; 1446 } 1447 1448 // This is a leaf node, so invoke our callback. 1449 absl::cord_internal::CordRep* current_node = cord_internal::SkipCrcNode(rep); 1450 absl::string_view chunk; 1451 bool success = GetFlatAux(current_node, &chunk); 1452 assert(success); 1453 if (success) { 1454 callback(chunk); 1455 } 1456 } 1457 1458 static void DumpNode(absl::Nonnull<CordRep*> nonnull_rep, bool include_data, 1459 absl::Nonnull<std::ostream*> os, int indent) { 1460 CordRep* rep = nonnull_rep; 1461 const int kIndentStep = 1; 1462 for (;;) { 1463 *os << std::setw(3) << (rep == nullptr ? 0 : rep->refcount.Get()); 1464 *os << " " << std::setw(7) << (rep == nullptr ? 0 : rep->length); 1465 *os << " ["; 1466 if (include_data) *os << static_cast<void*>(rep); 1467 *os << "]"; 1468 *os << " " << std::setw(indent) << ""; 1469 bool leaf = false; 1470 if (rep == nullptr) { 1471 *os << "NULL\n"; 1472 leaf = true; 1473 } else if (rep->IsCrc()) { 1474 *os << "CRC crc=" << rep->crc()->crc_cord_state.Checksum() << "\n"; 1475 indent += kIndentStep; 1476 rep = rep->crc()->child; 1477 } else if (rep->IsSubstring()) { 1478 *os << "SUBSTRING @ " << rep->substring()->start << "\n"; 1479 indent += kIndentStep; 1480 rep = rep->substring()->child; 1481 } else { // Leaf or ring 1482 leaf = true; 1483 if (rep->IsExternal()) { 1484 *os << "EXTERNAL ["; 1485 if (include_data) 1486 *os << absl::CEscape( 1487 absl::string_view(rep->external()->base, rep->length)); 1488 *os << "]\n"; 1489 } else if (rep->IsFlat()) { 1490 *os << "FLAT cap=" << rep->flat()->Capacity() << " ["; 1491 if (include_data) 1492 *os << absl::CEscape( 1493 absl::string_view(rep->flat()->Data(), rep->length)); 1494 *os << "]\n"; 1495 } else { 1496 CordRepBtree::Dump(rep, /*label=*/"", include_data, *os); 1497 } 1498 } 1499 if (leaf) { 1500 break; 1501 } 1502 } 1503 } 1504 1505 static std::string ReportError(absl::Nonnull<CordRep*> root, 1506 absl::Nonnull<CordRep*> node) { 1507 std::ostringstream buf; 1508 buf << "Error at node " << node << " in:"; 1509 DumpNode(root, true, &buf); 1510 return buf.str(); 1511 } 1512 1513 static bool VerifyNode(absl::Nonnull<CordRep*> root, 1514 absl::Nonnull<CordRep*> start_node) { 1515 absl::InlinedVector<absl::Nonnull<CordRep*>, 2> worklist; 1516 worklist.push_back(start_node); 1517 do { 1518 CordRep* node = worklist.back(); 1519 worklist.pop_back(); 1520 1521 ABSL_INTERNAL_CHECK(node != nullptr, ReportError(root, node)); 1522 if (node != root) { 1523 ABSL_INTERNAL_CHECK(node->length != 0, ReportError(root, node)); 1524 ABSL_INTERNAL_CHECK(!node->IsCrc(), ReportError(root, node)); 1525 } 1526 1527 if (node->IsFlat()) { 1528 ABSL_INTERNAL_CHECK(node->length <= node->flat()->Capacity(), 1529 ReportError(root, node)); 1530 } else if (node->IsExternal()) { 1531 ABSL_INTERNAL_CHECK(node->external()->base != nullptr, 1532 ReportError(root, node)); 1533 } else if (node->IsSubstring()) { 1534 ABSL_INTERNAL_CHECK( 1535 node->substring()->start < node->substring()->child->length, 1536 ReportError(root, node)); 1537 ABSL_INTERNAL_CHECK(node->substring()->start + node->length <= 1538 node->substring()->child->length, 1539 ReportError(root, node)); 1540 } else if (node->IsCrc()) { 1541 ABSL_INTERNAL_CHECK( 1542 node->crc()->child != nullptr || node->crc()->length == 0, 1543 ReportError(root, node)); 1544 if (node->crc()->child != nullptr) { 1545 ABSL_INTERNAL_CHECK(node->crc()->length == node->crc()->child->length, 1546 ReportError(root, node)); 1547 worklist.push_back(node->crc()->child); 1548 } 1549 } 1550 } while (!worklist.empty()); 1551 return true; 1552 } 1553 1554 std::ostream& operator<<(std::ostream& out, const Cord& cord) { 1555 for (absl::string_view chunk : cord.Chunks()) { 1556 out.write(chunk.data(), static_cast<std::streamsize>(chunk.size())); 1557 } 1558 return out; 1559 } 1560 1561 namespace strings_internal { 1562 size_t CordTestAccess::FlatOverhead() { return cord_internal::kFlatOverhead; } 1563 size_t CordTestAccess::MaxFlatLength() { return cord_internal::kMaxFlatLength; } 1564 size_t CordTestAccess::FlatTagToLength(uint8_t tag) { 1565 return cord_internal::TagToLength(tag); 1566 } 1567 uint8_t CordTestAccess::LengthToTag(size_t s) { 1568 ABSL_INTERNAL_CHECK(s <= kMaxFlatLength, absl::StrCat("Invalid length ", s)); 1569 return cord_internal::AllocatedSizeToTag(s + cord_internal::kFlatOverhead); 1570 } 1571 size_t CordTestAccess::SizeofCordRepExternal() { 1572 return sizeof(CordRepExternal); 1573 } 1574 size_t CordTestAccess::SizeofCordRepSubstring() { 1575 return sizeof(CordRepSubstring); 1576 } 1577 } // namespace strings_internal 1578 ABSL_NAMESPACE_END 1579 } // namespace absl