TextDirectiveUtil.h (27447B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim:set ts=2 sw=2 sts=2 et cindent: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #ifndef DOM_TEXTDIRECTIVEUTIL_H_ 8 #define DOM_TEXTDIRECTIVEUTIL_H_ 9 10 #include "mozilla/Logging.h" 11 #include "mozilla/RangeBoundary.h" 12 #include "mozilla/RefPtr.h" 13 #include "mozilla/StaticPrefs_dom.h" 14 #include "mozilla/TimeStamp.h" 15 #include "mozilla/dom/AbstractRange.h" 16 #include "mozilla/dom/Text.h" 17 #include "mozilla/intl/WordBreaker.h" 18 #include "nsStringFwd.h" 19 20 class nsIURI; 21 class nsINode; 22 class nsFind; 23 class nsRange; 24 struct TextDirective; 25 26 namespace mozilla::dom { 27 28 extern LazyLogModule gFragmentDirectiveLog; 29 #define TEXT_FRAGMENT_LOG_FN(msg, func, ...) \ 30 MOZ_LOG_FMT(gFragmentDirectiveLog, LogLevel::Debug, "{}(): " msg, func, \ 31 ##__VA_ARGS__) 32 33 // Shortcut macro for logging, which includes the current function name. 34 // To customize (eg. if in a lambda), use `TEXT_FRAGMENT_LOG_FN`. 35 #define TEXT_FRAGMENT_LOG(msg, ...) \ 36 TEXT_FRAGMENT_LOG_FN(msg, __FUNCTION__, ##__VA_ARGS__) 37 38 enum class TextScanDirection { Left = -1, Right = 1 }; 39 40 class TextDirectiveUtil final { 41 public: 42 MOZ_ALWAYS_INLINE static bool ShouldLog() { 43 return MOZ_LOG_TEST(gFragmentDirectiveLog, LogLevel::Debug); 44 } 45 46 static Result<nsString, ErrorResult> RangeContentAsString( 47 AbstractRange* aRange); 48 49 /** 50 * @brief Return true if `aNode` is a visible Text node. 51 * 52 * A node is a visible text node if it is a Text node, the computed value of 53 * its parent element's visibility property is visible, and it is being 54 * rendered. 55 * 56 * see https://wicg.github.io/scroll-to-text-fragment/#visible-text-node 57 */ 58 static bool NodeIsVisibleTextNode(const nsINode& aNode); 59 60 /** 61 * @brief Finds the search query in the given search range. 62 * 63 * This function parametrizes the `nsFind` instance. 64 */ 65 static RefPtr<nsRange> FindStringInRange(nsFind* aFinder, 66 const RangeBoundary& aSearchStart, 67 const RangeBoundary& aSearchEnd, 68 const nsAString& aQuery, 69 bool aWordStartBounded, 70 bool aWordEndBounded); 71 72 /** 73 * @brief Tests if there is whitespace at the given position. 74 * 75 * This algorithm tests for whitespaces and ` ` at `aPos`. 76 * It returns true if whitespace was found. 77 * 78 * This function assumes the reading direction is "right". If trying to check 79 * for whitespace to the left, the caller must adjust the offset. 80 * 81 */ 82 static bool IsWhitespaceAtPosition(const Text* aText, uint32_t aPos); 83 84 /** 85 * @brief Determine if `aNode` should be considered when traversing the DOM. 86 * 87 * A node is "search invisible" if it is an element in the HTML namespace and 88 * 1. The computed value of its `display` property is `none` 89 * 2. It serializes as void 90 * 3. It is one of the following types: 91 * - HTMLIFrameElement 92 * - HTMLImageElement 93 * - HTMLMeterElement 94 * - HTMLObjectElement 95 * - HTMLProgressElement 96 * - HTMLStyleElement 97 * - HTMLScriptElement 98 * - HTMLVideoElement 99 * - HTMLAudioElement 100 * 4. It is a `select` element whose `multiple` content attribute is absent 101 * 102 * see https://wicg.github.io/scroll-to-text-fragment/#search-invisible 103 */ 104 static bool NodeIsSearchInvisible(nsINode& aNode); 105 106 /** 107 * @brief Returns true if `aNode` has block-level display. 108 * A node has block-level display if it is an element and the computed value 109 * of its display property is any of 110 * - block 111 * - table 112 * - flow-root 113 * - grid 114 * - flex 115 * - list-item 116 * 117 * See https://wicg.github.io/scroll-to-text-fragment/#has-block-level-display 118 */ 119 static bool NodeHasBlockLevelDisplay(nsINode& aNode); 120 /** 121 * @brief Get the Block Ancestor For `aNode`. 122 * 123 * see https://wicg.github.io/scroll-to-text-fragment/#nearest-block-ancestor 124 */ 125 static nsINode* GetBlockAncestorForNode(nsINode* aNode); 126 127 /** 128 * @brief Returns true if `aNode` is part of a non-searchable subtree. 129 * 130 * A node is part of a non-searchable subtree if it is or has a 131 * shadow-including ancestor that is search invisible. 132 * 133 * see https://wicg.github.io/scroll-to-text-fragment/#non-searchable-subtree 134 */ 135 static bool NodeIsPartOfNonSearchableSubTree(nsINode& aNode); 136 137 /** Advances the start of `aRange` to the next non-whitespace position. 138 * The function follows this section of the spec: 139 * https://wicg.github.io/scroll-to-text-fragment/#next-non-whitespace-position 140 */ 141 static void AdvanceStartToNextNonWhitespacePosition(nsRange& aRange); 142 143 /** 144 * @brief Returns a point moved by one character or unicode surrogate pair. 145 */ 146 static RangeBoundary MoveToNextBoundaryPoint(const RangeBoundary& aPoint); 147 148 template <TextScanDirection direction> 149 static RangeBoundary FindNextBlockBoundary( 150 const RangeBoundary& aRangeBoundary); 151 152 template <TextScanDirection direction> 153 static Maybe<RangeBoundary> FindBlockBoundaryInRange( 154 const AbstractRange& aRange); 155 156 /** 157 * @brief Find the next non-whitespace point in given `direction`. 158 * 159 * This algorithm jumps across block boundaries. 160 * 161 * @param aPoint Start point 162 * @return New boundary point which points at the next non-whitespace text in 163 * `direction`. If no non-whitespace content exists in `direction`, 164 * return the original boundary point. 165 */ 166 template <TextScanDirection direction> 167 static RangeBoundary FindNextNonWhitespacePosition( 168 const RangeBoundary& aPoint); 169 170 /** 171 * @brief Creates a new RangeBoundary at the nearest word boundary. 172 * 173 * Word boundaries are determined using `intl::WordBreaker::FindWord()`. 174 * This algorithm can find word boundaries across node boundaries and stops at 175 * a block boundary. 176 * 177 * @param aRangeBoundary[in] The range boundary that should be moved. 178 * Must be set and valid. 179 * @param direction[in] The direction into which to move. 180 * @return A new `RangeBoundary` which is moved to the nearest word boundary. 181 */ 182 template <TextScanDirection direction> 183 static RangeBoundary FindWordBoundary(const RangeBoundary& aRangeBoundary); 184 185 /** 186 * @brief Compares the common substring between a reference string and a text 187 * node in the given direction. 188 * 189 * This algorithm returns the common substring across same-block visible text 190 * nodes, starting at `aBoundaryPoint`. Whitespace is compressed. 191 */ 192 template <TextScanDirection direction> 193 static uint32_t ComputeCommonSubstringLength( 194 const nsAString& aReferenceString, const RangeBoundary& aBoundaryPoint); 195 196 /** 197 * @brief Creates a list of all word boundary distances to the base of the 198 * string (beginning for left-to-right, end for right-to-left). 199 * 200 * Word boundaries are determined by iterating the string and checking for 201 * word boundaries using the `intl::WordBreaker` algorithm. 202 * 203 * If direction is `Left`, word begin positions are used, and the distances 204 * are based off the end of the string. Otherwise, the word end positions are 205 * used, and the distances are based off the begin of the string. 206 * The returned array is always sorted and contains monotonically increasing 207 * values. 208 * 209 * This function is guaranteed to return at least one word boundary distance, 210 * the last element always being the length of the string. 211 */ 212 template <TextScanDirection direction> 213 static nsTArray<uint32_t> ComputeWordBoundaryDistances( 214 const nsAString& aString); 215 216 /** 217 * @brief Returns true if the word between `aWordBegin` and `aWordEnd` is 218 * just whitespace or punctuation. 219 * @param aString The string to check. Must not be empty. 220 * @param aWordBegin The start index of the word. 221 * @param aWordEnd The end index of the word. 222 * @return true if the word is just whitespace or punctuation, false 223 * otherwise. 224 */ 225 static bool WordIsJustWhitespaceOrPunctuation(const nsAString& aString, 226 uint32_t aWordBegin, 227 uint32_t aWordEnd); 228 229 /** 230 * @brief Finds the position of the beginning of the second word (in 231 * `direction`), then removes everything up to that position from 232 * `aString` and `aWordDistances`. 233 * 234 * This function modifies both `aString` and `aWordDistances`. 235 * It expects `aString` to be non-empty, and to contain at least two words, 236 * as indicated by `aWordDistances` containing at least two elements. 237 * 238 * @tparam direction Either left-to-right or right-to-left. 239 * @param aString The string to modify. Must not be empty. 240 * @param aWordDistances The array of word boundary distances. The distances 241 * are always sorted and contain monotonically 242 * increasing values. For LTR, the distances are based 243 * off the beginning of the string. For RTL, the 244 * distances are based off the end of the string. Must 245 * contain at least two elements. 246 * @return The length of the first word including whitespace and 247 * punctuation up to the beginning of the second word. 248 */ 249 template <TextScanDirection direction> 250 static uint32_t RemoveFirstWordFromStringAndDistanceArray( 251 nsAString& aString, nsTArray<uint32_t>& aWordDistances); 252 }; 253 254 class TimeoutWatchdog final { 255 public: 256 NS_INLINE_DECL_REFCOUNTING(TimeoutWatchdog); 257 TimeoutWatchdog() 258 : mStartTime(TimeStamp::Now()), 259 mDuration(TimeDuration::FromSeconds( 260 StaticPrefs:: 261 dom_text_fragments_create_text_fragment_timeout_seconds())) {} 262 bool IsDone() const { return TimeStamp::Now() - mStartTime > mDuration; } 263 264 private: 265 ~TimeoutWatchdog() = default; 266 TimeStamp mStartTime; 267 TimeDuration mDuration; 268 }; 269 270 /** 271 * @brief Iterator for visible text nodes with the same block ancestor. 272 * 273 * Allows to be used in range-based iteration. Returns the next visible text 274 * node (as defined by `TextDirectiveUtil::NodeIsVisibleTextNode()` and 275 * `TextDirectiveUtil::NodeIsPartOfNonSearchableSubTree()`) in the given 276 * direction. 277 * 278 * @tparam direction Either left-to-right or right-to-left. 279 */ 280 template <TextScanDirection direction> 281 class SameBlockVisibleTextNodeIterator final { 282 public: 283 explicit SameBlockVisibleTextNodeIterator(nsINode& aStart) 284 : mCurrent(&aStart), 285 mBlockAncestor(TextDirectiveUtil::GetBlockAncestorForNode(mCurrent)) { 286 while (mCurrent->HasChildNodes()) { 287 nsINode* child = direction == TextScanDirection::Left 288 ? mCurrent->GetLastChild() 289 : mCurrent->GetFirstChild(); 290 if (TextDirectiveUtil::GetBlockAncestorForNode(child) != mBlockAncestor) { 291 break; 292 } 293 mCurrent = child; 294 } 295 } 296 297 SameBlockVisibleTextNodeIterator& begin() { return *this; } 298 299 std::nullptr_t end() { return nullptr; } 300 301 bool operator!=(std::nullptr_t) const { return !!mCurrent; } 302 303 void operator++() { 304 while (mCurrent) { 305 mCurrent = direction == TextScanDirection::Left ? mCurrent->GetPrevNode() 306 : mCurrent->GetNextNode(); 307 if (!mCurrent) { 308 return; 309 } 310 if (TextDirectiveUtil::GetBlockAncestorForNode(mCurrent) != 311 mBlockAncestor) { 312 mCurrent = nullptr; 313 return; 314 } 315 if (TextDirectiveUtil::NodeIsVisibleTextNode(*mCurrent) && 316 !TextDirectiveUtil::NodeIsPartOfNonSearchableSubTree(*mCurrent)) { 317 break; 318 } 319 } 320 MOZ_ASSERT_IF(mCurrent, mCurrent->IsText()); 321 } 322 323 Text* operator*() { return Text::FromNodeOrNull(mCurrent); } 324 325 private: 326 nsINode* mCurrent = nullptr; 327 nsINode* mBlockAncestor = nullptr; 328 }; 329 330 template <TextScanDirection direction> 331 /*static*/ RangeBoundary TextDirectiveUtil::FindNextBlockBoundary( 332 const RangeBoundary& aRangeBoundary) { 333 MOZ_ASSERT(aRangeBoundary.IsSetAndValid()); 334 nsINode* current = aRangeBoundary.GetContainer(); 335 uint32_t offset = 336 direction == TextScanDirection::Left ? 0u : current->Length(); 337 for (auto* node : SameBlockVisibleTextNodeIterator<direction>(*current)) { 338 if (!node) { 339 continue; 340 } 341 current = node; 342 offset = direction == TextScanDirection::Left ? 0u : current->Length(); 343 } 344 return {current, offset}; 345 } 346 347 template <TextScanDirection direction> 348 /* static */ Maybe<RangeBoundary> TextDirectiveUtil::FindBlockBoundaryInRange( 349 const AbstractRange& aRange) { 350 if (aRange.Collapsed()) { 351 return Nothing{}; 352 } 353 354 RangeBoundary boundary = FindNextBlockBoundary<direction>( 355 direction == TextScanDirection::Left ? aRange.EndRef() 356 : aRange.StartRef()); 357 358 Maybe<int32_t> compare = 359 direction == TextScanDirection::Left 360 ? nsContentUtils::ComparePoints(aRange.StartRef(), boundary) 361 : nsContentUtils::ComparePoints(boundary, aRange.EndRef()); 362 if (compare && *compare == -1) { 363 // *compare == -1 means that the found boundary is after the range start 364 // when looking left, and before the range end when looking right. 365 // This means that there is a block boundary within the range. 366 return Some(boundary); 367 } 368 369 return Nothing{}; 370 } 371 372 template <TextScanDirection direction> 373 /* static */ RangeBoundary TextDirectiveUtil::FindNextNonWhitespacePosition( 374 const RangeBoundary& aPoint) { 375 MOZ_ASSERT(aPoint.IsSetAndValid()); 376 nsINode* node = aPoint.GetChildAtOffset(); 377 uint32_t offset = 378 direction == TextScanDirection::Left && node ? node->Length() : 0; 379 if (!node) { 380 node = aPoint.GetContainer(); 381 offset = 382 *aPoint.Offset(RangeBoundary::OffsetFilter::kValidOrInvalidOffsets); 383 } 384 while (node->HasChildNodes()) { 385 if constexpr (direction == TextScanDirection::Left) { 386 node = node->GetLastChild(); 387 MOZ_ASSERT(node); 388 offset = node->Length(); 389 } else { 390 node = node->GetFirstChild(); 391 offset = 0; 392 } 393 } 394 395 while (node) { 396 const bool nodeIsInvisible = 397 !TextDirectiveUtil::NodeIsVisibleTextNode(*node) || 398 TextDirectiveUtil::NodeIsPartOfNonSearchableSubTree(*node); 399 const bool offsetIsAtEnd = 400 (direction == TextScanDirection::Left && offset == 0) || 401 (direction == TextScanDirection::Right && offset == node->Length()); 402 if (nodeIsInvisible || offsetIsAtEnd) { 403 if constexpr (direction == TextScanDirection::Left) { 404 node = node->GetPrevNode(); 405 if (node) { 406 offset = node->Length(); 407 } 408 } else { 409 node = node->GetNextNode(); 410 offset = 0; 411 } 412 continue; 413 } 414 const Text* text = Text::FromNode(node); 415 MOZ_ASSERT(text); 416 417 if (!TextDirectiveUtil::IsWhitespaceAtPosition( 418 text, direction == TextScanDirection::Left ? offset - 1 : offset)) { 419 return {node, offset}; 420 } 421 offset += int(direction); 422 } 423 424 // If there seems to be no non-whitespace text in the document in 425 // `direction`, it's safest to return the original point. 426 return aPoint; 427 } 428 429 template <TextScanDirection direction> 430 /*static*/ RangeBoundary TextDirectiveUtil::FindWordBoundary( 431 const RangeBoundary& aRangeBoundary) { 432 MOZ_ASSERT(aRangeBoundary.IsSetAndValid()); 433 nsINode* node = aRangeBoundary.GetContainer(); 434 uint32_t offset = *aRangeBoundary.Offset( 435 RangeBoundary::OffsetFilter::kValidOrInvalidOffsets); 436 437 // Collect text content into this buffer. 438 // The following algorithm pulls in the next text node if required 439 // (if the next word boundary would be at the beginning/end of the text node) 440 nsString textBuffer; 441 for (Text* textNode : SameBlockVisibleTextNodeIterator<direction>(*node)) { 442 if (!textNode || textNode->Length() == 0) { 443 continue; 444 } 445 nsString data; 446 textNode->GetWholeText(data); 447 const uint32_t bufferLength = textBuffer.Length(); 448 if constexpr (direction == TextScanDirection::Left) { 449 textBuffer.Insert(data, 0); 450 } else { 451 textBuffer.Append(data); 452 } 453 if (bufferLength) { 454 auto newOffset = 455 direction == TextScanDirection::Left ? textNode->Length() - 1 : 0u; 456 if (nsContentUtils::IsHTMLWhitespace(data.CharAt(newOffset)) || 457 mozilla::IsPunctuationForWordSelect(data.CharAt(newOffset))) { 458 break; 459 } 460 offset = newOffset; 461 } else { 462 offset = std::max(std::min(offset, textNode->Length() - 1), 0u); 463 } 464 if constexpr (direction == TextScanDirection::Right) { 465 // if not at the beginning of a word, go left by one character. 466 // Otherwise, if offset is already at the end of the word, the word 467 // breaker will match the whitespace or the next word. 468 if (offset && 469 !(nsContentUtils::IsHTMLWhitespace(data.CharAt(offset - 1)) || 470 mozilla::IsPunctuationForWordSelect(data.CharAt(offset - 1)))) { 471 --offset; 472 } 473 } else { 474 if (offset && 475 (nsContentUtils::IsHTMLWhitespace(data.CharAt(offset)) || 476 mozilla::IsPunctuationForWordSelect(data.CharAt(offset)))) { 477 --offset; 478 } 479 } 480 const uint32_t pos = 481 direction == TextScanDirection::Left ? offset : bufferLength + offset; 482 const auto [wordStart, wordEnd] = 483 intl::WordBreaker::FindWord(textBuffer, pos); 484 offset = direction == TextScanDirection::Left ? wordStart 485 : wordEnd - bufferLength; 486 node = textNode; 487 if (offset && offset < textNode->Length()) { 488 break; 489 } 490 } 491 return {node, offset}; 492 } 493 494 template <TextScanDirection direction> 495 void LogCommonSubstringLengths(const char* aFunc, 496 const nsAString& aReferenceString, 497 const nsTArray<nsString>& aTextContentPieces, 498 uint32_t aCommonLength) { 499 if (!TextDirectiveUtil::ShouldLog()) { 500 return; 501 } 502 nsString concatenatedTextContents; 503 for (const auto& textContent : aTextContentPieces) { 504 concatenatedTextContents.Append(textContent); 505 } 506 // the algorithm expects `aReferenceString` to be whitespace-compressed, 507 // and ignores leading whitespace when looking at the DOM nodes. So, 508 // whitespace needs to be compressed here as well. 509 concatenatedTextContents.CompressWhitespace(); 510 const uint32_t maxLength = 511 std::max(aReferenceString.Length(), concatenatedTextContents.Length()); 512 TEXT_FRAGMENT_LOG_FN("Direction: {}.", aFunc, 513 direction == TextScanDirection::Left ? "left" : "right"); 514 515 if constexpr (direction == TextScanDirection::Left) { 516 TEXT_FRAGMENT_LOG_FN("Ref: {:>{}}", aFunc, 517 NS_ConvertUTF16toUTF8(aReferenceString), maxLength); 518 TEXT_FRAGMENT_LOG_FN("Other: {:>{}}", aFunc, 519 NS_ConvertUTF16toUTF8(concatenatedTextContents), 520 maxLength); 521 TEXT_FRAGMENT_LOG_FN( 522 "Common: {:>{}} ({} chars)", aFunc, 523 NS_ConvertUTF16toUTF8(Substring(aReferenceString, aCommonLength)), 524 maxLength, aCommonLength); 525 } else { 526 TEXT_FRAGMENT_LOG_FN("Ref: {:<{}}", aFunc, 527 NS_ConvertUTF16toUTF8(aReferenceString), maxLength); 528 TEXT_FRAGMENT_LOG_FN("Other: {:<{}}", aFunc, 529 NS_ConvertUTF16toUTF8(concatenatedTextContents), 530 maxLength); 531 TEXT_FRAGMENT_LOG_FN( 532 "Common: {:<{}} ({} chars)", aFunc, 533 NS_ConvertUTF16toUTF8(Substring(aReferenceString, 0, aCommonLength)), 534 maxLength, aCommonLength); 535 } 536 } 537 538 template <TextScanDirection direction> 539 /*static*/ nsTArray<uint32_t> TextDirectiveUtil::ComputeWordBoundaryDistances( 540 const nsAString& aString) { 541 AutoTArray<uint32_t, 32> wordBoundaryDistances; 542 uint32_t pos = 543 direction == TextScanDirection::Left ? aString.Length() - 1 : 0; 544 545 // This loop relies on underflowing `pos` when going left as stop condition. 546 while (pos < aString.Length()) { 547 auto [wordBegin, wordEnd] = intl::WordBreaker::FindWord(aString, pos); 548 pos = direction == TextScanDirection::Left ? wordBegin - 1 : wordEnd + 1; 549 if (WordIsJustWhitespaceOrPunctuation(aString, wordBegin, wordEnd)) { 550 // The WordBreaker algorithm breaks at punctuation, so that "foo bar. baz" 551 // would be split into four words: [foo, bar, ., baz]. 552 // To avoid this, we skip words which are just whitespace or punctuation 553 // and add the punctuation to the previous word, so that the above example 554 // would yield three words: [foo, bar., baz]. 555 continue; 556 } 557 558 wordBoundaryDistances.AppendElement(direction == TextScanDirection::Left 559 ? aString.Length() - wordBegin 560 : wordEnd); 561 } 562 if (wordBoundaryDistances.IsEmpty() || 563 wordBoundaryDistances.LastElement() != aString.Length()) { 564 wordBoundaryDistances.AppendElement(aString.Length()); 565 } 566 return std::move(wordBoundaryDistances); 567 } 568 569 template <TextScanDirection direction> 570 /*static*/ uint32_t TextDirectiveUtil::ComputeCommonSubstringLength( 571 const nsAString& aReferenceString, const RangeBoundary& aBoundaryPoint) { 572 MOZ_ASSERT(aBoundaryPoint.IsSetAndValid()); 573 if (aReferenceString.IsEmpty()) { 574 TEXT_FRAGMENT_LOG("Reference string is empty."); 575 return 0; 576 } 577 578 MOZ_ASSERT(!nsContentUtils::IsHTMLWhitespace(aReferenceString.First())); 579 MOZ_ASSERT(!nsContentUtils::IsHTMLWhitespace(aReferenceString.Last())); 580 uint32_t referenceStringPosition = 581 direction == TextScanDirection::Left ? aReferenceString.Length() - 1 : 0; 582 583 bool foundMismatch = false; 584 585 // `aReferenceString` is expected to have its whitespace compressed. 586 // The raw text from the DOM nodes does not have compressed whitespace. 587 // Therefore, the algorithm needs to skip multiple whitespace characters. 588 // Setting this flag to true initially makes this algorithm tolerant to 589 // preceding whitespace in the DOM nodes and the reference string. 590 bool isInWhitespace = true; 591 nsTArray<nsString> textContentForLogging; 592 for (Text* text : SameBlockVisibleTextNodeIterator<direction>( 593 *aBoundaryPoint.GetContainer())) { 594 if (!text || text->Length() == 0) { 595 continue; 596 } 597 uint32_t offset = 598 direction == TextScanDirection::Left ? text->Length() - 1 : 0; 599 if (text == aBoundaryPoint.GetContainer()) { 600 offset = *aBoundaryPoint.Offset( 601 RangeBoundary::OffsetFilter::kValidOrInvalidOffsets); 602 if (offset && direction == TextScanDirection::Left) { 603 // when looking left, the offset is _behind_ the actual char. 604 // Therefore, the value is decremented, and incremented when returning. 605 --offset; 606 } 607 } 608 if (TextDirectiveUtil::ShouldLog()) { 609 nsString textContent; 610 text->GetWholeText(textContent); 611 if constexpr (direction == TextScanDirection::Left) { 612 if (offset) { 613 textContent = Substring(textContent, 0, offset + 1); 614 } else { 615 textContent.Truncate(); 616 } 617 } else { 618 textContent = Substring(textContent, offset); 619 } 620 textContentForLogging.AppendElement(std::move(textContent)); 621 } 622 const CharacterDataBuffer* characterDataBuffer = 623 text->GetCharacterDataBuffer(); 624 MOZ_DIAGNOSTIC_ASSERT(characterDataBuffer); 625 const uint32_t textLength = characterDataBuffer->GetLength(); 626 while (offset < textLength && 627 referenceStringPosition < aReferenceString.Length()) { 628 char16_t ch = characterDataBuffer->CharAt(offset); 629 char16_t refCh = aReferenceString.CharAt(referenceStringPosition); 630 const bool chIsWhitespace = nsContentUtils::IsHTMLWhitespace(ch); 631 const bool refChIsWhitespace = nsContentUtils::IsHTMLWhitespace(refCh); 632 if (chIsWhitespace) { 633 if (refChIsWhitespace) { 634 offset += int(direction); 635 referenceStringPosition += int(direction); 636 isInWhitespace = true; 637 continue; 638 } 639 if (isInWhitespace) { 640 offset += int(direction); 641 continue; 642 } 643 } 644 isInWhitespace = false; 645 if (refCh == ToFoldedCase(ch)) { 646 offset += int(direction); 647 referenceStringPosition += int(direction); 648 continue; 649 } 650 foundMismatch = true; 651 break; 652 } 653 if (foundMismatch) { 654 break; 655 } 656 } 657 uint32_t commonLength = 0; 658 if constexpr (direction == TextScanDirection::Left) { 659 ++referenceStringPosition; 660 commonLength = aReferenceString.Length() - referenceStringPosition; 661 if (TextDirectiveUtil::ShouldLog()) { 662 textContentForLogging.Reverse(); 663 } 664 } else { 665 commonLength = referenceStringPosition; 666 } 667 LogCommonSubstringLengths<direction>(__FUNCTION__, aReferenceString, 668 textContentForLogging, commonLength); 669 return commonLength; 670 } 671 672 template <TextScanDirection direction> 673 /*static*/ uint32_t 674 TextDirectiveUtil::RemoveFirstWordFromStringAndDistanceArray( 675 nsAString& aString, nsTArray<uint32_t>& aWordDistances) { 676 MOZ_DIAGNOSTIC_ASSERT(!aString.IsEmpty()); 677 MOZ_DIAGNOSTIC_ASSERT(aWordDistances.Length() > 1); 678 auto lengthOfFirstWordPlusWhitespaceAndPunctuation = aWordDistances[0]; 679 auto chIsWhitespaceOrPunctuation = [&](uint32_t distance) { 680 const char16_t ch = aString.CharAt(direction == TextScanDirection::Right 681 ? distance 682 : aString.Length() - distance - 1); 683 return nsContentUtils::IsHTMLWhitespace(ch) || 684 mozilla::IsPunctuationForWordSelect(ch); 685 }; 686 while (lengthOfFirstWordPlusWhitespaceAndPunctuation < aString.Length() && 687 chIsWhitespaceOrPunctuation( 688 lengthOfFirstWordPlusWhitespaceAndPunctuation)) { 689 ++lengthOfFirstWordPlusWhitespaceAndPunctuation; 690 } 691 if (lengthOfFirstWordPlusWhitespaceAndPunctuation == aString.Length()) { 692 // In this case the string only contains whitespace or punctuation after the 693 // first word. 694 aWordDistances.Clear(); 695 return lengthOfFirstWordPlusWhitespaceAndPunctuation; 696 } 697 // Adjust all distances to be relative to the new start position. 698 // In the case that the loop above jumps over punctuation which is actually 699 // considered to be a word, the distance underflows (or becomes zero). 700 // These obsolete distances are then removed. 701 for (auto& wordDistance : aWordDistances) { 702 wordDistance -= lengthOfFirstWordPlusWhitespaceAndPunctuation; 703 } 704 aWordDistances.RemoveElementsBy([&aString](uint32_t distance) { 705 return distance == 0 || distance > aString.Length(); 706 }); 707 if constexpr (direction == TextScanDirection::Right) { 708 aString = Substring(aString, lengthOfFirstWordPlusWhitespaceAndPunctuation); 709 } else { 710 aString = Substring( 711 aString, 0, 712 aString.Length() - lengthOfFirstWordPlusWhitespaceAndPunctuation); 713 } 714 return lengthOfFirstWordPlusWhitespaceAndPunctuation; 715 } 716 } // namespace mozilla::dom 717 718 #endif