[ tor-browser ].git.dasho

TextDirectiveUtil.h (27447B)
      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #ifndef DOM_TEXTDIRECTIVEUTIL_H_
      8 #define DOM_TEXTDIRECTIVEUTIL_H_
      9 
     10 #include "mozilla/Logging.h"
     11 #include "mozilla/RangeBoundary.h"
     12 #include "mozilla/RefPtr.h"
     13 #include "mozilla/StaticPrefs_dom.h"
     14 #include "mozilla/TimeStamp.h"
     15 #include "mozilla/dom/AbstractRange.h"
     16 #include "mozilla/dom/Text.h"
     17 #include "mozilla/intl/WordBreaker.h"
     18 #include "nsStringFwd.h"
     19 
     20 class nsIURI;
     21 class nsINode;
     22 class nsFind;
     23 class nsRange;
     24 struct TextDirective;
     25 
     26 namespace mozilla::dom {
     27 
     28 extern LazyLogModule gFragmentDirectiveLog;
     29 #define TEXT_FRAGMENT_LOG_FN(msg, func, ...)                              \
     30  MOZ_LOG_FMT(gFragmentDirectiveLog, LogLevel::Debug, "{}(): " msg, func, \
     31              ##__VA_ARGS__)
     32 
     33 // Shortcut macro for logging, which includes the current function name.
     34 // To customize (eg. if in a lambda), use `TEXT_FRAGMENT_LOG_FN`.
     35 #define TEXT_FRAGMENT_LOG(msg, ...) \
     36  TEXT_FRAGMENT_LOG_FN(msg, __FUNCTION__, ##__VA_ARGS__)
     37 
     38 enum class TextScanDirection { Left = -1, Right = 1 };
     39 
     40 class TextDirectiveUtil final {
     41 public:
     42  MOZ_ALWAYS_INLINE static bool ShouldLog() {
     43    return MOZ_LOG_TEST(gFragmentDirectiveLog, LogLevel::Debug);
     44  }
     45 
     46  static Result<nsString, ErrorResult> RangeContentAsString(
     47      AbstractRange* aRange);
     48 
     49  /**
     50   * @brief Return true if `aNode` is a visible Text node.
     51   *
     52   * A node is a visible text node if it is a Text node, the computed value of
     53   * its parent element's visibility property is visible, and it is being
     54   * rendered.
     55   *
     56   * see https://wicg.github.io/scroll-to-text-fragment/#visible-text-node
     57   */
     58  static bool NodeIsVisibleTextNode(const nsINode& aNode);
     59 
     60  /**
     61   * @brief Finds the search query in the given search range.
     62   *
     63   * This function parametrizes the `nsFind` instance.
     64   */
     65  static RefPtr<nsRange> FindStringInRange(nsFind* aFinder,
     66                                           const RangeBoundary& aSearchStart,
     67                                           const RangeBoundary& aSearchEnd,
     68                                           const nsAString& aQuery,
     69                                           bool aWordStartBounded,
     70                                           bool aWordEndBounded);
     71 
     72  /**
     73   * @brief Tests if there is whitespace at the given position.
     74   *
     75   * This algorithm tests for whitespaces and `&nbsp;` at `aPos`.
     76   * It returns true if whitespace was found.
     77   *
     78   * This function assumes the reading direction is "right". If trying to check
     79   * for whitespace to the left, the caller must adjust the offset.
     80   *
     81   */
     82  static bool IsWhitespaceAtPosition(const Text* aText, uint32_t aPos);
     83 
     84  /**
     85   * @brief Determine if `aNode` should be considered when traversing the DOM.
     86   *
     87   * A node is "search invisible" if it is an element in the HTML namespace and
     88   *  1. The computed value of its `display` property is `none`
     89   *  2. It serializes as void
     90   *  3. It is one of the following types:
     91   *    - HTMLIFrameElement
     92   *    - HTMLImageElement
     93   *    - HTMLMeterElement
     94   *    - HTMLObjectElement
     95   *    - HTMLProgressElement
     96   *    - HTMLStyleElement
     97   *    - HTMLScriptElement
     98   *    - HTMLVideoElement
     99   *    - HTMLAudioElement
    100   *  4. It is a `select` element whose `multiple` content attribute is absent
    101   *
    102   * see https://wicg.github.io/scroll-to-text-fragment/#search-invisible
    103   */
    104  static bool NodeIsSearchInvisible(nsINode& aNode);
    105 
    106  /**
    107   * @brief Returns true if `aNode` has block-level display.
    108   * A node has block-level display if it is an element and the computed value
    109   * of its display property is any of
    110   *  - block
    111   *  - table
    112   *  - flow-root
    113   *  - grid
    114   *  - flex
    115   *  - list-item
    116   *
    117   * See https://wicg.github.io/scroll-to-text-fragment/#has-block-level-display
    118   */
    119  static bool NodeHasBlockLevelDisplay(nsINode& aNode);
    120  /**
    121   * @brief Get the Block Ancestor For `aNode`.
    122   *
    123   * see https://wicg.github.io/scroll-to-text-fragment/#nearest-block-ancestor
    124   */
    125  static nsINode* GetBlockAncestorForNode(nsINode* aNode);
    126 
    127  /**
    128   * @brief Returns true if `aNode` is part of a non-searchable subtree.
    129   *
    130   * A node is part of a non-searchable subtree if it is or has a
    131   * shadow-including ancestor that is search invisible.
    132   *
    133   * see https://wicg.github.io/scroll-to-text-fragment/#non-searchable-subtree
    134   */
    135  static bool NodeIsPartOfNonSearchableSubTree(nsINode& aNode);
    136 
    137  /** Advances the start of `aRange` to the next non-whitespace position.
    138   * The function follows this section of the spec:
    139   * https://wicg.github.io/scroll-to-text-fragment/#next-non-whitespace-position
    140   */
    141  static void AdvanceStartToNextNonWhitespacePosition(nsRange& aRange);
    142 
    143  /**
    144   * @brief Returns a point moved by one character or unicode surrogate pair.
    145   */
    146  static RangeBoundary MoveToNextBoundaryPoint(const RangeBoundary& aPoint);
    147 
    148  template <TextScanDirection direction>
    149  static RangeBoundary FindNextBlockBoundary(
    150      const RangeBoundary& aRangeBoundary);
    151 
    152  template <TextScanDirection direction>
    153  static Maybe<RangeBoundary> FindBlockBoundaryInRange(
    154      const AbstractRange& aRange);
    155 
    156  /**
    157   * @brief Find the next non-whitespace point in given `direction`.
    158   *
    159   * This algorithm jumps across block boundaries.
    160   *
    161   * @param aPoint Start point
    162   * @return New boundary point which points at the next non-whitespace text in
    163   *         `direction`. If no non-whitespace content exists in `direction`,
    164   *         return the original boundary point.
    165   */
    166  template <TextScanDirection direction>
    167  static RangeBoundary FindNextNonWhitespacePosition(
    168      const RangeBoundary& aPoint);
    169 
    170  /**
    171   * @brief Creates a new RangeBoundary at the nearest word boundary.
    172   *
    173   * Word boundaries are determined using `intl::WordBreaker::FindWord()`.
    174   * This algorithm can find word boundaries across node boundaries and stops at
    175   * a block boundary.
    176   *
    177   * @param aRangeBoundary[in] The range boundary that should be moved.
    178   *                           Must be set and valid.
    179   * @param direction[in]     The direction into which to move.
    180   * @return A new `RangeBoundary` which is moved to the nearest word boundary.
    181   */
    182  template <TextScanDirection direction>
    183  static RangeBoundary FindWordBoundary(const RangeBoundary& aRangeBoundary);
    184 
    185  /**
    186   * @brief Compares the common substring between a reference string and a text
    187   *        node in the given direction.
    188   *
    189   * This algorithm returns the common substring across same-block visible text
    190   * nodes, starting at `aBoundaryPoint`. Whitespace is compressed.
    191   */
    192  template <TextScanDirection direction>
    193  static uint32_t ComputeCommonSubstringLength(
    194      const nsAString& aReferenceString, const RangeBoundary& aBoundaryPoint);
    195 
    196  /**
    197   * @brief Creates a list of all word boundary distances to the base of the
    198   *        string (beginning for left-to-right, end for right-to-left).
    199   *
    200   * Word boundaries are determined by iterating the string and checking for
    201   * word boundaries using the `intl::WordBreaker` algorithm.
    202   *
    203   * If direction is `Left`, word begin positions are used, and the distances
    204   * are based off the end of the string. Otherwise, the word end positions are
    205   * used, and the distances are based off the begin of the string.
    206   * The returned array is always sorted and contains monotonically increasing
    207   * values.
    208   *
    209   * This function is guaranteed to return at least one word boundary distance,
    210   * the last element always being the length of the string.
    211   */
    212  template <TextScanDirection direction>
    213  static nsTArray<uint32_t> ComputeWordBoundaryDistances(
    214      const nsAString& aString);
    215 
    216  /**
    217   * @brief Returns true if the word between `aWordBegin` and `aWordEnd` is
    218   *        just whitespace or punctuation.
    219   * @param aString The string to check. Must not be empty.
    220   * @param aWordBegin The start index of the word.
    221   * @param aWordEnd The end index of the word.
    222   * @return true if the word is just whitespace or punctuation, false
    223   * otherwise.
    224   */
    225  static bool WordIsJustWhitespaceOrPunctuation(const nsAString& aString,
    226                                                uint32_t aWordBegin,
    227                                                uint32_t aWordEnd);
    228 
    229  /**
    230   * @brief Finds the position of the beginning of the second word (in
    231   *        `direction`), then removes everything up to that position from
    232   *       `aString` and `aWordDistances`.
    233   *
    234   * This function modifies both `aString` and `aWordDistances`.
    235   * It expects `aString` to be non-empty, and to contain at least two words,
    236   * as indicated by `aWordDistances` containing at least two elements.
    237   *
    238   * @tparam direction Either left-to-right or right-to-left.
    239   * @param aString        The string to modify. Must not be empty.
    240   * @param aWordDistances The array of word boundary distances. The distances
    241   *                       are always sorted and contain monotonically
    242   *                       increasing values. For LTR, the distances are based
    243   *                       off the beginning of the string. For RTL, the
    244   *                       distances are based off the end of the string. Must
    245   *                       contain at least two elements.
    246   * @return The length of the first word including whitespace and
    247   *         punctuation up to the beginning of the second word.
    248   */
    249  template <TextScanDirection direction>
    250  static uint32_t RemoveFirstWordFromStringAndDistanceArray(
    251      nsAString& aString, nsTArray<uint32_t>& aWordDistances);
    252 };
    253 
    254 class TimeoutWatchdog final {
    255 public:
    256  NS_INLINE_DECL_REFCOUNTING(TimeoutWatchdog);
    257  TimeoutWatchdog()
    258      : mStartTime(TimeStamp::Now()),
    259        mDuration(TimeDuration::FromSeconds(
    260            StaticPrefs::
    261                dom_text_fragments_create_text_fragment_timeout_seconds())) {}
    262  bool IsDone() const { return TimeStamp::Now() - mStartTime > mDuration; }
    263 
    264 private:
    265  ~TimeoutWatchdog() = default;
    266  TimeStamp mStartTime;
    267  TimeDuration mDuration;
    268 };
    269 
    270 /**
    271 * @brief Iterator for visible text nodes with the same block ancestor.
    272 *
    273 * Allows to be used in range-based iteration. Returns the next visible text
    274 * node (as defined by `TextDirectiveUtil::NodeIsVisibleTextNode()` and
    275 * `TextDirectiveUtil::NodeIsPartOfNonSearchableSubTree()`) in the given
    276 * direction.
    277 *
    278 * @tparam direction Either left-to-right or right-to-left.
    279 */
    280 template <TextScanDirection direction>
    281 class SameBlockVisibleTextNodeIterator final {
    282 public:
    283  explicit SameBlockVisibleTextNodeIterator(nsINode& aStart)
    284      : mCurrent(&aStart),
    285        mBlockAncestor(TextDirectiveUtil::GetBlockAncestorForNode(mCurrent)) {
    286    while (mCurrent->HasChildNodes()) {
    287      nsINode* child = direction == TextScanDirection::Left
    288                           ? mCurrent->GetLastChild()
    289                           : mCurrent->GetFirstChild();
    290      if (TextDirectiveUtil::GetBlockAncestorForNode(child) != mBlockAncestor) {
    291        break;
    292      }
    293      mCurrent = child;
    294    }
    295  }
    296 
    297  SameBlockVisibleTextNodeIterator& begin() { return *this; }
    298 
    299  std::nullptr_t end() { return nullptr; }
    300 
    301  bool operator!=(std::nullptr_t) const { return !!mCurrent; }
    302 
    303  void operator++() {
    304    while (mCurrent) {
    305      mCurrent = direction == TextScanDirection::Left ? mCurrent->GetPrevNode()
    306                                                      : mCurrent->GetNextNode();
    307      if (!mCurrent) {
    308        return;
    309      }
    310      if (TextDirectiveUtil::GetBlockAncestorForNode(mCurrent) !=
    311          mBlockAncestor) {
    312        mCurrent = nullptr;
    313        return;
    314      }
    315      if (TextDirectiveUtil::NodeIsVisibleTextNode(*mCurrent) &&
    316          !TextDirectiveUtil::NodeIsPartOfNonSearchableSubTree(*mCurrent)) {
    317        break;
    318      }
    319    }
    320    MOZ_ASSERT_IF(mCurrent, mCurrent->IsText());
    321  }
    322 
    323  Text* operator*() { return Text::FromNodeOrNull(mCurrent); }
    324 
    325 private:
    326  nsINode* mCurrent = nullptr;
    327  nsINode* mBlockAncestor = nullptr;
    328 };
    329 
    330 template <TextScanDirection direction>
    331 /*static*/ RangeBoundary TextDirectiveUtil::FindNextBlockBoundary(
    332    const RangeBoundary& aRangeBoundary) {
    333  MOZ_ASSERT(aRangeBoundary.IsSetAndValid());
    334  nsINode* current = aRangeBoundary.GetContainer();
    335  uint32_t offset =
    336      direction == TextScanDirection::Left ? 0u : current->Length();
    337  for (auto* node : SameBlockVisibleTextNodeIterator<direction>(*current)) {
    338    if (!node) {
    339      continue;
    340    }
    341    current = node;
    342    offset = direction == TextScanDirection::Left ? 0u : current->Length();
    343  }
    344  return {current, offset};
    345 }
    346 
    347 template <TextScanDirection direction>
    348 /* static */ Maybe<RangeBoundary> TextDirectiveUtil::FindBlockBoundaryInRange(
    349    const AbstractRange& aRange) {
    350  if (aRange.Collapsed()) {
    351    return Nothing{};
    352  }
    353 
    354  RangeBoundary boundary = FindNextBlockBoundary<direction>(
    355      direction == TextScanDirection::Left ? aRange.EndRef()
    356                                           : aRange.StartRef());
    357 
    358  Maybe<int32_t> compare =
    359      direction == TextScanDirection::Left
    360          ? nsContentUtils::ComparePoints(aRange.StartRef(), boundary)
    361          : nsContentUtils::ComparePoints(boundary, aRange.EndRef());
    362  if (compare && *compare == -1) {
    363    // *compare == -1 means that the found boundary is after the range start
    364    // when looking left, and before the range end when looking right.
    365    // This means that there is a block boundary within the range.
    366    return Some(boundary);
    367  }
    368 
    369  return Nothing{};
    370 }
    371 
    372 template <TextScanDirection direction>
    373 /* static */ RangeBoundary TextDirectiveUtil::FindNextNonWhitespacePosition(
    374    const RangeBoundary& aPoint) {
    375  MOZ_ASSERT(aPoint.IsSetAndValid());
    376  nsINode* node = aPoint.GetChildAtOffset();
    377  uint32_t offset =
    378      direction == TextScanDirection::Left && node ? node->Length() : 0;
    379  if (!node) {
    380    node = aPoint.GetContainer();
    381    offset =
    382        *aPoint.Offset(RangeBoundary::OffsetFilter::kValidOrInvalidOffsets);
    383  }
    384  while (node->HasChildNodes()) {
    385    if constexpr (direction == TextScanDirection::Left) {
    386      node = node->GetLastChild();
    387      MOZ_ASSERT(node);
    388      offset = node->Length();
    389    } else {
    390      node = node->GetFirstChild();
    391      offset = 0;
    392    }
    393  }
    394 
    395  while (node) {
    396    const bool nodeIsInvisible =
    397        !TextDirectiveUtil::NodeIsVisibleTextNode(*node) ||
    398        TextDirectiveUtil::NodeIsPartOfNonSearchableSubTree(*node);
    399    const bool offsetIsAtEnd =
    400        (direction == TextScanDirection::Left && offset == 0) ||
    401        (direction == TextScanDirection::Right && offset == node->Length());
    402    if (nodeIsInvisible || offsetIsAtEnd) {
    403      if constexpr (direction == TextScanDirection::Left) {
    404        node = node->GetPrevNode();
    405        if (node) {
    406          offset = node->Length();
    407        }
    408      } else {
    409        node = node->GetNextNode();
    410        offset = 0;
    411      }
    412      continue;
    413    }
    414    const Text* text = Text::FromNode(node);
    415    MOZ_ASSERT(text);
    416 
    417    if (!TextDirectiveUtil::IsWhitespaceAtPosition(
    418            text, direction == TextScanDirection::Left ? offset - 1 : offset)) {
    419      return {node, offset};
    420    }
    421    offset += int(direction);
    422  }
    423 
    424  // If there seems to be no non-whitespace text in the document in
    425  // `direction`, it's safest to return the original point.
    426  return aPoint;
    427 }
    428 
    429 template <TextScanDirection direction>
    430 /*static*/ RangeBoundary TextDirectiveUtil::FindWordBoundary(
    431    const RangeBoundary& aRangeBoundary) {
    432  MOZ_ASSERT(aRangeBoundary.IsSetAndValid());
    433  nsINode* node = aRangeBoundary.GetContainer();
    434  uint32_t offset = *aRangeBoundary.Offset(
    435      RangeBoundary::OffsetFilter::kValidOrInvalidOffsets);
    436 
    437  // Collect text content into this buffer.
    438  // The following algorithm pulls in the next text node if required
    439  // (if the next word boundary would be at the beginning/end of the text node)
    440  nsString textBuffer;
    441  for (Text* textNode : SameBlockVisibleTextNodeIterator<direction>(*node)) {
    442    if (!textNode || textNode->Length() == 0) {
    443      continue;
    444    }
    445    nsString data;
    446    textNode->GetWholeText(data);
    447    const uint32_t bufferLength = textBuffer.Length();
    448    if constexpr (direction == TextScanDirection::Left) {
    449      textBuffer.Insert(data, 0);
    450    } else {
    451      textBuffer.Append(data);
    452    }
    453    if (bufferLength) {
    454      auto newOffset =
    455          direction == TextScanDirection::Left ? textNode->Length() - 1 : 0u;
    456      if (nsContentUtils::IsHTMLWhitespace(data.CharAt(newOffset)) ||
    457          mozilla::IsPunctuationForWordSelect(data.CharAt(newOffset))) {
    458        break;
    459      }
    460      offset = newOffset;
    461    } else {
    462      offset = std::max(std::min(offset, textNode->Length() - 1), 0u);
    463    }
    464    if constexpr (direction == TextScanDirection::Right) {
    465      // if not at the beginning of a word, go left by one character.
    466      // Otherwise, if offset is already at the end of the word, the word
    467      // breaker will match the whitespace or the next word.
    468      if (offset &&
    469          !(nsContentUtils::IsHTMLWhitespace(data.CharAt(offset - 1)) ||
    470            mozilla::IsPunctuationForWordSelect(data.CharAt(offset - 1)))) {
    471        --offset;
    472      }
    473    } else {
    474      if (offset &&
    475          (nsContentUtils::IsHTMLWhitespace(data.CharAt(offset)) ||
    476           mozilla::IsPunctuationForWordSelect(data.CharAt(offset)))) {
    477        --offset;
    478      }
    479    }
    480    const uint32_t pos =
    481        direction == TextScanDirection::Left ? offset : bufferLength + offset;
    482    const auto [wordStart, wordEnd] =
    483        intl::WordBreaker::FindWord(textBuffer, pos);
    484    offset = direction == TextScanDirection::Left ? wordStart
    485                                                  : wordEnd - bufferLength;
    486    node = textNode;
    487    if (offset && offset < textNode->Length()) {
    488      break;
    489    }
    490  }
    491  return {node, offset};
    492 }
    493 
    494 template <TextScanDirection direction>
    495 void LogCommonSubstringLengths(const char* aFunc,
    496                               const nsAString& aReferenceString,
    497                               const nsTArray<nsString>& aTextContentPieces,
    498                               uint32_t aCommonLength) {
    499  if (!TextDirectiveUtil::ShouldLog()) {
    500    return;
    501  }
    502  nsString concatenatedTextContents;
    503  for (const auto& textContent : aTextContentPieces) {
    504    concatenatedTextContents.Append(textContent);
    505  }
    506  // the algorithm expects `aReferenceString` to be whitespace-compressed,
    507  // and ignores leading whitespace when looking at the DOM nodes. So,
    508  // whitespace needs to be compressed here as well.
    509  concatenatedTextContents.CompressWhitespace();
    510  const uint32_t maxLength =
    511      std::max(aReferenceString.Length(), concatenatedTextContents.Length());
    512  TEXT_FRAGMENT_LOG_FN("Direction: {}.", aFunc,
    513                       direction == TextScanDirection::Left ? "left" : "right");
    514 
    515  if constexpr (direction == TextScanDirection::Left) {
    516    TEXT_FRAGMENT_LOG_FN("Ref:    {:>{}}", aFunc,
    517                         NS_ConvertUTF16toUTF8(aReferenceString), maxLength);
    518    TEXT_FRAGMENT_LOG_FN("Other:  {:>{}}", aFunc,
    519                         NS_ConvertUTF16toUTF8(concatenatedTextContents),
    520                         maxLength);
    521    TEXT_FRAGMENT_LOG_FN(
    522        "Common: {:>{}} ({} chars)", aFunc,
    523        NS_ConvertUTF16toUTF8(Substring(aReferenceString, aCommonLength)),
    524        maxLength, aCommonLength);
    525  } else {
    526    TEXT_FRAGMENT_LOG_FN("Ref:    {:<{}}", aFunc,
    527                         NS_ConvertUTF16toUTF8(aReferenceString), maxLength);
    528    TEXT_FRAGMENT_LOG_FN("Other:  {:<{}}", aFunc,
    529                         NS_ConvertUTF16toUTF8(concatenatedTextContents),
    530                         maxLength);
    531    TEXT_FRAGMENT_LOG_FN(
    532        "Common: {:<{}} ({} chars)", aFunc,
    533        NS_ConvertUTF16toUTF8(Substring(aReferenceString, 0, aCommonLength)),
    534        maxLength, aCommonLength);
    535  }
    536 }
    537 
    538 template <TextScanDirection direction>
    539 /*static*/ nsTArray<uint32_t> TextDirectiveUtil::ComputeWordBoundaryDistances(
    540    const nsAString& aString) {
    541  AutoTArray<uint32_t, 32> wordBoundaryDistances;
    542  uint32_t pos =
    543      direction == TextScanDirection::Left ? aString.Length() - 1 : 0;
    544 
    545  // This loop relies on underflowing `pos` when going left as stop condition.
    546  while (pos < aString.Length()) {
    547    auto [wordBegin, wordEnd] = intl::WordBreaker::FindWord(aString, pos);
    548    pos = direction == TextScanDirection::Left ? wordBegin - 1 : wordEnd + 1;
    549    if (WordIsJustWhitespaceOrPunctuation(aString, wordBegin, wordEnd)) {
    550      // The WordBreaker algorithm breaks at punctuation, so that "foo bar. baz"
    551      // would be split into four words: [foo, bar, ., baz].
    552      // To avoid this, we skip words which are just whitespace or punctuation
    553      // and add the punctuation to the previous word, so that the above example
    554      // would yield three words: [foo, bar., baz].
    555      continue;
    556    }
    557 
    558    wordBoundaryDistances.AppendElement(direction == TextScanDirection::Left
    559                                            ? aString.Length() - wordBegin
    560                                            : wordEnd);
    561  }
    562  if (wordBoundaryDistances.IsEmpty() ||
    563      wordBoundaryDistances.LastElement() != aString.Length()) {
    564    wordBoundaryDistances.AppendElement(aString.Length());
    565  }
    566  return std::move(wordBoundaryDistances);
    567 }
    568 
    569 template <TextScanDirection direction>
    570 /*static*/ uint32_t TextDirectiveUtil::ComputeCommonSubstringLength(
    571    const nsAString& aReferenceString, const RangeBoundary& aBoundaryPoint) {
    572  MOZ_ASSERT(aBoundaryPoint.IsSetAndValid());
    573  if (aReferenceString.IsEmpty()) {
    574    TEXT_FRAGMENT_LOG("Reference string is empty.");
    575    return 0;
    576  }
    577 
    578  MOZ_ASSERT(!nsContentUtils::IsHTMLWhitespace(aReferenceString.First()));
    579  MOZ_ASSERT(!nsContentUtils::IsHTMLWhitespace(aReferenceString.Last()));
    580  uint32_t referenceStringPosition =
    581      direction == TextScanDirection::Left ? aReferenceString.Length() - 1 : 0;
    582 
    583  bool foundMismatch = false;
    584 
    585  // `aReferenceString` is expected to have its whitespace compressed.
    586  // The raw text from the DOM nodes does not have compressed whitespace.
    587  // Therefore, the algorithm needs to skip multiple whitespace characters.
    588  // Setting this flag to true initially makes this algorithm tolerant to
    589  // preceding whitespace in the DOM nodes and the reference string.
    590  bool isInWhitespace = true;
    591  nsTArray<nsString> textContentForLogging;
    592  for (Text* text : SameBlockVisibleTextNodeIterator<direction>(
    593           *aBoundaryPoint.GetContainer())) {
    594    if (!text || text->Length() == 0) {
    595      continue;
    596    }
    597    uint32_t offset =
    598        direction == TextScanDirection::Left ? text->Length() - 1 : 0;
    599    if (text == aBoundaryPoint.GetContainer()) {
    600      offset = *aBoundaryPoint.Offset(
    601          RangeBoundary::OffsetFilter::kValidOrInvalidOffsets);
    602      if (offset && direction == TextScanDirection::Left) {
    603        // when looking left, the offset is _behind_ the actual char.
    604        // Therefore, the value is decremented, and incremented when returning.
    605        --offset;
    606      }
    607    }
    608    if (TextDirectiveUtil::ShouldLog()) {
    609      nsString textContent;
    610      text->GetWholeText(textContent);
    611      if constexpr (direction == TextScanDirection::Left) {
    612        if (offset) {
    613          textContent = Substring(textContent, 0, offset + 1);
    614        } else {
    615          textContent.Truncate();
    616        }
    617      } else {
    618        textContent = Substring(textContent, offset);
    619      }
    620      textContentForLogging.AppendElement(std::move(textContent));
    621    }
    622    const CharacterDataBuffer* characterDataBuffer =
    623        text->GetCharacterDataBuffer();
    624    MOZ_DIAGNOSTIC_ASSERT(characterDataBuffer);
    625    const uint32_t textLength = characterDataBuffer->GetLength();
    626    while (offset < textLength &&
    627           referenceStringPosition < aReferenceString.Length()) {
    628      char16_t ch = characterDataBuffer->CharAt(offset);
    629      char16_t refCh = aReferenceString.CharAt(referenceStringPosition);
    630      const bool chIsWhitespace = nsContentUtils::IsHTMLWhitespace(ch);
    631      const bool refChIsWhitespace = nsContentUtils::IsHTMLWhitespace(refCh);
    632      if (chIsWhitespace) {
    633        if (refChIsWhitespace) {
    634          offset += int(direction);
    635          referenceStringPosition += int(direction);
    636          isInWhitespace = true;
    637          continue;
    638        }
    639        if (isInWhitespace) {
    640          offset += int(direction);
    641          continue;
    642        }
    643      }
    644      isInWhitespace = false;
    645      if (refCh == ToFoldedCase(ch)) {
    646        offset += int(direction);
    647        referenceStringPosition += int(direction);
    648        continue;
    649      }
    650      foundMismatch = true;
    651      break;
    652    }
    653    if (foundMismatch) {
    654      break;
    655    }
    656  }
    657  uint32_t commonLength = 0;
    658  if constexpr (direction == TextScanDirection::Left) {
    659    ++referenceStringPosition;
    660    commonLength = aReferenceString.Length() - referenceStringPosition;
    661    if (TextDirectiveUtil::ShouldLog()) {
    662      textContentForLogging.Reverse();
    663    }
    664  } else {
    665    commonLength = referenceStringPosition;
    666  }
    667  LogCommonSubstringLengths<direction>(__FUNCTION__, aReferenceString,
    668                                       textContentForLogging, commonLength);
    669  return commonLength;
    670 }
    671 
    672 template <TextScanDirection direction>
    673 /*static*/ uint32_t
    674 TextDirectiveUtil::RemoveFirstWordFromStringAndDistanceArray(
    675    nsAString& aString, nsTArray<uint32_t>& aWordDistances) {
    676  MOZ_DIAGNOSTIC_ASSERT(!aString.IsEmpty());
    677  MOZ_DIAGNOSTIC_ASSERT(aWordDistances.Length() > 1);
    678  auto lengthOfFirstWordPlusWhitespaceAndPunctuation = aWordDistances[0];
    679  auto chIsWhitespaceOrPunctuation = [&](uint32_t distance) {
    680    const char16_t ch = aString.CharAt(direction == TextScanDirection::Right
    681                                           ? distance
    682                                           : aString.Length() - distance - 1);
    683    return nsContentUtils::IsHTMLWhitespace(ch) ||
    684           mozilla::IsPunctuationForWordSelect(ch);
    685  };
    686  while (lengthOfFirstWordPlusWhitespaceAndPunctuation < aString.Length() &&
    687         chIsWhitespaceOrPunctuation(
    688             lengthOfFirstWordPlusWhitespaceAndPunctuation)) {
    689    ++lengthOfFirstWordPlusWhitespaceAndPunctuation;
    690  }
    691  if (lengthOfFirstWordPlusWhitespaceAndPunctuation == aString.Length()) {
    692    // In this case the string only contains whitespace or punctuation after the
    693    // first word.
    694    aWordDistances.Clear();
    695    return lengthOfFirstWordPlusWhitespaceAndPunctuation;
    696  }
    697  // Adjust all distances to be relative to the new start position.
    698  // In the case that the loop above jumps over punctuation which is actually
    699  // considered to be a word, the distance underflows (or becomes zero).
    700  // These obsolete distances are then removed.
    701  for (auto& wordDistance : aWordDistances) {
    702    wordDistance -= lengthOfFirstWordPlusWhitespaceAndPunctuation;
    703  }
    704  aWordDistances.RemoveElementsBy([&aString](uint32_t distance) {
    705    return distance == 0 || distance > aString.Length();
    706  });
    707  if constexpr (direction == TextScanDirection::Right) {
    708    aString = Substring(aString, lengthOfFirstWordPlusWhitespaceAndPunctuation);
    709  } else {
    710    aString = Substring(
    711        aString, 0,
    712        aString.Length() - lengthOfFirstWordPlusWhitespaceAndPunctuation);
    713  }
    714  return lengthOfFirstWordPlusWhitespaceAndPunctuation;
    715 }
    716 }  // namespace mozilla::dom
    717 
    718 #endif
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE