tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

TextDirectiveFinder.cpp (20221B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 #include "TextDirectiveFinder.h"
      7 
      8 #include "Document.h"
      9 #include "TextDirectiveUtil.h"
     10 #include "fragmentdirectives_ffi_generated.h"
     11 #include "mozilla/CycleCollectedUniquePtr.h"
     12 #include "mozilla/ToString.h"
     13 #include "mozilla/glean/DomMetrics.h"
     14 #include "nsFind.h"
     15 #include "nsRange.h"
     16 
     17 namespace mozilla::dom {
     18 
     19 TextDirectiveFinder::TextDirectiveFinder(
     20    Document* aDocument, nsTArray<TextDirective>&& aTextDirectives)
     21    : mDocument(WrapNotNull(aDocument)),
     22      mUninvokedTextDirectives(std::move(aTextDirectives)) {}
     23 
     24 TextDirectiveFinder::~TextDirectiveFinder() {
     25  if (mFoundDirectiveCount) {
     26    glean::dom_textfragment::find_directives.AccumulateRawDuration(
     27        mFindTextDirectivesDuration);
     28 
     29    TEXT_FRAGMENT_LOG("Found {} directives in {}ms", mFoundDirectiveCount,
     30                      mFindTextDirectivesDuration.ToMilliseconds());
     31  }
     32  if (HasUninvokedDirectives()) {
     33    mDocument->SetUseCounter(eUseCounter_custom_InvalidTextDirectives);
     34  }
     35 }
     36 
     37 void TextDirectiveFinder::Traverse(
     38    nsCycleCollectionTraversalCallback& aCallback) {
     39  CycleCollectionNoteChild(aCallback, mDocument.get().get(),
     40                           "TextDirectiveFinder::mDocument", aCallback.Flags());
     41 }
     42 
     43 bool TextDirectiveFinder::HasUninvokedDirectives() const {
     44  return !mUninvokedTextDirectives.IsEmpty();
     45 }
     46 
     47 nsTArray<RefPtr<nsRange>> TextDirectiveFinder::FindTextDirectivesInDocument() {
     48  if (mUninvokedTextDirectives.IsEmpty()) {
     49    return {};
     50  }
     51 
     52  const TimeStamp start = TimeStamp::Now();
     53 
     54  auto uri = TextDirectiveUtil::ShouldLog() && mDocument->GetDocumentURI()
     55                 ? mDocument->GetDocumentURI()->GetSpecOrDefault()
     56                 : nsCString();
     57  TEXT_FRAGMENT_LOG("Trying to find text directives in document '{}'.", uri);
     58  mDocument->FlushPendingNotifications(FlushType::Layout);
     59  // https://wicg.github.io/scroll-to-text-fragment/#invoke-text-directives
     60  // To invoke text directives, given as input a list of text directives text
     61  // directives and a Document document, run these steps:
     62  // 1. Let ranges be a list of ranges, initially empty.
     63  nsTArray<RefPtr<nsRange>> textDirectiveRanges(
     64      mUninvokedTextDirectives.Length());
     65 
     66  // Additionally (not mentioned in the spec), remove all text directives from
     67  // the input list to keep only the ones that are not found.
     68  // This code runs repeatedly during a page load, so it is possible that the
     69  // match for a text directive has not been parsed yet.
     70  nsTArray<TextDirective> uninvokedTextDirectives(
     71      mUninvokedTextDirectives.Length());
     72 
     73  // 2. For each text directive directive of text directives:
     74  for (TextDirective& textDirective : mUninvokedTextDirectives) {
     75    // 2.1 If the result of running find a range from a text directive given
     76    //     directive and document is non-null, then append it to ranges.
     77    if (RefPtr<nsRange> range = FindRangeForTextDirective(textDirective)) {
     78      textDirectiveRanges.AppendElement(range);
     79      TEXT_FRAGMENT_LOG("Found text directive '{}'",
     80                        ToString(textDirective).c_str());
     81      if (RefPtr startNode = range->GetStartContainer()) {
     82        startNode->QueueAncestorRevealingAlgorithm();
     83      }
     84    } else {
     85      uninvokedTextDirectives.AppendElement(std::move(textDirective));
     86    }
     87  }
     88  if (TextDirectiveUtil::ShouldLog()) {
     89    if (uninvokedTextDirectives.Length() == mUninvokedTextDirectives.Length()) {
     90      TEXT_FRAGMENT_LOG("Did not find any of the {} uninvoked text directives.",
     91                        mUninvokedTextDirectives.Length());
     92    } else {
     93      TEXT_FRAGMENT_LOG(
     94          "Found {} of {} text directives in the document.",
     95          mUninvokedTextDirectives.Length() - uninvokedTextDirectives.Length(),
     96          mUninvokedTextDirectives.Length());
     97    }
     98    if (uninvokedTextDirectives.IsEmpty()) {
     99      TEXT_FRAGMENT_LOG("No uninvoked text directives left.");
    100    } else {
    101      TEXT_FRAGMENT_LOG("There are {} uninvoked text directives left:",
    102                        uninvokedTextDirectives.Length());
    103      for (size_t index = 0; index < uninvokedTextDirectives.Length();
    104           ++index) {
    105        TEXT_FRAGMENT_LOG(" [{}]: {}", index,
    106                          ToString(uninvokedTextDirectives[index]).c_str());
    107      }
    108    }
    109  }
    110  mUninvokedTextDirectives = std::move(uninvokedTextDirectives);
    111 
    112  mFindTextDirectivesDuration += TimeStamp::Now() - start;
    113  mFoundDirectiveCount += static_cast<int64_t>(textDirectiveRanges.Length());
    114 
    115  // 3. Return ranges.
    116  return textDirectiveRanges;
    117 }
    118 
    119 RefPtr<nsRange> TextDirectiveFinder::FindRangeForTextDirective(
    120    const TextDirective& aTextDirective) {
    121  // This method follows this spec algorithm and applies some changes:
    122  // https://wicg.github.io/scroll-to-text-fragment/#find-a-range-from-a-text-directive
    123  TEXT_FRAGMENT_LOG("Find range for text directive '{}'.",
    124                    ToString(aTextDirective).c_str());
    125  // 1. Let searchRange be a range with start (document, 0) and end (document,
    126  // document’s length)
    127  ErrorResult rv;
    128  RefPtr<nsRange> searchRange =
    129      nsRange::Create(mDocument, 0, mDocument, mDocument->Length(), rv);
    130  if (rv.Failed()) {
    131    return nullptr;
    132  }
    133 
    134  nsContentUtils::NodeIndexCache nodeIndexCache;
    135  RefPtr<nsFind> finder = new nsFind();
    136  finder->SetNodeIndexCache(&nodeIndexCache);
    137 
    138  // 2. While searchRange is not collapsed:
    139  while (!searchRange->Collapsed()) {
    140    // 2.1. Let potentialMatch be null.
    141    RefPtr<nsRange> potentialMatch;
    142    // 2.2. If parsedValues’s prefix is not null:
    143    if (!aTextDirective.prefix.IsEmpty()) {
    144      // 2.2.1. Let prefixMatch be the the result of running the find a string
    145      // in range steps with query parsedValues’s prefix, searchRange
    146      // searchRange, wordStartBounded true and wordEndBounded false.
    147      RefPtr<nsRange> prefixMatch = TextDirectiveUtil::FindStringInRange(
    148          finder, searchRange->StartRef(), searchRange->EndRef(),
    149          aTextDirective.prefix, true, false);
    150      // 2.2.2. If prefixMatch is null, return null.
    151      if (!prefixMatch) {
    152        TEXT_FRAGMENT_LOG(
    153            "Did not find prefix '{}'. The text directive does not exist "
    154            "in the document.",
    155            NS_ConvertUTF16toUTF8(aTextDirective.prefix));
    156        return nullptr;
    157      }
    158      TEXT_FRAGMENT_LOG("Did find prefix '{}'.",
    159                        NS_ConvertUTF16toUTF8(aTextDirective.prefix));
    160 
    161      // 2.2.3. Set searchRange’s start to the first boundary point after
    162      // prefixMatch’s start
    163      MOZ_DIAGNOSTIC_ASSERT(prefixMatch->GetStartContainer()->IsText());
    164      const RangeBoundary boundaryPoint =
    165          TextDirectiveUtil::MoveToNextBoundaryPoint(prefixMatch->StartRef());
    166      if (!boundaryPoint.IsSetAndValid()) {
    167        return nullptr;
    168      }
    169      searchRange->SetStart(boundaryPoint.AsRaw(), rv);
    170      if (rv.Failed()) {
    171        return nullptr;
    172      }
    173 
    174      // 2.2.4. Let matchRange be a range whose start is prefixMatch’s end and
    175      // end is searchRange’s end.
    176      // Note:
    177      // The spec is very inefficient. The start text must _immediately_ follow
    178      // after the end of the prefix. Therefore, it would be a huge waste to
    179      // search until the end of the document. Since the following `start`
    180      // attribute can't go across a block boundary, it is sufficient to do a
    181      // search until the next block boundary.
    182      RefPtr<nsRange> matchRange = nsRange::Create(
    183          prefixMatch->GetEndContainer(), prefixMatch->EndOffset(),
    184          searchRange->GetEndContainer(), searchRange->EndOffset(), rv);
    185      if (rv.Failed()) {
    186        return nullptr;
    187      }
    188      // 2.2.5. Advance matchRange’s start to the next non-whitespace position.
    189      TextDirectiveUtil::AdvanceStartToNextNonWhitespacePosition(*matchRange);
    190      // 2.2.6. If matchRange is collapsed return null.
    191      // (This can happen if prefixMatch’s end or its subsequent non-whitespace
    192      // position is at the end of the document.)
    193      if (matchRange->Collapsed()) {
    194        return nullptr;
    195      }
    196      // 2.2.7. Assert: matchRange’s start node is a Text node.
    197      // (matchRange’s start now points to the next non-whitespace text data
    198      // following a matched prefix.)
    199      MOZ_ASSERT(matchRange->GetStartContainer()->IsText());
    200      // Set `matchRange`s end to the next block boundary.
    201      auto nextBlockBoundary =
    202          TextDirectiveUtil::FindNextBlockBoundary<TextScanDirection::Right>(
    203              matchRange->StartRef());
    204 
    205      matchRange->SetEnd(nextBlockBoundary.AsRaw(), IgnoreErrors());
    206 
    207      // 2.2.8. Let mustEndAtWordBoundary be true if parsedValues’s end is
    208      // non-null or parsedValues’s suffix is null, false otherwise.
    209      const bool mustEndAtWordBoundary =
    210          !aTextDirective.end.IsEmpty() || aTextDirective.suffix.IsEmpty();
    211      // 2.2.9. Set potentialMatch to the result of running the find a string in
    212      // range steps with query parsedValues’s start, searchRange matchRange,
    213      // wordStartBounded false, and wordEndBounded mustEndAtWordBoundary.
    214      potentialMatch = TextDirectiveUtil::FindStringInRange(
    215          finder, matchRange->StartRef(), matchRange->EndRef(),
    216          aTextDirective.start, false, mustEndAtWordBoundary);
    217      // 2.2.10. If potentialMatch is null, return null.
    218      // Note: Because the search range for start only goes to the next block
    219      // boundary, this statement is wrong. If potentialMatch is null, the loop
    220      // needs to be restarted.
    221      if (!potentialMatch) {
    222        TEXT_FRAGMENT_LOG(
    223            "Did not find start '{}' in the sub range of the end of `prefix` "
    224            "and the next block boundary. Restarting outer loop.",
    225            NS_ConvertUTF16toUTF8(aTextDirective.start));
    226        continue;
    227      }
    228      // 2.2.11. If potentialMatch’s start is not matchRange’s start, then
    229      // continue.
    230      // (In this case, we found a prefix but it was followed by something other
    231      // than a matching text so we’ll continue searching for the next instance
    232      // of prefix.)
    233      if (potentialMatch->StartRef() != matchRange->StartRef()) {
    234        TEXT_FRAGMENT_LOG(
    235            "The prefix is not directly followed by the start element. "
    236            "Restarting outer loop.");
    237        continue;
    238      }
    239      TEXT_FRAGMENT_LOG("Did find start '{}'.",
    240                        NS_ConvertUTF16toUTF8(aTextDirective.start));
    241    }
    242    // 2.3. Otherwise:
    243    else {
    244      // 2.3.1. Let mustEndAtWordBoundary be true if parsedValues’s end is
    245      // non-null or parsedValues’s suffix is null, false otherwise.
    246      const bool mustEndAtWordBoundary =
    247          !aTextDirective.end.IsEmpty() || aTextDirective.suffix.IsEmpty();
    248      // 2.3.2. Set potentialMatch to the result of running the find a string in
    249      // range steps with query parsedValues’s start, searchRange searchRange,
    250      // wordStartBounded true, and wordEndBounded mustEndAtWordBoundary.
    251      potentialMatch = TextDirectiveUtil::FindStringInRange(
    252          finder, searchRange->StartRef(), searchRange->EndRef(),
    253          aTextDirective.start, true, mustEndAtWordBoundary);
    254      // 2.3.3. If potentialMatch is null, return null.
    255      if (!potentialMatch) {
    256        TEXT_FRAGMENT_LOG(
    257            "Did not find start '{}'. The text directive does not exist "
    258            "in the document.",
    259            NS_ConvertUTF16toUTF8(aTextDirective.start));
    260        return nullptr;
    261      }
    262      if (potentialMatch && aTextDirective.end.IsEmpty() &&
    263          aTextDirective.suffix.IsEmpty()) {
    264        return potentialMatch;
    265      }
    266      // 2.3.4. Set searchRange’s start to the first boundary point after
    267      // potentialMatch’s start
    268      MOZ_DIAGNOSTIC_ASSERT(potentialMatch->GetStartContainer()->IsText());
    269      const RangeBoundary newRangeBoundary =
    270          TextDirectiveUtil::MoveToNextBoundaryPoint(
    271              potentialMatch->StartRef());
    272 
    273      if (!newRangeBoundary.IsSetAndValid()) {
    274        return nullptr;
    275      }
    276      searchRange->SetStart(newRangeBoundary.AsRaw(), rv);
    277      if (rv.Failed()) {
    278        return nullptr;
    279      }
    280    }
    281    // 2.4. Let rangeEndSearchRange be a range whose start is potentialMatch’s
    282    // end and whose end is searchRange’s end.
    283    RefPtr<nsRange> rangeEndSearchRange = nsRange::Create(
    284        potentialMatch->GetEndContainer(), potentialMatch->EndOffset(),
    285        searchRange->GetEndContainer(), searchRange->EndOffset(), rv);
    286    if (rv.Failed()) {
    287      return nullptr;
    288    }
    289    // 2.5. While rangeEndSearchRange is not collapsed:
    290    while (!rangeEndSearchRange->Collapsed()) {
    291      // 2.5.1. If parsedValues’s end item is non-null, then:
    292      if (!aTextDirective.end.IsEmpty()) {
    293        // 2.5.1.1. Let mustEndAtWordBoundary be true if parsedValues’s suffix
    294        // is null, false otherwise.
    295        const bool mustEndAtWordBoundary = aTextDirective.suffix.IsEmpty();
    296        // 2.5.1.2. Let endMatch be the result of running the find a string in
    297        // range steps with query parsedValues’s end, searchRange
    298        // rangeEndSearchRange, wordStartBounded true, and wordEndBounded
    299        // mustEndAtWordBoundary.
    300        RefPtr<nsRange> endMatch = TextDirectiveUtil::FindStringInRange(
    301            finder, rangeEndSearchRange->StartRef(),
    302            rangeEndSearchRange->EndRef(), aTextDirective.end, true,
    303            mustEndAtWordBoundary);
    304        // 2.5.1.3. If endMatch is null then return null.
    305        if (!endMatch) {
    306          TEXT_FRAGMENT_LOG(
    307              "Did not find end '{}'. The text directive does not exist "
    308              "in the document.",
    309              NS_ConvertUTF16toUTF8(aTextDirective.end));
    310          return nullptr;
    311        }
    312        // 2.5.1.4. Set potentialMatch’s end to endMatch’s end.
    313        potentialMatch->SetEnd(endMatch->GetEndContainer(),
    314                               endMatch->EndOffset());
    315      }
    316      // 2.5.2. Assert: potentialMatch is non-null, not collapsed and represents
    317      // a range exactly containing an instance of matching text.
    318      MOZ_ASSERT(potentialMatch && !potentialMatch->Collapsed());
    319 
    320      // 2.5.3. If parsedValues’s suffix is null, return potentialMatch.
    321      if (aTextDirective.suffix.IsEmpty()) {
    322        TEXT_FRAGMENT_LOG("Did find a match.");
    323        return potentialMatch;
    324      }
    325      // 2.5.4. Let suffixRange be a range with start equal to potentialMatch’s
    326      // end and end equal to searchRange’s end.
    327      // Note: Again, this is highly inefficient. It's perfectly fine to only
    328      // search up to the next block boundary.
    329      RefPtr<nsRange> suffixRange = nsRange::Create(
    330          potentialMatch->GetEndContainer(), potentialMatch->EndOffset(),
    331          searchRange->GetEndContainer(), searchRange->EndOffset(), rv);
    332      if (rv.Failed()) {
    333        return nullptr;
    334      }
    335      // 2.5.5. Advance suffixRange's start to the next non-whitespace position.
    336      TextDirectiveUtil::AdvanceStartToNextNonWhitespacePosition(*suffixRange);
    337      auto nextBlockBoundary =
    338          TextDirectiveUtil::FindNextBlockBoundary<TextScanDirection::Right>(
    339              suffixRange->StartRef());
    340      suffixRange->SetEnd(nextBlockBoundary.AsRaw(), IgnoreErrors());
    341 
    342      // 2.5.6. Let suffixMatch be result of running the find a string in range
    343      // steps with query parsedValue's suffix, searchRange suffixRange,
    344      // wordStartBounded false, and wordEndBounded true.
    345      RefPtr<nsRange> suffixMatch = TextDirectiveUtil::FindStringInRange(
    346          finder, suffixRange->StartRef(), suffixRange->EndRef(),
    347          aTextDirective.suffix, false, true);
    348      // 2.5.7. If suffixMatch is null, return null.
    349      // (If the suffix doesn't appear in the remaining text of the document,
    350      // there's no possible way to make a match.)
    351      // 2.5.8. If suffixMatch's start is suffixRange's start, return
    352      // potentialMatch.
    353      // 2.5.9. If parsedValue's end item is null then break;
    354      // (If this is an exact match and the suffix doesn’t match, start
    355      // searching for the next range start by breaking out of this loop without
    356      // rangeEndSearchRange being collapsed. If we’re looking for a range
    357      // match, we’ll continue iterating this inner loop since the range start
    358      // will already be correct.)
    359      // 2.5.10. Set rangeEndSearchRange's start to potentialMatch's end.
    360      // (Otherwise, it is possible that we found the correct range start, but
    361      // not the correct range end. Continue the inner loop to keep searching
    362      // for another matching instance of rangeEnd.)
    363      // Note: the steps above are not correct anymore because of restricting
    364      // the suffix find to a sub range.
    365      // Therefore, the code looks different, but _essentially_ does the same as
    366      // what's described in the spec steps.
    367      rangeEndSearchRange->SetStart(potentialMatch->GetEndContainer(),
    368                                    potentialMatch->EndOffset());
    369      if (!suffixMatch) {
    370        if (aTextDirective.end.IsEmpty()) {
    371          TEXT_FRAGMENT_LOG(
    372              "Did not find suffix in the sub range of the end of `start` and "
    373              "the next block boundary. Restarting outer loop.");
    374          break;
    375        }
    376        TEXT_FRAGMENT_LOG(
    377            "Did not find suffix in the sub range of the end of `end` and the "
    378            "next block boundary. Discarding this `end` candidate and "
    379            "continuing inner loop.");
    380        continue;
    381      }
    382      if (suffixMatch->GetStartContainer() ==
    383              suffixRange->GetStartContainer() &&
    384          suffixMatch->StartOffset() == suffixRange->StartOffset()) {
    385        TEXT_FRAGMENT_LOG("Did find a match.");
    386        return potentialMatch;
    387      }
    388      if (aTextDirective.end.IsEmpty()) {
    389        TEXT_FRAGMENT_LOG(
    390            "Did find suffix in the sub range of end of `start` to the end of "
    391            "the next block boundary, but not at the start. Restarting outer "
    392            "loop.");
    393        break;
    394      }
    395      TEXT_FRAGMENT_LOG(
    396          "Did find `suffix` in the sub range of end of `end` to the end of "
    397          "the current block, but not at the start. Restarting inner loop.");
    398    }
    399    // 2.6. If rangeEndSearchRange is collapsed then:
    400    if (rangeEndSearchRange->Collapsed()) {
    401      // 2.6.1. Assert parsedValue's end item is non-null.
    402      // (This can only happen for range matches due to the break for exact
    403      // matches in step 9 of the above loop. If we couldn’t find a valid
    404      // rangeEnd+suffix pair anywhere in the doc then there’s no possible way
    405      // to make a match.)
    406      // ----
    407      // XXX(:jjaschke): Not too sure about this. If a text directive is only
    408      // defined by a (prefix +) start element, and the start element happens to
    409      // be at the end of the document, `rangeEndSearchRange` could be
    410      // collapsed. Therefore, the loop in section 2.5 does not run. Also,
    411      // if there would be either an `end` and/or a `suffix`, this would assert
    412      // instead of returning `nullptr`, indicating that there's no match.
    413      // Instead, the following would make the algorithm more safe:
    414      // if there is no end or suffix, the potential match is actually a match,
    415      // so return it. Otherwise, the text directive can't be in the document,
    416      // therefore return nullptr.
    417      if (aTextDirective.end.IsEmpty() && aTextDirective.suffix.IsEmpty()) {
    418        TEXT_FRAGMENT_LOG(
    419            "rangeEndSearchRange was collapsed, no end or suffix "
    420            "present. Returning a match");
    421        return potentialMatch;
    422      }
    423      TEXT_FRAGMENT_LOG(
    424          "rangeEndSearchRange was collapsed, there is an end or "
    425          "suffix. There can't be a match.");
    426      return nullptr;
    427    }
    428  }
    429  // 3. Return null.
    430  TEXT_FRAGMENT_LOG("Did not find a match.");
    431  return nullptr;
    432 }
    433 
    434 }  // namespace mozilla::dom