TextDirectiveFinder.cpp (20221B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim:set ts=2 sw=2 sts=2 et cindent: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 #include "TextDirectiveFinder.h" 7 8 #include "Document.h" 9 #include "TextDirectiveUtil.h" 10 #include "fragmentdirectives_ffi_generated.h" 11 #include "mozilla/CycleCollectedUniquePtr.h" 12 #include "mozilla/ToString.h" 13 #include "mozilla/glean/DomMetrics.h" 14 #include "nsFind.h" 15 #include "nsRange.h" 16 17 namespace mozilla::dom { 18 19 TextDirectiveFinder::TextDirectiveFinder( 20 Document* aDocument, nsTArray<TextDirective>&& aTextDirectives) 21 : mDocument(WrapNotNull(aDocument)), 22 mUninvokedTextDirectives(std::move(aTextDirectives)) {} 23 24 TextDirectiveFinder::~TextDirectiveFinder() { 25 if (mFoundDirectiveCount) { 26 glean::dom_textfragment::find_directives.AccumulateRawDuration( 27 mFindTextDirectivesDuration); 28 29 TEXT_FRAGMENT_LOG("Found {} directives in {}ms", mFoundDirectiveCount, 30 mFindTextDirectivesDuration.ToMilliseconds()); 31 } 32 if (HasUninvokedDirectives()) { 33 mDocument->SetUseCounter(eUseCounter_custom_InvalidTextDirectives); 34 } 35 } 36 37 void TextDirectiveFinder::Traverse( 38 nsCycleCollectionTraversalCallback& aCallback) { 39 CycleCollectionNoteChild(aCallback, mDocument.get().get(), 40 "TextDirectiveFinder::mDocument", aCallback.Flags()); 41 } 42 43 bool TextDirectiveFinder::HasUninvokedDirectives() const { 44 return !mUninvokedTextDirectives.IsEmpty(); 45 } 46 47 nsTArray<RefPtr<nsRange>> TextDirectiveFinder::FindTextDirectivesInDocument() { 48 if (mUninvokedTextDirectives.IsEmpty()) { 49 return {}; 50 } 51 52 const TimeStamp start = TimeStamp::Now(); 53 54 auto uri = TextDirectiveUtil::ShouldLog() && mDocument->GetDocumentURI() 55 ? mDocument->GetDocumentURI()->GetSpecOrDefault() 56 : nsCString(); 57 TEXT_FRAGMENT_LOG("Trying to find text directives in document '{}'.", uri); 58 mDocument->FlushPendingNotifications(FlushType::Layout); 59 // https://wicg.github.io/scroll-to-text-fragment/#invoke-text-directives 60 // To invoke text directives, given as input a list of text directives text 61 // directives and a Document document, run these steps: 62 // 1. Let ranges be a list of ranges, initially empty. 63 nsTArray<RefPtr<nsRange>> textDirectiveRanges( 64 mUninvokedTextDirectives.Length()); 65 66 // Additionally (not mentioned in the spec), remove all text directives from 67 // the input list to keep only the ones that are not found. 68 // This code runs repeatedly during a page load, so it is possible that the 69 // match for a text directive has not been parsed yet. 70 nsTArray<TextDirective> uninvokedTextDirectives( 71 mUninvokedTextDirectives.Length()); 72 73 // 2. For each text directive directive of text directives: 74 for (TextDirective& textDirective : mUninvokedTextDirectives) { 75 // 2.1 If the result of running find a range from a text directive given 76 // directive and document is non-null, then append it to ranges. 77 if (RefPtr<nsRange> range = FindRangeForTextDirective(textDirective)) { 78 textDirectiveRanges.AppendElement(range); 79 TEXT_FRAGMENT_LOG("Found text directive '{}'", 80 ToString(textDirective).c_str()); 81 if (RefPtr startNode = range->GetStartContainer()) { 82 startNode->QueueAncestorRevealingAlgorithm(); 83 } 84 } else { 85 uninvokedTextDirectives.AppendElement(std::move(textDirective)); 86 } 87 } 88 if (TextDirectiveUtil::ShouldLog()) { 89 if (uninvokedTextDirectives.Length() == mUninvokedTextDirectives.Length()) { 90 TEXT_FRAGMENT_LOG("Did not find any of the {} uninvoked text directives.", 91 mUninvokedTextDirectives.Length()); 92 } else { 93 TEXT_FRAGMENT_LOG( 94 "Found {} of {} text directives in the document.", 95 mUninvokedTextDirectives.Length() - uninvokedTextDirectives.Length(), 96 mUninvokedTextDirectives.Length()); 97 } 98 if (uninvokedTextDirectives.IsEmpty()) { 99 TEXT_FRAGMENT_LOG("No uninvoked text directives left."); 100 } else { 101 TEXT_FRAGMENT_LOG("There are {} uninvoked text directives left:", 102 uninvokedTextDirectives.Length()); 103 for (size_t index = 0; index < uninvokedTextDirectives.Length(); 104 ++index) { 105 TEXT_FRAGMENT_LOG(" [{}]: {}", index, 106 ToString(uninvokedTextDirectives[index]).c_str()); 107 } 108 } 109 } 110 mUninvokedTextDirectives = std::move(uninvokedTextDirectives); 111 112 mFindTextDirectivesDuration += TimeStamp::Now() - start; 113 mFoundDirectiveCount += static_cast<int64_t>(textDirectiveRanges.Length()); 114 115 // 3. Return ranges. 116 return textDirectiveRanges; 117 } 118 119 RefPtr<nsRange> TextDirectiveFinder::FindRangeForTextDirective( 120 const TextDirective& aTextDirective) { 121 // This method follows this spec algorithm and applies some changes: 122 // https://wicg.github.io/scroll-to-text-fragment/#find-a-range-from-a-text-directive 123 TEXT_FRAGMENT_LOG("Find range for text directive '{}'.", 124 ToString(aTextDirective).c_str()); 125 // 1. Let searchRange be a range with start (document, 0) and end (document, 126 // document’s length) 127 ErrorResult rv; 128 RefPtr<nsRange> searchRange = 129 nsRange::Create(mDocument, 0, mDocument, mDocument->Length(), rv); 130 if (rv.Failed()) { 131 return nullptr; 132 } 133 134 nsContentUtils::NodeIndexCache nodeIndexCache; 135 RefPtr<nsFind> finder = new nsFind(); 136 finder->SetNodeIndexCache(&nodeIndexCache); 137 138 // 2. While searchRange is not collapsed: 139 while (!searchRange->Collapsed()) { 140 // 2.1. Let potentialMatch be null. 141 RefPtr<nsRange> potentialMatch; 142 // 2.2. If parsedValues’s prefix is not null: 143 if (!aTextDirective.prefix.IsEmpty()) { 144 // 2.2.1. Let prefixMatch be the the result of running the find a string 145 // in range steps with query parsedValues’s prefix, searchRange 146 // searchRange, wordStartBounded true and wordEndBounded false. 147 RefPtr<nsRange> prefixMatch = TextDirectiveUtil::FindStringInRange( 148 finder, searchRange->StartRef(), searchRange->EndRef(), 149 aTextDirective.prefix, true, false); 150 // 2.2.2. If prefixMatch is null, return null. 151 if (!prefixMatch) { 152 TEXT_FRAGMENT_LOG( 153 "Did not find prefix '{}'. The text directive does not exist " 154 "in the document.", 155 NS_ConvertUTF16toUTF8(aTextDirective.prefix)); 156 return nullptr; 157 } 158 TEXT_FRAGMENT_LOG("Did find prefix '{}'.", 159 NS_ConvertUTF16toUTF8(aTextDirective.prefix)); 160 161 // 2.2.3. Set searchRange’s start to the first boundary point after 162 // prefixMatch’s start 163 MOZ_DIAGNOSTIC_ASSERT(prefixMatch->GetStartContainer()->IsText()); 164 const RangeBoundary boundaryPoint = 165 TextDirectiveUtil::MoveToNextBoundaryPoint(prefixMatch->StartRef()); 166 if (!boundaryPoint.IsSetAndValid()) { 167 return nullptr; 168 } 169 searchRange->SetStart(boundaryPoint.AsRaw(), rv); 170 if (rv.Failed()) { 171 return nullptr; 172 } 173 174 // 2.2.4. Let matchRange be a range whose start is prefixMatch’s end and 175 // end is searchRange’s end. 176 // Note: 177 // The spec is very inefficient. The start text must _immediately_ follow 178 // after the end of the prefix. Therefore, it would be a huge waste to 179 // search until the end of the document. Since the following `start` 180 // attribute can't go across a block boundary, it is sufficient to do a 181 // search until the next block boundary. 182 RefPtr<nsRange> matchRange = nsRange::Create( 183 prefixMatch->GetEndContainer(), prefixMatch->EndOffset(), 184 searchRange->GetEndContainer(), searchRange->EndOffset(), rv); 185 if (rv.Failed()) { 186 return nullptr; 187 } 188 // 2.2.5. Advance matchRange’s start to the next non-whitespace position. 189 TextDirectiveUtil::AdvanceStartToNextNonWhitespacePosition(*matchRange); 190 // 2.2.6. If matchRange is collapsed return null. 191 // (This can happen if prefixMatch’s end or its subsequent non-whitespace 192 // position is at the end of the document.) 193 if (matchRange->Collapsed()) { 194 return nullptr; 195 } 196 // 2.2.7. Assert: matchRange’s start node is a Text node. 197 // (matchRange’s start now points to the next non-whitespace text data 198 // following a matched prefix.) 199 MOZ_ASSERT(matchRange->GetStartContainer()->IsText()); 200 // Set `matchRange`s end to the next block boundary. 201 auto nextBlockBoundary = 202 TextDirectiveUtil::FindNextBlockBoundary<TextScanDirection::Right>( 203 matchRange->StartRef()); 204 205 matchRange->SetEnd(nextBlockBoundary.AsRaw(), IgnoreErrors()); 206 207 // 2.2.8. Let mustEndAtWordBoundary be true if parsedValues’s end is 208 // non-null or parsedValues’s suffix is null, false otherwise. 209 const bool mustEndAtWordBoundary = 210 !aTextDirective.end.IsEmpty() || aTextDirective.suffix.IsEmpty(); 211 // 2.2.9. Set potentialMatch to the result of running the find a string in 212 // range steps with query parsedValues’s start, searchRange matchRange, 213 // wordStartBounded false, and wordEndBounded mustEndAtWordBoundary. 214 potentialMatch = TextDirectiveUtil::FindStringInRange( 215 finder, matchRange->StartRef(), matchRange->EndRef(), 216 aTextDirective.start, false, mustEndAtWordBoundary); 217 // 2.2.10. If potentialMatch is null, return null. 218 // Note: Because the search range for start only goes to the next block 219 // boundary, this statement is wrong. If potentialMatch is null, the loop 220 // needs to be restarted. 221 if (!potentialMatch) { 222 TEXT_FRAGMENT_LOG( 223 "Did not find start '{}' in the sub range of the end of `prefix` " 224 "and the next block boundary. Restarting outer loop.", 225 NS_ConvertUTF16toUTF8(aTextDirective.start)); 226 continue; 227 } 228 // 2.2.11. If potentialMatch’s start is not matchRange’s start, then 229 // continue. 230 // (In this case, we found a prefix but it was followed by something other 231 // than a matching text so we’ll continue searching for the next instance 232 // of prefix.) 233 if (potentialMatch->StartRef() != matchRange->StartRef()) { 234 TEXT_FRAGMENT_LOG( 235 "The prefix is not directly followed by the start element. " 236 "Restarting outer loop."); 237 continue; 238 } 239 TEXT_FRAGMENT_LOG("Did find start '{}'.", 240 NS_ConvertUTF16toUTF8(aTextDirective.start)); 241 } 242 // 2.3. Otherwise: 243 else { 244 // 2.3.1. Let mustEndAtWordBoundary be true if parsedValues’s end is 245 // non-null or parsedValues’s suffix is null, false otherwise. 246 const bool mustEndAtWordBoundary = 247 !aTextDirective.end.IsEmpty() || aTextDirective.suffix.IsEmpty(); 248 // 2.3.2. Set potentialMatch to the result of running the find a string in 249 // range steps with query parsedValues’s start, searchRange searchRange, 250 // wordStartBounded true, and wordEndBounded mustEndAtWordBoundary. 251 potentialMatch = TextDirectiveUtil::FindStringInRange( 252 finder, searchRange->StartRef(), searchRange->EndRef(), 253 aTextDirective.start, true, mustEndAtWordBoundary); 254 // 2.3.3. If potentialMatch is null, return null. 255 if (!potentialMatch) { 256 TEXT_FRAGMENT_LOG( 257 "Did not find start '{}'. The text directive does not exist " 258 "in the document.", 259 NS_ConvertUTF16toUTF8(aTextDirective.start)); 260 return nullptr; 261 } 262 if (potentialMatch && aTextDirective.end.IsEmpty() && 263 aTextDirective.suffix.IsEmpty()) { 264 return potentialMatch; 265 } 266 // 2.3.4. Set searchRange’s start to the first boundary point after 267 // potentialMatch’s start 268 MOZ_DIAGNOSTIC_ASSERT(potentialMatch->GetStartContainer()->IsText()); 269 const RangeBoundary newRangeBoundary = 270 TextDirectiveUtil::MoveToNextBoundaryPoint( 271 potentialMatch->StartRef()); 272 273 if (!newRangeBoundary.IsSetAndValid()) { 274 return nullptr; 275 } 276 searchRange->SetStart(newRangeBoundary.AsRaw(), rv); 277 if (rv.Failed()) { 278 return nullptr; 279 } 280 } 281 // 2.4. Let rangeEndSearchRange be a range whose start is potentialMatch’s 282 // end and whose end is searchRange’s end. 283 RefPtr<nsRange> rangeEndSearchRange = nsRange::Create( 284 potentialMatch->GetEndContainer(), potentialMatch->EndOffset(), 285 searchRange->GetEndContainer(), searchRange->EndOffset(), rv); 286 if (rv.Failed()) { 287 return nullptr; 288 } 289 // 2.5. While rangeEndSearchRange is not collapsed: 290 while (!rangeEndSearchRange->Collapsed()) { 291 // 2.5.1. If parsedValues’s end item is non-null, then: 292 if (!aTextDirective.end.IsEmpty()) { 293 // 2.5.1.1. Let mustEndAtWordBoundary be true if parsedValues’s suffix 294 // is null, false otherwise. 295 const bool mustEndAtWordBoundary = aTextDirective.suffix.IsEmpty(); 296 // 2.5.1.2. Let endMatch be the result of running the find a string in 297 // range steps with query parsedValues’s end, searchRange 298 // rangeEndSearchRange, wordStartBounded true, and wordEndBounded 299 // mustEndAtWordBoundary. 300 RefPtr<nsRange> endMatch = TextDirectiveUtil::FindStringInRange( 301 finder, rangeEndSearchRange->StartRef(), 302 rangeEndSearchRange->EndRef(), aTextDirective.end, true, 303 mustEndAtWordBoundary); 304 // 2.5.1.3. If endMatch is null then return null. 305 if (!endMatch) { 306 TEXT_FRAGMENT_LOG( 307 "Did not find end '{}'. The text directive does not exist " 308 "in the document.", 309 NS_ConvertUTF16toUTF8(aTextDirective.end)); 310 return nullptr; 311 } 312 // 2.5.1.4. Set potentialMatch’s end to endMatch’s end. 313 potentialMatch->SetEnd(endMatch->GetEndContainer(), 314 endMatch->EndOffset()); 315 } 316 // 2.5.2. Assert: potentialMatch is non-null, not collapsed and represents 317 // a range exactly containing an instance of matching text. 318 MOZ_ASSERT(potentialMatch && !potentialMatch->Collapsed()); 319 320 // 2.5.3. If parsedValues’s suffix is null, return potentialMatch. 321 if (aTextDirective.suffix.IsEmpty()) { 322 TEXT_FRAGMENT_LOG("Did find a match."); 323 return potentialMatch; 324 } 325 // 2.5.4. Let suffixRange be a range with start equal to potentialMatch’s 326 // end and end equal to searchRange’s end. 327 // Note: Again, this is highly inefficient. It's perfectly fine to only 328 // search up to the next block boundary. 329 RefPtr<nsRange> suffixRange = nsRange::Create( 330 potentialMatch->GetEndContainer(), potentialMatch->EndOffset(), 331 searchRange->GetEndContainer(), searchRange->EndOffset(), rv); 332 if (rv.Failed()) { 333 return nullptr; 334 } 335 // 2.5.5. Advance suffixRange's start to the next non-whitespace position. 336 TextDirectiveUtil::AdvanceStartToNextNonWhitespacePosition(*suffixRange); 337 auto nextBlockBoundary = 338 TextDirectiveUtil::FindNextBlockBoundary<TextScanDirection::Right>( 339 suffixRange->StartRef()); 340 suffixRange->SetEnd(nextBlockBoundary.AsRaw(), IgnoreErrors()); 341 342 // 2.5.6. Let suffixMatch be result of running the find a string in range 343 // steps with query parsedValue's suffix, searchRange suffixRange, 344 // wordStartBounded false, and wordEndBounded true. 345 RefPtr<nsRange> suffixMatch = TextDirectiveUtil::FindStringInRange( 346 finder, suffixRange->StartRef(), suffixRange->EndRef(), 347 aTextDirective.suffix, false, true); 348 // 2.5.7. If suffixMatch is null, return null. 349 // (If the suffix doesn't appear in the remaining text of the document, 350 // there's no possible way to make a match.) 351 // 2.5.8. If suffixMatch's start is suffixRange's start, return 352 // potentialMatch. 353 // 2.5.9. If parsedValue's end item is null then break; 354 // (If this is an exact match and the suffix doesn’t match, start 355 // searching for the next range start by breaking out of this loop without 356 // rangeEndSearchRange being collapsed. If we’re looking for a range 357 // match, we’ll continue iterating this inner loop since the range start 358 // will already be correct.) 359 // 2.5.10. Set rangeEndSearchRange's start to potentialMatch's end. 360 // (Otherwise, it is possible that we found the correct range start, but 361 // not the correct range end. Continue the inner loop to keep searching 362 // for another matching instance of rangeEnd.) 363 // Note: the steps above are not correct anymore because of restricting 364 // the suffix find to a sub range. 365 // Therefore, the code looks different, but _essentially_ does the same as 366 // what's described in the spec steps. 367 rangeEndSearchRange->SetStart(potentialMatch->GetEndContainer(), 368 potentialMatch->EndOffset()); 369 if (!suffixMatch) { 370 if (aTextDirective.end.IsEmpty()) { 371 TEXT_FRAGMENT_LOG( 372 "Did not find suffix in the sub range of the end of `start` and " 373 "the next block boundary. Restarting outer loop."); 374 break; 375 } 376 TEXT_FRAGMENT_LOG( 377 "Did not find suffix in the sub range of the end of `end` and the " 378 "next block boundary. Discarding this `end` candidate and " 379 "continuing inner loop."); 380 continue; 381 } 382 if (suffixMatch->GetStartContainer() == 383 suffixRange->GetStartContainer() && 384 suffixMatch->StartOffset() == suffixRange->StartOffset()) { 385 TEXT_FRAGMENT_LOG("Did find a match."); 386 return potentialMatch; 387 } 388 if (aTextDirective.end.IsEmpty()) { 389 TEXT_FRAGMENT_LOG( 390 "Did find suffix in the sub range of end of `start` to the end of " 391 "the next block boundary, but not at the start. Restarting outer " 392 "loop."); 393 break; 394 } 395 TEXT_FRAGMENT_LOG( 396 "Did find `suffix` in the sub range of end of `end` to the end of " 397 "the current block, but not at the start. Restarting inner loop."); 398 } 399 // 2.6. If rangeEndSearchRange is collapsed then: 400 if (rangeEndSearchRange->Collapsed()) { 401 // 2.6.1. Assert parsedValue's end item is non-null. 402 // (This can only happen for range matches due to the break for exact 403 // matches in step 9 of the above loop. If we couldn’t find a valid 404 // rangeEnd+suffix pair anywhere in the doc then there’s no possible way 405 // to make a match.) 406 // ---- 407 // XXX(:jjaschke): Not too sure about this. If a text directive is only 408 // defined by a (prefix +) start element, and the start element happens to 409 // be at the end of the document, `rangeEndSearchRange` could be 410 // collapsed. Therefore, the loop in section 2.5 does not run. Also, 411 // if there would be either an `end` and/or a `suffix`, this would assert 412 // instead of returning `nullptr`, indicating that there's no match. 413 // Instead, the following would make the algorithm more safe: 414 // if there is no end or suffix, the potential match is actually a match, 415 // so return it. Otherwise, the text directive can't be in the document, 416 // therefore return nullptr. 417 if (aTextDirective.end.IsEmpty() && aTextDirective.suffix.IsEmpty()) { 418 TEXT_FRAGMENT_LOG( 419 "rangeEndSearchRange was collapsed, no end or suffix " 420 "present. Returning a match"); 421 return potentialMatch; 422 } 423 TEXT_FRAGMENT_LOG( 424 "rangeEndSearchRange was collapsed, there is an end or " 425 "suffix. There can't be a match."); 426 return nullptr; 427 } 428 } 429 // 3. Return null. 430 TEXT_FRAGMENT_LOG("Did not find a match."); 431 return nullptr; 432 } 433 434 } // namespace mozilla::dom