TextDirectiveCreator.h (12574B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim:set ts=2 sw=2 sts=2 et cindent: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #ifndef DOM_TEXTDIRECTIVECREATOR_H_ 8 #define DOM_TEXTDIRECTIVECREATOR_H_ 9 10 #include <tuple> 11 12 #include "RangeBoundary.h" 13 #include "TextDirectiveUtil.h" 14 #include "mozilla/RefPtr.h" 15 #include "mozilla/Result.h" 16 #include "mozilla/dom/fragmentdirectives_ffi_generated.h" 17 #include "nsStringFwd.h" 18 19 class nsRange; 20 21 namespace mozilla { 22 class ErrorResult; 23 } 24 25 namespace mozilla::dom { 26 class Document; 27 /** 28 * @brief Helper class to create a text directive string from a given `Range`. 29 * 30 * The class provides a public static creator function which encapsulates all 31 * necessary logic. 32 * This class serves as a base class that defines the main algorithm, and is 33 * subclassed twice for exact and range-based matching. 34 */ 35 class TextDirectiveCreator { 36 public: 37 /** 38 * @brief Static creator function. Takes a `Range` and creates a text 39 * directive string, if possible. 40 * 41 * @param aDocument The document in which `aInputRange` lives. 42 * @param aInputRange The input range. This range will not be modified. 43 * @param aWatchdog A watchdog to ensure the operation does not run 44 * longer than the predefined timeout. 45 * 46 * @return Returns a percent-encoded text directive string on success, an 47 * empty string if it's not possible to create a text fragment for the 48 * given range, or an error code. 49 */ 50 static Result<nsCString, ErrorResult> CreateTextDirectiveFromRange( 51 Document* aDocument, AbstractRange* aInputRange, 52 const TimeoutWatchdog* aWatchdog); 53 54 virtual ~TextDirectiveCreator(); 55 56 protected: 57 TextDirectiveCreator(Document* aDocument, AbstractRange* aRange, 58 const TimeoutWatchdog* aWatchdog); 59 60 /** 61 * @brief Ensures the boundary points of the range point to word boundaries. 62 * 63 * This function always returns a new range. 64 */ 65 static Result<RefPtr<AbstractRange>, ErrorResult> ExtendRangeToWordBoundaries( 66 AbstractRange* aRange); 67 68 /** 69 * @brief Determines whether exact or range-based matching should be used. 70 * 71 * This function searches for a block boundary in `aRange`, which requires 72 * range-based matching. If there is no block boundary, but the range content 73 * is longer than a threshold, range-based matching is used as well. 74 * This threshold is defined by the pref 75 * `dom.text_fragments.create_text_fragment.exact_match_max_length`. 76 * 77 */ 78 static Result<bool, ErrorResult> MustUseRangeBasedMatching( 79 AbstractRange* aRange); 80 81 /** 82 * @brief Creates an instance either for exact or range-based matching. 83 */ 84 static Result<UniquePtr<TextDirectiveCreator>, ErrorResult> CreateInstance( 85 Document* aDocument, AbstractRange* aRange, 86 const TimeoutWatchdog* aWatchdog); 87 88 /** 89 * @brief Collects text content surrounding the target range. 90 * 91 * The context terms are then stored both in normal and fold case form. 92 * 93 * Returns false if the algorithm cannot continue, for example if the text 94 * directive must use range-based matching because of its length, but the 95 * target range only consists of one word. 96 */ 97 virtual Result<bool, ErrorResult> CollectContextTerms() = 0; 98 99 /** 100 * @brief Common helper which collects the prefix term of the target range. 101 */ 102 Result<Ok, ErrorResult> CollectPrefixContextTerm(); 103 104 /** 105 * @brief Common helper which collects the suffix term of the target range. 106 */ 107 Result<Ok, ErrorResult> CollectSuffixContextTerm(); 108 109 /** 110 * @brief Collect the word begin / word end distances for the context terms. 111 * 112 * For start (for range-based matching) and suffix terms, the search direction 113 * is left-to-right. Therefore, the distances are based off the beginning of 114 * the context terms and use the word end boundary. 115 * 116 * For prefix and end (for range-based matching), the search direction is 117 * right-to-left. Therefore, the distances are based off the end of the 118 * context terms and use the word start boundary. 119 * 120 * The distances are always sorted, so that the first entry points to the 121 * nearest word boundary in search direction. 122 */ 123 virtual void CollectContextTermWordBoundaryDistances() = 0; 124 125 /** 126 * @brief Searches the document for other occurrences of the target range and 127 * converts the results into a comparable format. 128 * 129 * This method searches the partial document from the beginning up to the 130 * target range for occurrences of the target range content. 131 * This needs to be done differently based on whether matching is exact or 132 * range-based. For exact matching, the whole text content of the target range 133 * is searched for. For range-based matching, two search runs are required: 134 * One for the minimal `start` term (ie., the first word), which ends at the 135 * beginning of the target range. And one for the minimal `end` term (ie., the 136 * last word), which starts at the beginning of the target range and ends 137 * _before_ its end. 138 * The resulting lists of matching ranges do not exclude the target range. 139 */ 140 virtual Result<Ok, ErrorResult> FindAllMatchingCandidates() = 0; 141 142 /** 143 * @brief Find all occurrences of `aSearchQuery` in the partial document. 144 * 145 * This method uses `nsFind` to perform a case-insensitive search for 146 * `aSearchQuery` in the partial document from `aSearchStart` to `aSearchEnd`. 147 * 148 * @return List of `Range`s which have the case-insensitive-same content as 149 * `aSearchQuery`. 150 */ 151 Result<nsTArray<RefPtr<AbstractRange>>, ErrorResult> FindAllMatchingRanges( 152 const nsString& aSearchQuery, const RangeBoundary& aSearchStart, 153 const RangeBoundary& aSearchEnd); 154 155 /** 156 * @brief Creates the shortest possible text directive. 157 * 158 * @return A percent-encoded string containing a text directive. Returns empty 159 * string in cases where it's not possible to create a text directive. 160 */ 161 Result<nsCString, ErrorResult> CreateTextDirective(); 162 163 /** 164 * @brief Creates unique substring length arrays which are extended to the 165 * nearest word boundary. 166 */ 167 static std::tuple<nsTArray<uint32_t>, nsTArray<uint32_t>> 168 ExtendSubstringLengthsToWordBoundaries( 169 const nsTArray<std::tuple<uint32_t, uint32_t>>& aExactSubstringLengths, 170 const Span<const uint32_t>& aFirstWordPositions, 171 const Span<const uint32_t>& aSecondWordPositions); 172 173 /** 174 * @brief Test all combinations to identify the shortest text directive. 175 */ 176 virtual Maybe<TextDirective> FindShortestCombination() const = 0; 177 178 /** 179 * @brief Perform a brute-force optimization run to find the shortest 180 * combination of a combination of two context terms. 181 * 182 * Each combination of the extended values is compared against all exact 183 * values. It is only considered valid if at least one value is longer than 184 * the exact lengths. 185 * 186 * @param aExactWordLengths Array of tuples containing the exact 187 * common sub string lengths of this 188 * combination. 189 * @param aFirstExtendedToWordBoundaries All valid substring lengths for the 190 * first context term, extended to its 191 * next word boundary in reading 192 * direction. 193 * @param aSecondExtendedToWordBoundaries All valid substring lengths for the 194 * second context term, extended to its 195 * next word boundary in reading 196 * direction. 197 * @return A tuple of sub string lengths extended to word boundaries, which is 198 * the shortest allowed combination to eliminate all matches. 199 * Returns `Nothing` if it's not possible to eliminate all matches. 200 */ 201 static Maybe<std::tuple<uint32_t, uint32_t>> CheckAllCombinations( 202 const nsTArray<std::tuple<uint32_t, uint32_t>>& aExactWordLengths, 203 const nsTArray<uint32_t>& aFirstExtendedToWordBoundaries, 204 const nsTArray<uint32_t>& aSecondExtendedToWordBoundaries); 205 206 // The maximum length of the context terms around the target range. 207 // If a context term is longer, it will be truncated to this length. 208 // If -- which seems highly unlikely -- there is another match for the target 209 // range which happens to have a context term with the same content, it will 210 // essentially be ignored: 211 // The common length would be equal to this value, also the maximum common 212 // length extended to the word boundary. Therefore, the algorithm could not 213 // find a candidate which exceeds this length, therefore ignoring it 214 // altogether. 215 // 216 // If -- even more unlikely -- this condition would happen for _every_ context 217 // term, the algorithm would determine that it cannot create a text 218 // directive for the target range because it would be ambiguous. 219 static constexpr uint32_t kMaxContextTermLength = 1024; 220 221 nsString mPrefixContent; 222 nsString mPrefixFoldCaseContent; 223 nsTArray<uint32_t> mPrefixWordBeginDistances; 224 225 nsString mStartContent; 226 227 nsString mSuffixContent; 228 nsString mSuffixFoldCaseContent; 229 nsTArray<uint32_t> mSuffixWordEndDistances; 230 231 NotNull<RefPtr<Document>> mDocument; 232 NotNull<RefPtr<AbstractRange>> mRange; 233 234 NotNull<RefPtr<nsFind>> mFinder; 235 236 /** 237 * The watchdog ensures that the algorithm exits after a defined time 238 * duration, to ensure that the main thread is not blocked for too long. 239 * 240 * The duration is defined by the pref 241 * `dom.text_fragments.create_text_fragment.timeout`. 242 */ 243 RefPtr<const TimeoutWatchdog> mWatchdog; 244 245 nsContentUtils::NodeIndexCache mNodeIndexCache; 246 }; 247 248 /** 249 * @brief Creator class which creates a range-based text directive. 250 * 251 */ 252 class RangeBasedTextDirectiveCreator : public TextDirectiveCreator { 253 private: 254 using TextDirectiveCreator::TextDirectiveCreator; 255 256 Result<bool, ErrorResult> CollectContextTerms() override; 257 258 void CollectContextTermWordBoundaryDistances() override; 259 260 Result<Ok, ErrorResult> FindAllMatchingCandidates() override; 261 262 void FindStartMatchCommonSubstringLengths( 263 const nsTArray<RefPtr<AbstractRange>>& aMatchRanges); 264 265 void FindEndMatchCommonSubstringLengths( 266 const nsTArray<RefPtr<AbstractRange>>& aMatchRanges); 267 268 Maybe<TextDirective> FindShortestCombination() const override; 269 270 nsString mEndContent; 271 // The fold case contents for start and end terms don't include the first/last 272 // word of the start and end terms, because they are only used for finding the 273 // common lengths for other matches. 274 nsString mStartFoldCaseContent; 275 nsString mEndFoldCaseContent; 276 277 // These values are only passed into nsFind, therefore fold case is not 278 // required. 279 nsString mFirstWordOfStartContent; 280 nsString mLastWordOfEndContent; 281 282 // The lengths of the first/last word of the start and end terms, including 283 // whitespace to the next word. 284 // Therefore, these values are equal to 285 // `m[Start|End]Content.Length() - m[Start|End]FoldCaseContent.Length()`. 286 uint32_t mStartFirstWordLengthIncludingWhitespace = 0; 287 uint32_t mEndLastWordLengthIncludingWhitespace = 0; 288 289 // The distances are bound to the Fold Case Content strings, which do not 290 // include the first/last word of the start and end terms. 291 nsTArray<uint32_t> mStartWordEndDistances; 292 nsTArray<uint32_t> mEndWordBeginDistances; 293 294 nsTArray<std::tuple<uint32_t, uint32_t>> mStartMatchCommonSubstringLengths; 295 nsTArray<std::tuple<uint32_t, uint32_t>> mEndMatchCommonSubstringLengths; 296 }; 297 298 /** 299 * @brief Creator class which creates an exact match text directive. 300 * 301 */ 302 class ExactMatchTextDirectiveCreator : public TextDirectiveCreator { 303 private: 304 using TextDirectiveCreator::TextDirectiveCreator; 305 306 Result<bool, ErrorResult> CollectContextTerms() override; 307 308 void CollectContextTermWordBoundaryDistances() override; 309 310 Result<Ok, ErrorResult> FindAllMatchingCandidates() override; 311 312 void FindCommonSubstringLengths( 313 const nsTArray<RefPtr<AbstractRange>>& aMatchRanges); 314 315 Maybe<TextDirective> FindShortestCombination() const override; 316 317 nsTArray<std::tuple<uint32_t, uint32_t>> mCommonSubstringLengths; 318 }; 319 } // namespace mozilla::dom 320 #endif