CharacterDataBuffer.h (23388B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 /* 8 * A class which represents a buffer of text (eg inside a text 9 * node); if only codepoints below 256 are used, the text is stored as 10 * a char*; otherwise the text is stored as a char16_t* 11 */ 12 13 #ifndef mozilla_dom_CharacterDataBuffer_h 14 #define mozilla_dom_CharacterDataBuffer_h 15 16 #include "mozilla/Attributes.h" 17 #include "mozilla/EnumSet.h" 18 #include "mozilla/MemoryReporting.h" 19 #include "mozilla/StringBuffer.h" 20 #include "nsCharTraits.h" 21 #include "nsISupportsImpl.h" 22 #include "nsReadableUtils.h" 23 #include "nsString.h" 24 25 // XXX should this normalize the code to keep a \u0000 at the end? 26 27 namespace mozilla::dom { 28 /** 29 * A buffer of text. If mIs2b is 1 then the m2b pointer is valid 30 * otherwise the m1b pointer is valid. If m1b is used then each byte 31 * of data represents a single ucs2 character with the high byte being 32 * zero. 33 * 34 * This class does not have a virtual destructor therefore it is not 35 * meant to be subclassed. 36 */ 37 class CharacterDataBuffer final { 38 private: 39 constexpr static unsigned char kFormFeed = '\f'; 40 constexpr static unsigned char kNewLine = '\n'; 41 constexpr static unsigned char kCarriageReturn = '\r'; 42 constexpr static unsigned char kTab = '\t'; 43 constexpr static unsigned char kSpace = ' '; 44 constexpr static unsigned char kNBSP = 0xA0; 45 46 public: 47 static nsresult Init(); 48 static void Shutdown(); 49 50 /** 51 * Default constructor. Initialize the buffer to be empty. 52 */ 53 CharacterDataBuffer() : m1b(nullptr), mAllBits(0) { 54 MOZ_COUNT_CTOR(CharacterDataBuffer); 55 NS_ASSERTION(sizeof(FragmentBits) == 4, "Bad field packing!"); 56 } 57 58 ~CharacterDataBuffer(); 59 60 /** 61 * Change the contents of this buffer to be a copy of the 62 * the argument buffer, or to "" if unable to allocate enough memory. 63 */ 64 CharacterDataBuffer& operator=(const CharacterDataBuffer& aOther); 65 66 /** 67 * Return true if this buffer is represented by char16_t data 68 */ 69 bool Is2b() const { return mState.mIs2b; } 70 71 /** 72 * Return true if this buffer contains Bidi text 73 * For performance reasons this flag is only set if explicitely requested (by 74 * setting the aUpdateBidi argument on SetTo or Append to true). 75 */ 76 bool IsBidi() const { return mState.mIsBidi; } 77 78 /** 79 * Get a pointer to constant char16_t data. 80 */ 81 const char16_t* Get2b() const { 82 MOZ_ASSERT(Is2b(), "not 2b text"); 83 return static_cast<char16_t*>(m2b->Data()); 84 } 85 86 /** 87 * Get a pointer to constant char data. 88 * NOTE: CharacterDataBuffer treat the 1b buffer as an array of unsigned 89 * chars. Therefore, Get1b() is not good one for looking for a character 90 * between 0x80 91 * - 0xFF in the buffer. 92 */ 93 const char* Get1b() const { 94 NS_ASSERTION(!Is2b(), "not 1b text"); 95 return (const char*)m1b; 96 } 97 /** 98 * Get a pointer to constant unsigned char data. 99 */ 100 const unsigned char* GetUnsigned1b() const { 101 NS_ASSERTION(!Is2b(), "not 1b text"); 102 return (const unsigned char*)m1b; 103 } 104 105 /** 106 * Get the length of the buffer. The length is the number of logical 107 * characters, not the number of bytes to store the characters. 108 */ 109 uint32_t GetLength() const { return mState.mLength; } 110 111 #define NS_MAX_CHARACTER_DATA_BUFFER_LENGTH (static_cast<uint32_t>(0x1FFFFFFF)) 112 113 bool CanGrowBy(size_t n) const { 114 return n < (1 << 29) && mState.mLength + n < (1 << 29); 115 } 116 117 /** 118 * Change the contents of this buffer to be a copy of the given 119 * buffer. If aUpdateBidi is true, contents of the buffer will be scanned, 120 * and mState.mIsBidi will be turned on if it includes any Bidi characters. 121 * If aForce2b is true, aBuffer will be stored as char16_t as is. Then, 122 * you can access the value faster but may waste memory if all characters 123 * are less than U+0100. 124 */ 125 bool SetTo(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBidi, 126 bool aForce2b); 127 128 bool SetTo(const nsString& aString, bool aUpdateBidi, bool aForce2b) { 129 if (MOZ_UNLIKELY(aString.Length() > NS_MAX_CHARACTER_DATA_BUFFER_LENGTH)) { 130 return false; 131 } 132 ReleaseBuffer(); 133 if (aForce2b && !aUpdateBidi) { 134 if (mozilla::StringBuffer* buffer = aString.GetStringBuffer()) { 135 NS_ADDREF(m2b = buffer); 136 mState.mInHeap = true; 137 mState.mIs2b = true; 138 mState.mLength = aString.Length(); 139 return true; 140 } 141 } 142 143 return SetTo(aString.get(), aString.Length(), aUpdateBidi, aForce2b); 144 } 145 146 /** 147 * Append aData to the end of this buffer. If aUpdateBidi is true, contents 148 * of the buffer will be scanned, and mState.mIsBidi will be turned on if 149 * it includes any Bidi characters. 150 * If aForce2b is true, the string will be stored as char16_t as is. Then, 151 * you can access the value faster but may waste memory if all characters 152 * are less than U+0100. 153 */ 154 bool Append(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBidi, 155 bool aForce2b); 156 157 /** 158 * Append the contents of this data buffer to aString 159 */ 160 void AppendTo(nsAString& aString) const { 161 if (!AppendTo(aString, mozilla::fallible)) { 162 aString.AllocFailed(aString.Length() + GetLength()); 163 } 164 } 165 166 /** 167 * Append the contents of this data buffer to aString 168 * @return false if an out of memory condition is detected, true otherwise 169 */ 170 [[nodiscard]] bool AppendTo(nsAString& aString, 171 const mozilla::fallible_t& aFallible) const { 172 if (mState.mIs2b) { 173 if (aString.IsEmpty()) { 174 aString.Assign(m2b, mState.mLength); 175 return true; 176 } 177 return aString.Append(Get2b(), mState.mLength, aFallible); 178 } 179 return AppendASCIItoUTF16(Substring(m1b, mState.mLength), aString, 180 aFallible); 181 } 182 183 /** 184 * Append a substring of the contents of this data buffer to aString. 185 * @param aOffset where to start the substring in this data buffer 186 * @param aLength the length of the substring 187 */ 188 void AppendTo(nsAString& aString, uint32_t aOffset, uint32_t aLength) const { 189 if (!AppendTo(aString, aOffset, aLength, mozilla::fallible)) { 190 aString.AllocFailed(aString.Length() + aLength); 191 } 192 } 193 194 /** 195 * Append a substring of the contents of this data buffer to aString. 196 * @param aString the string in which to append 197 * @param aOffset where to start the substring in this data buffer 198 * @param aLength the length of the substring 199 * @return false if an out of memory condition is detected, true otherwise 200 */ 201 [[nodiscard]] bool AppendTo(nsAString& aString, uint32_t aOffset, 202 uint32_t aLength, 203 const mozilla::fallible_t& aFallible) const { 204 if (mState.mIs2b) { 205 bool ok = aString.Append(Get2b() + aOffset, aLength, aFallible); 206 if (!ok) { 207 return false; 208 } 209 210 return true; 211 } else { 212 return AppendASCIItoUTF16(Substring(m1b + aOffset, aLength), aString, 213 aFallible); 214 } 215 } 216 217 /** 218 * Make a copy of the fragments contents starting at offset for 219 * count characters. The offset and count will be adjusted to 220 * lie within the fragments data. The fragments data is converted if 221 * necessary. 222 */ 223 void CopyTo(char16_t* aDest, uint32_t aOffset, uint32_t aCount); 224 225 /** 226 * Return the character in the data buffer at the given 227 * index. This always returns a char16_t. 228 */ 229 [[nodiscard]] char16_t CharAt(uint32_t aIndex) const { 230 MOZ_ASSERT(aIndex < mState.mLength, "bad index"); 231 return mState.mIs2b ? Get2b()[aIndex] 232 : static_cast<unsigned char>(m1b[aIndex]); 233 } 234 [[nodiscard]] char16_t SafeCharAt(uint32_t aIndex) const { 235 return MOZ_LIKELY(aIndex < mState.mLength) ? CharAt(aIndex) 236 : static_cast<char16_t>(0); 237 } 238 239 /** 240 * Return the first char, but if you're not sure whether this is empty, you 241 * should use SafeFirstChar() instead. 242 */ 243 [[nodiscard]] char16_t FirstChar() const { 244 MOZ_ASSERT(mState.mLength); 245 return CharAt(0u); 246 } 247 [[nodiscard]] char16_t SafeFirstChar() const { 248 return MOZ_LIKELY(mState.mLength) ? FirstChar() : static_cast<char16_t>(0); 249 } 250 251 /** 252 * Return the last char, but if you're not sure whether this is empty, you 253 * should use SafeLastChar() instead. 254 */ 255 [[nodiscard]] char16_t LastChar() const { 256 MOZ_ASSERT(mState.mLength); 257 return CharAt(mState.mLength - 1); 258 } 259 [[nodiscard]] char16_t SafeLastChar() const { 260 return MOZ_LIKELY(mState.mLength) ? LastChar() : static_cast<char16_t>(0); 261 } 262 263 /** 264 * IsHighSurrogateFollowedByLowSurrogateAt() returns true if character at 265 * aIndex is high surrogate and it's followed by low surrogate. 266 */ 267 inline bool IsHighSurrogateFollowedByLowSurrogateAt(uint32_t aIndex) const { 268 MOZ_ASSERT(aIndex < mState.mLength); 269 if (!mState.mIs2b || aIndex + 1 >= mState.mLength) { 270 return false; 271 } 272 return NS_IS_SURROGATE_PAIR(Get2b()[aIndex], Get2b()[aIndex + 1]); 273 } 274 275 /** 276 * IsLowSurrogateFollowingHighSurrogateAt() returns true if character at 277 * aIndex is low surrogate and it follows high surrogate. 278 */ 279 inline bool IsLowSurrogateFollowingHighSurrogateAt(uint32_t aIndex) const { 280 MOZ_ASSERT(aIndex < mState.mLength); 281 if (!mState.mIs2b || !aIndex) { 282 return false; 283 } 284 return NS_IS_SURROGATE_PAIR(Get2b()[aIndex - 1], Get2b()[aIndex]); 285 } 286 287 /** 288 * ScalarValueAt() returns a Unicode scalar value at aIndex. If the character 289 * at aIndex is a high surrogate followed by low surrogate, returns character 290 * code for the pair. If the index is low surrogate, or a high surrogate but 291 * not in a pair, returns 0. 292 */ 293 inline char32_t ScalarValueAt(uint32_t aIndex) const { 294 MOZ_ASSERT(aIndex < mState.mLength); 295 if (!mState.mIs2b) { 296 return static_cast<unsigned char>(m1b[aIndex]); 297 } 298 char16_t ch = Get2b()[aIndex]; 299 if (!IS_SURROGATE(ch)) { 300 return ch; 301 } 302 if (aIndex + 1 < mState.mLength && NS_IS_HIGH_SURROGATE(ch)) { 303 char16_t nextCh = Get2b()[aIndex + 1]; 304 if (NS_IS_LOW_SURROGATE(nextCh)) { 305 return SURROGATE_TO_UCS4(ch, nextCh); 306 } 307 } 308 return 0; 309 } 310 311 void SetBidi(bool aBidi) { mState.mIsBidi = aBidi; } 312 313 struct FragmentBits { 314 // uint32_t to ensure that the values are unsigned, because we 315 // want 0/1, not 0/-1! 316 // Making these bool causes Windows to not actually pack them, 317 // which causes crashes because we assume this structure is no more than 318 // 32 bits! 319 uint32_t mInHeap : 1; 320 uint32_t mIs2b : 1; 321 uint32_t mIsBidi : 1; 322 // Note that when you change the bits of mLength, you also need to change 323 // NS_MAX_CHARACTER_DATA_BUFFER_LENGTH. 324 uint32_t mLength : 29; 325 }; 326 327 size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const; 328 329 /** 330 * Check whether the text in this buffer is the same as the text in the 331 * other buffer. 332 */ 333 [[nodiscard]] bool BufferEquals(const CharacterDataBuffer& aOther) const; 334 335 // FYI: FragmentBits::mLength is only 29 bits. Therefore, UINT32_MAX won't 336 // be valid offset in the data. 337 constexpr static uint32_t kNotFound = UINT32_MAX; 338 339 [[nodiscard]] uint32_t FindChar(char16_t aChar, uint32_t aOffset = 0) const { 340 if (aOffset >= GetLength()) { 341 return kNotFound; 342 } 343 if (Is2b()) { 344 const char16_t* end = Get2b() + GetLength(); 345 for (const char16_t* ch = Get2b() + aOffset; ch != end; ch++) { 346 if (*ch == aChar) { 347 return ch - Get2b(); 348 } 349 } 350 return kNotFound; 351 } 352 if (aChar > 0xFF) { 353 return kNotFound; 354 } 355 const unsigned char* end = GetUnsigned1b() + GetLength(); 356 for (const unsigned char* ch = GetUnsigned1b() + aOffset; ch != end; ch++) { 357 if (*ch == aChar) { 358 return ch - GetUnsigned1b(); 359 } 360 } 361 return kNotFound; 362 } 363 364 [[nodiscard]] uint32_t RFindChar(char16_t aChar, 365 uint32_t aOffset = UINT32_MAX) const { 366 const uint32_t length = GetLength(); 367 if (!length) { 368 return kNotFound; 369 } 370 aOffset = std::min(length - 1u, aOffset); 371 if (Is2b()) { 372 const char16_t* end = Get2b() - 1; 373 for (const char16_t* ch = Get2b() + aOffset; ch != end; ch--) { 374 if (*ch == aChar) { 375 return ch - Get2b(); 376 } 377 } 378 return kNotFound; 379 } 380 if (aChar > 0xFF) { 381 return kNotFound; 382 } 383 const unsigned char* end = GetUnsigned1b() - 1; 384 for (const unsigned char* ch = GetUnsigned1b() + aOffset; ch != end; ch--) { 385 if (*ch == aChar) { 386 return ch - GetUnsigned1b(); 387 } 388 } 389 return kNotFound; 390 } 391 392 enum class WhitespaceOption { 393 // If set, new lines (\n, U+000A) are treated as significant. 394 NewLineIsSignificant, 395 // If set, NBSPs ( , U+00A0) are treated as collapsible whitespaces. 396 // This option is useful to scan previous or next visible character from 397 // middle of a whitespace sequence because our editor makes consecutive 398 // whitespaces visible with converting collapsible whitespaces to pairs of 399 // and ASCII whitespace. 400 TreatNBSPAsCollapsible, 401 // If set, form feeds (\f, U+000C) are treated as significant. 402 // Be aware, form feed is defined as a whitespace by the HTML spec, but is 403 // not defined as so by the CSS spec. Therefore, it won't be rendered if it 404 // appears in a whitespace sequence surrounded by block boundaries like 405 // `data:text/html,%0C<div>%0Cabc%0C</div>%0C`. However, it'll be rendered 406 // as a character if it appears if surrounded by visible contents like 407 // `data:text/html,<div>abc %0C def</div>`. 408 FormFeedIsSignificant, 409 }; 410 using WhitespaceOptions = mozilla::EnumSet<WhitespaceOption>; 411 412 private: 413 // Helper class to check whether the character is a non-whitespace or not. 414 // This avoids to call EnumSet<WhitespaceOption>::contains() a lot. 415 class MOZ_STACK_CLASS AutoWhitespaceChecker final { 416 public: 417 explicit AutoWhitespaceChecker(const WhitespaceOptions& aOptions) 418 : mNBSPIsSignificant( 419 !aOptions.contains(WhitespaceOption::TreatNBSPAsCollapsible)), 420 mFormFeedIsSignificant( 421 aOptions.contains(WhitespaceOption::FormFeedIsSignificant)), 422 mNewLineIsSignificant( 423 aOptions.contains(WhitespaceOption::NewLineIsSignificant)) {} 424 425 [[nodiscard]] bool IsNonWhitespace(char16_t aChar) const { 426 switch (aChar) { 427 case kNBSP: 428 return mNBSPIsSignificant; 429 case kFormFeed: 430 return mFormFeedIsSignificant; 431 case kNewLine: 432 return mNewLineIsSignificant; 433 case kSpace: 434 case kTab: 435 case kCarriageReturn: 436 return false; 437 default: 438 return true; 439 } 440 } 441 442 private: 443 const bool mNBSPIsSignificant; 444 const bool mFormFeedIsSignificant; 445 const bool mNewLineIsSignificant; 446 }; 447 448 public: 449 /** 450 * Return the first non-whitespace character index. 451 * 452 * @param aOptions Set options to change which character should not be treated 453 * as a whitespace. 454 * @param aOffset Start offset, so, the result will equal or greater than 455 * aOffset if a char found. 456 */ 457 [[nodiscard]] uint32_t FindNonWhitespaceChar( 458 const WhitespaceOptions& aOptions = {}, uint32_t aOffset = 0) const { 459 if (aOffset >= GetLength()) { 460 return kNotFound; 461 } 462 const AutoWhitespaceChecker checker(aOptions); 463 if (Is2b()) { 464 const char16_t* end = Get2b() + GetLength(); 465 for (const char16_t* ch = Get2b() + aOffset; ch != end; ch++) { 466 if (checker.IsNonWhitespace(*ch)) { 467 return ch - Get2b(); 468 } 469 } 470 return kNotFound; 471 } 472 const unsigned char* end = GetUnsigned1b() + GetLength(); 473 for (const unsigned char* ch = GetUnsigned1b() + aOffset; ch != end; ch++) { 474 if (checker.IsNonWhitespace(*ch)) { 475 return ch - GetUnsigned1b(); 476 } 477 } 478 return kNotFound; 479 } 480 481 /** 482 * Return the last non-whitespace character index. 483 * 484 * @param aOptions Set options to change which character should not be 485 * treated as a whitespace. 486 * @param aOffset Start offset, so, the result will equal or greater than 487 * aOffset if a char found. 488 */ 489 [[nodiscard]] uint32_t RFindNonWhitespaceChar( 490 const WhitespaceOptions& aOptions = {}, 491 uint32_t aOffset = UINT32_MAX) const { 492 const uint32_t length = GetLength(); 493 if (!length) { 494 return kNotFound; 495 } 496 const AutoWhitespaceChecker checker(aOptions); 497 aOffset = std::min(length - 1u, aOffset); 498 if (Is2b()) { 499 const char16_t* end = Get2b() - 1; 500 for (const char16_t* ch = Get2b() + aOffset; ch != end; ch--) { 501 if (checker.IsNonWhitespace(*ch)) { 502 return ch - Get2b(); 503 } 504 } 505 return kNotFound; 506 } 507 const unsigned char* end = GetUnsigned1b() - 1; 508 for (const unsigned char* ch = GetUnsigned1b() + aOffset; ch != end; ch--) { 509 if (checker.IsNonWhitespace(*ch)) { 510 return ch - GetUnsigned1b(); 511 } 512 } 513 return kNotFound; 514 } 515 516 /** 517 * Return first different char offset in this buffer after 518 * aOffsetInFragment. For example, if we have "abcdefg", aStr is "bXYe" and 519 * aOffsetInFragment is 1, scan from "b" and return the offset of "c", 520 * i.e., 2. 521 * 522 * Note that this is currently not usable to compare us with longer string. 523 */ 524 [[nodiscard]] uint32_t FindFirstDifferentCharOffset( 525 const nsAString& aStr, uint32_t aOffsetInFragment = 0u) const { 526 return FindFirstDifferentCharOffsetInternal(aStr, aOffsetInFragment); 527 } 528 [[nodiscard]] uint32_t FindFirstDifferentCharOffset( 529 const nsACString& aStr, uint32_t aOffsetInFragment = 0u) const { 530 return FindFirstDifferentCharOffsetInternal(aStr, aOffsetInFragment); 531 } 532 533 /** 534 * Return first different char offset in this buffer before 535 * aOffsetInFragment (from backward scanning point of view). 536 * For example, if we have "abcdef", aStr is "bXYe" and aOffsetInFragment is 537 * 5, scan from "e" and return the offset of "d" (vs. "Y") in this buffer, 538 * i.e., 3. In other words, aOffsetInFragment should be the next offset of 539 * you start to scan. I.e., at least 1 and at most the length of this. So, 540 * if you want to compare with start of this, you should specify 541 * aStr.Length(), and if you want to compare with end of this, you should 542 * specify GetLength() result of this (or just omit it). 543 * 544 * Note that this is currently not usable to compare us with longer string. 545 */ 546 [[nodiscard]] uint32_t RFindFirstDifferentCharOffset( 547 const nsAString& aStr, uint32_t aOffsetInFragment = UINT32_MAX) const { 548 return RFindFirstDifferentCharOffsetInternal(aStr, aOffsetInFragment); 549 } 550 [[nodiscard]] uint32_t RFindFirstDifferentCharOffset( 551 const nsACString& aStr, uint32_t aOffsetInFragment = UINT32_MAX) const { 552 return RFindFirstDifferentCharOffsetInternal(aStr, aOffsetInFragment); 553 } 554 555 private: 556 void ReleaseBuffer(); 557 558 /** 559 * Scan the contents of the buffer and turn on mState.mIsBidi if it 560 * includes any Bidi characters. 561 */ 562 void UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength); 563 564 union { 565 mozilla::StringBuffer* m2b; 566 // FIXME: m1b is actually treated as const unsigned char* since the array 567 // may contain characters between 0x80 - 0xFF. So, copying the value to 568 // char16_t might depend on how the compiler to treat the values. 569 const char* m1b; // This is const since it can point to shared data 570 }; 571 572 union { 573 uint32_t mAllBits; 574 FragmentBits mState; 575 }; 576 577 /** 578 * See the explanation of FindFirstDifferentCharOffset() for the detail. 579 * 580 * This should not be directly exposed as a public method because it will 581 * cause instantiating the method with various derived classes of nsAString 582 * and nsACString. 583 */ 584 template <typename nsAXString> 585 [[nodiscard]] uint32_t FindFirstDifferentCharOffsetInternal( 586 const nsAXString& aStr, uint32_t aOffsetInFragment) const { 587 static_assert(std::is_same_v<nsAXString, nsAString> || 588 std::is_same_v<nsAXString, nsACString>); 589 MOZ_ASSERT(!aStr.IsEmpty()); 590 const uint32_t length = GetLength(); 591 MOZ_ASSERT(aOffsetInFragment <= length); 592 if (NS_WARN_IF(aStr.IsEmpty()) || NS_WARN_IF(length <= aOffsetInFragment) || 593 NS_WARN_IF(length - aOffsetInFragment < aStr.Length())) { 594 return kNotFound; 595 } 596 if (Is2b()) { 597 const auto* ch = aStr.BeginReading(); 598 // At the first char of the scan range. 599 const char16_t* ourCh = Get2b() + aOffsetInFragment; 600 const auto* const end = aStr.EndReading(); 601 const char16_t* const ourEnd = Get2b() + length; 602 for (; ch != end && ourCh != ourEnd; ch++, ourCh++) { 603 if (*ch != *ourCh) { 604 return ourCh - Get2b(); 605 } 606 } 607 return kNotFound; 608 } 609 const auto* ch = aStr.BeginReading(); 610 // At the first char of the scan range. 611 const char* ourCh = Get1b() + aOffsetInFragment; 612 const auto* const end = aStr.EndReading(); 613 const char* ourEnd = Get1b() + length; 614 for (; ch != end && ourCh != ourEnd; ch++, ourCh++) { 615 if (*ch != *ourCh) { 616 return ourCh - Get1b(); 617 } 618 } 619 return kNotFound; 620 } 621 622 /** 623 * See the explanation of RFindFirstDifferentCharOffset() for the detail. 624 * 625 * This should not be directly exposed as a public method because it will 626 * cause instantiating the method with various derived classes of nsAString 627 * and nsACString. 628 */ 629 template <typename nsAXString> 630 [[nodiscard]] uint32_t RFindFirstDifferentCharOffsetInternal( 631 const nsAXString& aStr, uint32_t aOffsetInFragment) const { 632 static_assert(std::is_same_v<nsAXString, nsAString> || 633 std::is_same_v<nsAXString, nsACString>); 634 MOZ_ASSERT(!aStr.IsEmpty()); 635 const uint32_t length = GetLength(); 636 MOZ_ASSERT(aOffsetInFragment <= length); 637 aOffsetInFragment = std::min(length, aOffsetInFragment); 638 if (NS_WARN_IF(aStr.IsEmpty()) || NS_WARN_IF(!aOffsetInFragment) || 639 NS_WARN_IF(aOffsetInFragment < aStr.Length())) { 640 return kNotFound; 641 } 642 if (Is2b()) { 643 const auto* ch = aStr.EndReading() - 1; 644 // At the last char of the scan range 645 const char16_t* ourCh = Get2b() + aOffsetInFragment - 1; 646 const auto* const end = aStr.BeginReading() - 1; 647 const char16_t* const ourEnd = Get2b() - 1; 648 for (; ch != end && ourCh != ourEnd; ch--, ourCh--) { 649 if (*ch != *ourCh) { 650 return ourCh - Get2b(); 651 } 652 } 653 return kNotFound; 654 } 655 const auto* ch = aStr.EndReading() - 1; 656 // At the last char of the scan range 657 const char* ourCh = Get1b() + aOffsetInFragment - 1; 658 const auto* const end = aStr.BeginReading() - 1; 659 const char* const ourEnd = Get1b() - 1; 660 for (; ch != end && ourCh != ourEnd; ch--, ourCh--) { 661 if (*ch != *ourCh) { 662 return ourCh - Get1b(); 663 } 664 } 665 return kNotFound; 666 } 667 }; 668 669 } // namespace mozilla::dom 670 671 #endif /* mozilla_dom_CharacterDataBuffer_h */