tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

CharacterDataBuffer.h (23388B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 /*
      8 * A class which represents a buffer of text (eg inside a text
      9 * node); if only codepoints below 256 are used, the text is stored as
     10 * a char*; otherwise the text is stored as a char16_t*
     11 */
     12 
     13 #ifndef mozilla_dom_CharacterDataBuffer_h
     14 #define mozilla_dom_CharacterDataBuffer_h
     15 
     16 #include "mozilla/Attributes.h"
     17 #include "mozilla/EnumSet.h"
     18 #include "mozilla/MemoryReporting.h"
     19 #include "mozilla/StringBuffer.h"
     20 #include "nsCharTraits.h"
     21 #include "nsISupportsImpl.h"
     22 #include "nsReadableUtils.h"
     23 #include "nsString.h"
     24 
     25 // XXX should this normalize the code to keep a \u0000 at the end?
     26 
     27 namespace mozilla::dom {
     28 /**
     29 * A buffer of text. If mIs2b is 1 then the m2b pointer is valid
     30 * otherwise the m1b pointer is valid. If m1b is used then each byte
     31 * of data represents a single ucs2 character with the high byte being
     32 * zero.
     33 *
     34 * This class does not have a virtual destructor therefore it is not
     35 * meant to be subclassed.
     36 */
     37 class CharacterDataBuffer final {
     38 private:
     39  constexpr static unsigned char kFormFeed = '\f';
     40  constexpr static unsigned char kNewLine = '\n';
     41  constexpr static unsigned char kCarriageReturn = '\r';
     42  constexpr static unsigned char kTab = '\t';
     43  constexpr static unsigned char kSpace = ' ';
     44  constexpr static unsigned char kNBSP = 0xA0;
     45 
     46 public:
     47  static nsresult Init();
     48  static void Shutdown();
     49 
     50  /**
     51   * Default constructor. Initialize the buffer to be empty.
     52   */
     53  CharacterDataBuffer() : m1b(nullptr), mAllBits(0) {
     54    MOZ_COUNT_CTOR(CharacterDataBuffer);
     55    NS_ASSERTION(sizeof(FragmentBits) == 4, "Bad field packing!");
     56  }
     57 
     58  ~CharacterDataBuffer();
     59 
     60  /**
     61   * Change the contents of this buffer to be a copy of the
     62   * the argument buffer, or to "" if unable to allocate enough memory.
     63   */
     64  CharacterDataBuffer& operator=(const CharacterDataBuffer& aOther);
     65 
     66  /**
     67   * Return true if this buffer is represented by char16_t data
     68   */
     69  bool Is2b() const { return mState.mIs2b; }
     70 
     71  /**
     72   * Return true if this buffer contains Bidi text
     73   * For performance reasons this flag is only set if explicitely requested (by
     74   * setting the aUpdateBidi argument on SetTo or Append to true).
     75   */
     76  bool IsBidi() const { return mState.mIsBidi; }
     77 
     78  /**
     79   * Get a pointer to constant char16_t data.
     80   */
     81  const char16_t* Get2b() const {
     82    MOZ_ASSERT(Is2b(), "not 2b text");
     83    return static_cast<char16_t*>(m2b->Data());
     84  }
     85 
     86  /**
     87   * Get a pointer to constant char data.
     88   * NOTE: CharacterDataBuffer treat the 1b buffer as an array of unsigned
     89   * chars. Therefore, Get1b() is not good one for looking for a character
     90   * between 0x80
     91   * - 0xFF in the buffer.
     92   */
     93  const char* Get1b() const {
     94    NS_ASSERTION(!Is2b(), "not 1b text");
     95    return (const char*)m1b;
     96  }
     97  /**
     98   * Get a pointer to constant unsigned char data.
     99   */
    100  const unsigned char* GetUnsigned1b() const {
    101    NS_ASSERTION(!Is2b(), "not 1b text");
    102    return (const unsigned char*)m1b;
    103  }
    104 
    105  /**
    106   * Get the length of the buffer. The length is the number of logical
    107   * characters, not the number of bytes to store the characters.
    108   */
    109  uint32_t GetLength() const { return mState.mLength; }
    110 
    111 #define NS_MAX_CHARACTER_DATA_BUFFER_LENGTH (static_cast<uint32_t>(0x1FFFFFFF))
    112 
    113  bool CanGrowBy(size_t n) const {
    114    return n < (1 << 29) && mState.mLength + n < (1 << 29);
    115  }
    116 
    117  /**
    118   * Change the contents of this buffer to be a copy of the given
    119   * buffer. If aUpdateBidi is true, contents of the buffer will be scanned,
    120   * and mState.mIsBidi will be turned on if it includes any Bidi characters.
    121   * If aForce2b is true, aBuffer will be stored as char16_t as is.  Then,
    122   * you can access the value faster but may waste memory if all characters
    123   * are less than U+0100.
    124   */
    125  bool SetTo(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBidi,
    126             bool aForce2b);
    127 
    128  bool SetTo(const nsString& aString, bool aUpdateBidi, bool aForce2b) {
    129    if (MOZ_UNLIKELY(aString.Length() > NS_MAX_CHARACTER_DATA_BUFFER_LENGTH)) {
    130      return false;
    131    }
    132    ReleaseBuffer();
    133    if (aForce2b && !aUpdateBidi) {
    134      if (mozilla::StringBuffer* buffer = aString.GetStringBuffer()) {
    135        NS_ADDREF(m2b = buffer);
    136        mState.mInHeap = true;
    137        mState.mIs2b = true;
    138        mState.mLength = aString.Length();
    139        return true;
    140      }
    141    }
    142 
    143    return SetTo(aString.get(), aString.Length(), aUpdateBidi, aForce2b);
    144  }
    145 
    146  /**
    147   * Append aData to the end of this buffer. If aUpdateBidi is true, contents
    148   * of the buffer will be scanned, and mState.mIsBidi will be turned on if
    149   * it includes any Bidi characters.
    150   * If aForce2b is true, the string will be stored as char16_t as is.  Then,
    151   * you can access the value faster but may waste memory if all characters
    152   * are less than U+0100.
    153   */
    154  bool Append(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBidi,
    155              bool aForce2b);
    156 
    157  /**
    158   * Append the contents of this data buffer to aString
    159   */
    160  void AppendTo(nsAString& aString) const {
    161    if (!AppendTo(aString, mozilla::fallible)) {
    162      aString.AllocFailed(aString.Length() + GetLength());
    163    }
    164  }
    165 
    166  /**
    167   * Append the contents of this data buffer to aString
    168   * @return false if an out of memory condition is detected, true otherwise
    169   */
    170  [[nodiscard]] bool AppendTo(nsAString& aString,
    171                              const mozilla::fallible_t& aFallible) const {
    172    if (mState.mIs2b) {
    173      if (aString.IsEmpty()) {
    174        aString.Assign(m2b, mState.mLength);
    175        return true;
    176      }
    177      return aString.Append(Get2b(), mState.mLength, aFallible);
    178    }
    179    return AppendASCIItoUTF16(Substring(m1b, mState.mLength), aString,
    180                              aFallible);
    181  }
    182 
    183  /**
    184   * Append a substring of the contents of this data buffer to aString.
    185   * @param aOffset where to start the substring in this data buffer
    186   * @param aLength the length of the substring
    187   */
    188  void AppendTo(nsAString& aString, uint32_t aOffset, uint32_t aLength) const {
    189    if (!AppendTo(aString, aOffset, aLength, mozilla::fallible)) {
    190      aString.AllocFailed(aString.Length() + aLength);
    191    }
    192  }
    193 
    194  /**
    195   * Append a substring of the contents of this data buffer to aString.
    196   * @param aString the string in which to append
    197   * @param aOffset where to start the substring in this data buffer
    198   * @param aLength the length of the substring
    199   * @return false if an out of memory condition is detected, true otherwise
    200   */
    201  [[nodiscard]] bool AppendTo(nsAString& aString, uint32_t aOffset,
    202                              uint32_t aLength,
    203                              const mozilla::fallible_t& aFallible) const {
    204    if (mState.mIs2b) {
    205      bool ok = aString.Append(Get2b() + aOffset, aLength, aFallible);
    206      if (!ok) {
    207        return false;
    208      }
    209 
    210      return true;
    211    } else {
    212      return AppendASCIItoUTF16(Substring(m1b + aOffset, aLength), aString,
    213                                aFallible);
    214    }
    215  }
    216 
    217  /**
    218   * Make a copy of the fragments contents starting at offset for
    219   * count characters. The offset and count will be adjusted to
    220   * lie within the fragments data. The fragments data is converted if
    221   * necessary.
    222   */
    223  void CopyTo(char16_t* aDest, uint32_t aOffset, uint32_t aCount);
    224 
    225  /**
    226   * Return the character in the data buffer at the given
    227   * index. This always returns a char16_t.
    228   */
    229  [[nodiscard]] char16_t CharAt(uint32_t aIndex) const {
    230    MOZ_ASSERT(aIndex < mState.mLength, "bad index");
    231    return mState.mIs2b ? Get2b()[aIndex]
    232                        : static_cast<unsigned char>(m1b[aIndex]);
    233  }
    234  [[nodiscard]] char16_t SafeCharAt(uint32_t aIndex) const {
    235    return MOZ_LIKELY(aIndex < mState.mLength) ? CharAt(aIndex)
    236                                               : static_cast<char16_t>(0);
    237  }
    238 
    239  /**
    240   * Return the first char, but if you're not sure whether this is empty, you
    241   * should use SafeFirstChar() instead.
    242   */
    243  [[nodiscard]] char16_t FirstChar() const {
    244    MOZ_ASSERT(mState.mLength);
    245    return CharAt(0u);
    246  }
    247  [[nodiscard]] char16_t SafeFirstChar() const {
    248    return MOZ_LIKELY(mState.mLength) ? FirstChar() : static_cast<char16_t>(0);
    249  }
    250 
    251  /**
    252   * Return the last char, but if you're not sure whether this is empty, you
    253   * should use SafeLastChar() instead.
    254   */
    255  [[nodiscard]] char16_t LastChar() const {
    256    MOZ_ASSERT(mState.mLength);
    257    return CharAt(mState.mLength - 1);
    258  }
    259  [[nodiscard]] char16_t SafeLastChar() const {
    260    return MOZ_LIKELY(mState.mLength) ? LastChar() : static_cast<char16_t>(0);
    261  }
    262 
    263  /**
    264   * IsHighSurrogateFollowedByLowSurrogateAt() returns true if character at
    265   * aIndex is high surrogate and it's followed by low surrogate.
    266   */
    267  inline bool IsHighSurrogateFollowedByLowSurrogateAt(uint32_t aIndex) const {
    268    MOZ_ASSERT(aIndex < mState.mLength);
    269    if (!mState.mIs2b || aIndex + 1 >= mState.mLength) {
    270      return false;
    271    }
    272    return NS_IS_SURROGATE_PAIR(Get2b()[aIndex], Get2b()[aIndex + 1]);
    273  }
    274 
    275  /**
    276   * IsLowSurrogateFollowingHighSurrogateAt() returns true if character at
    277   * aIndex is low surrogate and it follows high surrogate.
    278   */
    279  inline bool IsLowSurrogateFollowingHighSurrogateAt(uint32_t aIndex) const {
    280    MOZ_ASSERT(aIndex < mState.mLength);
    281    if (!mState.mIs2b || !aIndex) {
    282      return false;
    283    }
    284    return NS_IS_SURROGATE_PAIR(Get2b()[aIndex - 1], Get2b()[aIndex]);
    285  }
    286 
    287  /**
    288   * ScalarValueAt() returns a Unicode scalar value at aIndex.  If the character
    289   * at aIndex is a high surrogate followed by low surrogate, returns character
    290   * code for the pair.  If the index is low surrogate, or a high surrogate but
    291   * not in a pair, returns 0.
    292   */
    293  inline char32_t ScalarValueAt(uint32_t aIndex) const {
    294    MOZ_ASSERT(aIndex < mState.mLength);
    295    if (!mState.mIs2b) {
    296      return static_cast<unsigned char>(m1b[aIndex]);
    297    }
    298    char16_t ch = Get2b()[aIndex];
    299    if (!IS_SURROGATE(ch)) {
    300      return ch;
    301    }
    302    if (aIndex + 1 < mState.mLength && NS_IS_HIGH_SURROGATE(ch)) {
    303      char16_t nextCh = Get2b()[aIndex + 1];
    304      if (NS_IS_LOW_SURROGATE(nextCh)) {
    305        return SURROGATE_TO_UCS4(ch, nextCh);
    306      }
    307    }
    308    return 0;
    309  }
    310 
    311  void SetBidi(bool aBidi) { mState.mIsBidi = aBidi; }
    312 
    313  struct FragmentBits {
    314    // uint32_t to ensure that the values are unsigned, because we
    315    // want 0/1, not 0/-1!
    316    // Making these bool causes Windows to not actually pack them,
    317    // which causes crashes because we assume this structure is no more than
    318    // 32 bits!
    319    uint32_t mInHeap : 1;
    320    uint32_t mIs2b : 1;
    321    uint32_t mIsBidi : 1;
    322    // Note that when you change the bits of mLength, you also need to change
    323    // NS_MAX_CHARACTER_DATA_BUFFER_LENGTH.
    324    uint32_t mLength : 29;
    325  };
    326 
    327  size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;
    328 
    329  /**
    330   * Check whether the text in this buffer is the same as the text in the
    331   * other buffer.
    332   */
    333  [[nodiscard]] bool BufferEquals(const CharacterDataBuffer& aOther) const;
    334 
    335  // FYI: FragmentBits::mLength is only 29 bits.  Therefore, UINT32_MAX won't
    336  // be valid offset in the data.
    337  constexpr static uint32_t kNotFound = UINT32_MAX;
    338 
    339  [[nodiscard]] uint32_t FindChar(char16_t aChar, uint32_t aOffset = 0) const {
    340    if (aOffset >= GetLength()) {
    341      return kNotFound;
    342    }
    343    if (Is2b()) {
    344      const char16_t* end = Get2b() + GetLength();
    345      for (const char16_t* ch = Get2b() + aOffset; ch != end; ch++) {
    346        if (*ch == aChar) {
    347          return ch - Get2b();
    348        }
    349      }
    350      return kNotFound;
    351    }
    352    if (aChar > 0xFF) {
    353      return kNotFound;
    354    }
    355    const unsigned char* end = GetUnsigned1b() + GetLength();
    356    for (const unsigned char* ch = GetUnsigned1b() + aOffset; ch != end; ch++) {
    357      if (*ch == aChar) {
    358        return ch - GetUnsigned1b();
    359      }
    360    }
    361    return kNotFound;
    362  }
    363 
    364  [[nodiscard]] uint32_t RFindChar(char16_t aChar,
    365                                   uint32_t aOffset = UINT32_MAX) const {
    366    const uint32_t length = GetLength();
    367    if (!length) {
    368      return kNotFound;
    369    }
    370    aOffset = std::min(length - 1u, aOffset);
    371    if (Is2b()) {
    372      const char16_t* end = Get2b() - 1;
    373      for (const char16_t* ch = Get2b() + aOffset; ch != end; ch--) {
    374        if (*ch == aChar) {
    375          return ch - Get2b();
    376        }
    377      }
    378      return kNotFound;
    379    }
    380    if (aChar > 0xFF) {
    381      return kNotFound;
    382    }
    383    const unsigned char* end = GetUnsigned1b() - 1;
    384    for (const unsigned char* ch = GetUnsigned1b() + aOffset; ch != end; ch--) {
    385      if (*ch == aChar) {
    386        return ch - GetUnsigned1b();
    387      }
    388    }
    389    return kNotFound;
    390  }
    391 
    392  enum class WhitespaceOption {
    393    // If set, new lines (\n, U+000A) are treated as significant.
    394    NewLineIsSignificant,
    395    // If set, NBSPs (&nbsp;, U+00A0) are treated as collapsible whitespaces.
    396    // This option is useful to scan previous or next visible character from
    397    // middle of a whitespace sequence because our editor makes consecutive
    398    // whitespaces visible with converting collapsible whitespaces to pairs of
    399    // &nbsp; and ASCII whitespace.
    400    TreatNBSPAsCollapsible,
    401    // If set, form feeds (\f, U+000C) are treated as significant.
    402    // Be aware, form feed is defined as a whitespace by the HTML spec, but is
    403    // not defined as so by the CSS spec. Therefore, it won't be rendered if it
    404    // appears in a whitespace sequence surrounded by block boundaries like
    405    // `data:text/html,%0C<div>%0Cabc%0C</div>%0C`. However, it'll be rendered
    406    // as a character if it appears if surrounded by visible contents like
    407    // `data:text/html,<div>abc %0C def</div>`.
    408    FormFeedIsSignificant,
    409  };
    410  using WhitespaceOptions = mozilla::EnumSet<WhitespaceOption>;
    411 
    412 private:
    413  // Helper class to check whether the character is a non-whitespace or not.
    414  // This avoids to call EnumSet<WhitespaceOption>::contains() a lot.
    415  class MOZ_STACK_CLASS AutoWhitespaceChecker final {
    416   public:
    417    explicit AutoWhitespaceChecker(const WhitespaceOptions& aOptions)
    418        : mNBSPIsSignificant(
    419              !aOptions.contains(WhitespaceOption::TreatNBSPAsCollapsible)),
    420          mFormFeedIsSignificant(
    421              aOptions.contains(WhitespaceOption::FormFeedIsSignificant)),
    422          mNewLineIsSignificant(
    423              aOptions.contains(WhitespaceOption::NewLineIsSignificant)) {}
    424 
    425    [[nodiscard]] bool IsNonWhitespace(char16_t aChar) const {
    426      switch (aChar) {
    427        case kNBSP:
    428          return mNBSPIsSignificant;
    429        case kFormFeed:
    430          return mFormFeedIsSignificant;
    431        case kNewLine:
    432          return mNewLineIsSignificant;
    433        case kSpace:
    434        case kTab:
    435        case kCarriageReturn:
    436          return false;
    437        default:
    438          return true;
    439      }
    440    }
    441 
    442   private:
    443    const bool mNBSPIsSignificant;
    444    const bool mFormFeedIsSignificant;
    445    const bool mNewLineIsSignificant;
    446  };
    447 
    448 public:
    449  /**
    450   * Return the first non-whitespace character index.
    451   *
    452   * @param aOptions Set options to change which character should not be treated
    453   * as a whitespace.
    454   * @param aOffset Start offset, so, the result will equal or greater than
    455   * aOffset if a char found.
    456   */
    457  [[nodiscard]] uint32_t FindNonWhitespaceChar(
    458      const WhitespaceOptions& aOptions = {}, uint32_t aOffset = 0) const {
    459    if (aOffset >= GetLength()) {
    460      return kNotFound;
    461    }
    462    const AutoWhitespaceChecker checker(aOptions);
    463    if (Is2b()) {
    464      const char16_t* end = Get2b() + GetLength();
    465      for (const char16_t* ch = Get2b() + aOffset; ch != end; ch++) {
    466        if (checker.IsNonWhitespace(*ch)) {
    467          return ch - Get2b();
    468        }
    469      }
    470      return kNotFound;
    471    }
    472    const unsigned char* end = GetUnsigned1b() + GetLength();
    473    for (const unsigned char* ch = GetUnsigned1b() + aOffset; ch != end; ch++) {
    474      if (checker.IsNonWhitespace(*ch)) {
    475        return ch - GetUnsigned1b();
    476      }
    477    }
    478    return kNotFound;
    479  }
    480 
    481  /**
    482   * Return the last non-whitespace character index.
    483   *
    484   * @param aOptions Set options to change which character should not be
    485   * treated as a whitespace.
    486   * @param aOffset Start offset, so, the result will equal or greater than
    487   * aOffset if a char found.
    488   */
    489  [[nodiscard]] uint32_t RFindNonWhitespaceChar(
    490      const WhitespaceOptions& aOptions = {},
    491      uint32_t aOffset = UINT32_MAX) const {
    492    const uint32_t length = GetLength();
    493    if (!length) {
    494      return kNotFound;
    495    }
    496    const AutoWhitespaceChecker checker(aOptions);
    497    aOffset = std::min(length - 1u, aOffset);
    498    if (Is2b()) {
    499      const char16_t* end = Get2b() - 1;
    500      for (const char16_t* ch = Get2b() + aOffset; ch != end; ch--) {
    501        if (checker.IsNonWhitespace(*ch)) {
    502          return ch - Get2b();
    503        }
    504      }
    505      return kNotFound;
    506    }
    507    const unsigned char* end = GetUnsigned1b() - 1;
    508    for (const unsigned char* ch = GetUnsigned1b() + aOffset; ch != end; ch--) {
    509      if (checker.IsNonWhitespace(*ch)) {
    510        return ch - GetUnsigned1b();
    511      }
    512    }
    513    return kNotFound;
    514  }
    515 
    516  /**
    517   * Return first different char offset in this buffer after
    518   * aOffsetInFragment. For example, if we have "abcdefg", aStr is "bXYe" and
    519   * aOffsetInFragment is 1, scan from "b" and return the offset of "c",
    520   * i.e., 2.
    521   *
    522   * Note that this is currently not usable to compare us with longer string.
    523   */
    524  [[nodiscard]] uint32_t FindFirstDifferentCharOffset(
    525      const nsAString& aStr, uint32_t aOffsetInFragment = 0u) const {
    526    return FindFirstDifferentCharOffsetInternal(aStr, aOffsetInFragment);
    527  }
    528  [[nodiscard]] uint32_t FindFirstDifferentCharOffset(
    529      const nsACString& aStr, uint32_t aOffsetInFragment = 0u) const {
    530    return FindFirstDifferentCharOffsetInternal(aStr, aOffsetInFragment);
    531  }
    532 
    533  /**
    534   * Return first different char offset in this buffer before
    535   * aOffsetInFragment (from backward scanning point of view).
    536   * For example, if we have "abcdef", aStr is "bXYe" and aOffsetInFragment is
    537   * 5, scan from "e" and return the offset of "d" (vs. "Y") in this buffer,
    538   * i.e., 3.  In other words, aOffsetInFragment should be the next offset of
    539   * you start to scan. I.e., at least 1 and at most the length of this.  So,
    540   * if you want to compare with start of this, you should specify
    541   * aStr.Length(), and if you want to compare with end of this, you should
    542   * specify GetLength() result of this (or just omit it).
    543   *
    544   * Note that this is currently not usable to compare us with longer string.
    545   */
    546  [[nodiscard]] uint32_t RFindFirstDifferentCharOffset(
    547      const nsAString& aStr, uint32_t aOffsetInFragment = UINT32_MAX) const {
    548    return RFindFirstDifferentCharOffsetInternal(aStr, aOffsetInFragment);
    549  }
    550  [[nodiscard]] uint32_t RFindFirstDifferentCharOffset(
    551      const nsACString& aStr, uint32_t aOffsetInFragment = UINT32_MAX) const {
    552    return RFindFirstDifferentCharOffsetInternal(aStr, aOffsetInFragment);
    553  }
    554 
    555 private:
    556  void ReleaseBuffer();
    557 
    558  /**
    559   * Scan the contents of the buffer and turn on mState.mIsBidi if it
    560   * includes any Bidi characters.
    561   */
    562  void UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength);
    563 
    564  union {
    565    mozilla::StringBuffer* m2b;
    566    // FIXME: m1b is actually treated as const unsigned char* since the array
    567    // may contain characters between 0x80 - 0xFF.  So, copying the value to
    568    // char16_t might depend on how the compiler to treat the values.
    569    const char* m1b;  // This is const since it can point to shared data
    570  };
    571 
    572  union {
    573    uint32_t mAllBits;
    574    FragmentBits mState;
    575  };
    576 
    577  /**
    578   * See the explanation of FindFirstDifferentCharOffset() for the detail.
    579   *
    580   * This should not be directly exposed as a public method because it will
    581   * cause instantiating the method with various derived classes of nsAString
    582   * and nsACString.
    583   */
    584  template <typename nsAXString>
    585  [[nodiscard]] uint32_t FindFirstDifferentCharOffsetInternal(
    586      const nsAXString& aStr, uint32_t aOffsetInFragment) const {
    587    static_assert(std::is_same_v<nsAXString, nsAString> ||
    588                  std::is_same_v<nsAXString, nsACString>);
    589    MOZ_ASSERT(!aStr.IsEmpty());
    590    const uint32_t length = GetLength();
    591    MOZ_ASSERT(aOffsetInFragment <= length);
    592    if (NS_WARN_IF(aStr.IsEmpty()) || NS_WARN_IF(length <= aOffsetInFragment) ||
    593        NS_WARN_IF(length - aOffsetInFragment < aStr.Length())) {
    594      return kNotFound;
    595    }
    596    if (Is2b()) {
    597      const auto* ch = aStr.BeginReading();
    598      // At the first char of the scan range.
    599      const char16_t* ourCh = Get2b() + aOffsetInFragment;
    600      const auto* const end = aStr.EndReading();
    601      const char16_t* const ourEnd = Get2b() + length;
    602      for (; ch != end && ourCh != ourEnd; ch++, ourCh++) {
    603        if (*ch != *ourCh) {
    604          return ourCh - Get2b();
    605        }
    606      }
    607      return kNotFound;
    608    }
    609    const auto* ch = aStr.BeginReading();
    610    // At the first char of the scan range.
    611    const char* ourCh = Get1b() + aOffsetInFragment;
    612    const auto* const end = aStr.EndReading();
    613    const char* ourEnd = Get1b() + length;
    614    for (; ch != end && ourCh != ourEnd; ch++, ourCh++) {
    615      if (*ch != *ourCh) {
    616        return ourCh - Get1b();
    617      }
    618    }
    619    return kNotFound;
    620  }
    621 
    622  /**
    623   * See the explanation of RFindFirstDifferentCharOffset() for the detail.
    624   *
    625   * This should not be directly exposed as a public method because it will
    626   * cause instantiating the method with various derived classes of nsAString
    627   * and nsACString.
    628   */
    629  template <typename nsAXString>
    630  [[nodiscard]] uint32_t RFindFirstDifferentCharOffsetInternal(
    631      const nsAXString& aStr, uint32_t aOffsetInFragment) const {
    632    static_assert(std::is_same_v<nsAXString, nsAString> ||
    633                  std::is_same_v<nsAXString, nsACString>);
    634    MOZ_ASSERT(!aStr.IsEmpty());
    635    const uint32_t length = GetLength();
    636    MOZ_ASSERT(aOffsetInFragment <= length);
    637    aOffsetInFragment = std::min(length, aOffsetInFragment);
    638    if (NS_WARN_IF(aStr.IsEmpty()) || NS_WARN_IF(!aOffsetInFragment) ||
    639        NS_WARN_IF(aOffsetInFragment < aStr.Length())) {
    640      return kNotFound;
    641    }
    642    if (Is2b()) {
    643      const auto* ch = aStr.EndReading() - 1;
    644      // At the last char of the scan range
    645      const char16_t* ourCh = Get2b() + aOffsetInFragment - 1;
    646      const auto* const end = aStr.BeginReading() - 1;
    647      const char16_t* const ourEnd = Get2b() - 1;
    648      for (; ch != end && ourCh != ourEnd; ch--, ourCh--) {
    649        if (*ch != *ourCh) {
    650          return ourCh - Get2b();
    651        }
    652      }
    653      return kNotFound;
    654    }
    655    const auto* ch = aStr.EndReading() - 1;
    656    // At the last char of the scan range
    657    const char* ourCh = Get1b() + aOffsetInFragment - 1;
    658    const auto* const end = aStr.BeginReading() - 1;
    659    const char* const ourEnd = Get1b() - 1;
    660    for (; ch != end && ourCh != ourEnd; ch--, ourCh--) {
    661      if (*ch != *ourCh) {
    662        return ourCh - Get1b();
    663      }
    664    }
    665    return kNotFound;
    666  }
    667 };
    668 
    669 }  // namespace mozilla::dom
    670 
    671 #endif /* mozilla_dom_CharacterDataBuffer_h */