tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

CharacterDataBuffer.cpp (15590B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 /*
      8 * A class which represents a fragment of text (eg inside a text
      9 * node); if only codepoints below 256 are used, the text is stored as
     10 * a char*; otherwise the text is stored as a char16_t*
     11 */
     12 
     13 #include "CharacterDataBuffer.h"
     14 
     15 #include <algorithm>
     16 
     17 #include "CharacterDataBufferImpl.h"
     18 #include "mozilla/CheckedInt.h"
     19 #include "mozilla/MemoryReporting.h"
     20 #include "mozilla/SSE.h"
     21 #include "mozilla/ppc.h"
     22 #include "nsBidiUtils.h"
     23 #include "nsCRT.h"
     24 #include "nsReadableUtils.h"
     25 #include "nsUnicharUtils.h"
     26 
     27 #define TEXTFRAG_WHITE_AFTER_NEWLINE 50
     28 #define TEXTFRAG_MAX_NEWLINES 7
     29 
     30 // Static buffer used for common fragments
     31 static char* sSpaceSharedString[TEXTFRAG_MAX_NEWLINES + 1];
     32 static char* sTabSharedString[TEXTFRAG_MAX_NEWLINES + 1];
     33 static char sSingleCharSharedString[256];
     34 
     35 using namespace mozilla::dom;
     36 
     37 // static
     38 nsresult CharacterDataBuffer::Init() {
     39  // Create whitespace strings
     40  uint32_t i;
     41  for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
     42    sSpaceSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
     43    sTabSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
     44    sSpaceSharedString[i][0] = ' ';
     45    sTabSharedString[i][0] = ' ';
     46    uint32_t j;
     47    for (j = 1; j < 1 + i; ++j) {
     48      sSpaceSharedString[i][j] = '\n';
     49      sTabSharedString[i][j] = '\n';
     50    }
     51    for (; j < (1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE); ++j) {
     52      sSpaceSharedString[i][j] = ' ';
     53      sTabSharedString[i][j] = '\t';
     54    }
     55  }
     56 
     57  // Create single-char strings
     58  for (i = 0; i < 256; ++i) {
     59    sSingleCharSharedString[i] = i;
     60  }
     61 
     62  return NS_OK;
     63 }
     64 
     65 // static
     66 void CharacterDataBuffer::Shutdown() {
     67  uint32_t i;
     68  for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
     69    delete[] sSpaceSharedString[i];
     70    delete[] sTabSharedString[i];
     71    sSpaceSharedString[i] = nullptr;
     72    sTabSharedString[i] = nullptr;
     73  }
     74 }
     75 
     76 CharacterDataBuffer::~CharacterDataBuffer() {
     77  ReleaseBuffer();
     78  MOZ_COUNT_DTOR(CharacterDataBuffer);
     79 }
     80 
     81 void CharacterDataBuffer::ReleaseBuffer() {
     82  if (mState.mIs2b) {
     83    NS_RELEASE(m2b);
     84  } else if (mState.mLength && m1b && mState.mInHeap) {
     85    free(const_cast<char*>(m1b));
     86  }
     87 
     88  m1b = nullptr;
     89  mState.mIsBidi = false;
     90 
     91  // Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits;
     92  mAllBits = 0;
     93 }
     94 
     95 CharacterDataBuffer& CharacterDataBuffer::operator=(
     96    const CharacterDataBuffer& aOther) {
     97  ReleaseBuffer();
     98 
     99  if (aOther.mState.mLength) {
    100    if (!aOther.mState.mInHeap) {
    101      MOZ_ASSERT(!aOther.mState.mIs2b);
    102      m1b = aOther.m1b;
    103    } else if (aOther.mState.mIs2b) {
    104      m2b = aOther.m2b;
    105      NS_ADDREF(m2b);
    106    } else {
    107      m1b = static_cast<char*>(malloc(aOther.mState.mLength));
    108      if (m1b) {
    109        memcpy(const_cast<char*>(m1b), aOther.m1b, aOther.mState.mLength);
    110      } else {
    111        // allocate a buffer for a single REPLACEMENT CHARACTER
    112        m2b = StringBuffer::Alloc(sizeof(char16_t) * 2).take();
    113        if (!m2b) {
    114          MOZ_CRASH("OOM!");
    115        }
    116        char16_t* data = static_cast<char16_t*>(m2b->Data());
    117        data[0] = 0xFFFD;  // REPLACEMENT CHARACTER
    118        data[1] = char16_t(0);
    119        mState.mIs2b = true;
    120        mState.mInHeap = true;
    121        mState.mLength = 1;
    122        return *this;
    123      }
    124    }
    125 
    126    mAllBits = aOther.mAllBits;
    127  }
    128 
    129  return *this;
    130 }
    131 
    132 static inline int32_t FirstNon8BitUnvectorized(const char16_t* str,
    133                                               const char16_t* end) {
    134  using p = Non8BitParameters<sizeof(size_t)>;
    135  const size_t mask = p::mask();
    136  const uint32_t alignMask = p::alignMask();
    137  const uint32_t numUnicharsPerWord = p::numUnicharsPerWord();
    138  const int32_t len = end - str;
    139  int32_t i = 0;
    140 
    141  // Align ourselves to a word boundary.
    142  int32_t alignLen = std::min(
    143      len, int32_t(((-NS_PTR_TO_INT32(str)) & alignMask) / sizeof(char16_t)));
    144  for (; i < alignLen; i++) {
    145    if (str[i] > 255) return i;
    146  }
    147 
    148  // Check one word at a time.
    149  const int32_t wordWalkEnd =
    150      ((len - i) / numUnicharsPerWord) * numUnicharsPerWord;
    151  for (; i < wordWalkEnd; i += numUnicharsPerWord) {
    152    const size_t word = *reinterpret_cast<const size_t*>(str + i);
    153    if (word & mask) return i;
    154  }
    155 
    156  // Take care of the remainder one character at a time.
    157  for (; i < len; i++) {
    158    if (str[i] > 255) return i;
    159  }
    160 
    161  return -1;
    162 }
    163 
    164 #if defined(MOZILLA_MAY_SUPPORT_SSE2)
    165 #  include "CharacterDataBufferGenericFwd.h"
    166 #endif
    167 
    168 #ifdef __powerpc__
    169 namespace mozilla {
    170 namespace VMX {
    171 int32_t FirstNon8Bit(const char16_t* str, const char16_t* end);
    172 }  // namespace VMX
    173 }  // namespace mozilla
    174 #endif
    175 
    176 /*
    177 * This function returns -1 if all characters in str are 8 bit characters.
    178 * Otherwise, it returns a value less than or equal to the index of the first
    179 * non-8bit character in str. For example, if first non-8bit character is at
    180 * position 25, it may return 25, or for example 24, or 16. But it guarantees
    181 * there is no non-8bit character before returned value.
    182 */
    183 static inline int32_t FirstNon8Bit(const char16_t* str, const char16_t* end) {
    184 #ifdef MOZILLA_MAY_SUPPORT_SSE2
    185  if (mozilla::supports_sse2()) {
    186    return mozilla::FirstNon8Bit<xsimd::sse2>(str, end);
    187  }
    188 #elif defined(__powerpc__)
    189  if (mozilla::supports_vmx()) {
    190    return mozilla::VMX::FirstNon8Bit(str, end);
    191  }
    192 #endif
    193 
    194  return FirstNon8BitUnvectorized(str, end);
    195 }
    196 
    197 bool CharacterDataBuffer::SetTo(const char16_t* aBuffer, uint32_t aLength,
    198                                bool aUpdateBidi, bool aForce2b) {
    199  if (MOZ_UNLIKELY(aLength > NS_MAX_CHARACTER_DATA_BUFFER_LENGTH)) {
    200    return false;
    201  }
    202 
    203  if (aForce2b && mState.mIs2b && !m2b->IsReadonly()) {
    204    // Try to re-use our existing StringBuffer.
    205    uint32_t storageSize = m2b->StorageSize();
    206    uint32_t neededSize = aLength * sizeof(char16_t);
    207    if (!neededSize) {
    208      if (storageSize < AutoStringDefaultStorageSize) {
    209        // If we're storing small enough StringBuffer, let's preserve it.
    210        static_cast<char16_t*>(m2b->Data())[0] = char16_t(0);
    211        mState.mLength = 0;
    212        mState.mIsBidi = false;
    213        return true;
    214      }
    215    } else if (neededSize < storageSize &&
    216               (storageSize / 2) <
    217                   (neededSize + AutoStringDefaultStorageSize)) {
    218      // Don't try to reuse the existing StringBuffer, if it would have lots of
    219      // unused space.
    220      memcpy(m2b->Data(), aBuffer, neededSize);
    221      static_cast<char16_t*>(m2b->Data())[aLength] = char16_t(0);
    222      mState.mLength = aLength;
    223      mState.mIsBidi = false;
    224      if (aUpdateBidi) {
    225        UpdateBidiFlag(aBuffer, aLength);
    226      }
    227      return true;
    228    }
    229  }
    230 
    231  if (aLength == 0) {
    232    ReleaseBuffer();
    233    return true;
    234  }
    235 
    236  char16_t firstChar = *aBuffer;
    237  if (!aForce2b && aLength == 1 && firstChar < 256) {
    238    ReleaseBuffer();
    239    m1b = sSingleCharSharedString + firstChar;
    240    mState.mInHeap = false;
    241    mState.mIs2b = false;
    242    mState.mLength = 1;
    243    return true;
    244  }
    245 
    246  const char16_t* ucp = aBuffer;
    247  const char16_t* uend = aBuffer + aLength;
    248 
    249  // Check if we can use a shared string
    250  if (!aForce2b &&
    251      aLength <= 1 + TEXTFRAG_WHITE_AFTER_NEWLINE + TEXTFRAG_MAX_NEWLINES &&
    252      (firstChar == ' ' || firstChar == '\n' || firstChar == '\t')) {
    253    if (firstChar == ' ') {
    254      ++ucp;
    255    }
    256 
    257    const char16_t* start = ucp;
    258    while (ucp < uend && *ucp == '\n') {
    259      ++ucp;
    260    }
    261    const char16_t* endNewLine = ucp;
    262 
    263    char16_t space = ucp < uend && *ucp == '\t' ? '\t' : ' ';
    264    while (ucp < uend && *ucp == space) {
    265      ++ucp;
    266    }
    267 
    268    if (ucp == uend && endNewLine - start <= TEXTFRAG_MAX_NEWLINES &&
    269        ucp - endNewLine <= TEXTFRAG_WHITE_AFTER_NEWLINE) {
    270      ReleaseBuffer();
    271      char** strings = space == ' ' ? sSpaceSharedString : sTabSharedString;
    272      m1b = strings[endNewLine - start];
    273 
    274      // If we didn't find a space in the beginning, skip it now.
    275      if (firstChar != ' ') {
    276        ++m1b;
    277      }
    278 
    279      mState.mInHeap = false;
    280      mState.mIs2b = false;
    281      mState.mLength = aLength;
    282 
    283      return true;
    284    }
    285  }
    286 
    287  // See if we need to store the data in ucs2 or not
    288  int32_t first16bit = aForce2b ? 0 : ::FirstNon8Bit(ucp, uend);
    289 
    290  if (first16bit != -1) {  // aBuffer contains no non-8bit character
    291    // Use ucs2 storage because we have to
    292    CheckedUint32 size = CheckedUint32(aLength) + 1;
    293    if (!size.isValid()) {
    294      return false;
    295    }
    296    size *= sizeof(char16_t);
    297    if (!size.isValid()) {
    298      return false;
    299    }
    300 
    301    RefPtr<StringBuffer> newBuffer = StringBuffer::Alloc(size.value());
    302    if (!newBuffer) {
    303      return false;
    304    }
    305 
    306    ReleaseBuffer();
    307    memcpy(newBuffer->Data(), aBuffer, aLength * sizeof(char16_t));
    308    static_cast<char16_t*>(newBuffer->Data())[aLength] = char16_t(0);
    309 
    310    m2b = newBuffer.forget().take();
    311    mState.mIs2b = true;
    312    if (aUpdateBidi) {
    313      UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
    314    }
    315  } else {
    316    // Use 1 byte storage because we can
    317    char* buff = static_cast<char*>(malloc(aLength));
    318    if (!buff) {
    319      return false;
    320    }
    321 
    322    ReleaseBuffer();
    323    // Copy data
    324    LossyConvertUtf16toLatin1(Span(aBuffer, aLength), Span(buff, aLength));
    325    m1b = buff;
    326    mState.mIs2b = false;
    327  }
    328 
    329  // Setup our fields
    330  mState.mInHeap = true;
    331  mState.mLength = aLength;
    332 
    333  return true;
    334 }
    335 
    336 void CharacterDataBuffer::CopyTo(char16_t* aDest, uint32_t aOffset,
    337                                 uint32_t aCount) {
    338  const CheckedUint32 endOffset = CheckedUint32(aOffset) + aCount;
    339  if (!endOffset.isValid() || endOffset.value() > GetLength()) {
    340    aCount = mState.mLength - aOffset;
    341  }
    342 
    343  if (aCount) {
    344    if (mState.mIs2b) {
    345      memcpy(aDest, Get2b() + aOffset, sizeof(char16_t) * aCount);
    346    } else {
    347      const char* cp = m1b + aOffset;
    348      ConvertLatin1toUtf16(Span(cp, aCount), Span(aDest, aCount));
    349    }
    350  }
    351 }
    352 
    353 bool CharacterDataBuffer::Append(const char16_t* aBuffer, uint32_t aLength,
    354                                 bool aUpdateBidi, bool aForce2b) {
    355  if (!aLength) {
    356    return true;
    357  }
    358 
    359  // This is a common case because some callsites create a textnode
    360  // with a value by creating the node and then calling AppendData.
    361  if (mState.mLength == 0) {
    362    return SetTo(aBuffer, aLength, aUpdateBidi, aForce2b);
    363  }
    364 
    365  // Should we optimize for aData.Length() == 0?
    366 
    367  // FYI: Don't use CheckedInt in this method since here is very hot path
    368  //      in some performance tests.
    369  if (NS_MAX_CHARACTER_DATA_BUFFER_LENGTH - mState.mLength < aLength) {
    370    return false;  // Would be overflown if we'd keep handling.
    371  }
    372 
    373  if (mState.mIs2b) {
    374    size_t size = mState.mLength + aLength + 1;
    375    if (SIZE_MAX / sizeof(char16_t) < size) {
    376      return false;  // Would be overflown if we'd keep handling.
    377    }
    378    size *= sizeof(char16_t);
    379 
    380    // Already a 2-byte string so the result will be too
    381    StringBuffer* buff = nullptr;
    382    StringBuffer* bufferToRelease = nullptr;
    383    if (m2b->IsReadonly()) {
    384      buff = StringBuffer::Alloc(size).take();
    385      if (!buff) {
    386        return false;
    387      }
    388      bufferToRelease = m2b;
    389      memcpy(static_cast<char16_t*>(buff->Data()), m2b->Data(),
    390             mState.mLength * sizeof(char16_t));
    391    } else {
    392      buff = StringBuffer::Realloc(m2b, size);
    393      if (!buff) {
    394        return false;
    395      }
    396    }
    397 
    398    char16_t* data = static_cast<char16_t*>(buff->Data());
    399    memcpy(data + mState.mLength, aBuffer, aLength * sizeof(char16_t));
    400    mState.mLength += aLength;
    401    m2b = buff;
    402    data[mState.mLength] = char16_t(0);
    403 
    404    NS_IF_RELEASE(bufferToRelease);
    405 
    406    if (aUpdateBidi) {
    407      UpdateBidiFlag(aBuffer, aLength);
    408    }
    409 
    410    return true;
    411  }
    412 
    413  // Current string is a 1-byte string, check if the new data fits in one byte
    414  // too.
    415  int32_t first16bit =
    416      aForce2b ? 0 : ::FirstNon8Bit(aBuffer, aBuffer + aLength);
    417 
    418  if (first16bit != -1) {  // aBuffer contains no non-8bit character
    419    size_t size = mState.mLength + aLength + 1;
    420    if (SIZE_MAX / sizeof(char16_t) < size) {
    421      return false;  // Would be overflown if we'd keep handling.
    422    }
    423    size *= sizeof(char16_t);
    424 
    425    // The old data was 1-byte, but the new is not so we have to expand it
    426    // all to 2-byte
    427    StringBuffer* buff = StringBuffer::Alloc(size).take();
    428    if (!buff) {
    429      return false;
    430    }
    431 
    432    // Copy data into buff
    433    char16_t* data = static_cast<char16_t*>(buff->Data());
    434    ConvertLatin1toUtf16(Span(m1b, mState.mLength), Span(data, mState.mLength));
    435 
    436    memcpy(data + mState.mLength, aBuffer, aLength * sizeof(char16_t));
    437    mState.mLength += aLength;
    438    mState.mIs2b = true;
    439 
    440    if (mState.mInHeap) {
    441      free(const_cast<char*>(m1b));
    442    }
    443    data[mState.mLength] = char16_t(0);
    444    m2b = buff;
    445 
    446    mState.mInHeap = true;
    447 
    448    if (aUpdateBidi) {
    449      UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
    450    }
    451 
    452    return true;
    453  }
    454 
    455  // The new and the old data is all 1-byte
    456  size_t size = mState.mLength + aLength;
    457  MOZ_ASSERT(sizeof(char) == 1);
    458  char* buff;
    459  if (mState.mInHeap) {
    460    buff = static_cast<char*>(realloc(const_cast<char*>(m1b), size));
    461    if (!buff) {
    462      return false;
    463    }
    464  } else {
    465    buff = static_cast<char*>(malloc(size));
    466    if (!buff) {
    467      return false;
    468    }
    469 
    470    memcpy(buff, m1b, mState.mLength);
    471    mState.mInHeap = true;
    472  }
    473 
    474  // Copy aBuffer into buff.
    475  LossyConvertUtf16toLatin1(Span(aBuffer, aLength),
    476                            Span(buff + mState.mLength, aLength));
    477 
    478  m1b = buff;
    479  mState.mLength += aLength;
    480 
    481  return true;
    482 }
    483 
    484 /* virtual */
    485 size_t CharacterDataBuffer::SizeOfExcludingThis(
    486    mozilla::MallocSizeOf aMallocSizeOf) const {
    487  if (Is2b()) {
    488    return m2b->SizeOfIncludingThisIfUnshared(aMallocSizeOf);
    489  }
    490 
    491  if (mState.mInHeap) {
    492    return aMallocSizeOf(m1b);
    493  }
    494 
    495  return 0;
    496 }
    497 
    498 // To save time we only do this when we really want to know, not during
    499 // every allocation
    500 void CharacterDataBuffer::UpdateBidiFlag(const char16_t* aBuffer,
    501                                         uint32_t aLength) {
    502  if (mState.mIs2b && !mState.mIsBidi) {
    503    if (HasRTLChars(Span(aBuffer, aLength))) {
    504      mState.mIsBidi = true;
    505    }
    506  }
    507 }
    508 
    509 bool CharacterDataBuffer::BufferEquals(
    510    const CharacterDataBuffer& aOther) const {
    511  if (!Is2b()) {
    512    // We're 1-byte.
    513    if (!aOther.Is2b()) {
    514      nsDependentCSubstring ourStr(Get1b(), GetLength());
    515      return ourStr.Equals(
    516          nsDependentCSubstring(aOther.Get1b(), aOther.GetLength()));
    517    }
    518 
    519    // We're 1-byte, the other thing is 2-byte.  Instead of implementing a
    520    // separate codepath for this, just use our code below.
    521    return aOther.BufferEquals(*this);
    522  }
    523 
    524  nsDependentSubstring ourStr(Get2b(), GetLength());
    525  if (aOther.Is2b()) {
    526    return ourStr.Equals(
    527        nsDependentSubstring(aOther.Get2b(), aOther.GetLength()));
    528  }
    529 
    530  // We can't use EqualsASCII here, because the other string might not
    531  // actually be ASCII.  Just roll our own compare; do it in the simple way.
    532  // Bug 1532356 tracks not having to roll our own.
    533  if (GetLength() != aOther.GetLength()) {
    534    return false;
    535  }
    536 
    537  const char16_t* ourChars = Get2b();
    538  const char* otherChars = aOther.Get1b();
    539  for (uint32_t i = 0; i < GetLength(); ++i) {
    540    if (ourChars[i] != static_cast<char16_t>(otherChars[i])) {
    541      return false;
    542    }
    543  }
    544 
    545  return true;
    546 }