CharacterDataBuffer.cpp (15590B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 /* 8 * A class which represents a fragment of text (eg inside a text 9 * node); if only codepoints below 256 are used, the text is stored as 10 * a char*; otherwise the text is stored as a char16_t* 11 */ 12 13 #include "CharacterDataBuffer.h" 14 15 #include <algorithm> 16 17 #include "CharacterDataBufferImpl.h" 18 #include "mozilla/CheckedInt.h" 19 #include "mozilla/MemoryReporting.h" 20 #include "mozilla/SSE.h" 21 #include "mozilla/ppc.h" 22 #include "nsBidiUtils.h" 23 #include "nsCRT.h" 24 #include "nsReadableUtils.h" 25 #include "nsUnicharUtils.h" 26 27 #define TEXTFRAG_WHITE_AFTER_NEWLINE 50 28 #define TEXTFRAG_MAX_NEWLINES 7 29 30 // Static buffer used for common fragments 31 static char* sSpaceSharedString[TEXTFRAG_MAX_NEWLINES + 1]; 32 static char* sTabSharedString[TEXTFRAG_MAX_NEWLINES + 1]; 33 static char sSingleCharSharedString[256]; 34 35 using namespace mozilla::dom; 36 37 // static 38 nsresult CharacterDataBuffer::Init() { 39 // Create whitespace strings 40 uint32_t i; 41 for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) { 42 sSpaceSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE]; 43 sTabSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE]; 44 sSpaceSharedString[i][0] = ' '; 45 sTabSharedString[i][0] = ' '; 46 uint32_t j; 47 for (j = 1; j < 1 + i; ++j) { 48 sSpaceSharedString[i][j] = '\n'; 49 sTabSharedString[i][j] = '\n'; 50 } 51 for (; j < (1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE); ++j) { 52 sSpaceSharedString[i][j] = ' '; 53 sTabSharedString[i][j] = '\t'; 54 } 55 } 56 57 // Create single-char strings 58 for (i = 0; i < 256; ++i) { 59 sSingleCharSharedString[i] = i; 60 } 61 62 return NS_OK; 63 } 64 65 // static 66 void CharacterDataBuffer::Shutdown() { 67 uint32_t i; 68 for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) { 69 delete[] sSpaceSharedString[i]; 70 delete[] sTabSharedString[i]; 71 sSpaceSharedString[i] = nullptr; 72 sTabSharedString[i] = nullptr; 73 } 74 } 75 76 CharacterDataBuffer::~CharacterDataBuffer() { 77 ReleaseBuffer(); 78 MOZ_COUNT_DTOR(CharacterDataBuffer); 79 } 80 81 void CharacterDataBuffer::ReleaseBuffer() { 82 if (mState.mIs2b) { 83 NS_RELEASE(m2b); 84 } else if (mState.mLength && m1b && mState.mInHeap) { 85 free(const_cast<char*>(m1b)); 86 } 87 88 m1b = nullptr; 89 mState.mIsBidi = false; 90 91 // Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits; 92 mAllBits = 0; 93 } 94 95 CharacterDataBuffer& CharacterDataBuffer::operator=( 96 const CharacterDataBuffer& aOther) { 97 ReleaseBuffer(); 98 99 if (aOther.mState.mLength) { 100 if (!aOther.mState.mInHeap) { 101 MOZ_ASSERT(!aOther.mState.mIs2b); 102 m1b = aOther.m1b; 103 } else if (aOther.mState.mIs2b) { 104 m2b = aOther.m2b; 105 NS_ADDREF(m2b); 106 } else { 107 m1b = static_cast<char*>(malloc(aOther.mState.mLength)); 108 if (m1b) { 109 memcpy(const_cast<char*>(m1b), aOther.m1b, aOther.mState.mLength); 110 } else { 111 // allocate a buffer for a single REPLACEMENT CHARACTER 112 m2b = StringBuffer::Alloc(sizeof(char16_t) * 2).take(); 113 if (!m2b) { 114 MOZ_CRASH("OOM!"); 115 } 116 char16_t* data = static_cast<char16_t*>(m2b->Data()); 117 data[0] = 0xFFFD; // REPLACEMENT CHARACTER 118 data[1] = char16_t(0); 119 mState.mIs2b = true; 120 mState.mInHeap = true; 121 mState.mLength = 1; 122 return *this; 123 } 124 } 125 126 mAllBits = aOther.mAllBits; 127 } 128 129 return *this; 130 } 131 132 static inline int32_t FirstNon8BitUnvectorized(const char16_t* str, 133 const char16_t* end) { 134 using p = Non8BitParameters<sizeof(size_t)>; 135 const size_t mask = p::mask(); 136 const uint32_t alignMask = p::alignMask(); 137 const uint32_t numUnicharsPerWord = p::numUnicharsPerWord(); 138 const int32_t len = end - str; 139 int32_t i = 0; 140 141 // Align ourselves to a word boundary. 142 int32_t alignLen = std::min( 143 len, int32_t(((-NS_PTR_TO_INT32(str)) & alignMask) / sizeof(char16_t))); 144 for (; i < alignLen; i++) { 145 if (str[i] > 255) return i; 146 } 147 148 // Check one word at a time. 149 const int32_t wordWalkEnd = 150 ((len - i) / numUnicharsPerWord) * numUnicharsPerWord; 151 for (; i < wordWalkEnd; i += numUnicharsPerWord) { 152 const size_t word = *reinterpret_cast<const size_t*>(str + i); 153 if (word & mask) return i; 154 } 155 156 // Take care of the remainder one character at a time. 157 for (; i < len; i++) { 158 if (str[i] > 255) return i; 159 } 160 161 return -1; 162 } 163 164 #if defined(MOZILLA_MAY_SUPPORT_SSE2) 165 # include "CharacterDataBufferGenericFwd.h" 166 #endif 167 168 #ifdef __powerpc__ 169 namespace mozilla { 170 namespace VMX { 171 int32_t FirstNon8Bit(const char16_t* str, const char16_t* end); 172 } // namespace VMX 173 } // namespace mozilla 174 #endif 175 176 /* 177 * This function returns -1 if all characters in str are 8 bit characters. 178 * Otherwise, it returns a value less than or equal to the index of the first 179 * non-8bit character in str. For example, if first non-8bit character is at 180 * position 25, it may return 25, or for example 24, or 16. But it guarantees 181 * there is no non-8bit character before returned value. 182 */ 183 static inline int32_t FirstNon8Bit(const char16_t* str, const char16_t* end) { 184 #ifdef MOZILLA_MAY_SUPPORT_SSE2 185 if (mozilla::supports_sse2()) { 186 return mozilla::FirstNon8Bit<xsimd::sse2>(str, end); 187 } 188 #elif defined(__powerpc__) 189 if (mozilla::supports_vmx()) { 190 return mozilla::VMX::FirstNon8Bit(str, end); 191 } 192 #endif 193 194 return FirstNon8BitUnvectorized(str, end); 195 } 196 197 bool CharacterDataBuffer::SetTo(const char16_t* aBuffer, uint32_t aLength, 198 bool aUpdateBidi, bool aForce2b) { 199 if (MOZ_UNLIKELY(aLength > NS_MAX_CHARACTER_DATA_BUFFER_LENGTH)) { 200 return false; 201 } 202 203 if (aForce2b && mState.mIs2b && !m2b->IsReadonly()) { 204 // Try to re-use our existing StringBuffer. 205 uint32_t storageSize = m2b->StorageSize(); 206 uint32_t neededSize = aLength * sizeof(char16_t); 207 if (!neededSize) { 208 if (storageSize < AutoStringDefaultStorageSize) { 209 // If we're storing small enough StringBuffer, let's preserve it. 210 static_cast<char16_t*>(m2b->Data())[0] = char16_t(0); 211 mState.mLength = 0; 212 mState.mIsBidi = false; 213 return true; 214 } 215 } else if (neededSize < storageSize && 216 (storageSize / 2) < 217 (neededSize + AutoStringDefaultStorageSize)) { 218 // Don't try to reuse the existing StringBuffer, if it would have lots of 219 // unused space. 220 memcpy(m2b->Data(), aBuffer, neededSize); 221 static_cast<char16_t*>(m2b->Data())[aLength] = char16_t(0); 222 mState.mLength = aLength; 223 mState.mIsBidi = false; 224 if (aUpdateBidi) { 225 UpdateBidiFlag(aBuffer, aLength); 226 } 227 return true; 228 } 229 } 230 231 if (aLength == 0) { 232 ReleaseBuffer(); 233 return true; 234 } 235 236 char16_t firstChar = *aBuffer; 237 if (!aForce2b && aLength == 1 && firstChar < 256) { 238 ReleaseBuffer(); 239 m1b = sSingleCharSharedString + firstChar; 240 mState.mInHeap = false; 241 mState.mIs2b = false; 242 mState.mLength = 1; 243 return true; 244 } 245 246 const char16_t* ucp = aBuffer; 247 const char16_t* uend = aBuffer + aLength; 248 249 // Check if we can use a shared string 250 if (!aForce2b && 251 aLength <= 1 + TEXTFRAG_WHITE_AFTER_NEWLINE + TEXTFRAG_MAX_NEWLINES && 252 (firstChar == ' ' || firstChar == '\n' || firstChar == '\t')) { 253 if (firstChar == ' ') { 254 ++ucp; 255 } 256 257 const char16_t* start = ucp; 258 while (ucp < uend && *ucp == '\n') { 259 ++ucp; 260 } 261 const char16_t* endNewLine = ucp; 262 263 char16_t space = ucp < uend && *ucp == '\t' ? '\t' : ' '; 264 while (ucp < uend && *ucp == space) { 265 ++ucp; 266 } 267 268 if (ucp == uend && endNewLine - start <= TEXTFRAG_MAX_NEWLINES && 269 ucp - endNewLine <= TEXTFRAG_WHITE_AFTER_NEWLINE) { 270 ReleaseBuffer(); 271 char** strings = space == ' ' ? sSpaceSharedString : sTabSharedString; 272 m1b = strings[endNewLine - start]; 273 274 // If we didn't find a space in the beginning, skip it now. 275 if (firstChar != ' ') { 276 ++m1b; 277 } 278 279 mState.mInHeap = false; 280 mState.mIs2b = false; 281 mState.mLength = aLength; 282 283 return true; 284 } 285 } 286 287 // See if we need to store the data in ucs2 or not 288 int32_t first16bit = aForce2b ? 0 : ::FirstNon8Bit(ucp, uend); 289 290 if (first16bit != -1) { // aBuffer contains no non-8bit character 291 // Use ucs2 storage because we have to 292 CheckedUint32 size = CheckedUint32(aLength) + 1; 293 if (!size.isValid()) { 294 return false; 295 } 296 size *= sizeof(char16_t); 297 if (!size.isValid()) { 298 return false; 299 } 300 301 RefPtr<StringBuffer> newBuffer = StringBuffer::Alloc(size.value()); 302 if (!newBuffer) { 303 return false; 304 } 305 306 ReleaseBuffer(); 307 memcpy(newBuffer->Data(), aBuffer, aLength * sizeof(char16_t)); 308 static_cast<char16_t*>(newBuffer->Data())[aLength] = char16_t(0); 309 310 m2b = newBuffer.forget().take(); 311 mState.mIs2b = true; 312 if (aUpdateBidi) { 313 UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit); 314 } 315 } else { 316 // Use 1 byte storage because we can 317 char* buff = static_cast<char*>(malloc(aLength)); 318 if (!buff) { 319 return false; 320 } 321 322 ReleaseBuffer(); 323 // Copy data 324 LossyConvertUtf16toLatin1(Span(aBuffer, aLength), Span(buff, aLength)); 325 m1b = buff; 326 mState.mIs2b = false; 327 } 328 329 // Setup our fields 330 mState.mInHeap = true; 331 mState.mLength = aLength; 332 333 return true; 334 } 335 336 void CharacterDataBuffer::CopyTo(char16_t* aDest, uint32_t aOffset, 337 uint32_t aCount) { 338 const CheckedUint32 endOffset = CheckedUint32(aOffset) + aCount; 339 if (!endOffset.isValid() || endOffset.value() > GetLength()) { 340 aCount = mState.mLength - aOffset; 341 } 342 343 if (aCount) { 344 if (mState.mIs2b) { 345 memcpy(aDest, Get2b() + aOffset, sizeof(char16_t) * aCount); 346 } else { 347 const char* cp = m1b + aOffset; 348 ConvertLatin1toUtf16(Span(cp, aCount), Span(aDest, aCount)); 349 } 350 } 351 } 352 353 bool CharacterDataBuffer::Append(const char16_t* aBuffer, uint32_t aLength, 354 bool aUpdateBidi, bool aForce2b) { 355 if (!aLength) { 356 return true; 357 } 358 359 // This is a common case because some callsites create a textnode 360 // with a value by creating the node and then calling AppendData. 361 if (mState.mLength == 0) { 362 return SetTo(aBuffer, aLength, aUpdateBidi, aForce2b); 363 } 364 365 // Should we optimize for aData.Length() == 0? 366 367 // FYI: Don't use CheckedInt in this method since here is very hot path 368 // in some performance tests. 369 if (NS_MAX_CHARACTER_DATA_BUFFER_LENGTH - mState.mLength < aLength) { 370 return false; // Would be overflown if we'd keep handling. 371 } 372 373 if (mState.mIs2b) { 374 size_t size = mState.mLength + aLength + 1; 375 if (SIZE_MAX / sizeof(char16_t) < size) { 376 return false; // Would be overflown if we'd keep handling. 377 } 378 size *= sizeof(char16_t); 379 380 // Already a 2-byte string so the result will be too 381 StringBuffer* buff = nullptr; 382 StringBuffer* bufferToRelease = nullptr; 383 if (m2b->IsReadonly()) { 384 buff = StringBuffer::Alloc(size).take(); 385 if (!buff) { 386 return false; 387 } 388 bufferToRelease = m2b; 389 memcpy(static_cast<char16_t*>(buff->Data()), m2b->Data(), 390 mState.mLength * sizeof(char16_t)); 391 } else { 392 buff = StringBuffer::Realloc(m2b, size); 393 if (!buff) { 394 return false; 395 } 396 } 397 398 char16_t* data = static_cast<char16_t*>(buff->Data()); 399 memcpy(data + mState.mLength, aBuffer, aLength * sizeof(char16_t)); 400 mState.mLength += aLength; 401 m2b = buff; 402 data[mState.mLength] = char16_t(0); 403 404 NS_IF_RELEASE(bufferToRelease); 405 406 if (aUpdateBidi) { 407 UpdateBidiFlag(aBuffer, aLength); 408 } 409 410 return true; 411 } 412 413 // Current string is a 1-byte string, check if the new data fits in one byte 414 // too. 415 int32_t first16bit = 416 aForce2b ? 0 : ::FirstNon8Bit(aBuffer, aBuffer + aLength); 417 418 if (first16bit != -1) { // aBuffer contains no non-8bit character 419 size_t size = mState.mLength + aLength + 1; 420 if (SIZE_MAX / sizeof(char16_t) < size) { 421 return false; // Would be overflown if we'd keep handling. 422 } 423 size *= sizeof(char16_t); 424 425 // The old data was 1-byte, but the new is not so we have to expand it 426 // all to 2-byte 427 StringBuffer* buff = StringBuffer::Alloc(size).take(); 428 if (!buff) { 429 return false; 430 } 431 432 // Copy data into buff 433 char16_t* data = static_cast<char16_t*>(buff->Data()); 434 ConvertLatin1toUtf16(Span(m1b, mState.mLength), Span(data, mState.mLength)); 435 436 memcpy(data + mState.mLength, aBuffer, aLength * sizeof(char16_t)); 437 mState.mLength += aLength; 438 mState.mIs2b = true; 439 440 if (mState.mInHeap) { 441 free(const_cast<char*>(m1b)); 442 } 443 data[mState.mLength] = char16_t(0); 444 m2b = buff; 445 446 mState.mInHeap = true; 447 448 if (aUpdateBidi) { 449 UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit); 450 } 451 452 return true; 453 } 454 455 // The new and the old data is all 1-byte 456 size_t size = mState.mLength + aLength; 457 MOZ_ASSERT(sizeof(char) == 1); 458 char* buff; 459 if (mState.mInHeap) { 460 buff = static_cast<char*>(realloc(const_cast<char*>(m1b), size)); 461 if (!buff) { 462 return false; 463 } 464 } else { 465 buff = static_cast<char*>(malloc(size)); 466 if (!buff) { 467 return false; 468 } 469 470 memcpy(buff, m1b, mState.mLength); 471 mState.mInHeap = true; 472 } 473 474 // Copy aBuffer into buff. 475 LossyConvertUtf16toLatin1(Span(aBuffer, aLength), 476 Span(buff + mState.mLength, aLength)); 477 478 m1b = buff; 479 mState.mLength += aLength; 480 481 return true; 482 } 483 484 /* virtual */ 485 size_t CharacterDataBuffer::SizeOfExcludingThis( 486 mozilla::MallocSizeOf aMallocSizeOf) const { 487 if (Is2b()) { 488 return m2b->SizeOfIncludingThisIfUnshared(aMallocSizeOf); 489 } 490 491 if (mState.mInHeap) { 492 return aMallocSizeOf(m1b); 493 } 494 495 return 0; 496 } 497 498 // To save time we only do this when we really want to know, not during 499 // every allocation 500 void CharacterDataBuffer::UpdateBidiFlag(const char16_t* aBuffer, 501 uint32_t aLength) { 502 if (mState.mIs2b && !mState.mIsBidi) { 503 if (HasRTLChars(Span(aBuffer, aLength))) { 504 mState.mIsBidi = true; 505 } 506 } 507 } 508 509 bool CharacterDataBuffer::BufferEquals( 510 const CharacterDataBuffer& aOther) const { 511 if (!Is2b()) { 512 // We're 1-byte. 513 if (!aOther.Is2b()) { 514 nsDependentCSubstring ourStr(Get1b(), GetLength()); 515 return ourStr.Equals( 516 nsDependentCSubstring(aOther.Get1b(), aOther.GetLength())); 517 } 518 519 // We're 1-byte, the other thing is 2-byte. Instead of implementing a 520 // separate codepath for this, just use our code below. 521 return aOther.BufferEquals(*this); 522 } 523 524 nsDependentSubstring ourStr(Get2b(), GetLength()); 525 if (aOther.Is2b()) { 526 return ourStr.Equals( 527 nsDependentSubstring(aOther.Get2b(), aOther.GetLength())); 528 } 529 530 // We can't use EqualsASCII here, because the other string might not 531 // actually be ASCII. Just roll our own compare; do it in the simple way. 532 // Bug 1532356 tracks not having to roll our own. 533 if (GetLength() != aOther.GetLength()) { 534 return false; 535 } 536 537 const char16_t* ourChars = Get2b(); 538 const char* otherChars = aOther.Get1b(); 539 for (uint32_t i = 0; i < GetLength(); ++i) { 540 if (ourChars[i] != static_cast<char16_t>(otherChars[i])) { 541 return false; 542 } 543 } 544 545 return true; 546 }