nsScannerString.h (13721B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim:set ts=2 sw=2 sts=2 et cindent: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #ifndef nsScannerString_h___ 8 #define nsScannerString_h___ 9 10 #include "nsString.h" 11 #include "nsUnicharUtils.h" // for nsCaseInsensitiveStringComparator 12 #include "mozilla/LinkedList.h" 13 #include <algorithm> 14 15 /** 16 * NOTE: nsScannerString (and the other classes defined in this file) are 17 * not related to nsAString or any of the other xpcom/string classes. 18 * 19 * nsScannerString is based on the nsSlidingString implementation that used 20 * to live in xpcom/string. Now that nsAString is limited to representing 21 * only single fragment strings, nsSlidingString can no longer be used. 22 * 23 * An advantage to this design is that it does not employ any virtual 24 * functions. 25 * 26 * This file uses SCC-style indenting in deference to the nsSlidingString 27 * code from which this code is derived ;-) 28 */ 29 30 class nsScannerIterator; 31 class nsScannerSubstring; 32 class nsScannerString; 33 34 /** 35 * nsScannerBufferList 36 * 37 * This class maintains a list of heap-allocated Buffer objects. The buffers 38 * are maintained in a circular linked list. Each buffer has a usage count 39 * that is decremented by the owning nsScannerSubstring. 40 * 41 * The buffer list itself is reference counted. This allows the buffer list 42 * to be shared by multiple nsScannerSubstring objects. The reference 43 * counting is not threadsafe, which is not at all a requirement. 44 * 45 * When a nsScannerSubstring releases its reference to a buffer list, it 46 * decrements the usage count of the first buffer in the buffer list that it 47 * was referencing. It informs the buffer list that it can discard buffers 48 * starting at that prefix. The buffer list will do so if the usage count of 49 * that buffer is 0 and if it is the first buffer in the list. It will 50 * continue to prune buffers starting from the front of the buffer list until 51 * it finds a buffer that has a usage count that is non-zero. 52 */ 53 class nsScannerBufferList { 54 public: 55 /** 56 * Buffer objects are directly followed by a data segment. The start 57 * of the data segment is determined by increment the |this| pointer 58 * by 1 unit. 59 */ 60 class Buffer : public mozilla::LinkedListElement<Buffer> { 61 public: 62 void IncrementUsageCount() { ++mUsageCount; } 63 void DecrementUsageCount() { --mUsageCount; } 64 65 bool IsInUse() const { return mUsageCount != 0; } 66 67 const char16_t* DataStart() const { return (const char16_t*)(this + 1); } 68 char16_t* DataStart() { return (char16_t*)(this + 1); } 69 70 const char16_t* DataEnd() const { return mDataEnd; } 71 char16_t* DataEnd() { return mDataEnd; } 72 73 const Buffer* Next() const { return getNext(); } 74 Buffer* Next() { return getNext(); } 75 76 const Buffer* Prev() const { return getPrevious(); } 77 Buffer* Prev() { return getPrevious(); } 78 79 uint32_t DataLength() const { return mDataEnd - DataStart(); } 80 void SetDataLength(uint32_t len) { mDataEnd = DataStart() + len; } 81 82 private: 83 friend class nsScannerBufferList; 84 85 int32_t mUsageCount; 86 char16_t* mDataEnd; 87 }; 88 89 /** 90 * Position objects serve as lightweight pointers into a buffer list. 91 * The mPosition member must be contained with mBuffer->DataStart() 92 * and mBuffer->DataEnd(). 93 */ 94 class Position { 95 public: 96 Position() : mBuffer(nullptr), mPosition(nullptr) {} 97 98 Position(Buffer* buffer, char16_t* position) 99 : mBuffer(buffer), mPosition(position) {} 100 101 inline explicit Position(const nsScannerIterator& aIter); 102 103 inline Position& operator=(const nsScannerIterator& aIter); 104 105 static size_t Distance(const Position& p1, const Position& p2); 106 107 Buffer* mBuffer; 108 char16_t* mPosition; 109 }; 110 111 static Buffer* AllocBufferFromString(const nsAString&); 112 static Buffer* AllocBuffer(uint32_t capacity); // capacity = number of chars 113 114 explicit nsScannerBufferList(Buffer* buf) : mRefCnt(0) { 115 mBuffers.insertBack(buf); 116 } 117 118 void AddRef() { ++mRefCnt; } 119 void Release() { 120 if (--mRefCnt == 0) delete this; 121 } 122 123 void Append(Buffer* buf) { mBuffers.insertBack(buf); } 124 void InsertAfter(Buffer* buf, Buffer* prev) { prev->setNext(buf); } 125 void SplitBuffer(const Position&); 126 void DiscardUnreferencedPrefix(Buffer*); 127 128 Buffer* Head() { return mBuffers.getFirst(); } 129 const Buffer* Head() const { return mBuffers.getFirst(); } 130 131 Buffer* Tail() { return mBuffers.getLast(); } 132 const Buffer* Tail() const { return mBuffers.getLast(); } 133 134 private: 135 friend class nsScannerSubstring; 136 137 ~nsScannerBufferList() { ReleaseAll(); } 138 void ReleaseAll(); 139 140 int32_t mRefCnt; 141 mozilla::LinkedList<Buffer> mBuffers; 142 }; 143 144 /** 145 * nsScannerFragment represents a "slice" of a Buffer object. 146 */ 147 struct nsScannerFragment { 148 typedef nsScannerBufferList::Buffer Buffer; 149 150 const Buffer* mBuffer; 151 const char16_t* mFragmentStart; 152 const char16_t* mFragmentEnd; 153 }; 154 155 /** 156 * nsScannerSubstring is the base class for nsScannerString. It provides 157 * access to iterators and methods to bind the substring to another 158 * substring or nsAString instance. 159 * 160 * This class owns the buffer list. 161 */ 162 class nsScannerSubstring { 163 public: 164 typedef nsScannerBufferList::Buffer Buffer; 165 typedef nsScannerBufferList::Position Position; 166 typedef uint32_t size_type; 167 168 nsScannerSubstring(); 169 explicit nsScannerSubstring(const nsAString& s); 170 171 ~nsScannerSubstring(); 172 173 nsScannerIterator& BeginReading(nsScannerIterator& iter) const; 174 nsScannerIterator& EndReading(nsScannerIterator& iter) const; 175 176 size_type Length() const { return mLength; } 177 178 void Rebind(const nsScannerSubstring&, const nsScannerIterator&, 179 const nsScannerIterator&); 180 void Rebind(const nsAString&); 181 182 bool GetNextFragment(nsScannerFragment&) const; 183 bool GetPrevFragment(nsScannerFragment&) const; 184 185 static inline Buffer* AllocBufferFromString(const nsAString& aStr) { 186 return nsScannerBufferList::AllocBufferFromString(aStr); 187 } 188 static inline Buffer* AllocBuffer(size_type aCapacity) { 189 return nsScannerBufferList::AllocBuffer(aCapacity); 190 } 191 192 protected: 193 void acquire_ownership_of_buffer_list() const { 194 mBufferList->AddRef(); 195 mStart.mBuffer->IncrementUsageCount(); 196 } 197 198 void release_ownership_of_buffer_list() { 199 if (mBufferList) { 200 mStart.mBuffer->DecrementUsageCount(); 201 mBufferList->DiscardUnreferencedPrefix(mStart.mBuffer); 202 mBufferList->Release(); 203 } 204 } 205 206 void init_range_from_buffer_list() { 207 mStart.mBuffer = mBufferList->Head(); 208 mStart.mPosition = mStart.mBuffer->DataStart(); 209 210 mEnd.mBuffer = mBufferList->Tail(); 211 mEnd.mPosition = mEnd.mBuffer->DataEnd(); 212 213 mLength = Position::Distance(mStart, mEnd); 214 } 215 216 Position mStart; 217 Position mEnd; 218 nsScannerBufferList* mBufferList; 219 size_type mLength; 220 221 friend class nsScannerSharedSubstring; 222 }; 223 224 /** 225 * nsScannerString provides methods to grow and modify a buffer list. 226 */ 227 class nsScannerString : public nsScannerSubstring { 228 public: 229 explicit nsScannerString(Buffer*); 230 231 // you are giving ownership to the string, it takes and keeps your 232 // buffer, deleting it when done. 233 // Use AllocBuffer or AllocBufferFromString to create a Buffer object 234 // for use with this function. 235 void AppendBuffer(Buffer*); 236 237 void DiscardPrefix(const nsScannerIterator&); 238 // any other way you want to do this? 239 240 void UngetReadable(const nsAString& aReadable, 241 const nsScannerIterator& aCurrentPosition); 242 }; 243 244 /** 245 * nsScannerSharedSubstring implements copy-on-write semantics for 246 * nsScannerSubstring. This class also manages releasing 247 * the reference to the scanner buffer when it is no longer needed. 248 */ 249 250 class nsScannerSharedSubstring { 251 public: 252 nsScannerSharedSubstring() : mBuffer(nullptr), mBufferList(nullptr) {} 253 254 ~nsScannerSharedSubstring() { 255 if (mBufferList) ReleaseBuffer(); 256 } 257 258 // Acquire a copy-on-write reference to the given substring. 259 void Rebind(const nsScannerIterator& aStart, const nsScannerIterator& aEnd); 260 261 // Get a const reference to this string 262 const nsAString& str() const { return mString; } 263 264 private: 265 typedef nsScannerBufferList::Buffer Buffer; 266 267 void ReleaseBuffer(); 268 269 nsDependentSubstring mString; 270 Buffer* mBuffer; 271 nsScannerBufferList* mBufferList; 272 }; 273 274 /** 275 * nsScannerIterator works just like nsReadingIterator<CharT> except that 276 * it knows how to iterate over a list of scanner buffers. 277 */ 278 class nsScannerIterator { 279 public: 280 typedef nsScannerIterator self_type; 281 typedef ptrdiff_t difference_type; 282 typedef char16_t value_type; 283 typedef const char16_t* pointer; 284 typedef const char16_t& reference; 285 typedef nsScannerSubstring::Buffer Buffer; 286 287 protected: 288 nsScannerFragment mFragment; 289 const char16_t* mPosition; 290 const nsScannerSubstring* mOwner; 291 292 friend class nsScannerSubstring; 293 friend class nsScannerSharedSubstring; 294 295 public: 296 // nsScannerIterator(); // auto-generate 297 // default constructor is OK nsScannerIterator( const nsScannerIterator& ); // 298 // auto-generated copy-constructor OK nsScannerIterator& operator=( const 299 // nsScannerIterator& ); // auto-generated copy-assignment operator OK 300 301 inline void normalize_forward(); 302 inline void normalize_backward(); 303 304 pointer get() const { return mPosition; } 305 306 char16_t operator*() const { return *get(); } 307 308 const nsScannerFragment& fragment() const { return mFragment; } 309 310 const Buffer* buffer() const { return mFragment.mBuffer; } 311 312 self_type& operator++() { 313 ++mPosition; 314 normalize_forward(); 315 return *this; 316 } 317 318 self_type operator++(int) { 319 self_type result(*this); 320 ++mPosition; 321 normalize_forward(); 322 return result; 323 } 324 325 self_type& operator--() { 326 normalize_backward(); 327 --mPosition; 328 return *this; 329 } 330 331 self_type operator--(int) { 332 self_type result(*this); 333 normalize_backward(); 334 --mPosition; 335 return result; 336 } 337 338 difference_type size_forward() const { 339 return mFragment.mFragmentEnd - mPosition; 340 } 341 342 difference_type size_backward() const { 343 return mPosition - mFragment.mFragmentStart; 344 } 345 346 self_type& advance(difference_type n) { 347 while (n > 0) { 348 difference_type one_hop = std::min(n, size_forward()); 349 350 NS_ASSERTION(one_hop > 0, 351 "Infinite loop: can't advance a reading iterator beyond the " 352 "end of a string"); 353 // perhaps I should |break| if |!one_hop|? 354 355 mPosition += one_hop; 356 normalize_forward(); 357 n -= one_hop; 358 } 359 360 while (n < 0) { 361 normalize_backward(); 362 difference_type one_hop = std::max(n, -size_backward()); 363 364 NS_ASSERTION(one_hop < 0, 365 "Infinite loop: can't advance (backward) a reading iterator " 366 "beyond the end of a string"); 367 // perhaps I should |break| if |!one_hop|? 368 369 mPosition += one_hop; 370 n -= one_hop; 371 } 372 373 return *this; 374 } 375 }; 376 377 inline bool SameFragment(const nsScannerIterator& a, 378 const nsScannerIterator& b) { 379 return a.fragment().mFragmentStart == b.fragment().mFragmentStart; 380 } 381 382 template <> 383 struct nsCharSourceTraits<nsScannerIterator> { 384 typedef nsScannerIterator::difference_type difference_type; 385 386 static uint32_t readable_distance(const nsScannerIterator& first, 387 const nsScannerIterator& last) { 388 return uint32_t(SameFragment(first, last) ? last.get() - first.get() 389 : first.size_forward()); 390 } 391 392 static const nsScannerIterator::value_type* read( 393 const nsScannerIterator& iter) { 394 return iter.get(); 395 } 396 397 static void advance(nsScannerIterator& s, difference_type n) { s.advance(n); } 398 }; 399 400 /** 401 * inline methods follow 402 */ 403 404 inline void nsScannerIterator::normalize_forward() { 405 while (mPosition == mFragment.mFragmentEnd && 406 mOwner->GetNextFragment(mFragment)) 407 mPosition = mFragment.mFragmentStart; 408 } 409 410 inline void nsScannerIterator::normalize_backward() { 411 while (mPosition == mFragment.mFragmentStart && 412 mOwner->GetPrevFragment(mFragment)) 413 mPosition = mFragment.mFragmentEnd; 414 } 415 416 inline bool operator==(const nsScannerIterator& lhs, 417 const nsScannerIterator& rhs) { 418 return lhs.get() == rhs.get(); 419 } 420 421 inline bool operator!=(const nsScannerIterator& lhs, 422 const nsScannerIterator& rhs) { 423 return lhs.get() != rhs.get(); 424 } 425 426 inline nsScannerBufferList::Position::Position(const nsScannerIterator& aIter) 427 : mBuffer(const_cast<Buffer*>(aIter.buffer())), 428 mPosition(const_cast<char16_t*>(aIter.get())) {} 429 430 inline nsScannerBufferList::Position& nsScannerBufferList::Position::operator=( 431 const nsScannerIterator& aIter) { 432 mBuffer = const_cast<Buffer*>(aIter.buffer()); 433 mPosition = const_cast<char16_t*>(aIter.get()); 434 return *this; 435 } 436 437 /** 438 * scanner string utils 439 * 440 * These methods mimic the API provided by nsReadableUtils in xpcom/string. 441 * Here we provide only the methods that the htmlparser module needs. 442 */ 443 444 inline size_t Distance(const nsScannerIterator& aStart, 445 const nsScannerIterator& aEnd) { 446 typedef nsScannerBufferList::Position Position; 447 return Position::Distance(Position(aStart), Position(aEnd)); 448 } 449 450 bool CopyUnicodeTo(const nsScannerIterator& aSrcStart, 451 const nsScannerIterator& aSrcEnd, nsAString& aDest); 452 453 bool AppendUnicodeTo(const nsScannerIterator& aSrcStart, 454 const nsScannerIterator& aSrcEnd, nsAString& aDest); 455 456 #endif // !defined(nsScannerString_h___)