tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

nsScannerString.h (13721B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #ifndef nsScannerString_h___
      8 #define nsScannerString_h___
      9 
     10 #include "nsString.h"
     11 #include "nsUnicharUtils.h"  // for nsCaseInsensitiveStringComparator
     12 #include "mozilla/LinkedList.h"
     13 #include <algorithm>
     14 
     15 /**
     16 * NOTE: nsScannerString (and the other classes defined in this file) are
     17 * not related to nsAString or any of the other xpcom/string classes.
     18 *
     19 * nsScannerString is based on the nsSlidingString implementation that used
     20 * to live in xpcom/string.  Now that nsAString is limited to representing
     21 * only single fragment strings, nsSlidingString can no longer be used.
     22 *
     23 * An advantage to this design is that it does not employ any virtual
     24 * functions.
     25 *
     26 * This file uses SCC-style indenting in deference to the nsSlidingString
     27 * code from which this code is derived ;-)
     28 */
     29 
     30 class nsScannerIterator;
     31 class nsScannerSubstring;
     32 class nsScannerString;
     33 
     34 /**
     35 * nsScannerBufferList
     36 *
     37 * This class maintains a list of heap-allocated Buffer objects.  The buffers
     38 * are maintained in a circular linked list.  Each buffer has a usage count
     39 * that is decremented by the owning nsScannerSubstring.
     40 *
     41 * The buffer list itself is reference counted.  This allows the buffer list
     42 * to be shared by multiple nsScannerSubstring objects.  The reference
     43 * counting is not threadsafe, which is not at all a requirement.
     44 *
     45 * When a nsScannerSubstring releases its reference to a buffer list, it
     46 * decrements the usage count of the first buffer in the buffer list that it
     47 * was referencing.  It informs the buffer list that it can discard buffers
     48 * starting at that prefix.  The buffer list will do so if the usage count of
     49 * that buffer is 0 and if it is the first buffer in the list.  It will
     50 * continue to prune buffers starting from the front of the buffer list until
     51 * it finds a buffer that has a usage count that is non-zero.
     52 */
     53 class nsScannerBufferList {
     54 public:
     55  /**
     56   * Buffer objects are directly followed by a data segment.  The start
     57   * of the data segment is determined by increment the |this| pointer
     58   * by 1 unit.
     59   */
     60  class Buffer : public mozilla::LinkedListElement<Buffer> {
     61   public:
     62    void IncrementUsageCount() { ++mUsageCount; }
     63    void DecrementUsageCount() { --mUsageCount; }
     64 
     65    bool IsInUse() const { return mUsageCount != 0; }
     66 
     67    const char16_t* DataStart() const { return (const char16_t*)(this + 1); }
     68    char16_t* DataStart() { return (char16_t*)(this + 1); }
     69 
     70    const char16_t* DataEnd() const { return mDataEnd; }
     71    char16_t* DataEnd() { return mDataEnd; }
     72 
     73    const Buffer* Next() const { return getNext(); }
     74    Buffer* Next() { return getNext(); }
     75 
     76    const Buffer* Prev() const { return getPrevious(); }
     77    Buffer* Prev() { return getPrevious(); }
     78 
     79    uint32_t DataLength() const { return mDataEnd - DataStart(); }
     80    void SetDataLength(uint32_t len) { mDataEnd = DataStart() + len; }
     81 
     82   private:
     83    friend class nsScannerBufferList;
     84 
     85    int32_t mUsageCount;
     86    char16_t* mDataEnd;
     87  };
     88 
     89  /**
     90   * Position objects serve as lightweight pointers into a buffer list.
     91   * The mPosition member must be contained with mBuffer->DataStart()
     92   * and mBuffer->DataEnd().
     93   */
     94  class Position {
     95   public:
     96    Position() : mBuffer(nullptr), mPosition(nullptr) {}
     97 
     98    Position(Buffer* buffer, char16_t* position)
     99        : mBuffer(buffer), mPosition(position) {}
    100 
    101    inline explicit Position(const nsScannerIterator& aIter);
    102 
    103    inline Position& operator=(const nsScannerIterator& aIter);
    104 
    105    static size_t Distance(const Position& p1, const Position& p2);
    106 
    107    Buffer* mBuffer;
    108    char16_t* mPosition;
    109  };
    110 
    111  static Buffer* AllocBufferFromString(const nsAString&);
    112  static Buffer* AllocBuffer(uint32_t capacity);  // capacity = number of chars
    113 
    114  explicit nsScannerBufferList(Buffer* buf) : mRefCnt(0) {
    115    mBuffers.insertBack(buf);
    116  }
    117 
    118  void AddRef() { ++mRefCnt; }
    119  void Release() {
    120    if (--mRefCnt == 0) delete this;
    121  }
    122 
    123  void Append(Buffer* buf) { mBuffers.insertBack(buf); }
    124  void InsertAfter(Buffer* buf, Buffer* prev) { prev->setNext(buf); }
    125  void SplitBuffer(const Position&);
    126  void DiscardUnreferencedPrefix(Buffer*);
    127 
    128  Buffer* Head() { return mBuffers.getFirst(); }
    129  const Buffer* Head() const { return mBuffers.getFirst(); }
    130 
    131  Buffer* Tail() { return mBuffers.getLast(); }
    132  const Buffer* Tail() const { return mBuffers.getLast(); }
    133 
    134 private:
    135  friend class nsScannerSubstring;
    136 
    137  ~nsScannerBufferList() { ReleaseAll(); }
    138  void ReleaseAll();
    139 
    140  int32_t mRefCnt;
    141  mozilla::LinkedList<Buffer> mBuffers;
    142 };
    143 
    144 /**
    145 * nsScannerFragment represents a "slice" of a Buffer object.
    146 */
    147 struct nsScannerFragment {
    148  typedef nsScannerBufferList::Buffer Buffer;
    149 
    150  const Buffer* mBuffer;
    151  const char16_t* mFragmentStart;
    152  const char16_t* mFragmentEnd;
    153 };
    154 
    155 /**
    156 * nsScannerSubstring is the base class for nsScannerString.  It provides
    157 * access to iterators and methods to bind the substring to another
    158 * substring or nsAString instance.
    159 *
    160 * This class owns the buffer list.
    161 */
    162 class nsScannerSubstring {
    163 public:
    164  typedef nsScannerBufferList::Buffer Buffer;
    165  typedef nsScannerBufferList::Position Position;
    166  typedef uint32_t size_type;
    167 
    168  nsScannerSubstring();
    169  explicit nsScannerSubstring(const nsAString& s);
    170 
    171  ~nsScannerSubstring();
    172 
    173  nsScannerIterator& BeginReading(nsScannerIterator& iter) const;
    174  nsScannerIterator& EndReading(nsScannerIterator& iter) const;
    175 
    176  size_type Length() const { return mLength; }
    177 
    178  void Rebind(const nsScannerSubstring&, const nsScannerIterator&,
    179              const nsScannerIterator&);
    180  void Rebind(const nsAString&);
    181 
    182  bool GetNextFragment(nsScannerFragment&) const;
    183  bool GetPrevFragment(nsScannerFragment&) const;
    184 
    185  static inline Buffer* AllocBufferFromString(const nsAString& aStr) {
    186    return nsScannerBufferList::AllocBufferFromString(aStr);
    187  }
    188  static inline Buffer* AllocBuffer(size_type aCapacity) {
    189    return nsScannerBufferList::AllocBuffer(aCapacity);
    190  }
    191 
    192 protected:
    193  void acquire_ownership_of_buffer_list() const {
    194    mBufferList->AddRef();
    195    mStart.mBuffer->IncrementUsageCount();
    196  }
    197 
    198  void release_ownership_of_buffer_list() {
    199    if (mBufferList) {
    200      mStart.mBuffer->DecrementUsageCount();
    201      mBufferList->DiscardUnreferencedPrefix(mStart.mBuffer);
    202      mBufferList->Release();
    203    }
    204  }
    205 
    206  void init_range_from_buffer_list() {
    207    mStart.mBuffer = mBufferList->Head();
    208    mStart.mPosition = mStart.mBuffer->DataStart();
    209 
    210    mEnd.mBuffer = mBufferList->Tail();
    211    mEnd.mPosition = mEnd.mBuffer->DataEnd();
    212 
    213    mLength = Position::Distance(mStart, mEnd);
    214  }
    215 
    216  Position mStart;
    217  Position mEnd;
    218  nsScannerBufferList* mBufferList;
    219  size_type mLength;
    220 
    221  friend class nsScannerSharedSubstring;
    222 };
    223 
    224 /**
    225 * nsScannerString provides methods to grow and modify a buffer list.
    226 */
    227 class nsScannerString : public nsScannerSubstring {
    228 public:
    229  explicit nsScannerString(Buffer*);
    230 
    231  // you are giving ownership to the string, it takes and keeps your
    232  // buffer, deleting it when done.
    233  // Use AllocBuffer or AllocBufferFromString to create a Buffer object
    234  // for use with this function.
    235  void AppendBuffer(Buffer*);
    236 
    237  void DiscardPrefix(const nsScannerIterator&);
    238  // any other way you want to do this?
    239 
    240  void UngetReadable(const nsAString& aReadable,
    241                     const nsScannerIterator& aCurrentPosition);
    242 };
    243 
    244 /**
    245 * nsScannerSharedSubstring implements copy-on-write semantics for
    246 * nsScannerSubstring.  This class also manages releasing
    247 * the reference to the scanner buffer when it is no longer needed.
    248 */
    249 
    250 class nsScannerSharedSubstring {
    251 public:
    252  nsScannerSharedSubstring() : mBuffer(nullptr), mBufferList(nullptr) {}
    253 
    254  ~nsScannerSharedSubstring() {
    255    if (mBufferList) ReleaseBuffer();
    256  }
    257 
    258  // Acquire a copy-on-write reference to the given substring.
    259  void Rebind(const nsScannerIterator& aStart, const nsScannerIterator& aEnd);
    260 
    261  // Get a const reference to this string
    262  const nsAString& str() const { return mString; }
    263 
    264 private:
    265  typedef nsScannerBufferList::Buffer Buffer;
    266 
    267  void ReleaseBuffer();
    268 
    269  nsDependentSubstring mString;
    270  Buffer* mBuffer;
    271  nsScannerBufferList* mBufferList;
    272 };
    273 
    274 /**
    275 * nsScannerIterator works just like nsReadingIterator<CharT> except that
    276 * it knows how to iterate over a list of scanner buffers.
    277 */
    278 class nsScannerIterator {
    279 public:
    280  typedef nsScannerIterator self_type;
    281  typedef ptrdiff_t difference_type;
    282  typedef char16_t value_type;
    283  typedef const char16_t* pointer;
    284  typedef const char16_t& reference;
    285  typedef nsScannerSubstring::Buffer Buffer;
    286 
    287 protected:
    288  nsScannerFragment mFragment;
    289  const char16_t* mPosition;
    290  const nsScannerSubstring* mOwner;
    291 
    292  friend class nsScannerSubstring;
    293  friend class nsScannerSharedSubstring;
    294 
    295 public:
    296  // nsScannerIterator();                                       // auto-generate
    297  // default constructor is OK nsScannerIterator( const nsScannerIterator& ); //
    298  // auto-generated copy-constructor OK nsScannerIterator& operator=( const
    299  // nsScannerIterator& );  // auto-generated copy-assignment operator OK
    300 
    301  inline void normalize_forward();
    302  inline void normalize_backward();
    303 
    304  pointer get() const { return mPosition; }
    305 
    306  char16_t operator*() const { return *get(); }
    307 
    308  const nsScannerFragment& fragment() const { return mFragment; }
    309 
    310  const Buffer* buffer() const { return mFragment.mBuffer; }
    311 
    312  self_type& operator++() {
    313    ++mPosition;
    314    normalize_forward();
    315    return *this;
    316  }
    317 
    318  self_type operator++(int) {
    319    self_type result(*this);
    320    ++mPosition;
    321    normalize_forward();
    322    return result;
    323  }
    324 
    325  self_type& operator--() {
    326    normalize_backward();
    327    --mPosition;
    328    return *this;
    329  }
    330 
    331  self_type operator--(int) {
    332    self_type result(*this);
    333    normalize_backward();
    334    --mPosition;
    335    return result;
    336  }
    337 
    338  difference_type size_forward() const {
    339    return mFragment.mFragmentEnd - mPosition;
    340  }
    341 
    342  difference_type size_backward() const {
    343    return mPosition - mFragment.mFragmentStart;
    344  }
    345 
    346  self_type& advance(difference_type n) {
    347    while (n > 0) {
    348      difference_type one_hop = std::min(n, size_forward());
    349 
    350      NS_ASSERTION(one_hop > 0,
    351                   "Infinite loop: can't advance a reading iterator beyond the "
    352                   "end of a string");
    353      // perhaps I should |break| if |!one_hop|?
    354 
    355      mPosition += one_hop;
    356      normalize_forward();
    357      n -= one_hop;
    358    }
    359 
    360    while (n < 0) {
    361      normalize_backward();
    362      difference_type one_hop = std::max(n, -size_backward());
    363 
    364      NS_ASSERTION(one_hop < 0,
    365                   "Infinite loop: can't advance (backward) a reading iterator "
    366                   "beyond the end of a string");
    367      // perhaps I should |break| if |!one_hop|?
    368 
    369      mPosition += one_hop;
    370      n -= one_hop;
    371    }
    372 
    373    return *this;
    374  }
    375 };
    376 
    377 inline bool SameFragment(const nsScannerIterator& a,
    378                         const nsScannerIterator& b) {
    379  return a.fragment().mFragmentStart == b.fragment().mFragmentStart;
    380 }
    381 
    382 template <>
    383 struct nsCharSourceTraits<nsScannerIterator> {
    384  typedef nsScannerIterator::difference_type difference_type;
    385 
    386  static uint32_t readable_distance(const nsScannerIterator& first,
    387                                    const nsScannerIterator& last) {
    388    return uint32_t(SameFragment(first, last) ? last.get() - first.get()
    389                                              : first.size_forward());
    390  }
    391 
    392  static const nsScannerIterator::value_type* read(
    393      const nsScannerIterator& iter) {
    394    return iter.get();
    395  }
    396 
    397  static void advance(nsScannerIterator& s, difference_type n) { s.advance(n); }
    398 };
    399 
    400 /**
    401 * inline methods follow
    402 */
    403 
    404 inline void nsScannerIterator::normalize_forward() {
    405  while (mPosition == mFragment.mFragmentEnd &&
    406         mOwner->GetNextFragment(mFragment))
    407    mPosition = mFragment.mFragmentStart;
    408 }
    409 
    410 inline void nsScannerIterator::normalize_backward() {
    411  while (mPosition == mFragment.mFragmentStart &&
    412         mOwner->GetPrevFragment(mFragment))
    413    mPosition = mFragment.mFragmentEnd;
    414 }
    415 
    416 inline bool operator==(const nsScannerIterator& lhs,
    417                       const nsScannerIterator& rhs) {
    418  return lhs.get() == rhs.get();
    419 }
    420 
    421 inline bool operator!=(const nsScannerIterator& lhs,
    422                       const nsScannerIterator& rhs) {
    423  return lhs.get() != rhs.get();
    424 }
    425 
    426 inline nsScannerBufferList::Position::Position(const nsScannerIterator& aIter)
    427    : mBuffer(const_cast<Buffer*>(aIter.buffer())),
    428      mPosition(const_cast<char16_t*>(aIter.get())) {}
    429 
    430 inline nsScannerBufferList::Position& nsScannerBufferList::Position::operator=(
    431    const nsScannerIterator& aIter) {
    432  mBuffer = const_cast<Buffer*>(aIter.buffer());
    433  mPosition = const_cast<char16_t*>(aIter.get());
    434  return *this;
    435 }
    436 
    437 /**
    438 * scanner string utils
    439 *
    440 * These methods mimic the API provided by nsReadableUtils in xpcom/string.
    441 * Here we provide only the methods that the htmlparser module needs.
    442 */
    443 
    444 inline size_t Distance(const nsScannerIterator& aStart,
    445                       const nsScannerIterator& aEnd) {
    446  typedef nsScannerBufferList::Position Position;
    447  return Position::Distance(Position(aStart), Position(aEnd));
    448 }
    449 
    450 bool CopyUnicodeTo(const nsScannerIterator& aSrcStart,
    451                   const nsScannerIterator& aSrcEnd, nsAString& aDest);
    452 
    453 bool AppendUnicodeTo(const nsScannerIterator& aSrcStart,
    454                     const nsScannerIterator& aSrcEnd, nsAString& aDest);
    455 
    456 #endif  // !defined(nsScannerString_h___)