tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

nsScanner.cpp (9371B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=2 sw=2 et tw=78: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 // #define __INCREMENTAL 1
      8 
      9 #include "nsScanner.h"
     10 
     11 #include "mozilla/Encoding.h"
     12 #include "mozilla/UniquePtr.h"
     13 #include "nsDebug.h"
     14 #include "nsReadableUtils.h"
     15 #include "nsUTF8Utils.h"  // for LossyConvertEncoding
     16 #include "nsCRT.h"
     17 #include "nsParser.h"
     18 #include "nsCharsetSource.h"
     19 
     20 nsReadEndCondition::nsReadEndCondition(const char16_t* aTerminateChars)
     21    : mChars(aTerminateChars),
     22      mFilter(char16_t(~0))  // All bits set
     23 {
     24  // Build filter that will be used to filter out characters with
     25  // bits that none of the terminal chars have. This works very well
     26  // because terminal chars often have only the last 4-6 bits set and
     27  // normal ascii letters have bit 7 set. Other letters have even higher
     28  // bits set.
     29 
     30  // Calculate filter
     31  const char16_t* current = aTerminateChars;
     32  char16_t terminalChar = *current;
     33  while (terminalChar) {
     34    mFilter &= ~terminalChar;
     35    ++current;
     36    terminalChar = *current;
     37  }
     38 }
     39 
     40 /**
     41 *  Use this constructor if you want i/o to be based on
     42 *  a single string you hand in during construction.
     43 *  This short cut was added for Javascript.
     44 *
     45 *  @update  gess 5/12/98
     46 *  @param   aMode represents the parser mode (nav, other)
     47 *  @return
     48 */
     49 nsScanner::nsScanner(const nsAString& anHTMLString, bool aIncremental)
     50    : mIncremental(aIncremental) {
     51  MOZ_COUNT_CTOR(nsScanner);
     52 
     53  AppendToBuffer(anHTMLString);
     54  MOZ_ASSERT(mMarkPosition == mCurrentPosition);
     55 }
     56 
     57 /**
     58 *  Use this constructor if you want i/o to be based on strings
     59 *  the scanner receives. If you pass a null filename, you
     60 *  can still provide data to the scanner via append.
     61 */
     62 nsScanner::nsScanner(nsIURI* aURI) : mURI(aURI), mIncremental(true) {
     63  MOZ_COUNT_CTOR(nsScanner);
     64 
     65  // XXX This is a big hack.  We need to initialize the iterators to something.
     66  // What matters is that mCurrentPosition == mEndPosition, so that our methods
     67  // believe that we are at EOF (see bug 182067).  We null out mCurrentPosition
     68  // so that we have some hope of catching null pointer dereferences associated
     69  // with this hack. --darin
     70  memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
     71  mMarkPosition = mCurrentPosition;
     72  mEndPosition = mCurrentPosition;
     73 
     74  // XML defaults to UTF-8 and about:blank is UTF-8, too.
     75  SetDocumentCharset(UTF_8_ENCODING, kCharsetFromDocTypeDefault);
     76 }
     77 
     78 nsresult nsScanner::SetDocumentCharset(NotNull<const Encoding*> aEncoding,
     79                                       int32_t aSource) {
     80  if (aSource < mCharsetSource)  // priority is lower than the current one
     81    return NS_OK;
     82 
     83  mCharsetSource = aSource;
     84  nsCString charsetName;
     85  aEncoding->Name(charsetName);
     86  if (!mCharset.IsEmpty() && charsetName.Equals(mCharset)) {
     87    return NS_OK;  // no difference, don't change it
     88  }
     89 
     90  // different, need to change it
     91 
     92  mCharset.Assign(charsetName);
     93 
     94  mUnicodeDecoder = aEncoding->NewDecoderWithBOMRemoval();
     95 
     96  return NS_OK;
     97 }
     98 
     99 /**
    100 *  default destructor
    101 *
    102 *  @update  gess 3/25/98
    103 *  @param
    104 *  @return
    105 */
    106 nsScanner::~nsScanner() { MOZ_COUNT_DTOR(nsScanner); }
    107 
    108 /**
    109 *  Resets current offset position of input stream to marked position.
    110 *  This allows us to back up to this point if the need should arise,
    111 *  such as when tokenization gets interrupted.
    112 *  NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
    113 *
    114 *  @update  gess 5/12/98
    115 *  @param
    116 *  @return
    117 */
    118 void nsScanner::RewindToMark(void) {
    119  if (mSlidingBuffer) {
    120    mCurrentPosition = mMarkPosition;
    121  }
    122 }
    123 
    124 /**
    125 *  Records current offset position in input stream. This allows us
    126 *  to back up to this point if the need should arise, such as when
    127 *  tokenization gets interrupted.
    128 *
    129 *  @update  gess 7/29/98
    130 *  @param
    131 *  @return
    132 */
    133 int32_t nsScanner::Mark() {
    134  int32_t distance = 0;
    135  if (mSlidingBuffer) {
    136    nsScannerIterator oldStart;
    137    mSlidingBuffer->BeginReading(oldStart);
    138 
    139    distance = Distance(oldStart, mCurrentPosition);
    140 
    141    mSlidingBuffer->DiscardPrefix(mCurrentPosition);
    142    mSlidingBuffer->BeginReading(mCurrentPosition);
    143    mMarkPosition = mCurrentPosition;
    144  }
    145 
    146  return distance;
    147 }
    148 
    149 /**
    150 * Insert data to our underlying input buffer as
    151 * if it were read from an input stream.
    152 *
    153 * @update  harishd 01/12/99
    154 * @return  error code
    155 */
    156 bool nsScanner::UngetReadable(const nsAString& aBuffer) {
    157  if (!mSlidingBuffer) {
    158    return false;
    159  }
    160 
    161  mSlidingBuffer->UngetReadable(aBuffer, mCurrentPosition);
    162  mSlidingBuffer->BeginReading(
    163      mCurrentPosition);  // Insertion invalidated our iterators
    164  mSlidingBuffer->EndReading(mEndPosition);
    165 
    166  return true;
    167 }
    168 
    169 /**
    170 * Append data to our underlying input buffer as
    171 * if it were read from an input stream.
    172 *
    173 * @update  gess4/3/98
    174 * @return  error code
    175 */
    176 nsresult nsScanner::Append(const nsAString& aBuffer) {
    177  if (!AppendToBuffer(aBuffer)) return NS_ERROR_OUT_OF_MEMORY;
    178  return NS_OK;
    179 }
    180 
    181 /**
    182 *
    183 *
    184 *  @update  gess 5/21/98
    185 *  @param
    186 *  @return
    187 */
    188 nsresult nsScanner::Append(const char* aBuffer, uint32_t aLen) {
    189  nsresult res = NS_OK;
    190  if (mUnicodeDecoder) {
    191    mozilla::CheckedInt<size_t> needed =
    192        mUnicodeDecoder->MaxUTF16BufferLength(aLen);
    193    if (!needed.isValid()) {
    194      return NS_ERROR_OUT_OF_MEMORY;
    195    }
    196    mozilla::CheckedInt<uint32_t> allocLen(
    197        1);  // null terminator due to legacy sadness
    198    allocLen += needed.value();
    199    if (!allocLen.isValid()) {
    200      return NS_ERROR_OUT_OF_MEMORY;
    201    }
    202    nsScannerString::Buffer* buffer =
    203        nsScannerString::AllocBuffer(allocLen.value());
    204    NS_ENSURE_TRUE(buffer, NS_ERROR_OUT_OF_MEMORY);
    205    char16_t* unichars = buffer->DataStart();
    206 
    207    uint32_t result;
    208    size_t read;
    209    size_t written;
    210    // Do not use structured binding lest deal with [-Werror=unused-variable]
    211    std::tie(result, read, written) =
    212        mUnicodeDecoder->DecodeToUTF16WithoutReplacement(
    213            AsBytes(mozilla::Span(aBuffer, aLen)),
    214            mozilla::Span(unichars, needed.value()),
    215            false);  // Retain bug about failure to handle EOF
    216    MOZ_ASSERT(result != mozilla::kOutputFull);
    217    MOZ_ASSERT(read <= aLen);
    218    MOZ_ASSERT(written <= needed.value());
    219    if (result != mozilla::kInputEmpty) {
    220      // Since about:blank is empty, this line runs only for XML. Use a
    221      // character that's illegal in XML instead of U+FFFD in order to make
    222      // expat flag the error. There is no need to loop and convert more, since
    223      // expat will stop here anyway.
    224      unichars[written++] = 0xFFFF;
    225    }
    226    buffer->SetDataLength(written);
    227    // Don't propagate return code of unicode decoder
    228    // since it doesn't reflect on our success or failure
    229    // - Ref. bug 87110
    230    res = NS_OK;
    231    AppendToBuffer(buffer);
    232  } else {
    233    NS_WARNING("No decoder found.");
    234    res = NS_ERROR_FAILURE;
    235  }
    236 
    237  return res;
    238 }
    239 
    240 /**
    241 *  retrieve next char from scanners internal input stream
    242 *
    243 *  @update  gess 3/25/98
    244 *  @param
    245 *  @return  error code reflecting read status
    246 */
    247 nsresult nsScanner::GetChar(char16_t& aChar) {
    248  if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
    249    aChar = 0;
    250    return NS_ERROR_HTMLPARSER_EOF;
    251  }
    252 
    253  aChar = *mCurrentPosition++;
    254 
    255  return NS_OK;
    256 }
    257 
    258 void nsScanner::BindSubstring(nsScannerSubstring& aSubstring,
    259                              const nsScannerIterator& aStart,
    260                              const nsScannerIterator& aEnd) {
    261  aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd);
    262 }
    263 
    264 void nsScanner::CurrentPosition(nsScannerIterator& aPosition) {
    265  aPosition = mCurrentPosition;
    266 }
    267 
    268 void nsScanner::EndReading(nsScannerIterator& aPosition) {
    269  aPosition = mEndPosition;
    270 }
    271 
    272 void nsScanner::SetPosition(nsScannerIterator& aPosition, bool aTerminate) {
    273  if (mSlidingBuffer) {
    274    mCurrentPosition = aPosition;
    275    if (aTerminate && (mCurrentPosition == mEndPosition)) {
    276      mMarkPosition = mCurrentPosition;
    277      mSlidingBuffer->DiscardPrefix(mCurrentPosition);
    278    }
    279  }
    280 }
    281 
    282 void nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf) {
    283  if (!mSlidingBuffer) {
    284    mSlidingBuffer = mozilla::MakeUnique<nsScannerString>(aBuf);
    285    mSlidingBuffer->BeginReading(mCurrentPosition);
    286    mMarkPosition = mCurrentPosition;
    287  } else {
    288    mSlidingBuffer->AppendBuffer(aBuf);
    289    if (mCurrentPosition == mEndPosition) {
    290      mSlidingBuffer->BeginReading(mCurrentPosition);
    291    }
    292  }
    293  mSlidingBuffer->EndReading(mEndPosition);
    294 }
    295 
    296 /**
    297 *  call this to copy bytes out of the scanner that have not yet been consumed
    298 *  by the tokenization process.
    299 *
    300 *  @update  gess 5/12/98
    301 *  @param   aCopyBuffer is where the scanner buffer will be copied to
    302 *  @return  true if OK or false on OOM
    303 */
    304 bool nsScanner::CopyUnusedData(nsString& aCopyBuffer) {
    305  if (!mSlidingBuffer) {
    306    aCopyBuffer.Truncate();
    307    return true;
    308  }
    309 
    310  nsScannerIterator start, end;
    311  start = mCurrentPosition;
    312  end = mEndPosition;
    313 
    314  return CopyUnicodeTo(start, end, aCopyBuffer);
    315 }
    316 
    317 /**
    318 *  Conduct self test. Actually, selftesting for this class
    319 *  occurs in the parser selftest.
    320 *
    321 *  @update  gess 3/25/98
    322 *  @param
    323 *  @return
    324 */
    325 
    326 void nsScanner::SelfTest(void) {
    327 #ifdef _DEBUG
    328 #endif
    329 }