tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

nsConverterInputStream.cpp (8255B)


      1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 #include "nsConverterInputStream.h"
      7 #include "nsIInputStream.h"
      8 #include "nsReadLine.h"
      9 #include "nsStreamUtils.h"
     10 
     11 #include <tuple>
     12 
     13 using namespace mozilla;
     14 
     15 #define CONVERTER_BUFFER_SIZE 8192
     16 
     17 NS_IMPL_ISUPPORTS(nsConverterInputStream, nsIConverterInputStream,
     18                  nsIUnicharInputStream, nsIUnicharLineInputStream)
     19 
     20 NS_IMETHODIMP
     21 nsConverterInputStream::Init(nsIInputStream* aStream, const char* aCharset,
     22                             int32_t aBufferSize, char16_t aReplacementChar) {
     23  nsAutoCString label;
     24  if (!aCharset) {
     25    label.AssignLiteral("UTF-8");
     26  } else {
     27    label = aCharset;
     28  }
     29 
     30  auto encoding = Encoding::ForLabelNoReplacement(label);
     31  if (!encoding) {
     32    return NS_ERROR_UCONV_NOCONV;
     33  }
     34  // Previously, the implementation auto-switched only
     35  // between the two UTF-16 variants and only when
     36  // initialized with an endianness-unspecific label.
     37  mConverter = encoding->NewDecoder();
     38 
     39  size_t outputBufferSize;
     40  if (aBufferSize <= 0) {
     41    aBufferSize = CONVERTER_BUFFER_SIZE;
     42    outputBufferSize = CONVERTER_BUFFER_SIZE;
     43  } else {
     44    // NetUtil.sys.mjs assumes that if buffer size equals
     45    // the input size, the whole stream will be processed
     46    // as one readString. This is not true with encoding_rs,
     47    // because encoding_rs might want to see space for a
     48    // surrogate pair, so let's compute a larger output
     49    // buffer length.
     50    CheckedInt<size_t> needed = mConverter->MaxUTF16BufferLength(aBufferSize);
     51    if (!needed.isValid()) {
     52      return NS_ERROR_OUT_OF_MEMORY;
     53    }
     54    outputBufferSize = needed.value();
     55  }
     56 
     57  // set up our buffers.
     58  if (!mByteData.SetCapacity(aBufferSize, mozilla::fallible) ||
     59      !mUnicharData.SetLength(outputBufferSize, mozilla::fallible)) {
     60    return NS_ERROR_OUT_OF_MEMORY;
     61  }
     62 
     63  mInput = aStream;
     64  mErrorsAreFatal = !aReplacementChar;
     65  return NS_OK;
     66 }
     67 
     68 NS_IMETHODIMP
     69 nsConverterInputStream::Close() {
     70  nsresult rv = mInput ? mInput->Close() : NS_OK;
     71  mLineBuffer = nullptr;
     72  mInput = nullptr;
     73  mConverter = nullptr;
     74  mByteData.Clear();
     75  mUnicharData.Clear();
     76  return rv;
     77 }
     78 
     79 NS_IMETHODIMP
     80 nsConverterInputStream::Read(char16_t* aBuf, uint32_t aCount,
     81                             uint32_t* aReadCount) {
     82  NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
     83  uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
     84  if (0 == readCount) {
     85    // Fill the unichar buffer
     86    readCount = Fill(&mLastErrorCode);
     87    if (readCount == 0) {
     88      *aReadCount = 0;
     89      return mLastErrorCode;
     90    }
     91  }
     92  if (readCount > aCount) {
     93    readCount = aCount;
     94  }
     95  memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset,
     96         readCount * sizeof(char16_t));
     97  mUnicharDataOffset += readCount;
     98  *aReadCount = readCount;
     99  return NS_OK;
    100 }
    101 
    102 NS_IMETHODIMP
    103 nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
    104                                     void* aClosure, uint32_t aCount,
    105                                     uint32_t* aReadCount) {
    106  NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
    107  uint32_t codeUnitsToWrite = mUnicharDataLength - mUnicharDataOffset;
    108  if (0 == codeUnitsToWrite) {
    109    // Fill the unichar buffer
    110    codeUnitsToWrite = Fill(&mLastErrorCode);
    111    if (codeUnitsToWrite == 0) {
    112      *aReadCount = 0;
    113      return mLastErrorCode;
    114    }
    115  }
    116 
    117  if (codeUnitsToWrite > aCount) {
    118    codeUnitsToWrite = aCount;
    119  }
    120 
    121  uint32_t codeUnitsWritten;
    122  uint32_t totalCodeUnitsWritten = 0;
    123 
    124  while (codeUnitsToWrite) {
    125    nsresult rv =
    126        aWriter(this, aClosure, mUnicharData.Elements() + mUnicharDataOffset,
    127                totalCodeUnitsWritten, codeUnitsToWrite, &codeUnitsWritten);
    128    if (NS_FAILED(rv)) {
    129      // don't propagate errors to the caller
    130      break;
    131    }
    132 
    133    codeUnitsToWrite -= codeUnitsWritten;
    134    totalCodeUnitsWritten += codeUnitsWritten;
    135    mUnicharDataOffset += codeUnitsWritten;
    136  }
    137 
    138  *aReadCount = totalCodeUnitsWritten;
    139 
    140  return NS_OK;
    141 }
    142 
    143 NS_IMETHODIMP
    144 nsConverterInputStream::ReadString(uint32_t aCount, nsAString& aString,
    145                                   uint32_t* aReadCount) {
    146  NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
    147  uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
    148  if (0 == readCount) {
    149    // Fill the unichar buffer
    150    readCount = Fill(&mLastErrorCode);
    151    if (readCount == 0) {
    152      *aReadCount = 0;
    153      return mLastErrorCode;
    154    }
    155  }
    156  if (readCount > aCount) {
    157    readCount = aCount;
    158  }
    159  const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset;
    160  aString.Assign(buf, readCount);
    161  mUnicharDataOffset += readCount;
    162  *aReadCount = readCount;
    163  return NS_OK;
    164 }
    165 
    166 uint32_t nsConverterInputStream::Fill(nsresult* aErrorCode) {
    167  if (!mInput) {
    168    // We already closed the stream!
    169    *aErrorCode = NS_BASE_STREAM_CLOSED;
    170    return 0;
    171  }
    172 
    173  if (NS_FAILED(mLastErrorCode)) {
    174    // We failed to completely convert last time, and error-recovery
    175    // is disabled.  We will fare no better this time, so...
    176    *aErrorCode = mLastErrorCode;
    177    return 0;
    178  }
    179 
    180  // mUnicharData.Length() is the buffer length, not the fill status.
    181  // mUnicharDataLength reflects the current fill status.
    182  mUnicharDataLength = 0;
    183  // Whenever we convert, mUnicharData is logically empty.
    184  mUnicharDataOffset = 0;
    185 
    186  // Continue trying to read from the source stream until we successfully decode
    187  // a character or encounter an error, as returning `0` here implies that the
    188  // stream is complete.
    189  //
    190  // If the converter has been cleared, we've fully consumed the stream, and
    191  // want to report EOF.
    192  while (mUnicharDataLength == 0 && mConverter) {
    193    // We assume a many to one conversion and are using equal sizes for
    194    // the two buffers.  However if an error happens at the very start
    195    // of a byte buffer we may end up in a situation where n bytes lead
    196    // to n+1 unicode chars.  Thus we need to keep track of the leftover
    197    // bytes as we convert.
    198 
    199    uint32_t nb;
    200    *aErrorCode = NS_FillArray(mByteData, mInput, mLeftOverBytes, &nb);
    201    if (NS_FAILED(*aErrorCode)) {
    202      return 0;
    203    }
    204 
    205    NS_ASSERTION(uint32_t(nb) + mLeftOverBytes == mByteData.Length(),
    206                 "mByteData is lying to us somewhere");
    207 
    208    // If `NS_FillArray` failed to read any new bytes, this is the last read,
    209    // and we're at the end of the stream.
    210    bool last = (nb == 0);
    211 
    212    // Now convert as much of the byte buffer to unicode as possible
    213    auto src = AsBytes(Span(mByteData));
    214    auto dst = Span(mUnicharData);
    215 
    216    // Truncation from size_t to uint32_t below is OK, because the sizes
    217    // are bounded by the lengths of mByteData and mUnicharData.
    218    uint32_t result;
    219    size_t read;
    220    size_t written;
    221    if (mErrorsAreFatal) {
    222      std::tie(result, read, written) =
    223          mConverter->DecodeToUTF16WithoutReplacement(src, dst, last);
    224    } else {
    225      std::tie(result, read, written, std::ignore) =
    226          mConverter->DecodeToUTF16(src, dst, last);
    227    }
    228    mLeftOverBytes = mByteData.Length() - read;
    229    mUnicharDataLength = written;
    230    // Clear `mConverter` if we reached the end of the stream, as we can't
    231    // call methods on it anymore. This will also signal EOF to the caller
    232    // through the loop condition.
    233    if (last) {
    234      MOZ_ASSERT(mLeftOverBytes == 0,
    235                 "Failed to read all bytes on the last pass?");
    236      mConverter = nullptr;
    237    }
    238    // If we got a decode error, we're done.
    239    if (result != kInputEmpty && result != kOutputFull) {
    240      MOZ_ASSERT(mErrorsAreFatal, "How come DecodeToUTF16() reported error?");
    241      *aErrorCode = NS_ERROR_UDEC_ILLEGALINPUT;
    242      return 0;
    243    }
    244  }
    245  *aErrorCode = NS_OK;
    246  return mUnicharDataLength;
    247 }
    248 
    249 NS_IMETHODIMP
    250 nsConverterInputStream::ReadLine(nsAString& aLine, bool* aResult) {
    251  if (!mLineBuffer) {
    252    mLineBuffer = MakeUnique<nsLineBuffer<char16_t>>();
    253  }
    254  return NS_ReadLine(this, mLineBuffer.get(), aLine, aResult);
    255 }