tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

nsScriptableUConv.cpp (6178B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 #include "nsString.h"
      7 #include "nsIScriptableUConv.h"
      8 #include "nsScriptableUConv.h"
      9 #include "nsComponentManagerUtils.h"
     10 
     11 #include <tuple>
     12 
     13 using namespace mozilla;
     14 
     15 /* Implementation file */
     16 NS_IMPL_ISUPPORTS(nsScriptableUnicodeConverter, nsIScriptableUnicodeConverter)
     17 
     18 nsScriptableUnicodeConverter::nsScriptableUnicodeConverter()
     19    : mIsInternal(false) {}
     20 
     21 nsScriptableUnicodeConverter::~nsScriptableUnicodeConverter() = default;
     22 
     23 NS_IMETHODIMP
     24 nsScriptableUnicodeConverter::ConvertFromUnicode(const nsAString& aSrc,
     25                                                 nsACString& _retval) {
     26  if (!mEncoder) return NS_ERROR_FAILURE;
     27 
     28  // We can compute the length without replacement, because the
     29  // the replacement is only one byte long and a mappable character
     30  // would always output something, i.e. at least one byte.
     31  // When encoding to ISO-2022-JP, unmappables shouldn't be able
     32  // to cause more escape sequences to be emitted than the mappable
     33  // worst case where every input character causes an escape into
     34  // a different state.
     35  CheckedInt<size_t> needed =
     36      mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aSrc.Length());
     37  if (!needed.isValid() || needed.value() > UINT32_MAX) {
     38    return NS_ERROR_OUT_OF_MEMORY;
     39  }
     40 
     41  auto dstChars = _retval.GetMutableData(needed.value(), fallible);
     42  if (!dstChars) {
     43    return NS_ERROR_OUT_OF_MEMORY;
     44  }
     45 
     46  auto src = Span(aSrc);
     47  auto dst = AsWritableBytes(*dstChars);
     48  size_t totalWritten = 0;
     49  for (;;) {
     50    auto [result, read, written] =
     51        mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, false);
     52    if (result != kInputEmpty && result != kOutputFull) {
     53      MOZ_RELEASE_ASSERT(written < dst.Length(),
     54                         "Unmappables with one-byte replacement should not "
     55                         "exceed mappable worst case.");
     56      dst[written++] = '?';
     57    }
     58    totalWritten += written;
     59    if (result == kInputEmpty) {
     60      MOZ_ASSERT(totalWritten <= UINT32_MAX);
     61      if (!_retval.SetLength(totalWritten, fallible)) {
     62        return NS_ERROR_OUT_OF_MEMORY;
     63      }
     64      return NS_OK;
     65    }
     66    src = src.From(read);
     67    dst = dst.From(written);
     68  }
     69 }
     70 
     71 NS_IMETHODIMP
     72 nsScriptableUnicodeConverter::Finish(nsACString& _retval) {
     73  // The documentation for this method says it should be called after
     74  // ConvertFromUnicode(). However, our own tests called it after
     75  // convertFromByteArray(), i.e. when *decoding*.
     76  // Assuming that there exists extensions that similarly call
     77  // this at the wrong time, let's deal. In general, it is a design
     78  // error for this class to handle conversions in both directions.
     79  if (!mEncoder) {
     80    _retval.Truncate();
     81    mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder);
     82    return NS_OK;
     83  }
     84  // If we are encoding to ISO-2022-JP, potentially
     85  // transition back to the ASCII state. The buffer
     86  // needs to be large enough for an additional NCR,
     87  // though.
     88  _retval.SetLength(13);
     89  auto dst = AsWritableBytes(_retval.GetMutableData(13));
     90  Span<char16_t> src(nullptr);
     91  uint32_t result;
     92  size_t read;
     93  size_t written;
     94  std::tie(result, read, written, std::ignore) =
     95      mEncoder->EncodeFromUTF16(src, dst, true);
     96  MOZ_ASSERT(!read);
     97  MOZ_ASSERT(result == kInputEmpty);
     98  _retval.SetLength(written);
     99 
    100  mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder);
    101  mEncoder->Encoding()->NewEncoderInto(*mEncoder);
    102  return NS_OK;
    103 }
    104 
    105 NS_IMETHODIMP
    106 nsScriptableUnicodeConverter::ConvertToUnicode(const nsACString& aSrc,
    107                                               nsAString& _retval) {
    108  if (!mDecoder) return NS_ERROR_FAILURE;
    109 
    110  uint32_t length = aSrc.Length();
    111 
    112  CheckedInt<size_t> needed = mDecoder->MaxUTF16BufferLength(length);
    113  if (!needed.isValid() || needed.value() > UINT32_MAX) {
    114    return NS_ERROR_OUT_OF_MEMORY;
    115  }
    116 
    117  auto dst = _retval.GetMutableData(needed.value(), fallible);
    118  if (!dst) {
    119    return NS_ERROR_OUT_OF_MEMORY;
    120  }
    121 
    122  auto src =
    123      Span(reinterpret_cast<const uint8_t*>(aSrc.BeginReading()), length);
    124  uint32_t result;
    125  size_t read;
    126  size_t written;
    127  // The UTF-8 decoder used to throw regardless of the error behavior.
    128  // Simulating the old behavior for compatibility with legacy callers.
    129  // If callers want control over the behavior, they should switch to
    130  // TextDecoder.
    131  if (mDecoder->Encoding() == UTF_8_ENCODING) {
    132    std::tie(result, read, written) =
    133        mDecoder->DecodeToUTF16WithoutReplacement(src, *dst, false);
    134    if (result != kInputEmpty) {
    135      return NS_ERROR_UDEC_ILLEGALINPUT;
    136    }
    137  } else {
    138    std::tie(result, read, written, std::ignore) =
    139        mDecoder->DecodeToUTF16(src, *dst, false);
    140  }
    141  MOZ_ASSERT(result == kInputEmpty);
    142  MOZ_ASSERT(read == length);
    143  MOZ_ASSERT(written <= needed.value());
    144  if (!_retval.SetLength(written, fallible)) {
    145    return NS_ERROR_OUT_OF_MEMORY;
    146  }
    147  return NS_OK;
    148 }
    149 
    150 NS_IMETHODIMP
    151 nsScriptableUnicodeConverter::GetCharset(nsACString& aCharset) {
    152  if (!mDecoder) {
    153    aCharset.Truncate();
    154  } else {
    155    mDecoder->Encoding()->Name(aCharset);
    156  }
    157  return NS_OK;
    158 }
    159 
    160 NS_IMETHODIMP
    161 nsScriptableUnicodeConverter::SetCharset(const nsACString& aCharset) {
    162  return InitConverter(aCharset);
    163 }
    164 
    165 NS_IMETHODIMP
    166 nsScriptableUnicodeConverter::GetIsInternal(bool* aIsInternal) {
    167  *aIsInternal = mIsInternal;
    168  return NS_OK;
    169 }
    170 
    171 NS_IMETHODIMP
    172 nsScriptableUnicodeConverter::SetIsInternal(const bool aIsInternal) {
    173  mIsInternal = aIsInternal;
    174  return NS_OK;
    175 }
    176 
    177 nsresult nsScriptableUnicodeConverter::InitConverter(
    178    const nsACString& aCharset) {
    179  mEncoder = nullptr;
    180  mDecoder = nullptr;
    181 
    182  auto encoding = Encoding::ForLabelNoReplacement(aCharset);
    183  if (!encoding) {
    184    return NS_ERROR_UCONV_NOCONV;
    185  }
    186  if (!(encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING)) {
    187    mEncoder = encoding->NewEncoder();
    188  }
    189  mDecoder = encoding->NewDecoderWithBOMRemoval();
    190  return NS_OK;
    191 }