nsScriptableUConv.cpp (6178B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #include "nsString.h" 7 #include "nsIScriptableUConv.h" 8 #include "nsScriptableUConv.h" 9 #include "nsComponentManagerUtils.h" 10 11 #include <tuple> 12 13 using namespace mozilla; 14 15 /* Implementation file */ 16 NS_IMPL_ISUPPORTS(nsScriptableUnicodeConverter, nsIScriptableUnicodeConverter) 17 18 nsScriptableUnicodeConverter::nsScriptableUnicodeConverter() 19 : mIsInternal(false) {} 20 21 nsScriptableUnicodeConverter::~nsScriptableUnicodeConverter() = default; 22 23 NS_IMETHODIMP 24 nsScriptableUnicodeConverter::ConvertFromUnicode(const nsAString& aSrc, 25 nsACString& _retval) { 26 if (!mEncoder) return NS_ERROR_FAILURE; 27 28 // We can compute the length without replacement, because the 29 // the replacement is only one byte long and a mappable character 30 // would always output something, i.e. at least one byte. 31 // When encoding to ISO-2022-JP, unmappables shouldn't be able 32 // to cause more escape sequences to be emitted than the mappable 33 // worst case where every input character causes an escape into 34 // a different state. 35 CheckedInt<size_t> needed = 36 mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aSrc.Length()); 37 if (!needed.isValid() || needed.value() > UINT32_MAX) { 38 return NS_ERROR_OUT_OF_MEMORY; 39 } 40 41 auto dstChars = _retval.GetMutableData(needed.value(), fallible); 42 if (!dstChars) { 43 return NS_ERROR_OUT_OF_MEMORY; 44 } 45 46 auto src = Span(aSrc); 47 auto dst = AsWritableBytes(*dstChars); 48 size_t totalWritten = 0; 49 for (;;) { 50 auto [result, read, written] = 51 mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, false); 52 if (result != kInputEmpty && result != kOutputFull) { 53 MOZ_RELEASE_ASSERT(written < dst.Length(), 54 "Unmappables with one-byte replacement should not " 55 "exceed mappable worst case."); 56 dst[written++] = '?'; 57 } 58 totalWritten += written; 59 if (result == kInputEmpty) { 60 MOZ_ASSERT(totalWritten <= UINT32_MAX); 61 if (!_retval.SetLength(totalWritten, fallible)) { 62 return NS_ERROR_OUT_OF_MEMORY; 63 } 64 return NS_OK; 65 } 66 src = src.From(read); 67 dst = dst.From(written); 68 } 69 } 70 71 NS_IMETHODIMP 72 nsScriptableUnicodeConverter::Finish(nsACString& _retval) { 73 // The documentation for this method says it should be called after 74 // ConvertFromUnicode(). However, our own tests called it after 75 // convertFromByteArray(), i.e. when *decoding*. 76 // Assuming that there exists extensions that similarly call 77 // this at the wrong time, let's deal. In general, it is a design 78 // error for this class to handle conversions in both directions. 79 if (!mEncoder) { 80 _retval.Truncate(); 81 mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder); 82 return NS_OK; 83 } 84 // If we are encoding to ISO-2022-JP, potentially 85 // transition back to the ASCII state. The buffer 86 // needs to be large enough for an additional NCR, 87 // though. 88 _retval.SetLength(13); 89 auto dst = AsWritableBytes(_retval.GetMutableData(13)); 90 Span<char16_t> src(nullptr); 91 uint32_t result; 92 size_t read; 93 size_t written; 94 std::tie(result, read, written, std::ignore) = 95 mEncoder->EncodeFromUTF16(src, dst, true); 96 MOZ_ASSERT(!read); 97 MOZ_ASSERT(result == kInputEmpty); 98 _retval.SetLength(written); 99 100 mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder); 101 mEncoder->Encoding()->NewEncoderInto(*mEncoder); 102 return NS_OK; 103 } 104 105 NS_IMETHODIMP 106 nsScriptableUnicodeConverter::ConvertToUnicode(const nsACString& aSrc, 107 nsAString& _retval) { 108 if (!mDecoder) return NS_ERROR_FAILURE; 109 110 uint32_t length = aSrc.Length(); 111 112 CheckedInt<size_t> needed = mDecoder->MaxUTF16BufferLength(length); 113 if (!needed.isValid() || needed.value() > UINT32_MAX) { 114 return NS_ERROR_OUT_OF_MEMORY; 115 } 116 117 auto dst = _retval.GetMutableData(needed.value(), fallible); 118 if (!dst) { 119 return NS_ERROR_OUT_OF_MEMORY; 120 } 121 122 auto src = 123 Span(reinterpret_cast<const uint8_t*>(aSrc.BeginReading()), length); 124 uint32_t result; 125 size_t read; 126 size_t written; 127 // The UTF-8 decoder used to throw regardless of the error behavior. 128 // Simulating the old behavior for compatibility with legacy callers. 129 // If callers want control over the behavior, they should switch to 130 // TextDecoder. 131 if (mDecoder->Encoding() == UTF_8_ENCODING) { 132 std::tie(result, read, written) = 133 mDecoder->DecodeToUTF16WithoutReplacement(src, *dst, false); 134 if (result != kInputEmpty) { 135 return NS_ERROR_UDEC_ILLEGALINPUT; 136 } 137 } else { 138 std::tie(result, read, written, std::ignore) = 139 mDecoder->DecodeToUTF16(src, *dst, false); 140 } 141 MOZ_ASSERT(result == kInputEmpty); 142 MOZ_ASSERT(read == length); 143 MOZ_ASSERT(written <= needed.value()); 144 if (!_retval.SetLength(written, fallible)) { 145 return NS_ERROR_OUT_OF_MEMORY; 146 } 147 return NS_OK; 148 } 149 150 NS_IMETHODIMP 151 nsScriptableUnicodeConverter::GetCharset(nsACString& aCharset) { 152 if (!mDecoder) { 153 aCharset.Truncate(); 154 } else { 155 mDecoder->Encoding()->Name(aCharset); 156 } 157 return NS_OK; 158 } 159 160 NS_IMETHODIMP 161 nsScriptableUnicodeConverter::SetCharset(const nsACString& aCharset) { 162 return InitConverter(aCharset); 163 } 164 165 NS_IMETHODIMP 166 nsScriptableUnicodeConverter::GetIsInternal(bool* aIsInternal) { 167 *aIsInternal = mIsInternal; 168 return NS_OK; 169 } 170 171 NS_IMETHODIMP 172 nsScriptableUnicodeConverter::SetIsInternal(const bool aIsInternal) { 173 mIsInternal = aIsInternal; 174 return NS_OK; 175 } 176 177 nsresult nsScriptableUnicodeConverter::InitConverter( 178 const nsACString& aCharset) { 179 mEncoder = nullptr; 180 mDecoder = nullptr; 181 182 auto encoding = Encoding::ForLabelNoReplacement(aCharset); 183 if (!encoding) { 184 return NS_ERROR_UCONV_NOCONV; 185 } 186 if (!(encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING)) { 187 mEncoder = encoding->NewEncoder(); 188 } 189 mDecoder = encoding->NewDecoderWithBOMRemoval(); 190 return NS_OK; 191 }