nsConverterInputStream.cpp (8255B)
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #include "nsConverterInputStream.h" 7 #include "nsIInputStream.h" 8 #include "nsReadLine.h" 9 #include "nsStreamUtils.h" 10 11 #include <tuple> 12 13 using namespace mozilla; 14 15 #define CONVERTER_BUFFER_SIZE 8192 16 17 NS_IMPL_ISUPPORTS(nsConverterInputStream, nsIConverterInputStream, 18 nsIUnicharInputStream, nsIUnicharLineInputStream) 19 20 NS_IMETHODIMP 21 nsConverterInputStream::Init(nsIInputStream* aStream, const char* aCharset, 22 int32_t aBufferSize, char16_t aReplacementChar) { 23 nsAutoCString label; 24 if (!aCharset) { 25 label.AssignLiteral("UTF-8"); 26 } else { 27 label = aCharset; 28 } 29 30 auto encoding = Encoding::ForLabelNoReplacement(label); 31 if (!encoding) { 32 return NS_ERROR_UCONV_NOCONV; 33 } 34 // Previously, the implementation auto-switched only 35 // between the two UTF-16 variants and only when 36 // initialized with an endianness-unspecific label. 37 mConverter = encoding->NewDecoder(); 38 39 size_t outputBufferSize; 40 if (aBufferSize <= 0) { 41 aBufferSize = CONVERTER_BUFFER_SIZE; 42 outputBufferSize = CONVERTER_BUFFER_SIZE; 43 } else { 44 // NetUtil.sys.mjs assumes that if buffer size equals 45 // the input size, the whole stream will be processed 46 // as one readString. This is not true with encoding_rs, 47 // because encoding_rs might want to see space for a 48 // surrogate pair, so let's compute a larger output 49 // buffer length. 50 CheckedInt<size_t> needed = mConverter->MaxUTF16BufferLength(aBufferSize); 51 if (!needed.isValid()) { 52 return NS_ERROR_OUT_OF_MEMORY; 53 } 54 outputBufferSize = needed.value(); 55 } 56 57 // set up our buffers. 58 if (!mByteData.SetCapacity(aBufferSize, mozilla::fallible) || 59 !mUnicharData.SetLength(outputBufferSize, mozilla::fallible)) { 60 return NS_ERROR_OUT_OF_MEMORY; 61 } 62 63 mInput = aStream; 64 mErrorsAreFatal = !aReplacementChar; 65 return NS_OK; 66 } 67 68 NS_IMETHODIMP 69 nsConverterInputStream::Close() { 70 nsresult rv = mInput ? mInput->Close() : NS_OK; 71 mLineBuffer = nullptr; 72 mInput = nullptr; 73 mConverter = nullptr; 74 mByteData.Clear(); 75 mUnicharData.Clear(); 76 return rv; 77 } 78 79 NS_IMETHODIMP 80 nsConverterInputStream::Read(char16_t* aBuf, uint32_t aCount, 81 uint32_t* aReadCount) { 82 NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); 83 uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; 84 if (0 == readCount) { 85 // Fill the unichar buffer 86 readCount = Fill(&mLastErrorCode); 87 if (readCount == 0) { 88 *aReadCount = 0; 89 return mLastErrorCode; 90 } 91 } 92 if (readCount > aCount) { 93 readCount = aCount; 94 } 95 memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset, 96 readCount * sizeof(char16_t)); 97 mUnicharDataOffset += readCount; 98 *aReadCount = readCount; 99 return NS_OK; 100 } 101 102 NS_IMETHODIMP 103 nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter, 104 void* aClosure, uint32_t aCount, 105 uint32_t* aReadCount) { 106 NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); 107 uint32_t codeUnitsToWrite = mUnicharDataLength - mUnicharDataOffset; 108 if (0 == codeUnitsToWrite) { 109 // Fill the unichar buffer 110 codeUnitsToWrite = Fill(&mLastErrorCode); 111 if (codeUnitsToWrite == 0) { 112 *aReadCount = 0; 113 return mLastErrorCode; 114 } 115 } 116 117 if (codeUnitsToWrite > aCount) { 118 codeUnitsToWrite = aCount; 119 } 120 121 uint32_t codeUnitsWritten; 122 uint32_t totalCodeUnitsWritten = 0; 123 124 while (codeUnitsToWrite) { 125 nsresult rv = 126 aWriter(this, aClosure, mUnicharData.Elements() + mUnicharDataOffset, 127 totalCodeUnitsWritten, codeUnitsToWrite, &codeUnitsWritten); 128 if (NS_FAILED(rv)) { 129 // don't propagate errors to the caller 130 break; 131 } 132 133 codeUnitsToWrite -= codeUnitsWritten; 134 totalCodeUnitsWritten += codeUnitsWritten; 135 mUnicharDataOffset += codeUnitsWritten; 136 } 137 138 *aReadCount = totalCodeUnitsWritten; 139 140 return NS_OK; 141 } 142 143 NS_IMETHODIMP 144 nsConverterInputStream::ReadString(uint32_t aCount, nsAString& aString, 145 uint32_t* aReadCount) { 146 NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); 147 uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; 148 if (0 == readCount) { 149 // Fill the unichar buffer 150 readCount = Fill(&mLastErrorCode); 151 if (readCount == 0) { 152 *aReadCount = 0; 153 return mLastErrorCode; 154 } 155 } 156 if (readCount > aCount) { 157 readCount = aCount; 158 } 159 const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset; 160 aString.Assign(buf, readCount); 161 mUnicharDataOffset += readCount; 162 *aReadCount = readCount; 163 return NS_OK; 164 } 165 166 uint32_t nsConverterInputStream::Fill(nsresult* aErrorCode) { 167 if (!mInput) { 168 // We already closed the stream! 169 *aErrorCode = NS_BASE_STREAM_CLOSED; 170 return 0; 171 } 172 173 if (NS_FAILED(mLastErrorCode)) { 174 // We failed to completely convert last time, and error-recovery 175 // is disabled. We will fare no better this time, so... 176 *aErrorCode = mLastErrorCode; 177 return 0; 178 } 179 180 // mUnicharData.Length() is the buffer length, not the fill status. 181 // mUnicharDataLength reflects the current fill status. 182 mUnicharDataLength = 0; 183 // Whenever we convert, mUnicharData is logically empty. 184 mUnicharDataOffset = 0; 185 186 // Continue trying to read from the source stream until we successfully decode 187 // a character or encounter an error, as returning `0` here implies that the 188 // stream is complete. 189 // 190 // If the converter has been cleared, we've fully consumed the stream, and 191 // want to report EOF. 192 while (mUnicharDataLength == 0 && mConverter) { 193 // We assume a many to one conversion and are using equal sizes for 194 // the two buffers. However if an error happens at the very start 195 // of a byte buffer we may end up in a situation where n bytes lead 196 // to n+1 unicode chars. Thus we need to keep track of the leftover 197 // bytes as we convert. 198 199 uint32_t nb; 200 *aErrorCode = NS_FillArray(mByteData, mInput, mLeftOverBytes, &nb); 201 if (NS_FAILED(*aErrorCode)) { 202 return 0; 203 } 204 205 NS_ASSERTION(uint32_t(nb) + mLeftOverBytes == mByteData.Length(), 206 "mByteData is lying to us somewhere"); 207 208 // If `NS_FillArray` failed to read any new bytes, this is the last read, 209 // and we're at the end of the stream. 210 bool last = (nb == 0); 211 212 // Now convert as much of the byte buffer to unicode as possible 213 auto src = AsBytes(Span(mByteData)); 214 auto dst = Span(mUnicharData); 215 216 // Truncation from size_t to uint32_t below is OK, because the sizes 217 // are bounded by the lengths of mByteData and mUnicharData. 218 uint32_t result; 219 size_t read; 220 size_t written; 221 if (mErrorsAreFatal) { 222 std::tie(result, read, written) = 223 mConverter->DecodeToUTF16WithoutReplacement(src, dst, last); 224 } else { 225 std::tie(result, read, written, std::ignore) = 226 mConverter->DecodeToUTF16(src, dst, last); 227 } 228 mLeftOverBytes = mByteData.Length() - read; 229 mUnicharDataLength = written; 230 // Clear `mConverter` if we reached the end of the stream, as we can't 231 // call methods on it anymore. This will also signal EOF to the caller 232 // through the loop condition. 233 if (last) { 234 MOZ_ASSERT(mLeftOverBytes == 0, 235 "Failed to read all bytes on the last pass?"); 236 mConverter = nullptr; 237 } 238 // If we got a decode error, we're done. 239 if (result != kInputEmpty && result != kOutputFull) { 240 MOZ_ASSERT(mErrorsAreFatal, "How come DecodeToUTF16() reported error?"); 241 *aErrorCode = NS_ERROR_UDEC_ILLEGALINPUT; 242 return 0; 243 } 244 } 245 *aErrorCode = NS_OK; 246 return mUnicharDataLength; 247 } 248 249 NS_IMETHODIMP 250 nsConverterInputStream::ReadLine(nsAString& aLine, bool* aResult) { 251 if (!mLineBuffer) { 252 mLineBuffer = MakeUnique<nsLineBuffer<char16_t>>(); 253 } 254 return NS_ReadLine(this, mLineBuffer.get(), aLine, aResult); 255 }