BodyUtil.cpp (15388B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "BodyUtil.h" 8 9 #include "js/ArrayBuffer.h" // JS::NewArrayBufferWithContents 10 #include "js/JSON.h" 11 #include "mozilla/Encoding.h" 12 #include "mozilla/ErrorResult.h" 13 #include "mozilla/dom/Exceptions.h" 14 #include "mozilla/dom/FetchUtil.h" 15 #include "mozilla/dom/File.h" 16 #include "mozilla/dom/FormData.h" 17 #include "mozilla/dom/Headers.h" 18 #include "mozilla/dom/MimeType.h" 19 #include "mozilla/dom/Promise.h" 20 #include "nsCRT.h" 21 #include "nsCharSeparatedTokenizer.h" 22 #include "nsDOMString.h" 23 #include "nsError.h" 24 #include "nsIGlobalObject.h" 25 #include "nsNetUtil.h" 26 #include "nsReadableUtils.h" 27 #include "nsStreamUtils.h" 28 #include "nsString.h" 29 #include "nsStringStream.h" 30 #include "nsURLHelper.h" 31 32 namespace mozilla::dom { 33 34 namespace { 35 36 // Reads over a CRLF and positions start after it. 37 static bool PushOverLine(nsACString::const_iterator& aStart, 38 const nsACString::const_iterator& aEnd) { 39 if (*aStart == nsCRT::CR && (aEnd - aStart > 1) && *(++aStart) == nsCRT::LF) { 40 ++aStart; // advance to after CRLF 41 return true; 42 } 43 44 return false; 45 } 46 47 /** 48 * A simple multipart/form-data parser as defined in RFC 2388 and RFC 2046. 49 * This does not respect any encoding specified per entry, using UTF-8 50 * throughout. This is as the Fetch spec states in the consume body algorithm. 51 * Borrows some things from Necko's nsMultiMixedConv, but is simpler since 52 * unlike Necko we do not have to deal with receiving incomplete chunks of data. 53 * 54 * This parser will fail the entire parse on any invalid entry, so it will 55 * never return a partially filled FormData. 56 * The content-disposition header is used to figure out the name and filename 57 * entries. The inclusion of the filename parameter decides if the entry is 58 * inserted into the FormData as a string or a File. 59 * 60 * File blobs are copies of the underlying data string since we cannot adopt 61 * char* chunks embedded within the larger body without significant effort. 62 * FIXME(nsm): Bug 1127552 - We should add telemetry to calls to formData() and 63 * friends to figure out if Fetch ends up copying big blobs to see if this is 64 * worth optimizing. 65 */ 66 class MOZ_STACK_CLASS FormDataParser { 67 private: 68 RefPtr<FormData> mFormData; 69 nsCString mMimeType; 70 nsCString mMixedCaseMimeType; 71 nsCString mData; 72 73 // Entry state, reset in START_PART. 74 nsCString mName; 75 nsCString mFilename; 76 nsCString mContentType; 77 78 enum { 79 START_PART, 80 PARSE_HEADER, 81 PARSE_BODY, 82 } mState; 83 84 nsIGlobalObject* mParentObject; 85 86 // Reads over a boundary and sets start to the position after the end of the 87 // boundary. Returns false if no boundary is found immediately. 88 bool PushOverBoundary(const nsACString& aBoundaryString, 89 nsACString::const_iterator& aStart, 90 nsACString::const_iterator& aEnd) { 91 // We copy the end iterator to keep the original pointing to the real end 92 // of the string. 93 nsACString::const_iterator end(aEnd); 94 const char* beginning = aStart.get(); 95 if (FindInReadable(aBoundaryString, aStart, end)) { 96 // We either should find the body immediately, or after 2 chars with the 97 // 2 chars being '-', everything else is failure. 98 if ((aStart.get() - beginning) == 0) { 99 aStart.advance(aBoundaryString.Length()); 100 return true; 101 } 102 103 if ((aStart.get() - beginning) == 2) { 104 if (*(--aStart) == '-' && *(--aStart) == '-') { 105 aStart.advance(aBoundaryString.Length() + 2); 106 return true; 107 } 108 } 109 } 110 111 return false; 112 } 113 114 bool ParseHeader(nsACString::const_iterator& aStart, 115 nsACString::const_iterator& aEnd, bool* aWasEmptyHeader) { 116 nsAutoCString headerName, headerValue; 117 if (!FetchUtil::ExtractHeader(aStart, aEnd, headerName, headerValue, 118 aWasEmptyHeader)) { 119 return false; 120 } 121 if (*aWasEmptyHeader) { 122 return true; 123 } 124 125 if (headerName.LowerCaseEqualsLiteral("content-disposition")) { 126 bool seenFormData = false; 127 for (const nsACString& token : 128 nsCCharSeparatedTokenizer(headerValue, ';').ToRange()) { 129 if (token.IsEmpty()) { 130 continue; 131 } 132 133 if (token.EqualsLiteral("form-data")) { 134 seenFormData = true; 135 continue; 136 } 137 138 if (seenFormData && StringBeginsWith(token, "name="_ns)) { 139 mName = StringTail(token, token.Length() - 5); 140 mName.Trim(" \""); 141 continue; 142 } 143 144 if (seenFormData && StringBeginsWith(token, "filename="_ns)) { 145 mFilename = StringTail(token, token.Length() - 9); 146 mFilename.Trim(" \""); 147 continue; 148 } 149 } 150 151 if (mName.IsVoid()) { 152 // Could not parse a valid entry name. 153 return false; 154 } 155 } else if (headerName.LowerCaseEqualsLiteral("content-type")) { 156 mContentType = headerValue; 157 } 158 159 return true; 160 } 161 162 // The end of a body is marked by a CRLF followed by the boundary. So the 163 // CRLF is part of the boundary and not the body, but any prior CRLFs are 164 // part of the body. This will position the iterator at the beginning of the 165 // boundary (after the CRLF). 166 bool ParseBody(const nsACString& aBoundaryString, 167 nsACString::const_iterator& aStart, 168 nsACString::const_iterator& aEnd) { 169 const char* beginning = aStart.get(); 170 171 // Find the boundary marking the end of the body. 172 nsACString::const_iterator end(aEnd); 173 if (!FindInReadable(aBoundaryString, aStart, end)) { 174 return false; 175 } 176 177 // We found a boundary, strip the just prior CRLF, and consider 178 // everything else the body section. 179 if (aStart.get() - beginning < 2) { 180 // Only the first entry can have a boundary right at the beginning. Even 181 // an empty body will have a CRLF before the boundary. So this is 182 // a failure. 183 return false; 184 } 185 186 // Check that there is a CRLF right before the boundary. 187 aStart.advance(-2); 188 189 // Skip optional hyphens. 190 if (*aStart == '-' && *(aStart.get() + 1) == '-') { 191 if (aStart.get() - beginning < 2) { 192 return false; 193 } 194 195 aStart.advance(-2); 196 } 197 198 if (*aStart != nsCRT::CR || *(aStart.get() + 1) != nsCRT::LF) { 199 return false; 200 } 201 202 nsAutoCString body(beginning, aStart.get() - beginning); 203 204 // Restore iterator to after the \r\n as we promised. 205 // We do not need to handle the extra hyphens case since our boundary 206 // parser in PushOverBoundary() 207 aStart.advance(2); 208 209 if (!mFormData) { 210 mFormData = new FormData(); 211 } 212 213 NS_ConvertUTF8toUTF16 name(mName); 214 215 if (mFilename.IsVoid()) { 216 ErrorResult rv; 217 mFormData->Append(name, NS_ConvertUTF8toUTF16(body), rv); 218 MOZ_ASSERT(!rv.Failed()); 219 } else { 220 // Unfortunately we've to copy the data first since all our strings are 221 // going to free it. We also need fallible alloc, so we can't just use 222 // ToNewCString(). 223 char* copy = static_cast<char*>(moz_xmalloc(body.Length())); 224 nsCString::const_iterator bodyIter, bodyEnd; 225 body.BeginReading(bodyIter); 226 body.EndReading(bodyEnd); 227 char* p = copy; 228 while (bodyIter != bodyEnd) { 229 *p++ = *bodyIter++; 230 } 231 p = nullptr; 232 233 RefPtr<Blob> file = File::CreateMemoryFileWithCustomLastModified( 234 mParentObject, reinterpret_cast<void*>(copy), body.Length(), 235 NS_ConvertUTF8toUTF16(mFilename), NS_ConvertUTF8toUTF16(mContentType), 236 /* aLastModifiedDate */ 0); 237 if (NS_WARN_IF(!file)) { 238 return false; 239 } 240 241 Optional<nsAString> dummy; 242 ErrorResult rv; 243 mFormData->Append(name, *file, dummy, rv); 244 if (NS_WARN_IF(rv.Failed())) { 245 rv.SuppressException(); 246 return false; 247 } 248 } 249 250 return true; 251 } 252 253 public: 254 FormDataParser(const nsACString& aMimeType, 255 const nsACString& aMixedCaseMimeType, const nsACString& aData, 256 nsIGlobalObject* aParent) 257 : mMimeType(aMimeType), 258 mMixedCaseMimeType(aMixedCaseMimeType), 259 mData(aData), 260 mState(START_PART), 261 mParentObject(aParent) {} 262 263 bool Parse() { 264 if (mData.IsEmpty()) { 265 return false; 266 } 267 268 // Determine boundary from mimetype. 269 RefPtr<CMimeType> parsed = CMimeType::Parse(mMixedCaseMimeType); 270 if (!parsed) { 271 return false; 272 } 273 274 nsAutoCString boundaryString; 275 if (!parsed->GetParameterValue("boundary"_ns, boundaryString)) { 276 return false; 277 } 278 279 nsACString::const_iterator start, end; 280 mData.BeginReading(start); 281 // This should ALWAYS point to the end of data. 282 // Helpers make copies. 283 mData.EndReading(end); 284 285 while (start != end) { 286 switch (mState) { 287 case START_PART: 288 mName.SetIsVoid(true); 289 mFilename.SetIsVoid(true); 290 mContentType = "text/plain"_ns; 291 292 while (start != end && NS_IsHTTPWhitespace(*start)) { 293 ++start; 294 } 295 296 // MUST start with boundary. 297 if (!PushOverBoundary(boundaryString, start, end)) { 298 return false; 299 } 300 301 if (start != end && *start == '-') { 302 // End of data. 303 if (!mFormData) { 304 mFormData = new FormData(); 305 } 306 return true; 307 } 308 309 if (!PushOverLine(start, end)) { 310 return false; 311 } 312 mState = PARSE_HEADER; 313 break; 314 315 case PARSE_HEADER: 316 bool emptyHeader; 317 if (!ParseHeader(start, end, &emptyHeader)) { 318 return false; 319 } 320 321 if (emptyHeader && !PushOverLine(start, end)) { 322 return false; 323 } 324 325 mState = emptyHeader ? PARSE_BODY : PARSE_HEADER; 326 break; 327 328 case PARSE_BODY: 329 if (mName.IsVoid()) { 330 NS_WARNING( 331 "No content-disposition header with a valid name was " 332 "found. Failing at body parse."); 333 return false; 334 } 335 336 if (!ParseBody(boundaryString, start, end)) { 337 return false; 338 } 339 340 mState = START_PART; 341 break; 342 343 default: 344 MOZ_CRASH("Invalid case"); 345 } 346 } 347 348 MOZ_ASSERT_UNREACHABLE("Should never reach here."); 349 return false; 350 } 351 352 already_AddRefed<FormData> GetFormData() { return mFormData.forget(); } 353 }; 354 } // namespace 355 356 // static 357 void BodyUtil::ConsumeArrayBuffer(JSContext* aCx, 358 JS::MutableHandle<JSObject*> aValue, 359 uint32_t aInputLength, 360 UniquePtr<uint8_t[], JS::FreePolicy> aInput, 361 ErrorResult& aRv) { 362 aRv.MightThrowJSException(); 363 364 JS::Rooted<JSObject*> arrayBuffer(aCx); 365 arrayBuffer = 366 JS::NewArrayBufferWithContents(aCx, aInputLength, std::move(aInput)); 367 if (!arrayBuffer) { 368 aRv.StealExceptionFromJSContext(aCx); 369 return; 370 } 371 aValue.set(arrayBuffer); 372 } 373 374 // static 375 already_AddRefed<Blob> BodyUtil::ConsumeBlob(nsIGlobalObject* aParent, 376 const nsString& aMimeType, 377 uint32_t aInputLength, 378 uint8_t* aInput, 379 ErrorResult& aRv) { 380 RefPtr<Blob> blob = Blob::CreateMemoryBlob( 381 aParent, reinterpret_cast<void*>(aInput), aInputLength, aMimeType); 382 383 if (!blob) { 384 aRv.Throw(NS_ERROR_DOM_UNKNOWN_ERR); 385 return nullptr; 386 } 387 return blob.forget(); 388 } 389 390 // static 391 void BodyUtil::ConsumeBytes(JSContext* aCx, JS::MutableHandle<JSObject*> aValue, 392 uint32_t aInputLength, 393 UniquePtr<uint8_t[], JS::FreePolicy> aInput, 394 ErrorResult& aRv) { 395 aRv.MightThrowJSException(); 396 397 JS::Rooted<JSObject*> arrayBuffer(aCx); 398 ConsumeArrayBuffer(aCx, &arrayBuffer, aInputLength, std::move(aInput), aRv); 399 if (aRv.Failed()) { 400 return; 401 } 402 403 JS::Rooted<JSObject*> bytes( 404 aCx, JS_NewUint8ArrayWithBuffer(aCx, arrayBuffer, 0, aInputLength)); 405 if (!bytes) { 406 aRv.StealExceptionFromJSContext(aCx); 407 return; 408 } 409 aValue.set(bytes); 410 } 411 412 // static 413 already_AddRefed<FormData> BodyUtil::ConsumeFormData( 414 nsIGlobalObject* aParent, const nsCString& aMimeType, 415 const nsACString& aMixedCaseMimeType, const nsCString& aStr, 416 ErrorResult& aRv) { 417 constexpr auto formDataMimeType = "multipart/form-data"_ns; 418 419 // Allow semicolon separated boundary/encoding suffix like 420 // multipart/form-data; boundary= but disallow multipart/form-datafoobar. 421 bool isValidFormDataMimeType = StringBeginsWith(aMimeType, formDataMimeType); 422 423 if (isValidFormDataMimeType && 424 aMimeType.Length() > formDataMimeType.Length()) { 425 isValidFormDataMimeType = aMimeType[formDataMimeType.Length()] == ';'; 426 } 427 428 if (isValidFormDataMimeType) { 429 FormDataParser parser(aMimeType, aMixedCaseMimeType, aStr, aParent); 430 if (!parser.Parse()) { 431 aRv.ThrowTypeError<MSG_BAD_FORMDATA>(); 432 return nullptr; 433 } 434 435 RefPtr<FormData> fd = parser.GetFormData(); 436 MOZ_ASSERT(fd); 437 return fd.forget(); 438 } 439 440 constexpr auto urlDataMimeType = "application/x-www-form-urlencoded"_ns; 441 bool isValidUrlEncodedMimeType = StringBeginsWith(aMimeType, urlDataMimeType); 442 443 if (isValidUrlEncodedMimeType && 444 aMimeType.Length() > urlDataMimeType.Length()) { 445 isValidUrlEncodedMimeType = aMimeType[urlDataMimeType.Length()] == ';'; 446 } 447 448 if (isValidUrlEncodedMimeType) { 449 RefPtr<FormData> fd = new FormData(aParent); 450 DebugOnly<bool> status = URLParams::Parse( 451 aStr, true, [&fd](const nsACString& aName, const nsACString& aValue) { 452 IgnoredErrorResult rv; 453 fd->Append(NS_ConvertUTF8toUTF16(aName), 454 NS_ConvertUTF8toUTF16(aValue), rv); 455 MOZ_ASSERT(!rv.Failed()); 456 return true; 457 }); 458 MOZ_ASSERT(status); 459 460 return fd.forget(); 461 } 462 463 aRv.ThrowTypeError<MSG_BAD_FORMDATA>(); 464 return nullptr; 465 } 466 467 // static 468 nsresult BodyUtil::ConsumeText(uint32_t aInputLength, uint8_t* aInput, 469 nsString& aText) { 470 nsresult rv = 471 UTF_8_ENCODING->DecodeWithBOMRemoval(Span(aInput, aInputLength), aText); 472 if (NS_FAILED(rv)) { 473 return rv; 474 } 475 return NS_OK; 476 } 477 478 // static 479 void BodyUtil::ConsumeJson(JSContext* aCx, JS::MutableHandle<JS::Value> aValue, 480 const nsString& aStr, ErrorResult& aRv) { 481 aRv.MightThrowJSException(); 482 483 JS::Rooted<JS::Value> json(aCx); 484 if (!JS_ParseJSON(aCx, aStr.get(), aStr.Length(), &json)) { 485 if (!JS_IsExceptionPending(aCx)) { 486 aRv.Throw(NS_ERROR_DOM_UNKNOWN_ERR); 487 return; 488 } 489 490 JS::Rooted<JS::Value> exn(aCx); 491 DebugOnly<bool> gotException = JS_GetPendingException(aCx, &exn); 492 MOZ_ASSERT(gotException); 493 494 JS_ClearPendingException(aCx); 495 aRv.ThrowJSException(aCx, exn); 496 return; 497 } 498 499 aValue.set(json); 500 } 501 502 } // namespace mozilla::dom