tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

BodyUtil.cpp (15388B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "BodyUtil.h"
      8 
      9 #include "js/ArrayBuffer.h"  // JS::NewArrayBufferWithContents
     10 #include "js/JSON.h"
     11 #include "mozilla/Encoding.h"
     12 #include "mozilla/ErrorResult.h"
     13 #include "mozilla/dom/Exceptions.h"
     14 #include "mozilla/dom/FetchUtil.h"
     15 #include "mozilla/dom/File.h"
     16 #include "mozilla/dom/FormData.h"
     17 #include "mozilla/dom/Headers.h"
     18 #include "mozilla/dom/MimeType.h"
     19 #include "mozilla/dom/Promise.h"
     20 #include "nsCRT.h"
     21 #include "nsCharSeparatedTokenizer.h"
     22 #include "nsDOMString.h"
     23 #include "nsError.h"
     24 #include "nsIGlobalObject.h"
     25 #include "nsNetUtil.h"
     26 #include "nsReadableUtils.h"
     27 #include "nsStreamUtils.h"
     28 #include "nsString.h"
     29 #include "nsStringStream.h"
     30 #include "nsURLHelper.h"
     31 
     32 namespace mozilla::dom {
     33 
     34 namespace {
     35 
     36 // Reads over a CRLF and positions start after it.
     37 static bool PushOverLine(nsACString::const_iterator& aStart,
     38                         const nsACString::const_iterator& aEnd) {
     39  if (*aStart == nsCRT::CR && (aEnd - aStart > 1) && *(++aStart) == nsCRT::LF) {
     40    ++aStart;  // advance to after CRLF
     41    return true;
     42  }
     43 
     44  return false;
     45 }
     46 
     47 /**
     48 * A simple multipart/form-data parser as defined in RFC 2388 and RFC 2046.
     49 * This does not respect any encoding specified per entry, using UTF-8
     50 * throughout. This is as the Fetch spec states in the consume body algorithm.
     51 * Borrows some things from Necko's nsMultiMixedConv, but is simpler since
     52 * unlike Necko we do not have to deal with receiving incomplete chunks of data.
     53 *
     54 * This parser will fail the entire parse on any invalid entry, so it will
     55 * never return a partially filled FormData.
     56 * The content-disposition header is used to figure out the name and filename
     57 * entries. The inclusion of the filename parameter decides if the entry is
     58 * inserted into the FormData as a string or a File.
     59 *
     60 * File blobs are copies of the underlying data string since we cannot adopt
     61 * char* chunks embedded within the larger body without significant effort.
     62 * FIXME(nsm): Bug 1127552 - We should add telemetry to calls to formData() and
     63 * friends to figure out if Fetch ends up copying big blobs to see if this is
     64 * worth optimizing.
     65 */
     66 class MOZ_STACK_CLASS FormDataParser {
     67 private:
     68  RefPtr<FormData> mFormData;
     69  nsCString mMimeType;
     70  nsCString mMixedCaseMimeType;
     71  nsCString mData;
     72 
     73  // Entry state, reset in START_PART.
     74  nsCString mName;
     75  nsCString mFilename;
     76  nsCString mContentType;
     77 
     78  enum {
     79    START_PART,
     80    PARSE_HEADER,
     81    PARSE_BODY,
     82  } mState;
     83 
     84  nsIGlobalObject* mParentObject;
     85 
     86  // Reads over a boundary and sets start to the position after the end of the
     87  // boundary. Returns false if no boundary is found immediately.
     88  bool PushOverBoundary(const nsACString& aBoundaryString,
     89                        nsACString::const_iterator& aStart,
     90                        nsACString::const_iterator& aEnd) {
     91    // We copy the end iterator to keep the original pointing to the real end
     92    // of the string.
     93    nsACString::const_iterator end(aEnd);
     94    const char* beginning = aStart.get();
     95    if (FindInReadable(aBoundaryString, aStart, end)) {
     96      // We either should find the body immediately, or after 2 chars with the
     97      // 2 chars being '-', everything else is failure.
     98      if ((aStart.get() - beginning) == 0) {
     99        aStart.advance(aBoundaryString.Length());
    100        return true;
    101      }
    102 
    103      if ((aStart.get() - beginning) == 2) {
    104        if (*(--aStart) == '-' && *(--aStart) == '-') {
    105          aStart.advance(aBoundaryString.Length() + 2);
    106          return true;
    107        }
    108      }
    109    }
    110 
    111    return false;
    112  }
    113 
    114  bool ParseHeader(nsACString::const_iterator& aStart,
    115                   nsACString::const_iterator& aEnd, bool* aWasEmptyHeader) {
    116    nsAutoCString headerName, headerValue;
    117    if (!FetchUtil::ExtractHeader(aStart, aEnd, headerName, headerValue,
    118                                  aWasEmptyHeader)) {
    119      return false;
    120    }
    121    if (*aWasEmptyHeader) {
    122      return true;
    123    }
    124 
    125    if (headerName.LowerCaseEqualsLiteral("content-disposition")) {
    126      bool seenFormData = false;
    127      for (const nsACString& token :
    128           nsCCharSeparatedTokenizer(headerValue, ';').ToRange()) {
    129        if (token.IsEmpty()) {
    130          continue;
    131        }
    132 
    133        if (token.EqualsLiteral("form-data")) {
    134          seenFormData = true;
    135          continue;
    136        }
    137 
    138        if (seenFormData && StringBeginsWith(token, "name="_ns)) {
    139          mName = StringTail(token, token.Length() - 5);
    140          mName.Trim(" \"");
    141          continue;
    142        }
    143 
    144        if (seenFormData && StringBeginsWith(token, "filename="_ns)) {
    145          mFilename = StringTail(token, token.Length() - 9);
    146          mFilename.Trim(" \"");
    147          continue;
    148        }
    149      }
    150 
    151      if (mName.IsVoid()) {
    152        // Could not parse a valid entry name.
    153        return false;
    154      }
    155    } else if (headerName.LowerCaseEqualsLiteral("content-type")) {
    156      mContentType = headerValue;
    157    }
    158 
    159    return true;
    160  }
    161 
    162  // The end of a body is marked by a CRLF followed by the boundary. So the
    163  // CRLF is part of the boundary and not the body, but any prior CRLFs are
    164  // part of the body. This will position the iterator at the beginning of the
    165  // boundary (after the CRLF).
    166  bool ParseBody(const nsACString& aBoundaryString,
    167                 nsACString::const_iterator& aStart,
    168                 nsACString::const_iterator& aEnd) {
    169    const char* beginning = aStart.get();
    170 
    171    // Find the boundary marking the end of the body.
    172    nsACString::const_iterator end(aEnd);
    173    if (!FindInReadable(aBoundaryString, aStart, end)) {
    174      return false;
    175    }
    176 
    177    // We found a boundary, strip the just prior CRLF, and consider
    178    // everything else the body section.
    179    if (aStart.get() - beginning < 2) {
    180      // Only the first entry can have a boundary right at the beginning. Even
    181      // an empty body will have a CRLF before the boundary. So this is
    182      // a failure.
    183      return false;
    184    }
    185 
    186    // Check that there is a CRLF right before the boundary.
    187    aStart.advance(-2);
    188 
    189    // Skip optional hyphens.
    190    if (*aStart == '-' && *(aStart.get() + 1) == '-') {
    191      if (aStart.get() - beginning < 2) {
    192        return false;
    193      }
    194 
    195      aStart.advance(-2);
    196    }
    197 
    198    if (*aStart != nsCRT::CR || *(aStart.get() + 1) != nsCRT::LF) {
    199      return false;
    200    }
    201 
    202    nsAutoCString body(beginning, aStart.get() - beginning);
    203 
    204    // Restore iterator to after the \r\n as we promised.
    205    // We do not need to handle the extra hyphens case since our boundary
    206    // parser in PushOverBoundary()
    207    aStart.advance(2);
    208 
    209    if (!mFormData) {
    210      mFormData = new FormData();
    211    }
    212 
    213    NS_ConvertUTF8toUTF16 name(mName);
    214 
    215    if (mFilename.IsVoid()) {
    216      ErrorResult rv;
    217      mFormData->Append(name, NS_ConvertUTF8toUTF16(body), rv);
    218      MOZ_ASSERT(!rv.Failed());
    219    } else {
    220      // Unfortunately we've to copy the data first since all our strings are
    221      // going to free it. We also need fallible alloc, so we can't just use
    222      // ToNewCString().
    223      char* copy = static_cast<char*>(moz_xmalloc(body.Length()));
    224      nsCString::const_iterator bodyIter, bodyEnd;
    225      body.BeginReading(bodyIter);
    226      body.EndReading(bodyEnd);
    227      char* p = copy;
    228      while (bodyIter != bodyEnd) {
    229        *p++ = *bodyIter++;
    230      }
    231      p = nullptr;
    232 
    233      RefPtr<Blob> file = File::CreateMemoryFileWithCustomLastModified(
    234          mParentObject, reinterpret_cast<void*>(copy), body.Length(),
    235          NS_ConvertUTF8toUTF16(mFilename), NS_ConvertUTF8toUTF16(mContentType),
    236          /* aLastModifiedDate */ 0);
    237      if (NS_WARN_IF(!file)) {
    238        return false;
    239      }
    240 
    241      Optional<nsAString> dummy;
    242      ErrorResult rv;
    243      mFormData->Append(name, *file, dummy, rv);
    244      if (NS_WARN_IF(rv.Failed())) {
    245        rv.SuppressException();
    246        return false;
    247      }
    248    }
    249 
    250    return true;
    251  }
    252 
    253 public:
    254  FormDataParser(const nsACString& aMimeType,
    255                 const nsACString& aMixedCaseMimeType, const nsACString& aData,
    256                 nsIGlobalObject* aParent)
    257      : mMimeType(aMimeType),
    258        mMixedCaseMimeType(aMixedCaseMimeType),
    259        mData(aData),
    260        mState(START_PART),
    261        mParentObject(aParent) {}
    262 
    263  bool Parse() {
    264    if (mData.IsEmpty()) {
    265      return false;
    266    }
    267 
    268    // Determine boundary from mimetype.
    269    RefPtr<CMimeType> parsed = CMimeType::Parse(mMixedCaseMimeType);
    270    if (!parsed) {
    271      return false;
    272    }
    273 
    274    nsAutoCString boundaryString;
    275    if (!parsed->GetParameterValue("boundary"_ns, boundaryString)) {
    276      return false;
    277    }
    278 
    279    nsACString::const_iterator start, end;
    280    mData.BeginReading(start);
    281    // This should ALWAYS point to the end of data.
    282    // Helpers make copies.
    283    mData.EndReading(end);
    284 
    285    while (start != end) {
    286      switch (mState) {
    287        case START_PART:
    288          mName.SetIsVoid(true);
    289          mFilename.SetIsVoid(true);
    290          mContentType = "text/plain"_ns;
    291 
    292          while (start != end && NS_IsHTTPWhitespace(*start)) {
    293            ++start;
    294          }
    295 
    296          // MUST start with boundary.
    297          if (!PushOverBoundary(boundaryString, start, end)) {
    298            return false;
    299          }
    300 
    301          if (start != end && *start == '-') {
    302            // End of data.
    303            if (!mFormData) {
    304              mFormData = new FormData();
    305            }
    306            return true;
    307          }
    308 
    309          if (!PushOverLine(start, end)) {
    310            return false;
    311          }
    312          mState = PARSE_HEADER;
    313          break;
    314 
    315        case PARSE_HEADER:
    316          bool emptyHeader;
    317          if (!ParseHeader(start, end, &emptyHeader)) {
    318            return false;
    319          }
    320 
    321          if (emptyHeader && !PushOverLine(start, end)) {
    322            return false;
    323          }
    324 
    325          mState = emptyHeader ? PARSE_BODY : PARSE_HEADER;
    326          break;
    327 
    328        case PARSE_BODY:
    329          if (mName.IsVoid()) {
    330            NS_WARNING(
    331                "No content-disposition header with a valid name was "
    332                "found. Failing at body parse.");
    333            return false;
    334          }
    335 
    336          if (!ParseBody(boundaryString, start, end)) {
    337            return false;
    338          }
    339 
    340          mState = START_PART;
    341          break;
    342 
    343        default:
    344          MOZ_CRASH("Invalid case");
    345      }
    346    }
    347 
    348    MOZ_ASSERT_UNREACHABLE("Should never reach here.");
    349    return false;
    350  }
    351 
    352  already_AddRefed<FormData> GetFormData() { return mFormData.forget(); }
    353 };
    354 }  // namespace
    355 
    356 // static
    357 void BodyUtil::ConsumeArrayBuffer(JSContext* aCx,
    358                                  JS::MutableHandle<JSObject*> aValue,
    359                                  uint32_t aInputLength,
    360                                  UniquePtr<uint8_t[], JS::FreePolicy> aInput,
    361                                  ErrorResult& aRv) {
    362  aRv.MightThrowJSException();
    363 
    364  JS::Rooted<JSObject*> arrayBuffer(aCx);
    365  arrayBuffer =
    366      JS::NewArrayBufferWithContents(aCx, aInputLength, std::move(aInput));
    367  if (!arrayBuffer) {
    368    aRv.StealExceptionFromJSContext(aCx);
    369    return;
    370  }
    371  aValue.set(arrayBuffer);
    372 }
    373 
    374 // static
    375 already_AddRefed<Blob> BodyUtil::ConsumeBlob(nsIGlobalObject* aParent,
    376                                             const nsString& aMimeType,
    377                                             uint32_t aInputLength,
    378                                             uint8_t* aInput,
    379                                             ErrorResult& aRv) {
    380  RefPtr<Blob> blob = Blob::CreateMemoryBlob(
    381      aParent, reinterpret_cast<void*>(aInput), aInputLength, aMimeType);
    382 
    383  if (!blob) {
    384    aRv.Throw(NS_ERROR_DOM_UNKNOWN_ERR);
    385    return nullptr;
    386  }
    387  return blob.forget();
    388 }
    389 
    390 // static
    391 void BodyUtil::ConsumeBytes(JSContext* aCx, JS::MutableHandle<JSObject*> aValue,
    392                            uint32_t aInputLength,
    393                            UniquePtr<uint8_t[], JS::FreePolicy> aInput,
    394                            ErrorResult& aRv) {
    395  aRv.MightThrowJSException();
    396 
    397  JS::Rooted<JSObject*> arrayBuffer(aCx);
    398  ConsumeArrayBuffer(aCx, &arrayBuffer, aInputLength, std::move(aInput), aRv);
    399  if (aRv.Failed()) {
    400    return;
    401  }
    402 
    403  JS::Rooted<JSObject*> bytes(
    404      aCx, JS_NewUint8ArrayWithBuffer(aCx, arrayBuffer, 0, aInputLength));
    405  if (!bytes) {
    406    aRv.StealExceptionFromJSContext(aCx);
    407    return;
    408  }
    409  aValue.set(bytes);
    410 }
    411 
    412 // static
    413 already_AddRefed<FormData> BodyUtil::ConsumeFormData(
    414    nsIGlobalObject* aParent, const nsCString& aMimeType,
    415    const nsACString& aMixedCaseMimeType, const nsCString& aStr,
    416    ErrorResult& aRv) {
    417  constexpr auto formDataMimeType = "multipart/form-data"_ns;
    418 
    419  // Allow semicolon separated boundary/encoding suffix like
    420  // multipart/form-data; boundary= but disallow multipart/form-datafoobar.
    421  bool isValidFormDataMimeType = StringBeginsWith(aMimeType, formDataMimeType);
    422 
    423  if (isValidFormDataMimeType &&
    424      aMimeType.Length() > formDataMimeType.Length()) {
    425    isValidFormDataMimeType = aMimeType[formDataMimeType.Length()] == ';';
    426  }
    427 
    428  if (isValidFormDataMimeType) {
    429    FormDataParser parser(aMimeType, aMixedCaseMimeType, aStr, aParent);
    430    if (!parser.Parse()) {
    431      aRv.ThrowTypeError<MSG_BAD_FORMDATA>();
    432      return nullptr;
    433    }
    434 
    435    RefPtr<FormData> fd = parser.GetFormData();
    436    MOZ_ASSERT(fd);
    437    return fd.forget();
    438  }
    439 
    440  constexpr auto urlDataMimeType = "application/x-www-form-urlencoded"_ns;
    441  bool isValidUrlEncodedMimeType = StringBeginsWith(aMimeType, urlDataMimeType);
    442 
    443  if (isValidUrlEncodedMimeType &&
    444      aMimeType.Length() > urlDataMimeType.Length()) {
    445    isValidUrlEncodedMimeType = aMimeType[urlDataMimeType.Length()] == ';';
    446  }
    447 
    448  if (isValidUrlEncodedMimeType) {
    449    RefPtr<FormData> fd = new FormData(aParent);
    450    DebugOnly<bool> status = URLParams::Parse(
    451        aStr, true, [&fd](const nsACString& aName, const nsACString& aValue) {
    452          IgnoredErrorResult rv;
    453          fd->Append(NS_ConvertUTF8toUTF16(aName),
    454                     NS_ConvertUTF8toUTF16(aValue), rv);
    455          MOZ_ASSERT(!rv.Failed());
    456          return true;
    457        });
    458    MOZ_ASSERT(status);
    459 
    460    return fd.forget();
    461  }
    462 
    463  aRv.ThrowTypeError<MSG_BAD_FORMDATA>();
    464  return nullptr;
    465 }
    466 
    467 // static
    468 nsresult BodyUtil::ConsumeText(uint32_t aInputLength, uint8_t* aInput,
    469                               nsString& aText) {
    470  nsresult rv =
    471      UTF_8_ENCODING->DecodeWithBOMRemoval(Span(aInput, aInputLength), aText);
    472  if (NS_FAILED(rv)) {
    473    return rv;
    474  }
    475  return NS_OK;
    476 }
    477 
    478 // static
    479 void BodyUtil::ConsumeJson(JSContext* aCx, JS::MutableHandle<JS::Value> aValue,
    480                           const nsString& aStr, ErrorResult& aRv) {
    481  aRv.MightThrowJSException();
    482 
    483  JS::Rooted<JS::Value> json(aCx);
    484  if (!JS_ParseJSON(aCx, aStr.get(), aStr.Length(), &json)) {
    485    if (!JS_IsExceptionPending(aCx)) {
    486      aRv.Throw(NS_ERROR_DOM_UNKNOWN_ERR);
    487      return;
    488    }
    489 
    490    JS::Rooted<JS::Value> exn(aCx);
    491    DebugOnly<bool> gotException = JS_GetPendingException(aCx, &exn);
    492    MOZ_ASSERT(gotException);
    493 
    494    JS_ClearPendingException(aCx);
    495    aRv.ThrowJSException(aCx, exn);
    496    return;
    497  }
    498 
    499  aValue.set(json);
    500 }
    501 
    502 }  // namespace mozilla::dom