tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

nsExpatDriver.cpp (64659B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 #include "nsExpatDriver.h"
      7 #include "mozilla/fallible.h"
      8 #include "nsCOMPtr.h"
      9 #include "CParserContext.h"
     10 #include "nsIExpatSink.h"
     11 #include "nsIContentSink.h"
     12 #include "nsIDocShell.h"
     13 #include "nsParserMsgUtils.h"
     14 #include "nsIURL.h"
     15 #include "nsIUnicharInputStream.h"
     16 #include "nsIProtocolHandler.h"
     17 #include "nsNetUtil.h"
     18 #include "nsString.h"
     19 #include "nsTextFormatter.h"
     20 #include "nsDirectoryServiceDefs.h"
     21 #include "nsCRT.h"
     22 #include "nsIConsoleService.h"
     23 #include "nsIScriptError.h"
     24 #include "nsIScriptGlobalObject.h"
     25 #include "nsIContentPolicy.h"
     26 #include "nsComponentManagerUtils.h"
     27 #include "nsContentPolicyUtils.h"
     28 #include "nsError.h"
     29 #include "nsXPCOMCIDInternal.h"
     30 #include "nsUnicharInputStream.h"
     31 #include "nsContentUtils.h"
     32 #include "mozilla/BasePrincipal.h"
     33 #include "mozilla/NullPrincipal.h"
     34 #include "mozilla/RandomNum.h"
     35 #include "mozilla/glean/ParserHtmlparserMetrics.h"
     36 
     37 #include "nsThreadUtils.h"
     38 #include "mozilla/ClearOnShutdown.h"
     39 #include "mozilla/RLBoxUtils.h"
     40 #include "mozilla/UniquePtr.h"
     41 
     42 #include "mozilla/Logging.h"
     43 
     44 using mozilla::fallible;
     45 using mozilla::LogLevel;
     46 using mozilla::MakeStringSpan;
     47 using mozilla::Maybe;
     48 using mozilla::dom::Document;
     49 
     50 // We only pass chunks of length sMaxChunkLength to Expat in the RLBOX sandbox.
     51 // The RLBOX sandbox has a limited amount of memory, and we have to account for
     52 // other memory use by Expat (including the buffering it does).
     53 // Note that sMaxChunkLength is in number of characters.
     54 #ifdef DEBUG
     55 // On debug builds we set a much lower limit (1kB) to try to hit boundary
     56 // conditions more frequently.
     57 static const uint32_t sMaxChunkLength = 1024 / sizeof(char16_t);
     58 #else
     59 static const uint32_t sMaxChunkLength = (128 * 1024) / sizeof(char16_t);
     60 #endif
     61 
     62 #define kExpatSeparatorChar 0xFFFF
     63 
     64 static const char16_t kUTF16[] = {'U', 'T', 'F', '-', '1', '6', '\0'};
     65 
     66 static mozilla::LazyLogModule gExpatDriverLog("expatdriver");
     67 
     68 // Use the same maximum tree depth as Chromium (see
     69 // https://chromium.googlesource.com/chromium/src/+/f464165c1dedff1c955d3c051c5a9a1c6a0e8f6b/third_party/WebKit/Source/core/xml/parser/XMLDocumentParser.cpp#85).
     70 static const uint16_t sMaxXMLTreeDepth = 5000;
     71 
     72 /***************************** RLBOX HELPERS ********************************/
     73 // Helpers for calling sandboxed expat functions in handlers
     74 
     75 #define RLBOX_EXPAT_SAFE_CALL(foo, verifier, ...)                          \
     76  aSandbox.invoke_sandbox_function(foo, self->mExpatParser, ##__VA_ARGS__) \
     77      .copy_and_verify(verifier)
     78 
     79 #define RLBOX_EXPAT_SAFE_MCALL(foo, verifier, ...)                \
     80  Sandbox()                                                       \
     81      ->invoke_sandbox_function(foo, mExpatParser, ##__VA_ARGS__) \
     82      .copy_and_verify(verifier)
     83 
     84 #define RLBOX_EXPAT_CALL(foo, ...) \
     85  aSandbox.invoke_sandbox_function(foo, self->mExpatParser, ##__VA_ARGS__)
     86 
     87 #define RLBOX_EXPAT_MCALL(foo, ...) \
     88  Sandbox()->invoke_sandbox_function(foo, mExpatParser, ##__VA_ARGS__)
     89 
     90 #define RLBOX_SAFE_PRINT "Value used only for printing"
     91 #define MOZ_RELEASE_ASSERT_TAINTED(cond, ...)                        \
     92  MOZ_RELEASE_ASSERT((cond).unverified_safe_because("Sanity check"), \
     93                     ##__VA_ARGS__)
     94 
     95 /* safe_unverified is used whenever it's safe to not use a validator */
     96 template <typename T>
     97 static T safe_unverified(T val) {
     98  return val;
     99 }
    100 
    101 /* status_verifier is a type validator for XML_Status */
    102 inline enum XML_Status status_verifier(enum XML_Status s) {
    103  MOZ_RELEASE_ASSERT(s >= XML_STATUS_ERROR && s <= XML_STATUS_SUSPENDED,
    104                     "unexpected status code");
    105  return s;
    106 }
    107 
    108 /* error_verifier is a type validator for XML_Error */
    109 inline enum XML_Error error_verifier(enum XML_Error code) {
    110  MOZ_RELEASE_ASSERT(
    111      code >= XML_ERROR_NONE && code <= XML_ERROR_INVALID_ARGUMENT,
    112      "unexpected XML error code");
    113  return code;
    114 }
    115 
    116 /* We use unverified_xml_string to just expose sandbox expat strings to Firefox
    117 * without any validation. On 64-bit we have guard pages at the sandbox
    118 * boundary; on 32-bit we don't and a string could be used to read beyond the
    119 * sandbox boundary. In our attacker model this is okay (the attacker can just
    120 * Spectre).
    121 *
    122 * Nevertheless, we should try to add strings validators to the consumer code
    123 * of expat whenever we have some semantics. At the very lest we should make
    124 * sure that the strings are never written to. Bug 1693991 tracks this.
    125 */
    126 static const XML_Char* unverified_xml_string(uintptr_t ptr) {
    127  return reinterpret_cast<const XML_Char*>(ptr);
    128 }
    129 
    130 /* The TransferBuffer class is used to copy (or directly expose in the
    131 * noop-sandbox case) buffers into the expat sandbox (and automatically
    132 * when out of scope).
    133 */
    134 template <typename T>
    135 using TransferBuffer =
    136    mozilla::RLBoxTransferBufferToSandbox<T, rlbox_expat_sandbox_type>;
    137 
    138 /*************************** END RLBOX HELPERS ******************************/
    139 
    140 /***************************** EXPAT CALL BACKS ******************************/
    141 // The callback handlers that get called from the expat parser.
    142 
    143 static void Driver_HandleXMLDeclaration(
    144    rlbox_sandbox_expat& aSandbox, tainted_expat<void*> /* aUserData */,
    145    tainted_expat<const XML_Char*> aVersion,
    146    tainted_expat<const XML_Char*> aEncoding, tainted_expat<int> aStandalone) {
    147  nsExpatDriver* driver = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
    148  MOZ_ASSERT(driver);
    149 
    150  int standalone = aStandalone.copy_and_verify([&](auto a) {
    151    // Standalone argument can be -1, 0, or 1 (see
    152    // /parser/expat/lib/expat.h#185)
    153    MOZ_RELEASE_ASSERT(a >= -1 && a <= 1, "Unexpected standalone parameter");
    154    return a;
    155  });
    156 
    157  const auto* version = aVersion.copy_and_verify_address(unverified_xml_string);
    158  const auto* encoding =
    159      aEncoding.copy_and_verify_address(unverified_xml_string);
    160  driver->HandleXMLDeclaration(version, encoding, standalone);
    161 }
    162 
    163 static void Driver_HandleCharacterData(rlbox_sandbox_expat& aSandbox,
    164                                       tainted_expat<void*> /* aUserData */,
    165                                       tainted_expat<const XML_Char*> aData,
    166                                       tainted_expat<int> aLength) {
    167  nsExpatDriver* driver = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
    168  MOZ_ASSERT(driver);
    169  // aData is not null terminated; even with bad length we will not span beyond
    170  // sandbox boundary
    171  uint32_t length =
    172      static_cast<uint32_t>(aLength.copy_and_verify(safe_unverified<int>));
    173  const auto* data = aData.unverified_safe_pointer_because(
    174      length, "Only care that the data is within sandbox boundary.");
    175  driver->HandleCharacterData(data, length);
    176 }
    177 
    178 static void Driver_HandleComment(rlbox_sandbox_expat& aSandbox,
    179                                 tainted_expat<void*> /* aUserData */,
    180                                 tainted_expat<const XML_Char*> aName) {
    181  nsExpatDriver* driver = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
    182  MOZ_ASSERT(driver);
    183  const auto* name = aName.copy_and_verify_address(unverified_xml_string);
    184  driver->HandleComment(name);
    185 }
    186 
    187 static void Driver_HandleProcessingInstruction(
    188    rlbox_sandbox_expat& aSandbox, tainted_expat<void*> /* aUserData */,
    189    tainted_expat<const XML_Char*> aTarget,
    190    tainted_expat<const XML_Char*> aData) {
    191  nsExpatDriver* driver = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
    192  MOZ_ASSERT(driver);
    193  const auto* target = aTarget.copy_and_verify_address(unverified_xml_string);
    194  const auto* data = aData.copy_and_verify_address(unverified_xml_string);
    195  driver->HandleProcessingInstruction(target, data);
    196 }
    197 
    198 static void Driver_HandleDefault(rlbox_sandbox_expat& aSandbox,
    199                                 tainted_expat<void*> /* aUserData */,
    200                                 tainted_expat<const XML_Char*> aData,
    201                                 tainted_expat<int> aLength) {
    202  nsExpatDriver* driver = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
    203  MOZ_ASSERT(driver);
    204  // aData is not null terminated; even with bad length we will not span
    205  // beyond sandbox boundary
    206  uint32_t length =
    207      static_cast<uint32_t>(aLength.copy_and_verify(safe_unverified<int>));
    208  const auto* data = aData.unverified_safe_pointer_because(
    209      length, "Only care that the data is within sandbox boundary.");
    210  driver->HandleDefault(data, length);
    211 }
    212 
    213 static void Driver_HandleStartCdataSection(
    214    rlbox_sandbox_expat& aSandbox, tainted_expat<void*> /* aUserData */) {
    215  nsExpatDriver* driver = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
    216  MOZ_ASSERT(driver);
    217  driver->HandleStartCdataSection();
    218 }
    219 
    220 static void Driver_HandleEndCdataSection(rlbox_sandbox_expat& aSandbox,
    221                                         tainted_expat<void*> /* aUserData */) {
    222  nsExpatDriver* driver = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
    223  MOZ_ASSERT(driver);
    224  driver->HandleEndCdataSection();
    225 }
    226 
    227 static void Driver_HandleStartDoctypeDecl(
    228    rlbox_sandbox_expat& aSandbox, tainted_expat<void*> /* aUserData */,
    229    tainted_expat<const XML_Char*> aDoctypeName,
    230    tainted_expat<const XML_Char*> aSysid,
    231    tainted_expat<const XML_Char*> aPubid,
    232    tainted_expat<int> aHasInternalSubset) {
    233  nsExpatDriver* driver = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
    234  MOZ_ASSERT(driver);
    235  const auto* doctypeName =
    236      aDoctypeName.copy_and_verify_address(unverified_xml_string);
    237  const auto* sysid = aSysid.copy_and_verify_address(unverified_xml_string);
    238  const auto* pubid = aPubid.copy_and_verify_address(unverified_xml_string);
    239  bool hasInternalSubset =
    240      !!(aHasInternalSubset.copy_and_verify(safe_unverified<int>));
    241  driver->HandleStartDoctypeDecl(doctypeName, sysid, pubid, hasInternalSubset);
    242 }
    243 
    244 static void Driver_HandleEndDoctypeDecl(rlbox_sandbox_expat& aSandbox,
    245                                        tainted_expat<void*> /* aUserData */) {
    246  nsExpatDriver* driver = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
    247  MOZ_ASSERT(driver);
    248  driver->HandleEndDoctypeDecl();
    249 }
    250 
    251 static tainted_expat<int> Driver_HandleExternalEntityRef(
    252    rlbox_sandbox_expat& aSandbox, tainted_expat<XML_Parser> /* aParser */,
    253    tainted_expat<const XML_Char*> aOpenEntityNames,
    254    tainted_expat<const XML_Char*> aBase,
    255    tainted_expat<const XML_Char*> aSystemId,
    256    tainted_expat<const XML_Char*> aPublicId) {
    257  nsExpatDriver* driver = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
    258  MOZ_ASSERT(driver);
    259 
    260  const auto* openEntityNames =
    261      aOpenEntityNames.copy_and_verify_address(unverified_xml_string);
    262  const auto* base = aBase.copy_and_verify_address(unverified_xml_string);
    263  const auto* systemId =
    264      aSystemId.copy_and_verify_address(unverified_xml_string);
    265  const auto* publicId =
    266      aPublicId.copy_and_verify_address(unverified_xml_string);
    267  return driver->HandleExternalEntityRef(openEntityNames, base, systemId,
    268                                         publicId);
    269 }
    270 
    271 /***************************** END CALL BACKS ********************************/
    272 
    273 /***************************** CATALOG UTILS *********************************/
    274 
    275 // Initially added for bug 113400 to switch from the remote "XHTML 1.0 plus
    276 // MathML 2.0" DTD to the the lightweight customized version that Mozilla uses.
    277 // Since Mozilla is not validating, no need to fetch a *huge* file at each
    278 // click.
    279 // XXX The cleanest solution here would be to fix Bug 98413: Implement XML
    280 // Catalogs.
    281 struct nsCatalogData {
    282  const char* mPublicID;
    283  const char* mLocalDTD;
    284  const char* mAgentSheet;
    285 };
    286 
    287 // The order of this table is guestimated to be in the optimum order
    288 static const nsCatalogData kCatalogTable[] = {
    289    {"-//W3C//DTD XHTML 1.0 Transitional//EN", "htmlmathml-f.ent", nullptr},
    290    {"-//W3C//DTD XHTML 1.1//EN", "htmlmathml-f.ent", nullptr},
    291    {"-//W3C//DTD XHTML 1.0 Strict//EN", "htmlmathml-f.ent", nullptr},
    292    {"-//W3C//DTD XHTML 1.0 Frameset//EN", "htmlmathml-f.ent", nullptr},
    293    {"-//W3C//DTD XHTML Basic 1.0//EN", "htmlmathml-f.ent", nullptr},
    294    {"-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN", "htmlmathml-f.ent", nullptr},
    295    {"-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN",
    296     "htmlmathml-f.ent", nullptr},
    297    {"-//W3C//DTD MathML 2.0//EN", "htmlmathml-f.ent", nullptr},
    298    {"-//WAPFORUM//DTD XHTML Mobile 1.0//EN", "htmlmathml-f.ent", nullptr},
    299    {"-//WAPFORUM//DTD XHTML Mobile 1.1//EN", "htmlmathml-f.ent", nullptr},
    300    {"-//WAPFORUM//DTD XHTML Mobile 1.2//EN", "htmlmathml-f.ent", nullptr},
    301    {nullptr, nullptr, nullptr}};
    302 
    303 static const nsCatalogData* LookupCatalogData(const char16_t* aPublicID) {
    304  nsDependentString publicID(aPublicID);
    305 
    306  // linear search for now since the number of entries is going to
    307  // be negligible, and the fix for bug 98413 would get rid of this
    308  // code anyway
    309  const nsCatalogData* data = kCatalogTable;
    310  while (data->mPublicID) {
    311    if (publicID.EqualsASCII(data->mPublicID)) {
    312      return data;
    313    }
    314    ++data;
    315  }
    316 
    317  return nullptr;
    318 }
    319 
    320 // This function provides a resource URI to a local DTD
    321 // in resource://gre/res/dtd/ which may or may not exist.
    322 // If aCatalogData is provided, it is used to remap the
    323 // DTD instead of taking the filename from the URI.  aDTD
    324 // may be null in some cases that are relying on
    325 // aCatalogData working for them.
    326 static void GetLocalDTDURI(const nsCatalogData* aCatalogData, nsIURI* aDTD,
    327                           nsIURI** aResult) {
    328  nsAutoCString fileName;
    329  if (aCatalogData) {
    330    // remap the DTD to a known local DTD
    331    fileName.Assign(aCatalogData->mLocalDTD);
    332  }
    333 
    334  if (fileName.IsEmpty()) {
    335    // Try to see if the user has installed the DTD file -- we extract the
    336    // filename.ext of the DTD here. Hence, for any DTD for which we have
    337    // no predefined mapping, users just have to copy the DTD file to our
    338    // special DTD directory and it will be picked.
    339    nsCOMPtr<nsIURL> dtdURL = do_QueryInterface(aDTD);
    340    if (!dtdURL) {
    341      // Not a URL with a filename, or maybe it was null.  Either way, nothing
    342      // else we can do here.
    343      return;
    344    }
    345 
    346    dtdURL->GetFileName(fileName);
    347    if (fileName.IsEmpty()) {
    348      return;
    349    }
    350  }
    351 
    352  nsAutoCString respath("resource://gre/res/dtd/");
    353  respath += fileName;
    354  NS_NewURI(aResult, respath);
    355 }
    356 
    357 /***************************** END CATALOG UTILS *****************************/
    358 
    359 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsExpatDriver)
    360  NS_INTERFACE_MAP_ENTRY(nsISupports)
    361 NS_INTERFACE_MAP_END
    362 
    363 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsExpatDriver)
    364 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsExpatDriver)
    365 
    366 NS_IMPL_CYCLE_COLLECTION(nsExpatDriver, mSink)
    367 
    368 nsExpatDriver::nsExpatDriver()
    369    : mExpatParser(nullptr),
    370      mInCData(false),
    371      mInInternalSubset(false),
    372      mInExternalDTD(false),
    373      mMadeFinalCallToExpat(false),
    374      mInParser(false),
    375      mInternalState(NS_OK),
    376      mExpatBuffered(0),
    377      mTagDepth(0),
    378      mCatalogData(nullptr),
    379      mInnerWindowID(0) {}
    380 
    381 nsExpatDriver::~nsExpatDriver() { Destroy(); }
    382 
    383 void nsExpatDriver::Destroy() {
    384  if (mSandboxPoolData) {
    385    SandboxData()->DetachDriver();
    386    if (mExpatParser) {
    387      RLBOX_EXPAT_MCALL(MOZ_XML_ParserFree);
    388    }
    389  }
    390  mSandboxPoolData.reset();
    391  mURIs.Clear();
    392  mExpatParser = nullptr;
    393 }
    394 
    395 // The AllocAttrs class is used to speed up copying attributes from the
    396 // sandboxed expat by fast allocating attributes on the stack and only falling
    397 // back to malloc when we need to allocate lots of attributes.
    398 class MOZ_STACK_CLASS AllocAttrs {
    399 #define NUM_STACK_SLOTS 16
    400 public:
    401  const char16_t** Init(size_t size) {
    402    if (size <= NUM_STACK_SLOTS) {
    403      return mInlineArr;
    404    }
    405    mHeapPtr = mozilla::MakeUnique<const char16_t*[]>(size);
    406    return mHeapPtr.get();
    407  }
    408 
    409 private:
    410  const char16_t* mInlineArr[NUM_STACK_SLOTS];
    411  mozilla::UniquePtr<const char16_t*[]> mHeapPtr;
    412 #undef NUM_STACK_SLOTS
    413 };
    414 
    415 /* static */
    416 void nsExpatDriver::HandleStartElement(rlbox_sandbox_expat& aSandbox,
    417                                       tainted_expat<void*> /* aUserData */,
    418                                       tainted_expat<const char16_t*> aName,
    419                                       tainted_expat<const char16_t**> aAttrs) {
    420  nsExpatDriver* self = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
    421  MOZ_ASSERT(self && self->mSink);
    422 
    423  const auto* name = aName.copy_and_verify_address(unverified_xml_string);
    424 
    425  // Calculate the total number of elements in aAttrs.
    426  // XML_GetSpecifiedAttributeCount will only give us the number of specified
    427  // attrs (twice that number, actually), so we have to check for default
    428  // attrs ourselves.
    429  tainted_expat<int> count =
    430      RLBOX_EXPAT_CALL(MOZ_XML_GetSpecifiedAttributeCount);
    431  MOZ_RELEASE_ASSERT_TAINTED(count >= 0, "Unexpected attribute count");
    432 
    433  tainted_expat<uint64_t> attrArrayLengthTainted;
    434  for (attrArrayLengthTainted = rlbox::sandbox_static_cast<uint64_t>(count);
    435       (aAttrs[attrArrayLengthTainted] != nullptr)
    436           .unverified_safe_because("Bad length is checked later");
    437       attrArrayLengthTainted += 2) {
    438    // Just looping till we find out what the length is
    439  }
    440 
    441  uint32_t attrArrayLength =
    442      attrArrayLengthTainted.copy_and_verify([&](uint64_t value) {
    443        // A malicious length could result in an overflow when we allocate
    444        // aAttrs and then access elements of the array.
    445        MOZ_RELEASE_ASSERT(value < UINT32_MAX, "Overflow attempt");
    446        return value;
    447      });
    448 
    449  // Copy tainted aAttrs from sandbox
    450  AllocAttrs allocAttrs;
    451  const char16_t** attrs = allocAttrs.Init(attrArrayLength + 1);
    452  if (NS_WARN_IF(!aAttrs || !attrs)) {
    453    self->MaybeStopParser(NS_ERROR_OUT_OF_MEMORY);
    454    return;
    455  }
    456 
    457  for (uint32_t i = 0; i < attrArrayLength; i++) {
    458    attrs[i] = aAttrs[i].copy_and_verify_address(unverified_xml_string);
    459  }
    460  attrs[attrArrayLength] = nullptr;
    461 
    462  if (self->mSink) {
    463    // We store the tagdepth in a PRUint16, so make sure the limit fits in a
    464    // PRUint16.
    465    static_assert(
    466        sMaxXMLTreeDepth <=
    467        std::numeric_limits<decltype(nsExpatDriver::mTagDepth)>::max());
    468 
    469    if (++self->mTagDepth > sMaxXMLTreeDepth) {
    470      self->MaybeStopParser(NS_ERROR_HTMLPARSER_HIERARCHYTOODEEP);
    471      return;
    472    }
    473 
    474    nsresult rv = self->mSink->HandleStartElement(
    475        name, attrs, attrArrayLength,
    476        RLBOX_EXPAT_SAFE_CALL(MOZ_XML_GetCurrentLineNumber,
    477                              safe_unverified<XML_Size>),
    478        RLBOX_EXPAT_SAFE_CALL(MOZ_XML_GetCurrentColumnNumber,
    479                              safe_unverified<XML_Size>));
    480    self->MaybeStopParser(rv);
    481  }
    482 }
    483 
    484 /* static */
    485 void nsExpatDriver::HandleStartElementForSystemPrincipal(
    486    rlbox_sandbox_expat& aSandbox, tainted_expat<void*> aUserData,
    487    tainted_expat<const char16_t*> aName,
    488    tainted_expat<const char16_t**> aAttrs) {
    489  nsExpatDriver* self = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
    490  MOZ_ASSERT(self);
    491  if (!RLBOX_EXPAT_SAFE_CALL(MOZ_XML_ProcessingEntityValue,
    492                             safe_unverified<XML_Bool>)) {
    493    HandleStartElement(aSandbox, aUserData, aName, aAttrs);
    494  } else {
    495    nsCOMPtr<Document> doc =
    496        do_QueryInterface(self->mOriginalSink->GetTarget());
    497 
    498    // Adjust the column number so that it is one based rather than zero
    499    // based.
    500    tainted_expat<XML_Size> colNumber =
    501        RLBOX_EXPAT_CALL(MOZ_XML_GetCurrentColumnNumber) + 1;
    502    tainted_expat<XML_Size> lineNumber =
    503        RLBOX_EXPAT_CALL(MOZ_XML_GetCurrentLineNumber);
    504 
    505    int32_t nameSpaceID;
    506    RefPtr<nsAtom> prefix, localName;
    507    const auto* name = aName.copy_and_verify_address(unverified_xml_string);
    508    nsContentUtils::SplitExpatName(name, getter_AddRefs(prefix),
    509                                   getter_AddRefs(localName), &nameSpaceID);
    510 
    511    nsAutoString error;
    512    error.AppendLiteral("Ignoring element <");
    513    if (prefix) {
    514      error.Append(prefix->GetUTF16String());
    515      error.Append(':');
    516    }
    517    error.Append(localName->GetUTF16String());
    518    error.AppendLiteral("> created from entity value.");
    519 
    520    nsContentUtils::ReportToConsoleNonLocalized(
    521        error, nsIScriptError::warningFlag, "XML Document"_ns, doc,
    522        mozilla::SourceLocation(
    523            doc->GetDocumentURI(),
    524            lineNumber.unverified_safe_because(RLBOX_SAFE_PRINT),
    525            colNumber.unverified_safe_because(RLBOX_SAFE_PRINT)));
    526  }
    527 }
    528 
    529 /* static */
    530 void nsExpatDriver::HandleEndElement(rlbox_sandbox_expat& aSandbox,
    531                                     tainted_expat<void*> aUserData,
    532                                     tainted_expat<const char16_t*> aName) {
    533  nsExpatDriver* self = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
    534  MOZ_ASSERT(self);
    535  const auto* name = aName.copy_and_verify_address(unverified_xml_string);
    536 
    537  NS_ASSERTION(self->mSink, "content sink not found!");
    538  NS_ASSERTION(self->mInternalState != NS_ERROR_HTMLPARSER_BLOCK,
    539               "Shouldn't block from HandleStartElement.");
    540 
    541  if (self->mSink && self->mInternalState != NS_ERROR_HTMLPARSER_STOPPARSING) {
    542    nsresult rv = self->mSink->HandleEndElement(name);
    543    --self->mTagDepth;
    544    self->MaybeStopParser(rv);
    545  }
    546 }
    547 
    548 /* static */
    549 void nsExpatDriver::HandleEndElementForSystemPrincipal(
    550    rlbox_sandbox_expat& aSandbox, tainted_expat<void*> aUserData,
    551    tainted_expat<const char16_t*> aName) {
    552  nsExpatDriver* self = static_cast<nsExpatDriver*>(aSandbox.sandbox_storage);
    553  MOZ_ASSERT(self);
    554  if (!RLBOX_EXPAT_SAFE_CALL(MOZ_XML_ProcessingEntityValue,
    555                             safe_unverified<XML_Bool>)) {
    556    HandleEndElement(aSandbox, aUserData, aName);
    557  }
    558 }
    559 
    560 nsresult nsExpatDriver::HandleCharacterData(const char16_t* aValue,
    561                                            const uint32_t aLength) {
    562  NS_ASSERTION(mSink, "content sink not found!");
    563 
    564  if (mInCData) {
    565    if (!mCDataText.Append(aValue, aLength, fallible)) {
    566      MaybeStopParser(NS_ERROR_OUT_OF_MEMORY);
    567    }
    568  } else if (mSink) {
    569    nsresult rv = mSink->HandleCharacterData(aValue, aLength);
    570    MaybeStopParser(rv);
    571  }
    572 
    573  return NS_OK;
    574 }
    575 
    576 nsresult nsExpatDriver::HandleComment(const char16_t* aValue) {
    577  NS_ASSERTION(mSink, "content sink not found!");
    578 
    579  if (mInExternalDTD) {
    580    // Ignore comments from external DTDs
    581    return NS_OK;
    582  }
    583 
    584  if (mInInternalSubset) {
    585    mInternalSubset.AppendLiteral("<!--");
    586    mInternalSubset.Append(aValue);
    587    mInternalSubset.AppendLiteral("-->");
    588  } else if (mSink) {
    589    nsresult rv = mSink->HandleComment(aValue);
    590    MaybeStopParser(rv);
    591  }
    592 
    593  return NS_OK;
    594 }
    595 
    596 nsresult nsExpatDriver::HandleProcessingInstruction(const char16_t* aTarget,
    597                                                    const char16_t* aData) {
    598  NS_ASSERTION(mSink, "content sink not found!");
    599 
    600  if (mInExternalDTD) {
    601    // Ignore PIs in external DTDs for now.  Eventually we want to
    602    // pass them to the sink in a way that doesn't put them in the DOM
    603    return NS_OK;
    604  }
    605 
    606  if (mInInternalSubset) {
    607    mInternalSubset.AppendLiteral("<?");
    608    mInternalSubset.Append(aTarget);
    609    mInternalSubset.Append(' ');
    610    mInternalSubset.Append(aData);
    611    mInternalSubset.AppendLiteral("?>");
    612  } else if (mSink) {
    613    nsresult rv = mSink->HandleProcessingInstruction(aTarget, aData);
    614    MaybeStopParser(rv);
    615  }
    616 
    617  return NS_OK;
    618 }
    619 
    620 nsresult nsExpatDriver::HandleXMLDeclaration(const char16_t* aVersion,
    621                                             const char16_t* aEncoding,
    622                                             int32_t aStandalone) {
    623  if (mSink) {
    624    nsresult rv = mSink->HandleXMLDeclaration(aVersion, aEncoding, aStandalone);
    625    MaybeStopParser(rv);
    626  }
    627 
    628  return NS_OK;
    629 }
    630 
    631 nsresult nsExpatDriver::HandleDefault(const char16_t* aValue,
    632                                      const uint32_t aLength) {
    633  NS_ASSERTION(mSink, "content sink not found!");
    634 
    635  if (mInExternalDTD) {
    636    // Ignore newlines in external DTDs
    637    return NS_OK;
    638  }
    639 
    640  if (mInInternalSubset) {
    641    mInternalSubset.Append(aValue, aLength);
    642  } else if (mSink) {
    643    uint32_t i;
    644    nsresult rv = mInternalState;
    645    for (i = 0; i < aLength && NS_SUCCEEDED(rv); ++i) {
    646      if (aValue[i] == '\n' || aValue[i] == '\r') {
    647        rv = mSink->HandleCharacterData(&aValue[i], 1);
    648      }
    649    }
    650    MaybeStopParser(rv);
    651  }
    652 
    653  return NS_OK;
    654 }
    655 
    656 nsresult nsExpatDriver::HandleStartCdataSection() {
    657  mInCData = true;
    658 
    659  return NS_OK;
    660 }
    661 
    662 nsresult nsExpatDriver::HandleEndCdataSection() {
    663  NS_ASSERTION(mSink, "content sink not found!");
    664 
    665  mInCData = false;
    666  if (mSink) {
    667    nsresult rv =
    668        mSink->HandleCDataSection(mCDataText.get(), mCDataText.Length());
    669    MaybeStopParser(rv);
    670  }
    671  mCDataText.Truncate();
    672 
    673  return NS_OK;
    674 }
    675 
    676 nsresult nsExpatDriver::HandleStartDoctypeDecl(const char16_t* aDoctypeName,
    677                                               const char16_t* aSysid,
    678                                               const char16_t* aPubid,
    679                                               bool aHasInternalSubset) {
    680  mDoctypeName = aDoctypeName;
    681  mSystemID = aSysid;
    682  mPublicID = aPubid;
    683 
    684  if (aHasInternalSubset) {
    685    // Consuming a huge internal subset translates to numerous
    686    // allocations. In an effort to avoid too many allocations
    687    // setting mInternalSubset's capacity to be 1K ( just a guesstimate! ).
    688    mInInternalSubset = true;
    689    mInternalSubset.SetCapacity(1024);
    690  } else {
    691    // Distinguish missing internal subset from an empty one
    692    mInternalSubset.SetIsVoid(true);
    693  }
    694 
    695  return NS_OK;
    696 }
    697 
    698 nsresult nsExpatDriver::HandleEndDoctypeDecl() {
    699  NS_ASSERTION(mSink, "content sink not found!");
    700 
    701  mInInternalSubset = false;
    702 
    703  if (mSink) {
    704    // let the sink know any additional knowledge that we have about the
    705    // document (currently, from bug 124570, we only expect to pass additional
    706    // agent sheets needed to layout the XML vocabulary of the document)
    707    nsCOMPtr<nsIURI> data;
    708 #if 0
    709    if (mCatalogData && mCatalogData->mAgentSheet) {
    710      NS_NewURI(getter_AddRefs(data), mCatalogData->mAgentSheet);
    711    }
    712 #endif
    713 
    714    // The unused support for "catalog style sheets" was removed. It doesn't
    715    // look like we'll ever fix bug 98413 either.
    716    MOZ_ASSERT(!mCatalogData || !mCatalogData->mAgentSheet,
    717               "Need to add back support for catalog style sheets");
    718 
    719    // Note: mInternalSubset already doesn't include the [] around it.
    720    nsresult rv = mSink->HandleDoctypeDecl(mInternalSubset, mDoctypeName,
    721                                           mSystemID, mPublicID, data);
    722    MaybeStopParser(rv);
    723  }
    724 
    725  mInternalSubset.Truncate();
    726 
    727  return NS_OK;
    728 }
    729 
    730 // Wrapper class for passing the sandbox data and parser as a closure to
    731 // ExternalDTDStreamReaderFunc.
    732 class RLBoxExpatClosure {
    733 public:
    734  RLBoxExpatClosure(RLBoxExpatSandboxData* aSbxData,
    735                    tainted_expat<XML_Parser> aExpatParser)
    736      : mSbxData(aSbxData), mExpatParser(aExpatParser) {};
    737  inline rlbox_sandbox_expat* Sandbox() const { return mSbxData->Sandbox(); };
    738  inline tainted_expat<XML_Parser> Parser() const { return mExpatParser; };
    739 
    740 private:
    741  RLBoxExpatSandboxData* mSbxData;
    742  tainted_expat<XML_Parser> mExpatParser;
    743 };
    744 
    745 static nsresult ExternalDTDStreamReaderFunc(nsIUnicharInputStream* aIn,
    746                                            void* aClosure,
    747                                            const char16_t* aFromSegment,
    748                                            uint32_t aToOffset, uint32_t aCount,
    749                                            uint32_t* aWriteCount) {
    750  MOZ_ASSERT(aClosure && aFromSegment && aWriteCount);
    751 
    752  *aWriteCount = 0;
    753 
    754  // Get sandbox and parser
    755  auto* closure = reinterpret_cast<RLBoxExpatClosure*>(aClosure);
    756  MOZ_ASSERT(closure);
    757 
    758  // Transfer segment into the sandbox
    759  auto fromSegment =
    760      TransferBuffer<char16_t>(closure->Sandbox(), aFromSegment, aCount);
    761  NS_ENSURE_TRUE(*fromSegment, NS_ERROR_OUT_OF_MEMORY);
    762 
    763  // Pass the buffer to expat for parsing.
    764  if (closure->Sandbox()
    765          ->invoke_sandbox_function(
    766              MOZ_XML_Parse, closure->Parser(),
    767              rlbox::sandbox_reinterpret_cast<const char*>(*fromSegment),
    768              aCount * sizeof(char16_t), 0)
    769          .copy_and_verify(status_verifier) == XML_STATUS_OK) {
    770    *aWriteCount = aCount;
    771    return NS_OK;
    772  }
    773 
    774  return NS_ERROR_FAILURE;
    775 }
    776 
    777 int nsExpatDriver::HandleExternalEntityRef(const char16_t* openEntityNames,
    778                                           const char16_t* base,
    779                                           const char16_t* systemId,
    780                                           const char16_t* publicId) {
    781  if (mInInternalSubset && !mInExternalDTD && openEntityNames) {
    782    mInternalSubset.Append(char16_t('%'));
    783    mInternalSubset.Append(nsDependentString(openEntityNames));
    784    mInternalSubset.Append(char16_t(';'));
    785  }
    786 
    787  nsCOMPtr<nsIURI> baseURI = GetBaseURI(base);
    788  NS_ENSURE_TRUE(baseURI, 1);
    789 
    790  // Load the external entity into a buffer.
    791  nsCOMPtr<nsIInputStream> in;
    792  nsCOMPtr<nsIURI> absURI;
    793  nsresult rv = OpenInputStreamFromExternalDTD(
    794      publicId, systemId, baseURI, getter_AddRefs(in), getter_AddRefs(absURI));
    795  if (NS_FAILED(rv)) {
    796 #ifdef DEBUG
    797    nsCString message("Failed to open external DTD: publicId \"");
    798    AppendUTF16toUTF8(MakeStringSpan(publicId), message);
    799    message += "\" systemId \"";
    800    AppendUTF16toUTF8(MakeStringSpan(systemId), message);
    801    message += "\" base \"";
    802    message.Append(baseURI->GetSpecOrDefault());
    803    message += "\" URL \"";
    804    if (absURI) {
    805      message.Append(absURI->GetSpecOrDefault());
    806    }
    807    message += "\"";
    808    NS_WARNING(message.get());
    809 #endif
    810    return 1;
    811  }
    812 
    813  nsCOMPtr<nsIUnicharInputStream> uniIn;
    814  rv = NS_NewUnicharInputStream(in, getter_AddRefs(uniIn));
    815  NS_ENSURE_SUCCESS(rv, 1);
    816 
    817  int result = 1;
    818  if (uniIn) {
    819    auto utf16 = TransferBuffer<char16_t>(
    820        Sandbox(), kUTF16, nsCharTraits<char16_t>::length(kUTF16) + 1);
    821    NS_ENSURE_TRUE(*utf16, 1);
    822    tainted_expat<XML_Parser> entParser;
    823    entParser =
    824        RLBOX_EXPAT_MCALL(MOZ_XML_ExternalEntityParserCreate, nullptr, *utf16);
    825    if (entParser) {
    826      auto baseURI = GetExpatBaseURI(absURI);
    827      auto url =
    828          TransferBuffer<XML_Char>(Sandbox(), &baseURI[0], std::size(baseURI));
    829      NS_ENSURE_TRUE(*url, 1);
    830      Sandbox()->invoke_sandbox_function(MOZ_XML_SetBase, entParser, *url);
    831 
    832      mInExternalDTD = true;
    833 
    834      bool inParser = mInParser;  // Save in-parser status
    835      mInParser = true;
    836 
    837      RLBoxExpatClosure closure(SandboxData(), entParser);
    838      uint32_t totalRead;
    839      do {
    840        rv = uniIn->ReadSegments(ExternalDTDStreamReaderFunc, &closure,
    841                                 uint32_t(-1), &totalRead);
    842      } while (NS_SUCCEEDED(rv) && totalRead > 0);
    843 
    844      result =
    845          Sandbox()
    846              ->invoke_sandbox_function(MOZ_XML_Parse, entParser, nullptr, 0, 1)
    847              .copy_and_verify(status_verifier);
    848 
    849      mInParser = inParser;  // Restore in-parser status
    850      mInExternalDTD = false;
    851 
    852      Sandbox()->invoke_sandbox_function(MOZ_XML_ParserFree, entParser);
    853    }
    854  }
    855 
    856  return result;
    857 }
    858 
    859 nsresult nsExpatDriver::OpenInputStreamFromExternalDTD(const char16_t* aFPIStr,
    860                                                       const char16_t* aURLStr,
    861                                                       nsIURI* aBaseURI,
    862                                                       nsIInputStream** aStream,
    863                                                       nsIURI** aAbsURI) {
    864  nsCOMPtr<nsIURI> uri;
    865  nsresult rv = NS_NewURI(getter_AddRefs(uri), NS_ConvertUTF16toUTF8(aURLStr),
    866                          nullptr, aBaseURI);
    867  // Even if the URI is malformed (most likely because we have a
    868  // non-hierarchical base URI and a relative DTD URI, with the latter
    869  // being the normal XHTML DTD case), we can try to see whether we
    870  // have catalog data for aFPIStr.
    871  if (NS_WARN_IF(NS_FAILED(rv) && rv != NS_ERROR_MALFORMED_URI)) {
    872    return rv;
    873  }
    874 
    875  // make sure the URI, if we have one, is allowed to be loaded in sync
    876  bool isUIResource = false;
    877  if (uri) {
    878    rv = NS_URIChainHasFlags(uri, nsIProtocolHandler::URI_IS_UI_RESOURCE,
    879                             &isUIResource);
    880    NS_ENSURE_SUCCESS(rv, rv);
    881  }
    882 
    883  nsCOMPtr<nsIURI> localURI;
    884  if (!isUIResource) {
    885    // Check to see if we can map the DTD to a known local DTD, or if a DTD
    886    // file of the same name exists in the special DTD directory
    887    if (aFPIStr) {
    888      // see if the Formal Public Identifier (FPI) maps to a catalog entry
    889      mCatalogData = LookupCatalogData(aFPIStr);
    890      GetLocalDTDURI(mCatalogData, uri, getter_AddRefs(localURI));
    891    }
    892    if (!localURI) {
    893      return NS_ERROR_NOT_IMPLEMENTED;
    894    }
    895  }
    896 
    897  nsCOMPtr<nsIChannel> channel;
    898  if (localURI) {
    899    localURI.swap(uri);
    900    rv = NS_NewChannel(getter_AddRefs(channel), uri,
    901                       nsContentUtils::GetSystemPrincipal(),
    902                       nsILoadInfo::SEC_ALLOW_CROSS_ORIGIN_SEC_CONTEXT_IS_NULL,
    903                       nsIContentPolicy::TYPE_DTD);
    904    NS_ENSURE_SUCCESS(rv, rv);
    905  } else {
    906    NS_ASSERTION(
    907        mSink == nsCOMPtr<nsIExpatSink>(do_QueryInterface(mOriginalSink)),
    908        "In nsExpatDriver::OpenInputStreamFromExternalDTD: "
    909        "mOriginalSink not the same object as mSink?");
    910    nsContentPolicyType policyType = nsIContentPolicy::TYPE_INTERNAL_DTD;
    911    if (mOriginalSink) {
    912      nsCOMPtr<Document> doc;
    913      doc = do_QueryInterface(mOriginalSink->GetTarget());
    914      if (doc) {
    915        if (doc->SkipDTDSecurityChecks()) {
    916          policyType = nsIContentPolicy::TYPE_INTERNAL_FORCE_ALLOWED_DTD;
    917        }
    918        rv = NS_NewChannel(
    919            getter_AddRefs(channel), uri, doc,
    920            nsILoadInfo::SEC_ALLOW_CROSS_ORIGIN_INHERITS_SEC_CONTEXT |
    921                nsILoadInfo::SEC_ALLOW_CHROME,
    922            policyType);
    923        NS_ENSURE_SUCCESS(rv, rv);
    924      }
    925    }
    926    if (!channel) {
    927      nsCOMPtr<nsIPrincipal> nullPrincipal =
    928          mozilla::NullPrincipal::CreateWithoutOriginAttributes();
    929      rv = NS_NewChannel(
    930          getter_AddRefs(channel), uri, nullPrincipal,
    931          nsILoadInfo::SEC_ALLOW_CROSS_ORIGIN_INHERITS_SEC_CONTEXT |
    932              nsILoadInfo::SEC_ALLOW_CHROME,
    933          policyType);
    934      NS_ENSURE_SUCCESS(rv, rv);
    935    }
    936  }
    937 
    938  uri.forget(aAbsURI);
    939 
    940  channel->SetContentType("application/xml"_ns);
    941  return channel->Open(aStream);
    942 }
    943 
    944 static nsresult CreateErrorText(const char16_t* aDescription,
    945                                const char16_t* aSourceURL,
    946                                tainted_expat<XML_Size> aLineNumber,
    947                                tainted_expat<XML_Size> aColNumber,
    948                                nsString& aErrorString, bool spoofEnglish) {
    949  aErrorString.Truncate();
    950 
    951  nsAutoString msg;
    952  nsresult rv = nsParserMsgUtils::GetLocalizedStringByName(
    953      spoofEnglish ? XMLPARSER_PROPERTIES_en_US : XMLPARSER_PROPERTIES,
    954      "XMLParsingError", msg);
    955  NS_ENSURE_SUCCESS(rv, rv);
    956 
    957  // XML Parsing Error: %1$S\nLocation: %2$S\nLine Number %3$u, Column %4$u:
    958  nsTextFormatter::ssprintf(
    959      aErrorString, msg.get(), aDescription, aSourceURL,
    960      aLineNumber.unverified_safe_because(RLBOX_SAFE_PRINT),
    961      aColNumber.unverified_safe_because(RLBOX_SAFE_PRINT));
    962  return NS_OK;
    963 }
    964 
    965 static nsresult AppendErrorPointer(tainted_expat<XML_Size> aColNumber,
    966                                   const char16_t* aSourceLine,
    967                                   size_t aSourceLineLength,
    968                                   nsString& aSourceString) {
    969  aSourceString.Append(char16_t('\n'));
    970 
    971  MOZ_RELEASE_ASSERT_TAINTED(aColNumber != static_cast<XML_Size>(0),
    972                             "Unexpected value of column");
    973 
    974  // Last character will be '^'.
    975  XML_Size last =
    976      (aColNumber - 1).copy_and_verify([&](XML_Size val) -> XML_Size {
    977        if (val > aSourceLineLength) {
    978          // Unexpected value of last column, just return a safe value
    979          return 0;
    980        }
    981        return val;
    982      });
    983 
    984  XML_Size i;
    985  uint32_t minuses = 0;
    986  for (i = 0; i < last; ++i) {
    987    if (aSourceLine[i] == '\t') {
    988      // Since this uses |white-space: pre;| a tab stop equals 8 spaces.
    989      uint32_t add = 8 - (minuses % 8);
    990      aSourceString.AppendASCII("--------", add);
    991      minuses += add;
    992    } else {
    993      aSourceString.Append(char16_t('-'));
    994      ++minuses;
    995    }
    996  }
    997  aSourceString.Append(char16_t('^'));
    998 
    999  return NS_OK;
   1000 }
   1001 
   1002 nsresult nsExpatDriver::HandleError() {
   1003  int32_t code =
   1004      RLBOX_EXPAT_MCALL(MOZ_XML_GetErrorCode).copy_and_verify(error_verifier);
   1005 
   1006  // Map Expat error code to an error string
   1007  // XXX Deal with error returns.
   1008  nsAutoString description;
   1009  nsCOMPtr<Document> doc;
   1010  if (mOriginalSink) {
   1011    doc = do_QueryInterface(mOriginalSink->GetTarget());
   1012  }
   1013 
   1014  bool spoofEnglish = nsContentUtils::ShouldResistFingerprinting(
   1015      doc, mozilla::RFPTarget::JSLocale);
   1016  nsParserMsgUtils::GetLocalizedStringByID(
   1017      spoofEnglish ? XMLPARSER_PROPERTIES_en_US : XMLPARSER_PROPERTIES, code,
   1018      description);
   1019 
   1020  if (code == XML_ERROR_TAG_MISMATCH) {
   1021    /**
   1022     *  Expat can send the following:
   1023     *    localName
   1024     *    namespaceURI<separator>localName
   1025     *    namespaceURI<separator>localName<separator>prefix
   1026     *
   1027     *  and we use 0xFFFF for the <separator>.
   1028     *
   1029     */
   1030 
   1031    const char16_t* mismatch =
   1032        RLBOX_EXPAT_MCALL(MOZ_XML_GetMismatchedTag)
   1033            .copy_and_verify_address(unverified_xml_string);
   1034    const char16_t* uriEnd = nullptr;
   1035    const char16_t* nameEnd = nullptr;
   1036    const char16_t* pos;
   1037    for (pos = mismatch; *pos; ++pos) {
   1038      if (*pos == kExpatSeparatorChar) {
   1039        if (uriEnd) {
   1040          nameEnd = pos;
   1041        } else {
   1042          uriEnd = pos;
   1043        }
   1044      }
   1045    }
   1046 
   1047    nsAutoString tagName;
   1048    if (uriEnd && nameEnd) {
   1049      // We have a prefix.
   1050      tagName.Append(nameEnd + 1, pos - nameEnd - 1);
   1051      tagName.Append(char16_t(':'));
   1052    }
   1053    const char16_t* nameStart = uriEnd ? uriEnd + 1 : mismatch;
   1054    tagName.Append(nameStart, (nameEnd ? nameEnd : pos) - nameStart);
   1055 
   1056    nsAutoString msg;
   1057    nsParserMsgUtils::GetLocalizedStringByName(
   1058        spoofEnglish ? XMLPARSER_PROPERTIES_en_US : XMLPARSER_PROPERTIES,
   1059        "Expected", msg);
   1060 
   1061    // . Expected: </%S>.
   1062    nsAutoString message;
   1063    nsTextFormatter::ssprintf(message, msg.get(), tagName.get());
   1064    description.Append(message);
   1065  }
   1066 
   1067  // Adjust the column number so that it is one based rather than zero based.
   1068  tainted_expat<XML_Size> colNumber =
   1069      RLBOX_EXPAT_MCALL(MOZ_XML_GetCurrentColumnNumber) + 1;
   1070  tainted_expat<XML_Size> lineNumber =
   1071      RLBOX_EXPAT_MCALL(MOZ_XML_GetCurrentLineNumber);
   1072 
   1073  // Copy out the two character bufer that holds the expatBase
   1074  const std::unique_ptr<XML_Char[]> expatBase =
   1075      RLBOX_EXPAT_MCALL(MOZ_XML_GetBase)
   1076          .copy_and_verify_range(
   1077              [](std::unique_ptr<XML_Char[]> val) {
   1078                // No additional checks needed as this is sent to GetBaseURI
   1079                // which checks its inputs
   1080                return val;
   1081              },
   1082              ExpatBaseURI::Length);
   1083  nsAutoString uri;
   1084  nsCOMPtr<nsIURI> baseURI;
   1085  if (expatBase && (baseURI = GetBaseURI(expatBase.get()))) {
   1086    // Let's ignore if this fails, we're already reporting a parse error.
   1087    (void)CopyUTF8toUTF16(baseURI->GetSpecOrDefault(), uri, fallible);
   1088  }
   1089  nsAutoString errorText;
   1090  CreateErrorText(description.get(), uri.get(), lineNumber, colNumber,
   1091                  errorText, spoofEnglish);
   1092 
   1093  nsAutoString sourceText(mLastLine);
   1094  AppendErrorPointer(colNumber, mLastLine.get(), mLastLine.Length(),
   1095                     sourceText);
   1096 
   1097  if (doc && nsContentUtils::IsChromeDoc(doc)) {
   1098    nsCString path = doc->GetDocumentURI()->GetSpecOrDefault();
   1099    nsCOMPtr<nsISupports> container = doc->GetContainer();
   1100    nsCOMPtr<nsIDocShell> docShell = do_QueryInterface(container);
   1101    nsCString docShellDestroyed("unknown"_ns);
   1102    if (docShell) {
   1103      bool destroyed = false;
   1104      docShell->IsBeingDestroyed(&destroyed);
   1105      docShellDestroyed.Assign(destroyed ? "true"_ns : "false"_ns);
   1106    }
   1107 
   1108    mozilla::glean::ysod::ShownYsodExtra extra = {
   1109        .destroyed = mozilla::Some(docShellDestroyed),
   1110        .errorCode = mozilla::Some(code),
   1111        .hidden = mozilla::Some(doc->Hidden()),
   1112        .lastLine = mozilla::Some(NS_ConvertUTF16toUTF8(mLastLine)),
   1113        .lastLineLen = mozilla::Some(mLastLine.Length()),
   1114        .location = mozilla::Some(nsPrintfCString(
   1115            "%lu:%lu", lineNumber.unverified_safe_because(RLBOX_SAFE_PRINT),
   1116            colNumber.unverified_safe_because(RLBOX_SAFE_PRINT))),
   1117        .value = mozilla::Some(path),
   1118    };
   1119    mozilla::glean::ysod::shown_ysod.Record(mozilla::Some(extra));
   1120  }
   1121 
   1122  // Try to create and initialize the script error.
   1123  nsCOMPtr<nsIScriptError> serr(do_CreateInstance(NS_SCRIPTERROR_CONTRACTID));
   1124  nsresult rv = NS_ERROR_FAILURE;
   1125  if (serr) {
   1126    rv = serr->InitWithSourceURI(
   1127        errorText, mURIs.SafeElementAt(0),
   1128        lineNumber.unverified_safe_because(RLBOX_SAFE_PRINT),
   1129        colNumber.unverified_safe_because(RLBOX_SAFE_PRINT),
   1130        nsIScriptError::errorFlag, "malformed-xml", mInnerWindowID);
   1131  }
   1132 
   1133  // If it didn't initialize, we can't do any logging.
   1134  bool shouldReportError = NS_SUCCEEDED(rv);
   1135 
   1136  // mSink might be null here if our parser was terminated.
   1137  if (mSink && shouldReportError) {
   1138    rv = mSink->ReportError(errorText.get(), sourceText.get(), serr,
   1139                            &shouldReportError);
   1140    if (NS_FAILED(rv)) {
   1141      shouldReportError = true;
   1142    }
   1143  }
   1144 
   1145  // mOriginalSink might be null here if our parser was terminated.
   1146  if (mOriginalSink) {
   1147    nsCOMPtr<Document> doc = do_QueryInterface(mOriginalSink->GetTarget());
   1148    if (doc && doc->SuppressParserErrorConsoleMessages()) {
   1149      shouldReportError = false;
   1150    }
   1151  }
   1152 
   1153  if (shouldReportError) {
   1154    nsCOMPtr<nsIConsoleService> cs(do_GetService(NS_CONSOLESERVICE_CONTRACTID));
   1155    if (cs) {
   1156      cs->LogMessage(serr);
   1157    }
   1158  }
   1159 
   1160  return NS_ERROR_HTMLPARSER_STOPPARSING;
   1161 }
   1162 
   1163 // Because we need to allocate a buffer in the RLBOX sandbox, and copy the data
   1164 // to it for Expat to parse, we are limited in size by the memory available in
   1165 // the RLBOX sandbox. nsExpatDriver::ChunkAndParseBuffer divides the buffer into
   1166 // chunks of sMaxChunkLength characters or less, and passes them to
   1167 // nsExpatDriver::ParseBuffer. That should ensure that we almost never run out
   1168 // of memory in the sandbox.
   1169 void nsExpatDriver::ChunkAndParseBuffer(const char16_t* aBuffer,
   1170                                        uint32_t aLength, bool aIsFinal,
   1171                                        uint32_t* aPassedToExpat,
   1172                                        uint32_t* aConsumed,
   1173                                        XML_Size* aLastLineLength) {
   1174  *aConsumed = 0;
   1175  *aLastLineLength = 0;
   1176 
   1177  uint32_t remainder = aLength;
   1178  while (remainder > sMaxChunkLength) {
   1179    ParseChunk(aBuffer, sMaxChunkLength, ChunkOrBufferIsFinal::None, aConsumed,
   1180               aLastLineLength);
   1181    aBuffer += sMaxChunkLength;
   1182    remainder -= sMaxChunkLength;
   1183    if (NS_FAILED(mInternalState)) {
   1184      // Stop parsing if there's an error (including if we're blocked or
   1185      // interrupted).
   1186      *aPassedToExpat = aLength - remainder;
   1187      return;
   1188    }
   1189  }
   1190 
   1191  ParseChunk(aBuffer, remainder,
   1192             aIsFinal ? ChunkOrBufferIsFinal::FinalChunkAndBuffer
   1193                      : ChunkOrBufferIsFinal::FinalChunk,
   1194             aConsumed, aLastLineLength);
   1195  *aPassedToExpat = aLength;
   1196 }
   1197 
   1198 void nsExpatDriver::ParseChunk(const char16_t* aBuffer, uint32_t aLength,
   1199                               ChunkOrBufferIsFinal aIsFinal,
   1200                               uint32_t* aConsumed, XML_Size* aLastLineLength) {
   1201  NS_ASSERTION((aBuffer && aLength != 0) || (!aBuffer && aLength == 0), "?");
   1202  NS_ASSERTION(mInternalState != NS_OK ||
   1203                   (aIsFinal == ChunkOrBufferIsFinal::FinalChunkAndBuffer) ||
   1204                   aBuffer,
   1205               "Useless call, we won't call Expat");
   1206  MOZ_ASSERT(!BlockedOrInterrupted() || !aBuffer,
   1207             "Non-null buffer when resuming");
   1208  MOZ_ASSERT(mExpatParser);
   1209 
   1210  auto parserBytesBefore_verifier = [&](auto parserBytesBefore) {
   1211    MOZ_RELEASE_ASSERT(parserBytesBefore >= 0, "Unexpected value");
   1212    MOZ_RELEASE_ASSERT(parserBytesBefore % sizeof(char16_t) == 0,
   1213                       "Consumed part of a char16_t?");
   1214    return parserBytesBefore;
   1215  };
   1216  int32_t parserBytesBefore = RLBOX_EXPAT_SAFE_MCALL(
   1217      MOZ_XML_GetCurrentByteIndex, parserBytesBefore_verifier);
   1218 
   1219  if (mInternalState != NS_OK && !BlockedOrInterrupted()) {
   1220    return;
   1221  }
   1222 
   1223  XML_Status status;
   1224  bool inParser = mInParser;  // Save in-parser status
   1225  mInParser = true;
   1226  Maybe<TransferBuffer<char16_t>> buffer;
   1227  if (BlockedOrInterrupted()) {
   1228    mInternalState = NS_OK;  // Resume in case we're blocked.
   1229    status = RLBOX_EXPAT_SAFE_MCALL(MOZ_XML_ResumeParser, status_verifier);
   1230  } else {
   1231    buffer.emplace(Sandbox(), aBuffer, aLength);
   1232    MOZ_RELEASE_ASSERT(!aBuffer || !!*buffer.ref(),
   1233                       "Chunking should avoid OOM in ParseBuffer");
   1234 
   1235    status = RLBOX_EXPAT_SAFE_MCALL(
   1236        MOZ_XML_Parse, status_verifier,
   1237        rlbox::sandbox_reinterpret_cast<const char*>(*buffer.ref()),
   1238        aLength * sizeof(char16_t),
   1239        aIsFinal == ChunkOrBufferIsFinal::FinalChunkAndBuffer);
   1240  }
   1241  mInParser = inParser;  // Restore in-parser status
   1242 
   1243  auto parserBytesConsumed_verifier = [&](auto parserBytesConsumed) {
   1244    MOZ_RELEASE_ASSERT(parserBytesConsumed >= 0, "Unexpected value");
   1245    MOZ_RELEASE_ASSERT(parserBytesConsumed >= parserBytesBefore,
   1246                       "How'd this happen?");
   1247    MOZ_RELEASE_ASSERT(parserBytesConsumed % sizeof(char16_t) == 0,
   1248                       "Consumed part of a char16_t?");
   1249    return parserBytesConsumed;
   1250  };
   1251  int32_t parserBytesConsumed = RLBOX_EXPAT_SAFE_MCALL(
   1252      MOZ_XML_GetCurrentByteIndex, parserBytesConsumed_verifier);
   1253 
   1254  // Consumed something.
   1255  *aConsumed += (parserBytesConsumed - parserBytesBefore) / sizeof(char16_t);
   1256 
   1257  NS_ASSERTION(status != XML_STATUS_SUSPENDED || BlockedOrInterrupted(),
   1258               "Inconsistent expat suspension state.");
   1259 
   1260  if (status == XML_STATUS_ERROR) {
   1261    mInternalState = NS_ERROR_HTMLPARSER_STOPPARSING;
   1262  }
   1263 
   1264  if (*aConsumed > 0 &&
   1265      (aIsFinal != ChunkOrBufferIsFinal::None || NS_FAILED(mInternalState))) {
   1266    *aLastLineLength = RLBOX_EXPAT_SAFE_MCALL(MOZ_XML_GetCurrentColumnNumber,
   1267                                              safe_unverified<XML_Size>);
   1268  }
   1269 }
   1270 
   1271 nsresult nsExpatDriver::ResumeParse(nsScanner& aScanner, bool aIsFinalChunk) {
   1272  // We keep the scanner pointing to the position where Expat will start
   1273  // parsing.
   1274  nsScannerIterator currentExpatPosition;
   1275  aScanner.CurrentPosition(currentExpatPosition);
   1276 
   1277  // This is the start of the first buffer that we need to pass to Expat.
   1278  nsScannerIterator start = currentExpatPosition;
   1279  start.advance(mExpatBuffered);
   1280 
   1281  // This is the end of the last buffer (at this point, more data could come in
   1282  // later).
   1283  nsScannerIterator end;
   1284  aScanner.EndReading(end);
   1285 
   1286  MOZ_LOG(gExpatDriverLog, LogLevel::Debug,
   1287          ("Remaining in expat's buffer: %i, remaining in scanner: %zu.",
   1288           mExpatBuffered, Distance(start, end)));
   1289 
   1290  // We want to call Expat if we have more buffers, or if we know there won't
   1291  // be more buffers (and so we want to flush the remaining data), or if we're
   1292  // currently blocked and there's data in Expat's buffer.
   1293  while (start != end || (aIsFinalChunk && !mMadeFinalCallToExpat) ||
   1294         (BlockedOrInterrupted() && mExpatBuffered > 0)) {
   1295    bool noMoreBuffers = start == end && aIsFinalChunk;
   1296    bool blocked = BlockedOrInterrupted();
   1297 
   1298    const char16_t* buffer;
   1299    uint32_t length;
   1300    if (blocked || noMoreBuffers) {
   1301      // If we're blocked we just resume Expat so we don't need a buffer, if
   1302      // there aren't any more buffers we pass a null buffer to Expat.
   1303      buffer = nullptr;
   1304      length = 0;
   1305 
   1306      if (blocked) {
   1307        MOZ_LOG(
   1308            gExpatDriverLog, LogLevel::Debug,
   1309            ("Resuming Expat, will parse data remaining in Expat's "
   1310             "buffer.\nContent of Expat's buffer:\n-----\n%s\n-----\n",
   1311             NS_ConvertUTF16toUTF8(currentExpatPosition.get(), mExpatBuffered)
   1312                 .get()));
   1313      } else {
   1314        NS_ASSERTION(mExpatBuffered == Distance(currentExpatPosition, end),
   1315                     "Didn't pass all the data to Expat?");
   1316        MOZ_LOG(
   1317            gExpatDriverLog, LogLevel::Debug,
   1318            ("Last call to Expat, will parse data remaining in Expat's "
   1319             "buffer.\nContent of Expat's buffer:\n-----\n%s\n-----\n",
   1320             NS_ConvertUTF16toUTF8(currentExpatPosition.get(), mExpatBuffered)
   1321                 .get()));
   1322      }
   1323    } else {
   1324      buffer = start.get();
   1325      length = uint32_t(start.size_forward());
   1326 
   1327      MOZ_LOG(gExpatDriverLog, LogLevel::Debug,
   1328              ("Calling Expat, will parse data remaining in Expat's buffer and "
   1329               "new data.\nContent of Expat's buffer:\n-----\n%s\n-----\nNew "
   1330               "data:\n-----\n%s\n-----\n",
   1331               NS_ConvertUTF16toUTF8(currentExpatPosition.get(), mExpatBuffered)
   1332                   .get(),
   1333               NS_ConvertUTF16toUTF8(start.get(), length).get()));
   1334    }
   1335 
   1336    uint32_t passedToExpat;
   1337    uint32_t consumed;
   1338    XML_Size lastLineLength;
   1339    ChunkAndParseBuffer(buffer, length, noMoreBuffers, &passedToExpat,
   1340                        &consumed, &lastLineLength);
   1341    MOZ_ASSERT_IF(passedToExpat != length, NS_FAILED(mInternalState));
   1342    MOZ_ASSERT(consumed <= passedToExpat + mExpatBuffered);
   1343    if (consumed > 0) {
   1344      nsScannerIterator oldExpatPosition = currentExpatPosition;
   1345      currentExpatPosition.advance(consumed);
   1346 
   1347      // We consumed some data, we want to store the last line of data that
   1348      // was consumed in case we run into an error (to show the line in which
   1349      // the error occurred).
   1350 
   1351      if (lastLineLength <= consumed) {
   1352        // The length of the last line was less than what expat consumed, so
   1353        // there was at least one line break in the consumed data. Store the
   1354        // last line until the point where we stopped parsing.
   1355        nsScannerIterator startLastLine = currentExpatPosition;
   1356        startLastLine.advance(-((ptrdiff_t)lastLineLength));
   1357        if (!CopyUnicodeTo(startLastLine, currentExpatPosition, mLastLine)) {
   1358          return (mInternalState = NS_ERROR_OUT_OF_MEMORY);
   1359        }
   1360      } else {
   1361        // There was no line break in the consumed data, append the consumed
   1362        // data.
   1363        if (!AppendUnicodeTo(oldExpatPosition, currentExpatPosition,
   1364                             mLastLine)) {
   1365          return (mInternalState = NS_ERROR_OUT_OF_MEMORY);
   1366        }
   1367      }
   1368    }
   1369 
   1370    mExpatBuffered += passedToExpat - consumed;
   1371 
   1372    if (BlockedOrInterrupted()) {
   1373      MOZ_LOG(gExpatDriverLog, LogLevel::Debug,
   1374              ("Blocked or interrupted parser (probably for loading linked "
   1375               "stylesheets or scripts)."));
   1376 
   1377      aScanner.SetPosition(currentExpatPosition, true);
   1378      aScanner.Mark();
   1379 
   1380      return mInternalState;
   1381    }
   1382 
   1383    if (noMoreBuffers && mExpatBuffered == 0) {
   1384      mMadeFinalCallToExpat = true;
   1385    }
   1386 
   1387    if (NS_FAILED(mInternalState)) {
   1388      if (RLBOX_EXPAT_SAFE_MCALL(MOZ_XML_GetErrorCode, error_verifier) !=
   1389          XML_ERROR_NONE) {
   1390        NS_ASSERTION(mInternalState == NS_ERROR_HTMLPARSER_STOPPARSING,
   1391                     "Unexpected error");
   1392 
   1393        // Look for the next newline after the last one we consumed
   1394        nsScannerIterator lastLine = currentExpatPosition;
   1395        while (lastLine != end) {
   1396          length = uint32_t(lastLine.size_forward());
   1397          uint32_t endOffset = 0;
   1398          const char16_t* buffer = lastLine.get();
   1399          while (endOffset < length && buffer[endOffset] != '\n' &&
   1400                 buffer[endOffset] != '\r') {
   1401            ++endOffset;
   1402          }
   1403          mLastLine.Append(Substring(buffer, buffer + endOffset));
   1404          if (endOffset < length) {
   1405            // We found a newline.
   1406            break;
   1407          }
   1408 
   1409          lastLine.advance(length);
   1410        }
   1411 
   1412        HandleError();
   1413      }
   1414 
   1415      return mInternalState;
   1416    }
   1417 
   1418    // Either we have more buffers, or we were blocked (and we'll flush in the
   1419    // next iteration), or we should have emptied Expat's buffer.
   1420    NS_ASSERTION(!noMoreBuffers || blocked ||
   1421                     (mExpatBuffered == 0 && currentExpatPosition == end),
   1422                 "Unreachable data left in Expat's buffer");
   1423 
   1424    start.advance(length);
   1425 
   1426    // It's possible for start to have passed end if we received more data
   1427    // (e.g. if we spun the event loop in an inline script). Reload end now
   1428    // to compensate.
   1429    aScanner.EndReading(end);
   1430  }
   1431 
   1432  aScanner.SetPosition(currentExpatPosition, true);
   1433  aScanner.Mark();
   1434 
   1435  MOZ_LOG(gExpatDriverLog, LogLevel::Debug,
   1436          ("Remaining in expat's buffer: %i, remaining in scanner: %zu.",
   1437           mExpatBuffered, Distance(currentExpatPosition, end)));
   1438 
   1439  return NS_SUCCEEDED(mInternalState) ? NS_ERROR_HTMLPARSER_EOF : NS_OK;
   1440 }
   1441 
   1442 mozilla::UniquePtr<mozilla::RLBoxSandboxDataBase>
   1443 RLBoxExpatSandboxPool::CreateSandboxData(uint64_t aSize) {
   1444  // Create expat sandbox
   1445  auto sandbox = mozilla::MakeUnique<rlbox_sandbox_expat>();
   1446 
   1447 #ifdef MOZ_WASM_SANDBOXING_EXPAT
   1448  const w2c_mem_capacity capacity =
   1449      get_valid_wasm2c_memory_capacity(aSize, true /* 32-bit wasm memory*/);
   1450  bool create_ok = sandbox->create_sandbox(/* shouldAbortOnFailure = */ false,
   1451                                           &capacity, "rlbox_wasm2c_expat");
   1452 #else
   1453  bool create_ok = sandbox->create_sandbox();
   1454 #endif
   1455 
   1456  NS_ENSURE_TRUE(create_ok, nullptr);
   1457 
   1458  mozilla::UniquePtr<RLBoxExpatSandboxData> sbxData =
   1459      mozilla::MakeUnique<RLBoxExpatSandboxData>(aSize);
   1460 
   1461  // Register callbacks common to both system and non-system principals
   1462  sbxData->mHandleXMLDeclaration =
   1463      sandbox->register_callback(Driver_HandleXMLDeclaration);
   1464  sbxData->mHandleCharacterData =
   1465      sandbox->register_callback(Driver_HandleCharacterData);
   1466  sbxData->mHandleProcessingInstruction =
   1467      sandbox->register_callback(Driver_HandleProcessingInstruction);
   1468  sbxData->mHandleDefault = sandbox->register_callback(Driver_HandleDefault);
   1469  sbxData->mHandleExternalEntityRef =
   1470      sandbox->register_callback(Driver_HandleExternalEntityRef);
   1471  sbxData->mHandleComment = sandbox->register_callback(Driver_HandleComment);
   1472  sbxData->mHandleStartCdataSection =
   1473      sandbox->register_callback(Driver_HandleStartCdataSection);
   1474  sbxData->mHandleEndCdataSection =
   1475      sandbox->register_callback(Driver_HandleEndCdataSection);
   1476  sbxData->mHandleStartDoctypeDecl =
   1477      sandbox->register_callback(Driver_HandleStartDoctypeDecl);
   1478  sbxData->mHandleEndDoctypeDecl =
   1479      sandbox->register_callback(Driver_HandleEndDoctypeDecl);
   1480 
   1481  sbxData->mSandbox = std::move(sandbox);
   1482 
   1483  return sbxData;
   1484 }
   1485 
   1486 mozilla::StaticRefPtr<RLBoxExpatSandboxPool> RLBoxExpatSandboxPool::sSingleton;
   1487 
   1488 void RLBoxExpatSandboxPool::Initialize(size_t aDelaySeconds) {
   1489  mozilla::AssertIsOnMainThread();
   1490  RLBoxExpatSandboxPool::sSingleton = new RLBoxExpatSandboxPool(aDelaySeconds);
   1491  ClearOnShutdown(&RLBoxExpatSandboxPool::sSingleton);
   1492 }
   1493 
   1494 void RLBoxExpatSandboxData::AttachDriver(bool aIsSystemPrincipal,
   1495                                         void* aDriver) {
   1496  MOZ_ASSERT(!mSandbox->sandbox_storage);
   1497  MOZ_ASSERT(mHandleStartElement.is_unregistered());
   1498  MOZ_ASSERT(mHandleEndElement.is_unregistered());
   1499 
   1500  if (aIsSystemPrincipal) {
   1501    mHandleStartElement = mSandbox->register_callback(
   1502        nsExpatDriver::HandleStartElementForSystemPrincipal);
   1503    mHandleEndElement = mSandbox->register_callback(
   1504        nsExpatDriver::HandleEndElementForSystemPrincipal);
   1505  } else {
   1506    mHandleStartElement =
   1507        mSandbox->register_callback(nsExpatDriver::HandleStartElement);
   1508    mHandleEndElement =
   1509        mSandbox->register_callback(nsExpatDriver::HandleEndElement);
   1510  }
   1511 
   1512  mSandbox->sandbox_storage = aDriver;
   1513 }
   1514 
   1515 void RLBoxExpatSandboxData::DetachDriver() {
   1516  mSandbox->sandbox_storage = nullptr;
   1517  mHandleStartElement.unregister();
   1518  mHandleEndElement.unregister();
   1519 }
   1520 
   1521 RLBoxExpatSandboxData::~RLBoxExpatSandboxData() {
   1522  MOZ_ASSERT(mSandbox);
   1523 
   1524  // DetachDriver should always be called before a sandbox goes back into the
   1525  // pool, and thus before it's freed.
   1526  MOZ_ASSERT(!mSandbox->sandbox_storage);
   1527  MOZ_ASSERT(mHandleStartElement.is_unregistered());
   1528  MOZ_ASSERT(mHandleEndElement.is_unregistered());
   1529 
   1530  // Unregister callbacks
   1531  mHandleXMLDeclaration.unregister();
   1532  mHandleCharacterData.unregister();
   1533  mHandleProcessingInstruction.unregister();
   1534  mHandleDefault.unregister();
   1535  mHandleExternalEntityRef.unregister();
   1536  mHandleComment.unregister();
   1537  mHandleStartCdataSection.unregister();
   1538  mHandleEndCdataSection.unregister();
   1539  mHandleStartDoctypeDecl.unregister();
   1540  mHandleEndDoctypeDecl.unregister();
   1541  // Destroy sandbox
   1542  mSandbox->destroy_sandbox();
   1543  MOZ_COUNT_DTOR(RLBoxExpatSandboxData);
   1544 }
   1545 
   1546 nsresult nsExpatDriver::Initialize(nsIURI* aURI, nsIContentSink* aSink) {
   1547  mSink = do_QueryInterface(aSink);
   1548  if (!mSink) {
   1549    NS_ERROR("nsExpatDriver didn't get an nsIExpatSink");
   1550    // Make sure future calls to us bail out as needed
   1551    mInternalState = NS_ERROR_UNEXPECTED;
   1552    return mInternalState;
   1553  }
   1554 
   1555  mOriginalSink = aSink;
   1556 
   1557  static const char16_t kExpatSeparator[] = {kExpatSeparatorChar, '\0'};
   1558 
   1559  // Get the doc if any
   1560  nsCOMPtr<Document> doc = do_QueryInterface(mOriginalSink->GetTarget());
   1561  if (doc) {
   1562    nsCOMPtr<nsPIDOMWindowOuter> win = doc->GetWindow();
   1563    nsCOMPtr<nsPIDOMWindowInner> inner;
   1564    if (win) {
   1565      inner = win->GetCurrentInnerWindow();
   1566    } else {
   1567      bool aHasHadScriptHandlingObject;
   1568      nsIScriptGlobalObject* global =
   1569          doc->GetScriptHandlingObject(aHasHadScriptHandlingObject);
   1570      if (global) {
   1571        inner = do_QueryInterface(global);
   1572      }
   1573    }
   1574    if (inner) {
   1575      mInnerWindowID = inner->WindowID();
   1576    }
   1577  }
   1578 
   1579  // Create sandbox
   1580  //
   1581  // We have to make sure the sandbox is large enough. We unscientifically
   1582  // request two MB. Note that the parsing itself is chunked so as not to
   1583  // require a large sandbox.
   1584  static const uint64_t minSandboxSize = 2 * 1024 * 1024;
   1585  MOZ_ASSERT(!mSandboxPoolData);
   1586  mSandboxPoolData =
   1587      RLBoxExpatSandboxPool::sSingleton->PopOrCreate(minSandboxSize);
   1588  NS_ENSURE_TRUE(mSandboxPoolData, NS_ERROR_OUT_OF_MEMORY);
   1589 
   1590  MOZ_ASSERT(SandboxData());
   1591 
   1592  SandboxData()->AttachDriver(doc && doc->NodePrincipal()->IsSystemPrincipal(),
   1593                              static_cast<void*>(this));
   1594 
   1595  // Create expat parser.
   1596  // We need to copy the encoding and namespace separator into the sandbox.
   1597  // For the noop sandbox we pass in the memsuite; for the Wasm sandbox, we
   1598  // pass in nullptr to let expat use the standard library memory suite.
   1599  auto expatSeparator = TransferBuffer<char16_t>(
   1600      Sandbox(), kExpatSeparator,
   1601      nsCharTraits<char16_t>::length(kExpatSeparator) + 1);
   1602  MOZ_RELEASE_ASSERT(*expatSeparator);
   1603  auto utf16 = TransferBuffer<char16_t>(
   1604      Sandbox(), kUTF16, nsCharTraits<char16_t>::length(kUTF16) + 1);
   1605  MOZ_RELEASE_ASSERT(*utf16);
   1606  mExpatParser = Sandbox()->invoke_sandbox_function(
   1607      MOZ_XML_ParserCreate_MM, *utf16, nullptr, *expatSeparator);
   1608  NS_ENSURE_TRUE(mExpatParser, NS_ERROR_FAILURE);
   1609 
   1610  RLBOX_EXPAT_MCALL(MOZ_XML_SetReturnNSTriplet, XML_TRUE);
   1611 
   1612 #ifdef XML_DTD
   1613  RLBOX_EXPAT_MCALL(MOZ_XML_SetParamEntityParsing,
   1614                    XML_PARAM_ENTITY_PARSING_ALWAYS);
   1615 #endif
   1616 
   1617  rlbox_sandbox_expat::convert_to_sandbox_equivalent_nonclass_t<unsigned long>
   1618      salt;
   1619  MOZ_RELEASE_ASSERT(mozilla::GenerateRandomBytesFromOS(&salt, sizeof(salt)));
   1620  MOZ_RELEASE_ASSERT(
   1621      RLBOX_EXPAT_SAFE_MCALL(MOZ_XML_SetHashSalt, safe_unverified<int>, salt));
   1622  MOZ_RELEASE_ASSERT(RLBOX_EXPAT_SAFE_MCALL(
   1623      MOZ_XML_SetReparseDeferralEnabled, safe_unverified<XML_Bool>, XML_FALSE));
   1624 
   1625  auto baseURI = GetExpatBaseURI(aURI);
   1626  auto uri =
   1627      TransferBuffer<XML_Char>(Sandbox(), &baseURI[0], std::size(baseURI));
   1628  RLBOX_EXPAT_MCALL(MOZ_XML_SetBase, *uri);
   1629 
   1630  // Set up the callbacks
   1631  RLBOX_EXPAT_MCALL(MOZ_XML_SetXmlDeclHandler,
   1632                    SandboxData()->mHandleXMLDeclaration);
   1633  RLBOX_EXPAT_MCALL(MOZ_XML_SetElementHandler,
   1634                    SandboxData()->mHandleStartElement,
   1635                    SandboxData()->mHandleEndElement);
   1636  RLBOX_EXPAT_MCALL(MOZ_XML_SetCharacterDataHandler,
   1637                    SandboxData()->mHandleCharacterData);
   1638  RLBOX_EXPAT_MCALL(MOZ_XML_SetProcessingInstructionHandler,
   1639                    SandboxData()->mHandleProcessingInstruction);
   1640  RLBOX_EXPAT_MCALL(MOZ_XML_SetDefaultHandlerExpand,
   1641                    SandboxData()->mHandleDefault);
   1642  RLBOX_EXPAT_MCALL(MOZ_XML_SetExternalEntityRefHandler,
   1643                    SandboxData()->mHandleExternalEntityRef);
   1644  RLBOX_EXPAT_MCALL(MOZ_XML_SetCommentHandler, SandboxData()->mHandleComment);
   1645  RLBOX_EXPAT_MCALL(MOZ_XML_SetCdataSectionHandler,
   1646                    SandboxData()->mHandleStartCdataSection,
   1647                    SandboxData()->mHandleEndCdataSection);
   1648 
   1649  RLBOX_EXPAT_MCALL(MOZ_XML_SetParamEntityParsing,
   1650                    XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
   1651  RLBOX_EXPAT_MCALL(MOZ_XML_SetDoctypeDeclHandler,
   1652                    SandboxData()->mHandleStartDoctypeDecl,
   1653                    SandboxData()->mHandleEndDoctypeDecl);
   1654 
   1655  return mInternalState;
   1656 }
   1657 
   1658 nsresult nsExpatDriver::BuildModel() { return mInternalState; }
   1659 
   1660 void nsExpatDriver::DidBuildModel() {
   1661  if (!mInParser) {
   1662    // Because nsExpatDriver is cycle-collected, it gets destroyed
   1663    // asynchronously. We want to eagerly release the sandbox back into the
   1664    // pool so that it can be reused immediately, unless this is a reentrant
   1665    // call (which we track with mInParser).
   1666    Destroy();
   1667  }
   1668  mOriginalSink = nullptr;
   1669  mSink = nullptr;
   1670 }
   1671 
   1672 void nsExpatDriver::Terminate() {
   1673  // XXX - not sure what happens to the unparsed data.
   1674  if (mExpatParser) {
   1675    RLBOX_EXPAT_MCALL(MOZ_XML_StopParser, XML_FALSE);
   1676  }
   1677  mInternalState = NS_ERROR_HTMLPARSER_STOPPARSING;
   1678 }
   1679 
   1680 /*************************** Unused methods **********************************/
   1681 
   1682 void nsExpatDriver::MaybeStopParser(nsresult aState) {
   1683  if (NS_FAILED(aState)) {
   1684    // If we had a failure we want to override NS_ERROR_HTMLPARSER_INTERRUPTED
   1685    // and we want to override NS_ERROR_HTMLPARSER_BLOCK but not with
   1686    // NS_ERROR_HTMLPARSER_INTERRUPTED.
   1687    if (NS_SUCCEEDED(mInternalState) ||
   1688        mInternalState == NS_ERROR_HTMLPARSER_INTERRUPTED ||
   1689        (mInternalState == NS_ERROR_HTMLPARSER_BLOCK &&
   1690         aState != NS_ERROR_HTMLPARSER_INTERRUPTED)) {
   1691      mInternalState = (aState == NS_ERROR_HTMLPARSER_INTERRUPTED ||
   1692                        aState == NS_ERROR_HTMLPARSER_BLOCK)
   1693                           ? aState
   1694                           : NS_ERROR_HTMLPARSER_STOPPARSING;
   1695    }
   1696 
   1697    // If we get an error then we need to stop Expat (by calling XML_StopParser
   1698    // with false as the last argument). If the parser should be blocked or
   1699    // interrupted we need to pause Expat (by calling XML_StopParser with
   1700    // true as the last argument).
   1701 
   1702    // Note that due to Bug 1742913, we need to explicitly cast the parameter to
   1703    // an int so that the value is correctly zero extended.
   1704    int resumable = BlockedOrInterrupted();
   1705    RLBOX_EXPAT_MCALL(MOZ_XML_StopParser, resumable);
   1706  } else if (NS_SUCCEEDED(mInternalState)) {
   1707    // Only clobber mInternalState with the success code if we didn't block or
   1708    // interrupt before.
   1709    mInternalState = aState;
   1710  }
   1711 }
   1712 
   1713 nsExpatDriver::ExpatBaseURI nsExpatDriver::GetExpatBaseURI(nsIURI* aURI) {
   1714  mURIs.AppendElement(aURI);
   1715 
   1716  MOZ_RELEASE_ASSERT(mURIs.Length() <= std::numeric_limits<XML_Char>::max());
   1717 
   1718  return ExpatBaseURI(static_cast<XML_Char>(mURIs.Length()), XML_T('\0'));
   1719 }
   1720 
   1721 nsIURI* nsExpatDriver::GetBaseURI(const XML_Char* aBase) const {
   1722  MOZ_ASSERT(aBase[0] != '\0' && aBase[1] == '\0');
   1723 
   1724  if (aBase[0] == '\0' || aBase[1] != '\0') {
   1725    return nullptr;
   1726  }
   1727 
   1728  uint32_t index = aBase[0] - 1;
   1729  MOZ_ASSERT(index < mURIs.Length());
   1730 
   1731  return mURIs.SafeElementAt(index);
   1732 }
   1733 
   1734 inline RLBoxExpatSandboxData* nsExpatDriver::SandboxData() const {
   1735  return reinterpret_cast<RLBoxExpatSandboxData*>(
   1736      mSandboxPoolData->SandboxData());
   1737 }
   1738 
   1739 inline rlbox_sandbox_expat* nsExpatDriver::Sandbox() const {
   1740  return SandboxData()->Sandbox();
   1741 }