tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

nsHtml5TokenizerCppSupplement.h (14112B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 #include "mozilla/CheckedInt.h"
      6 #include "mozilla/Likely.h"
      7 
      8 // INT32_MAX is (2^31)-1. Therefore, the highest power-of-two that fits
      9 // is 2^30. Note that this is counting char16_t units. The underlying
     10 // bytes will be twice that, but they fit even in 32-bit size_t even
     11 // if a contiguous chunk of memory of that size is pretty unlikely to
     12 // be available on a 32-bit system.
     13 #define MAX_POWER_OF_TWO_IN_INT32 0x40000000
     14 
     15 bool nsHtml5Tokenizer::EnsureBufferSpace(int32_t aLength) {
     16  MOZ_RELEASE_ASSERT(aLength >= 0, "Negative length.");
     17  if (aLength > MAX_POWER_OF_TWO_IN_INT32) {
     18    // Can't happen when loading from network.
     19    return false;
     20  }
     21  mozilla::CheckedInt<int32_t> worstCase(strBufLen);
     22  worstCase += aLength;
     23  worstCase += charRefBufLen;
     24  // Add 2 to account for emissions of LT_GT, LT_SOLIDUS and RSQB_RSQB.
     25  // Adding to the general worst case instead of only the
     26  // TreeBuilder-exposed worst case to avoid re-introducing a bug when
     27  // unifying the tokenizer and tree builder buffers in the future.
     28  worstCase += 2;
     29  if (!worstCase.isValid()) {
     30    return false;
     31  }
     32  if (worstCase.value() > MAX_POWER_OF_TWO_IN_INT32) {
     33    return false;
     34  }
     35  // TODO: Unify nsHtml5Tokenizer::strBuf and nsHtml5TreeBuilder::charBuffer
     36  // so that the call below becomes unnecessary.
     37  if (!tokenHandler->EnsureBufferSpace(worstCase.value())) {
     38    return false;
     39  }
     40  if (!strBuf) {
     41    if (worstCase.value() < MAX_POWER_OF_TWO_IN_INT32) {
     42      // Add one to round to the next power of two to avoid immediate
     43      // reallocation once there are a few characters in the buffer.
     44      worstCase += 1;
     45    }
     46    strBuf = jArray<char16_t, int32_t>::newFallibleJArray(
     47        mozilla::RoundUpPow2(worstCase.value()));
     48    if (!strBuf) {
     49      return false;
     50    }
     51  } else if (worstCase.value() > strBuf.length) {
     52    jArray<char16_t, int32_t> newBuf =
     53        jArray<char16_t, int32_t>::newFallibleJArray(
     54            mozilla::RoundUpPow2(worstCase.value()));
     55    if (!newBuf) {
     56      return false;
     57    }
     58    memcpy(newBuf, strBuf, sizeof(char16_t) * size_t(strBufLen));
     59    strBuf = newBuf;
     60  }
     61  return true;
     62 }
     63 
     64 MOZ_COLD MOZ_NEVER_INLINE void
     65 nsHtml5Tokenizer::EnsureBufferSpaceShouldNeverHappen(int32_t aLength) {
     66  MOZ_DIAGNOSTIC_ASSERT(false,
     67                        "This is never supposed to happen. Please file a bug "
     68                        "with steps to reproduce!");
     69  if (!EnsureBufferSpace(aLength)) {
     70    MOZ_CRASH(
     71        "Unrecovable allocation failure in situation that should never happen. "
     72        "Please file a bug with steps to reproduce!");
     73  }
     74 }
     75 
     76 bool nsHtml5Tokenizer::TemplatePushedOrHeadPopped() {
     77  if (encodingDeclarationHandler) {
     78    return encodingDeclarationHandler->TemplatePushedOrHeadPopped();
     79  }
     80  return false;
     81 }
     82 
     83 void nsHtml5Tokenizer::RememberGt(int32_t aPos) {
     84  if (encodingDeclarationHandler) {
     85    return encodingDeclarationHandler->RememberGt(aPos);
     86  }
     87 }
     88 
     89 void nsHtml5Tokenizer::StartPlainText() {
     90  stateSave = nsHtml5Tokenizer::PLAINTEXT;
     91 }
     92 
     93 void nsHtml5Tokenizer::EnableViewSource(nsHtml5Highlighter* aHighlighter) {
     94  mViewSource = mozilla::WrapUnique(aHighlighter);
     95 }
     96 
     97 bool nsHtml5Tokenizer::ShouldFlushViewSource() {
     98  return mViewSource->ShouldFlushOps();
     99 }
    100 
    101 mozilla::Result<bool, nsresult> nsHtml5Tokenizer::FlushViewSource() {
    102  return mViewSource->FlushOps();
    103 }
    104 
    105 void nsHtml5Tokenizer::StartViewSource(const nsAutoString& aTitle) {
    106  mViewSource->Start(aTitle);
    107 }
    108 
    109 void nsHtml5Tokenizer::StartViewSourceBodyContents() {
    110  mViewSource->StartBodyContents();
    111 }
    112 
    113 [[nodiscard]] bool nsHtml5Tokenizer::EndViewSource() {
    114  return mViewSource->End();
    115 }
    116 
    117 void nsHtml5Tokenizer::SetViewSourceOpSink(nsAHtml5TreeOpSink* aOpSink) {
    118  mViewSource->SetOpSink(aOpSink);
    119 }
    120 
    121 void nsHtml5Tokenizer::RewindViewSource() { mViewSource->Rewind(); }
    122 
    123 nsHtml5String nsHtml5Tokenizer::TryAtomizeForSingleDigit() {
    124  if (!newAttributesEachTime && strBufLen == 1 && strBuf[0] >= '0' &&
    125      strBuf[0] <= '9') {
    126    static nsStaticAtom* const digitAtoms[10] = {
    127        nsGkAtoms::_0, nsGkAtoms::_1, nsGkAtoms::_2, nsGkAtoms::_3,
    128        nsGkAtoms::_4, nsGkAtoms::_5, nsGkAtoms::_6, nsGkAtoms::_7,
    129        nsGkAtoms::_8, nsGkAtoms::_9};
    130    nsAtom* atom = digitAtoms[strBuf[0] - '0'];
    131    nsHtml5String result = nsHtml5String::FromAtom(do_AddRef(atom));
    132    clearStrBufAfterUse();
    133    return result;
    134  }
    135  return nullptr;
    136 }
    137 
    138 void nsHtml5Tokenizer::errWarnLtSlashInRcdata() {}
    139 
    140 // The null checks below annotated MOZ_LIKELY are not actually necessary.
    141 
    142 void nsHtml5Tokenizer::errUnquotedAttributeValOrNull(char16_t c) {
    143  if (MOZ_LIKELY(mViewSource)) {
    144    switch (c) {
    145      case '<':
    146        mViewSource->AddErrorToCurrentNode("errUnquotedAttributeLt");
    147        return;
    148      case '`':
    149        mViewSource->AddErrorToCurrentNode("errUnquotedAttributeGrave");
    150        return;
    151      case '\'':
    152      case '"':
    153        mViewSource->AddErrorToCurrentNode("errUnquotedAttributeQuote");
    154        return;
    155      case '=':
    156        mViewSource->AddErrorToCurrentNode("errUnquotedAttributeEquals");
    157        return;
    158    }
    159  }
    160 }
    161 
    162 void nsHtml5Tokenizer::errLtOrEqualsOrGraveInUnquotedAttributeOrNull(
    163    char16_t c) {
    164  if (MOZ_LIKELY(mViewSource)) {
    165    switch (c) {
    166      case '=':
    167        mViewSource->AddErrorToCurrentNode("errUnquotedAttributeStartEquals");
    168        return;
    169      case '<':
    170        mViewSource->AddErrorToCurrentNode("errUnquotedAttributeStartLt");
    171        return;
    172      case '`':
    173        mViewSource->AddErrorToCurrentNode("errUnquotedAttributeStartGrave");
    174        return;
    175    }
    176  }
    177 }
    178 
    179 void nsHtml5Tokenizer::errBadCharBeforeAttributeNameOrNull(char16_t c) {
    180  if (MOZ_LIKELY(mViewSource)) {
    181    if (c == '<') {
    182      mViewSource->AddErrorToCurrentNode("errBadCharBeforeAttributeNameLt");
    183    } else if (c == '=') {
    184      errEqualsSignBeforeAttributeName();
    185    } else if (c != 0xFFFD) {
    186      errQuoteBeforeAttributeName(c);
    187    }
    188  }
    189 }
    190 
    191 void nsHtml5Tokenizer::errBadCharAfterLt(char16_t c) {
    192  if (MOZ_LIKELY(mViewSource)) {
    193    mViewSource->AddErrorToCurrentNode("errBadCharAfterLt");
    194  }
    195 }
    196 
    197 void nsHtml5Tokenizer::errQuoteOrLtInAttributeNameOrNull(char16_t c) {
    198  if (MOZ_LIKELY(mViewSource)) {
    199    if (c == '<') {
    200      mViewSource->AddErrorToCurrentNode("errLtInAttributeName");
    201    } else if (c != 0xFFFD) {
    202      mViewSource->AddErrorToCurrentNode("errQuoteInAttributeName");
    203    }
    204  }
    205 }
    206 
    207 void nsHtml5Tokenizer::maybeErrAttributesOnEndTag(
    208    nsHtml5HtmlAttributes* attrs) {
    209  if (mViewSource && attrs->getLength() != 0) {
    210    /*
    211     * When an end tag token is emitted with attributes, that is a parse
    212     * error.
    213     */
    214    mViewSource->AddErrorToCurrentRun("maybeErrAttributesOnEndTag");
    215  }
    216 }
    217 
    218 void nsHtml5Tokenizer::maybeErrSlashInEndTag(bool selfClosing) {
    219  if (mViewSource && selfClosing && endTag) {
    220    mViewSource->AddErrorToCurrentSlash("maybeErrSlashInEndTag");
    221  }
    222 }
    223 
    224 void nsHtml5Tokenizer::errGarbageAfterLtSlash() {
    225  if (MOZ_LIKELY(mViewSource)) {
    226    mViewSource->AddErrorToCurrentNode("errGarbageAfterLtSlash");
    227  }
    228 }
    229 
    230 void nsHtml5Tokenizer::errLtSlashGt() {
    231  if (MOZ_LIKELY(mViewSource)) {
    232    mViewSource->AddErrorToCurrentNode("errLtSlashGt");
    233  }
    234 }
    235 
    236 void nsHtml5Tokenizer::errCharRefLacksSemicolon() {
    237  if (MOZ_UNLIKELY(mViewSource)) {
    238    mViewSource->AddErrorToCurrentNode("errCharRefLacksSemicolon");
    239  }
    240 }
    241 
    242 void nsHtml5Tokenizer::errNoDigitsInNCR() {
    243  if (MOZ_UNLIKELY(mViewSource)) {
    244    mViewSource->AddErrorToCurrentNode("errNoDigitsInNCR");
    245  }
    246 }
    247 
    248 void nsHtml5Tokenizer::errGtInSystemId() {
    249  if (MOZ_LIKELY(mViewSource)) {
    250    mViewSource->AddErrorToCurrentNode("errGtInSystemId");
    251  }
    252 }
    253 
    254 void nsHtml5Tokenizer::errGtInPublicId() {
    255  if (MOZ_LIKELY(mViewSource)) {
    256    mViewSource->AddErrorToCurrentNode("errGtInPublicId");
    257  }
    258 }
    259 
    260 void nsHtml5Tokenizer::errNamelessDoctype() {
    261  if (MOZ_LIKELY(mViewSource)) {
    262    mViewSource->AddErrorToCurrentNode("errNamelessDoctype");
    263  }
    264 }
    265 
    266 void nsHtml5Tokenizer::errConsecutiveHyphens() {
    267  if (MOZ_UNLIKELY(mViewSource)) {
    268    mViewSource->AddErrorToCurrentNode("errConsecutiveHyphens");
    269  }
    270 }
    271 
    272 void nsHtml5Tokenizer::errPrematureEndOfComment() {
    273  if (MOZ_LIKELY(mViewSource)) {
    274    mViewSource->AddErrorToCurrentNode("errPrematureEndOfComment");
    275  }
    276 }
    277 
    278 void nsHtml5Tokenizer::errBogusComment() {
    279  if (MOZ_UNLIKELY(mViewSource)) {
    280    mViewSource->AddErrorToCurrentNode("errBogusComment");
    281  }
    282 }
    283 
    284 void nsHtml5Tokenizer::errSlashNotFollowedByGt() {
    285  if (MOZ_LIKELY(mViewSource)) {
    286    mViewSource->AddErrorToCurrentSlash("errSlashNotFollowedByGt");
    287  }
    288 }
    289 
    290 void nsHtml5Tokenizer::errNoSpaceBetweenAttributes() {
    291  if (MOZ_LIKELY(mViewSource)) {
    292    mViewSource->AddErrorToCurrentNode("errNoSpaceBetweenAttributes");
    293  }
    294 }
    295 
    296 void nsHtml5Tokenizer::errAttributeValueMissing() {
    297  if (MOZ_LIKELY(mViewSource)) {
    298    mViewSource->AddErrorToCurrentNode("errAttributeValueMissing");
    299  }
    300 }
    301 
    302 void nsHtml5Tokenizer::errEqualsSignBeforeAttributeName() {
    303  if (MOZ_LIKELY(mViewSource)) {
    304    mViewSource->AddErrorToCurrentNode("errEqualsSignBeforeAttributeName");
    305  }
    306 }
    307 
    308 void nsHtml5Tokenizer::errLtGt() {
    309  if (MOZ_LIKELY(mViewSource)) {
    310    mViewSource->AddErrorToCurrentNode("errLtGt");
    311  }
    312 }
    313 
    314 void nsHtml5Tokenizer::errProcessingInstruction() {
    315  if (MOZ_LIKELY(mViewSource)) {
    316    mViewSource->AddErrorToCurrentNode("errProcessingInstruction");
    317  }
    318 }
    319 
    320 void nsHtml5Tokenizer::errUnescapedAmpersandInterpretedAsCharacterReference() {
    321  if (MOZ_UNLIKELY(mViewSource)) {
    322    mViewSource->AddErrorToCurrentAmpersand(
    323        "errUnescapedAmpersandInterpretedAsCharacterReference");
    324  }
    325 }
    326 
    327 void nsHtml5Tokenizer::errNotSemicolonTerminated() {
    328  if (MOZ_UNLIKELY(mViewSource)) {
    329    mViewSource->AddErrorToCurrentNode("errNotSemicolonTerminated");
    330  }
    331 }
    332 
    333 void nsHtml5Tokenizer::errNoNamedCharacterMatch() {
    334  if (MOZ_UNLIKELY(mViewSource)) {
    335    mViewSource->AddErrorToCurrentAmpersand("errNoNamedCharacterMatch");
    336  }
    337 }
    338 
    339 void nsHtml5Tokenizer::errQuoteBeforeAttributeName(char16_t c) {
    340  if (MOZ_LIKELY(mViewSource)) {
    341    mViewSource->AddErrorToCurrentNode("errQuoteBeforeAttributeName");
    342  }
    343 }
    344 
    345 void nsHtml5Tokenizer::errExpectedPublicId() {
    346  if (MOZ_LIKELY(mViewSource)) {
    347    mViewSource->AddErrorToCurrentNode("errExpectedPublicId");
    348  }
    349 }
    350 
    351 void nsHtml5Tokenizer::errBogusDoctype() {
    352  if (MOZ_UNLIKELY(mViewSource)) {
    353    mViewSource->AddErrorToCurrentNode("errBogusDoctype");
    354  }
    355 }
    356 
    357 void nsHtml5Tokenizer::errNcrSurrogate() {
    358  if (MOZ_UNLIKELY(mViewSource)) {
    359    mViewSource->AddErrorToCurrentNode("errNcrSurrogate");
    360  }
    361 }
    362 
    363 void nsHtml5Tokenizer::errNcrInC1Range() {
    364  if (MOZ_UNLIKELY(mViewSource)) {
    365    mViewSource->AddErrorToCurrentNode("errNcrInC1Range");
    366  }
    367 }
    368 
    369 void nsHtml5Tokenizer::errEofInPublicId() {
    370  if (MOZ_UNLIKELY(mViewSource)) {
    371    mViewSource->AddErrorToCurrentRun("errEofInPublicId");
    372  }
    373 }
    374 
    375 void nsHtml5Tokenizer::errEofInComment() {
    376  if (MOZ_UNLIKELY(mViewSource)) {
    377    mViewSource->AddErrorToCurrentRun("errEofInComment");
    378  }
    379 }
    380 
    381 void nsHtml5Tokenizer::errEofInDoctype() {
    382  if (MOZ_UNLIKELY(mViewSource)) {
    383    mViewSource->AddErrorToCurrentRun("errEofInDoctype");
    384  }
    385 }
    386 
    387 void nsHtml5Tokenizer::errEofInAttributeValue() {
    388  if (MOZ_UNLIKELY(mViewSource)) {
    389    mViewSource->AddErrorToCurrentRun("errEofInAttributeValue");
    390  }
    391 }
    392 
    393 void nsHtml5Tokenizer::errEofInAttributeName() {
    394  if (MOZ_UNLIKELY(mViewSource)) {
    395    mViewSource->AddErrorToCurrentRun("errEofInAttributeName");
    396  }
    397 }
    398 
    399 void nsHtml5Tokenizer::errEofWithoutGt() {
    400  if (MOZ_UNLIKELY(mViewSource)) {
    401    mViewSource->AddErrorToCurrentRun("errEofWithoutGt");
    402  }
    403 }
    404 
    405 void nsHtml5Tokenizer::errEofInTagName() {
    406  if (MOZ_UNLIKELY(mViewSource)) {
    407    mViewSource->AddErrorToCurrentRun("errEofInTagName");
    408  }
    409 }
    410 
    411 void nsHtml5Tokenizer::errEofInEndTag() {
    412  if (MOZ_UNLIKELY(mViewSource)) {
    413    mViewSource->AddErrorToCurrentRun("errEofInEndTag");
    414  }
    415 }
    416 
    417 void nsHtml5Tokenizer::errEofAfterLt() {
    418  if (MOZ_UNLIKELY(mViewSource)) {
    419    mViewSource->AddErrorToCurrentRun("errEofAfterLt");
    420  }
    421 }
    422 
    423 void nsHtml5Tokenizer::errNcrOutOfRange() {
    424  if (MOZ_UNLIKELY(mViewSource)) {
    425    mViewSource->AddErrorToCurrentNode("errNcrOutOfRange");
    426  }
    427 }
    428 
    429 void nsHtml5Tokenizer::errNcrUnassigned() {
    430  if (MOZ_UNLIKELY(mViewSource)) {
    431    mViewSource->AddErrorToCurrentNode("errNcrUnassigned");
    432  }
    433 }
    434 
    435 void nsHtml5Tokenizer::errDuplicateAttribute() {
    436  if (attributes) {
    437    // There is an open issue for properly specifying this:
    438    // https://github.com/whatwg/html/issues/3257
    439    attributes->setDuplicateAttributeError();
    440  }
    441 
    442  if (MOZ_UNLIKELY(mViewSource)) {
    443    mViewSource->AddErrorToCurrentNode("errDuplicateAttribute");
    444  }
    445 }
    446 
    447 void nsHtml5Tokenizer::errEofInSystemId() {
    448  if (MOZ_UNLIKELY(mViewSource)) {
    449    mViewSource->AddErrorToCurrentRun("errEofInSystemId");
    450  }
    451 }
    452 
    453 void nsHtml5Tokenizer::errExpectedSystemId() {
    454  if (MOZ_LIKELY(mViewSource)) {
    455    mViewSource->AddErrorToCurrentNode("errExpectedSystemId");
    456  }
    457 }
    458 
    459 void nsHtml5Tokenizer::errMissingSpaceBeforeDoctypeName() {
    460  if (MOZ_LIKELY(mViewSource)) {
    461    mViewSource->AddErrorToCurrentNode("errMissingSpaceBeforeDoctypeName");
    462  }
    463 }
    464 
    465 void nsHtml5Tokenizer::errNestedComment() {
    466  if (MOZ_LIKELY(mViewSource)) {
    467    mViewSource->AddErrorToCurrentNode("errNestedComment");
    468  }
    469 }
    470 
    471 void nsHtml5Tokenizer::errNcrZero() {
    472  if (MOZ_UNLIKELY(mViewSource)) {
    473    mViewSource->AddErrorToCurrentNode("errNcrZero");
    474  }
    475 }
    476 
    477 void nsHtml5Tokenizer::errNoSpaceBetweenDoctypeSystemKeywordAndQuote() {
    478  if (MOZ_LIKELY(mViewSource)) {
    479    mViewSource->AddErrorToCurrentNode(
    480        "errNoSpaceBetweenDoctypeSystemKeywordAndQuote");
    481  }
    482 }
    483 
    484 void nsHtml5Tokenizer::errNoSpaceBetweenPublicAndSystemIds() {
    485  if (MOZ_LIKELY(mViewSource)) {
    486    mViewSource->AddErrorToCurrentNode("errNoSpaceBetweenPublicAndSystemIds");
    487  }
    488 }
    489 
    490 void nsHtml5Tokenizer::errNoSpaceBetweenDoctypePublicKeywordAndQuote() {
    491  if (MOZ_LIKELY(mViewSource)) {
    492    mViewSource->AddErrorToCurrentNode(
    493        "errNoSpaceBetweenDoctypePublicKeywordAndQuote");
    494  }
    495 }