nsHtml5TokenizerCppSupplement.h (14112B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 #include "mozilla/CheckedInt.h" 6 #include "mozilla/Likely.h" 7 8 // INT32_MAX is (2^31)-1. Therefore, the highest power-of-two that fits 9 // is 2^30. Note that this is counting char16_t units. The underlying 10 // bytes will be twice that, but they fit even in 32-bit size_t even 11 // if a contiguous chunk of memory of that size is pretty unlikely to 12 // be available on a 32-bit system. 13 #define MAX_POWER_OF_TWO_IN_INT32 0x40000000 14 15 bool nsHtml5Tokenizer::EnsureBufferSpace(int32_t aLength) { 16 MOZ_RELEASE_ASSERT(aLength >= 0, "Negative length."); 17 if (aLength > MAX_POWER_OF_TWO_IN_INT32) { 18 // Can't happen when loading from network. 19 return false; 20 } 21 mozilla::CheckedInt<int32_t> worstCase(strBufLen); 22 worstCase += aLength; 23 worstCase += charRefBufLen; 24 // Add 2 to account for emissions of LT_GT, LT_SOLIDUS and RSQB_RSQB. 25 // Adding to the general worst case instead of only the 26 // TreeBuilder-exposed worst case to avoid re-introducing a bug when 27 // unifying the tokenizer and tree builder buffers in the future. 28 worstCase += 2; 29 if (!worstCase.isValid()) { 30 return false; 31 } 32 if (worstCase.value() > MAX_POWER_OF_TWO_IN_INT32) { 33 return false; 34 } 35 // TODO: Unify nsHtml5Tokenizer::strBuf and nsHtml5TreeBuilder::charBuffer 36 // so that the call below becomes unnecessary. 37 if (!tokenHandler->EnsureBufferSpace(worstCase.value())) { 38 return false; 39 } 40 if (!strBuf) { 41 if (worstCase.value() < MAX_POWER_OF_TWO_IN_INT32) { 42 // Add one to round to the next power of two to avoid immediate 43 // reallocation once there are a few characters in the buffer. 44 worstCase += 1; 45 } 46 strBuf = jArray<char16_t, int32_t>::newFallibleJArray( 47 mozilla::RoundUpPow2(worstCase.value())); 48 if (!strBuf) { 49 return false; 50 } 51 } else if (worstCase.value() > strBuf.length) { 52 jArray<char16_t, int32_t> newBuf = 53 jArray<char16_t, int32_t>::newFallibleJArray( 54 mozilla::RoundUpPow2(worstCase.value())); 55 if (!newBuf) { 56 return false; 57 } 58 memcpy(newBuf, strBuf, sizeof(char16_t) * size_t(strBufLen)); 59 strBuf = newBuf; 60 } 61 return true; 62 } 63 64 MOZ_COLD MOZ_NEVER_INLINE void 65 nsHtml5Tokenizer::EnsureBufferSpaceShouldNeverHappen(int32_t aLength) { 66 MOZ_DIAGNOSTIC_ASSERT(false, 67 "This is never supposed to happen. Please file a bug " 68 "with steps to reproduce!"); 69 if (!EnsureBufferSpace(aLength)) { 70 MOZ_CRASH( 71 "Unrecovable allocation failure in situation that should never happen. " 72 "Please file a bug with steps to reproduce!"); 73 } 74 } 75 76 bool nsHtml5Tokenizer::TemplatePushedOrHeadPopped() { 77 if (encodingDeclarationHandler) { 78 return encodingDeclarationHandler->TemplatePushedOrHeadPopped(); 79 } 80 return false; 81 } 82 83 void nsHtml5Tokenizer::RememberGt(int32_t aPos) { 84 if (encodingDeclarationHandler) { 85 return encodingDeclarationHandler->RememberGt(aPos); 86 } 87 } 88 89 void nsHtml5Tokenizer::StartPlainText() { 90 stateSave = nsHtml5Tokenizer::PLAINTEXT; 91 } 92 93 void nsHtml5Tokenizer::EnableViewSource(nsHtml5Highlighter* aHighlighter) { 94 mViewSource = mozilla::WrapUnique(aHighlighter); 95 } 96 97 bool nsHtml5Tokenizer::ShouldFlushViewSource() { 98 return mViewSource->ShouldFlushOps(); 99 } 100 101 mozilla::Result<bool, nsresult> nsHtml5Tokenizer::FlushViewSource() { 102 return mViewSource->FlushOps(); 103 } 104 105 void nsHtml5Tokenizer::StartViewSource(const nsAutoString& aTitle) { 106 mViewSource->Start(aTitle); 107 } 108 109 void nsHtml5Tokenizer::StartViewSourceBodyContents() { 110 mViewSource->StartBodyContents(); 111 } 112 113 [[nodiscard]] bool nsHtml5Tokenizer::EndViewSource() { 114 return mViewSource->End(); 115 } 116 117 void nsHtml5Tokenizer::SetViewSourceOpSink(nsAHtml5TreeOpSink* aOpSink) { 118 mViewSource->SetOpSink(aOpSink); 119 } 120 121 void nsHtml5Tokenizer::RewindViewSource() { mViewSource->Rewind(); } 122 123 nsHtml5String nsHtml5Tokenizer::TryAtomizeForSingleDigit() { 124 if (!newAttributesEachTime && strBufLen == 1 && strBuf[0] >= '0' && 125 strBuf[0] <= '9') { 126 static nsStaticAtom* const digitAtoms[10] = { 127 nsGkAtoms::_0, nsGkAtoms::_1, nsGkAtoms::_2, nsGkAtoms::_3, 128 nsGkAtoms::_4, nsGkAtoms::_5, nsGkAtoms::_6, nsGkAtoms::_7, 129 nsGkAtoms::_8, nsGkAtoms::_9}; 130 nsAtom* atom = digitAtoms[strBuf[0] - '0']; 131 nsHtml5String result = nsHtml5String::FromAtom(do_AddRef(atom)); 132 clearStrBufAfterUse(); 133 return result; 134 } 135 return nullptr; 136 } 137 138 void nsHtml5Tokenizer::errWarnLtSlashInRcdata() {} 139 140 // The null checks below annotated MOZ_LIKELY are not actually necessary. 141 142 void nsHtml5Tokenizer::errUnquotedAttributeValOrNull(char16_t c) { 143 if (MOZ_LIKELY(mViewSource)) { 144 switch (c) { 145 case '<': 146 mViewSource->AddErrorToCurrentNode("errUnquotedAttributeLt"); 147 return; 148 case '`': 149 mViewSource->AddErrorToCurrentNode("errUnquotedAttributeGrave"); 150 return; 151 case '\'': 152 case '"': 153 mViewSource->AddErrorToCurrentNode("errUnquotedAttributeQuote"); 154 return; 155 case '=': 156 mViewSource->AddErrorToCurrentNode("errUnquotedAttributeEquals"); 157 return; 158 } 159 } 160 } 161 162 void nsHtml5Tokenizer::errLtOrEqualsOrGraveInUnquotedAttributeOrNull( 163 char16_t c) { 164 if (MOZ_LIKELY(mViewSource)) { 165 switch (c) { 166 case '=': 167 mViewSource->AddErrorToCurrentNode("errUnquotedAttributeStartEquals"); 168 return; 169 case '<': 170 mViewSource->AddErrorToCurrentNode("errUnquotedAttributeStartLt"); 171 return; 172 case '`': 173 mViewSource->AddErrorToCurrentNode("errUnquotedAttributeStartGrave"); 174 return; 175 } 176 } 177 } 178 179 void nsHtml5Tokenizer::errBadCharBeforeAttributeNameOrNull(char16_t c) { 180 if (MOZ_LIKELY(mViewSource)) { 181 if (c == '<') { 182 mViewSource->AddErrorToCurrentNode("errBadCharBeforeAttributeNameLt"); 183 } else if (c == '=') { 184 errEqualsSignBeforeAttributeName(); 185 } else if (c != 0xFFFD) { 186 errQuoteBeforeAttributeName(c); 187 } 188 } 189 } 190 191 void nsHtml5Tokenizer::errBadCharAfterLt(char16_t c) { 192 if (MOZ_LIKELY(mViewSource)) { 193 mViewSource->AddErrorToCurrentNode("errBadCharAfterLt"); 194 } 195 } 196 197 void nsHtml5Tokenizer::errQuoteOrLtInAttributeNameOrNull(char16_t c) { 198 if (MOZ_LIKELY(mViewSource)) { 199 if (c == '<') { 200 mViewSource->AddErrorToCurrentNode("errLtInAttributeName"); 201 } else if (c != 0xFFFD) { 202 mViewSource->AddErrorToCurrentNode("errQuoteInAttributeName"); 203 } 204 } 205 } 206 207 void nsHtml5Tokenizer::maybeErrAttributesOnEndTag( 208 nsHtml5HtmlAttributes* attrs) { 209 if (mViewSource && attrs->getLength() != 0) { 210 /* 211 * When an end tag token is emitted with attributes, that is a parse 212 * error. 213 */ 214 mViewSource->AddErrorToCurrentRun("maybeErrAttributesOnEndTag"); 215 } 216 } 217 218 void nsHtml5Tokenizer::maybeErrSlashInEndTag(bool selfClosing) { 219 if (mViewSource && selfClosing && endTag) { 220 mViewSource->AddErrorToCurrentSlash("maybeErrSlashInEndTag"); 221 } 222 } 223 224 void nsHtml5Tokenizer::errGarbageAfterLtSlash() { 225 if (MOZ_LIKELY(mViewSource)) { 226 mViewSource->AddErrorToCurrentNode("errGarbageAfterLtSlash"); 227 } 228 } 229 230 void nsHtml5Tokenizer::errLtSlashGt() { 231 if (MOZ_LIKELY(mViewSource)) { 232 mViewSource->AddErrorToCurrentNode("errLtSlashGt"); 233 } 234 } 235 236 void nsHtml5Tokenizer::errCharRefLacksSemicolon() { 237 if (MOZ_UNLIKELY(mViewSource)) { 238 mViewSource->AddErrorToCurrentNode("errCharRefLacksSemicolon"); 239 } 240 } 241 242 void nsHtml5Tokenizer::errNoDigitsInNCR() { 243 if (MOZ_UNLIKELY(mViewSource)) { 244 mViewSource->AddErrorToCurrentNode("errNoDigitsInNCR"); 245 } 246 } 247 248 void nsHtml5Tokenizer::errGtInSystemId() { 249 if (MOZ_LIKELY(mViewSource)) { 250 mViewSource->AddErrorToCurrentNode("errGtInSystemId"); 251 } 252 } 253 254 void nsHtml5Tokenizer::errGtInPublicId() { 255 if (MOZ_LIKELY(mViewSource)) { 256 mViewSource->AddErrorToCurrentNode("errGtInPublicId"); 257 } 258 } 259 260 void nsHtml5Tokenizer::errNamelessDoctype() { 261 if (MOZ_LIKELY(mViewSource)) { 262 mViewSource->AddErrorToCurrentNode("errNamelessDoctype"); 263 } 264 } 265 266 void nsHtml5Tokenizer::errConsecutiveHyphens() { 267 if (MOZ_UNLIKELY(mViewSource)) { 268 mViewSource->AddErrorToCurrentNode("errConsecutiveHyphens"); 269 } 270 } 271 272 void nsHtml5Tokenizer::errPrematureEndOfComment() { 273 if (MOZ_LIKELY(mViewSource)) { 274 mViewSource->AddErrorToCurrentNode("errPrematureEndOfComment"); 275 } 276 } 277 278 void nsHtml5Tokenizer::errBogusComment() { 279 if (MOZ_UNLIKELY(mViewSource)) { 280 mViewSource->AddErrorToCurrentNode("errBogusComment"); 281 } 282 } 283 284 void nsHtml5Tokenizer::errSlashNotFollowedByGt() { 285 if (MOZ_LIKELY(mViewSource)) { 286 mViewSource->AddErrorToCurrentSlash("errSlashNotFollowedByGt"); 287 } 288 } 289 290 void nsHtml5Tokenizer::errNoSpaceBetweenAttributes() { 291 if (MOZ_LIKELY(mViewSource)) { 292 mViewSource->AddErrorToCurrentNode("errNoSpaceBetweenAttributes"); 293 } 294 } 295 296 void nsHtml5Tokenizer::errAttributeValueMissing() { 297 if (MOZ_LIKELY(mViewSource)) { 298 mViewSource->AddErrorToCurrentNode("errAttributeValueMissing"); 299 } 300 } 301 302 void nsHtml5Tokenizer::errEqualsSignBeforeAttributeName() { 303 if (MOZ_LIKELY(mViewSource)) { 304 mViewSource->AddErrorToCurrentNode("errEqualsSignBeforeAttributeName"); 305 } 306 } 307 308 void nsHtml5Tokenizer::errLtGt() { 309 if (MOZ_LIKELY(mViewSource)) { 310 mViewSource->AddErrorToCurrentNode("errLtGt"); 311 } 312 } 313 314 void nsHtml5Tokenizer::errProcessingInstruction() { 315 if (MOZ_LIKELY(mViewSource)) { 316 mViewSource->AddErrorToCurrentNode("errProcessingInstruction"); 317 } 318 } 319 320 void nsHtml5Tokenizer::errUnescapedAmpersandInterpretedAsCharacterReference() { 321 if (MOZ_UNLIKELY(mViewSource)) { 322 mViewSource->AddErrorToCurrentAmpersand( 323 "errUnescapedAmpersandInterpretedAsCharacterReference"); 324 } 325 } 326 327 void nsHtml5Tokenizer::errNotSemicolonTerminated() { 328 if (MOZ_UNLIKELY(mViewSource)) { 329 mViewSource->AddErrorToCurrentNode("errNotSemicolonTerminated"); 330 } 331 } 332 333 void nsHtml5Tokenizer::errNoNamedCharacterMatch() { 334 if (MOZ_UNLIKELY(mViewSource)) { 335 mViewSource->AddErrorToCurrentAmpersand("errNoNamedCharacterMatch"); 336 } 337 } 338 339 void nsHtml5Tokenizer::errQuoteBeforeAttributeName(char16_t c) { 340 if (MOZ_LIKELY(mViewSource)) { 341 mViewSource->AddErrorToCurrentNode("errQuoteBeforeAttributeName"); 342 } 343 } 344 345 void nsHtml5Tokenizer::errExpectedPublicId() { 346 if (MOZ_LIKELY(mViewSource)) { 347 mViewSource->AddErrorToCurrentNode("errExpectedPublicId"); 348 } 349 } 350 351 void nsHtml5Tokenizer::errBogusDoctype() { 352 if (MOZ_UNLIKELY(mViewSource)) { 353 mViewSource->AddErrorToCurrentNode("errBogusDoctype"); 354 } 355 } 356 357 void nsHtml5Tokenizer::errNcrSurrogate() { 358 if (MOZ_UNLIKELY(mViewSource)) { 359 mViewSource->AddErrorToCurrentNode("errNcrSurrogate"); 360 } 361 } 362 363 void nsHtml5Tokenizer::errNcrInC1Range() { 364 if (MOZ_UNLIKELY(mViewSource)) { 365 mViewSource->AddErrorToCurrentNode("errNcrInC1Range"); 366 } 367 } 368 369 void nsHtml5Tokenizer::errEofInPublicId() { 370 if (MOZ_UNLIKELY(mViewSource)) { 371 mViewSource->AddErrorToCurrentRun("errEofInPublicId"); 372 } 373 } 374 375 void nsHtml5Tokenizer::errEofInComment() { 376 if (MOZ_UNLIKELY(mViewSource)) { 377 mViewSource->AddErrorToCurrentRun("errEofInComment"); 378 } 379 } 380 381 void nsHtml5Tokenizer::errEofInDoctype() { 382 if (MOZ_UNLIKELY(mViewSource)) { 383 mViewSource->AddErrorToCurrentRun("errEofInDoctype"); 384 } 385 } 386 387 void nsHtml5Tokenizer::errEofInAttributeValue() { 388 if (MOZ_UNLIKELY(mViewSource)) { 389 mViewSource->AddErrorToCurrentRun("errEofInAttributeValue"); 390 } 391 } 392 393 void nsHtml5Tokenizer::errEofInAttributeName() { 394 if (MOZ_UNLIKELY(mViewSource)) { 395 mViewSource->AddErrorToCurrentRun("errEofInAttributeName"); 396 } 397 } 398 399 void nsHtml5Tokenizer::errEofWithoutGt() { 400 if (MOZ_UNLIKELY(mViewSource)) { 401 mViewSource->AddErrorToCurrentRun("errEofWithoutGt"); 402 } 403 } 404 405 void nsHtml5Tokenizer::errEofInTagName() { 406 if (MOZ_UNLIKELY(mViewSource)) { 407 mViewSource->AddErrorToCurrentRun("errEofInTagName"); 408 } 409 } 410 411 void nsHtml5Tokenizer::errEofInEndTag() { 412 if (MOZ_UNLIKELY(mViewSource)) { 413 mViewSource->AddErrorToCurrentRun("errEofInEndTag"); 414 } 415 } 416 417 void nsHtml5Tokenizer::errEofAfterLt() { 418 if (MOZ_UNLIKELY(mViewSource)) { 419 mViewSource->AddErrorToCurrentRun("errEofAfterLt"); 420 } 421 } 422 423 void nsHtml5Tokenizer::errNcrOutOfRange() { 424 if (MOZ_UNLIKELY(mViewSource)) { 425 mViewSource->AddErrorToCurrentNode("errNcrOutOfRange"); 426 } 427 } 428 429 void nsHtml5Tokenizer::errNcrUnassigned() { 430 if (MOZ_UNLIKELY(mViewSource)) { 431 mViewSource->AddErrorToCurrentNode("errNcrUnassigned"); 432 } 433 } 434 435 void nsHtml5Tokenizer::errDuplicateAttribute() { 436 if (attributes) { 437 // There is an open issue for properly specifying this: 438 // https://github.com/whatwg/html/issues/3257 439 attributes->setDuplicateAttributeError(); 440 } 441 442 if (MOZ_UNLIKELY(mViewSource)) { 443 mViewSource->AddErrorToCurrentNode("errDuplicateAttribute"); 444 } 445 } 446 447 void nsHtml5Tokenizer::errEofInSystemId() { 448 if (MOZ_UNLIKELY(mViewSource)) { 449 mViewSource->AddErrorToCurrentRun("errEofInSystemId"); 450 } 451 } 452 453 void nsHtml5Tokenizer::errExpectedSystemId() { 454 if (MOZ_LIKELY(mViewSource)) { 455 mViewSource->AddErrorToCurrentNode("errExpectedSystemId"); 456 } 457 } 458 459 void nsHtml5Tokenizer::errMissingSpaceBeforeDoctypeName() { 460 if (MOZ_LIKELY(mViewSource)) { 461 mViewSource->AddErrorToCurrentNode("errMissingSpaceBeforeDoctypeName"); 462 } 463 } 464 465 void nsHtml5Tokenizer::errNestedComment() { 466 if (MOZ_LIKELY(mViewSource)) { 467 mViewSource->AddErrorToCurrentNode("errNestedComment"); 468 } 469 } 470 471 void nsHtml5Tokenizer::errNcrZero() { 472 if (MOZ_UNLIKELY(mViewSource)) { 473 mViewSource->AddErrorToCurrentNode("errNcrZero"); 474 } 475 } 476 477 void nsHtml5Tokenizer::errNoSpaceBetweenDoctypeSystemKeywordAndQuote() { 478 if (MOZ_LIKELY(mViewSource)) { 479 mViewSource->AddErrorToCurrentNode( 480 "errNoSpaceBetweenDoctypeSystemKeywordAndQuote"); 481 } 482 } 483 484 void nsHtml5Tokenizer::errNoSpaceBetweenPublicAndSystemIds() { 485 if (MOZ_LIKELY(mViewSource)) { 486 mViewSource->AddErrorToCurrentNode("errNoSpaceBetweenPublicAndSystemIds"); 487 } 488 } 489 490 void nsHtml5Tokenizer::errNoSpaceBetweenDoctypePublicKeywordAndQuote() { 491 if (MOZ_LIKELY(mViewSource)) { 492 mViewSource->AddErrorToCurrentNode( 493 "errNoSpaceBetweenDoctypePublicKeywordAndQuote"); 494 } 495 }