nsHtml5Highlighter.cpp (25912B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 #include "nsHtml5Highlighter.h" 6 #include "ErrorList.h" 7 #include "nsDebug.h" 8 #include "nsHtml5AttributeName.h" 9 #include "nsHtml5Tokenizer.h" 10 #include "nsHtml5ViewSourceUtils.h" 11 #include "nsString.h" 12 #include "nsThreadUtils.h" 13 14 using namespace mozilla; 15 16 nsHtml5Highlighter::nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink) 17 : mState(nsHtml5Tokenizer::DATA), 18 mCStart(INT32_MAX), 19 mPos(0), 20 mLineNumber(1), 21 mInlinesOpen(0), 22 mInCharacters(false), 23 mBuffer(nullptr), 24 mOpSink(aOpSink), 25 mCurrentRun(nullptr), 26 mAmpersand(nullptr), 27 mSlash(nullptr), 28 mHandles( 29 MakeUnique<nsIContent*[]>(NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH)), 30 mHandlesUsed(0), 31 mSeenBase(false) { 32 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); 33 } 34 35 nsHtml5Highlighter::~nsHtml5Highlighter() { 36 NS_ASSERTION(NS_IsMainThread(), "Wrong thread!"); 37 } 38 39 void nsHtml5Highlighter::SetOpSink(nsAHtml5TreeOpSink* aOpSink) { 40 mOpSink = aOpSink; 41 } 42 43 void nsHtml5Highlighter::Rewind() { 44 mState = 0; 45 mCStart = INT32_MAX; 46 mPos = 0; 47 mLineNumber = 1; 48 mInlinesOpen = 0; 49 mInCharacters = false; 50 mBuffer = nullptr; 51 mOpQueue.Clear(); 52 mCurrentRun = nullptr; 53 mAmpersand = nullptr; 54 mSlash = nullptr; 55 mSeenBase = false; 56 57 // Pop until we have two elements on the stack: html and body. 58 while (mStack.Length() > 2) { 59 Pop(); 60 } 61 } 62 63 void nsHtml5Highlighter::Start(const nsAutoString& aTitle) { 64 // Doctype 65 opAppendDoctypeToDocument operation(nsGkAtoms::html, u""_ns, u""_ns); 66 mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation)); 67 68 mOpQueue.AppendElement()->Init(mozilla::AsVariant(STANDARDS_MODE)); 69 70 // <html> uses NS_NewHTMLSharedElement creator 71 nsIContent** root = 72 CreateElement(nsGkAtoms::html, nullptr, nullptr, NS_NewHTMLSharedElement); 73 opAppendToDocument appendOp(root); 74 mOpQueue.AppendElement()->Init(mozilla::AsVariant(appendOp)); 75 mStack.AppendElement(root); 76 77 // <head> uses NS_NewHTMLSharedElement creator 78 Push(nsGkAtoms::head, nullptr, NS_NewHTMLSharedElement); 79 80 Push(nsGkAtoms::meta, nsHtml5ViewSourceUtils::NewMetaViewportAttributes(), 81 NS_NewHTMLMetaElement); 82 Pop(); // meta 83 84 Push(nsGkAtoms::title, nullptr, NS_NewHTMLTitleElement); 85 // XUL will add the "Source of: " prefix. 86 uint32_t length = aTitle.Length(); 87 if (length > INT32_MAX) { 88 length = INT32_MAX; 89 } 90 AppendCharacters(aTitle.BeginReading(), 0, (int32_t)length); 91 Pop(); // title 92 93 Push(nsGkAtoms::link, nsHtml5ViewSourceUtils::NewLinkAttributes(), 94 NS_NewHTMLLinkElement); 95 96 opUpdateStyleSheet updateOp(CurrentNode()); 97 mOpQueue.AppendElement()->Init(mozilla::AsVariant(updateOp)); 98 99 Pop(); // link 100 101 Pop(); // head 102 103 Push(nsGkAtoms::body, nsHtml5ViewSourceUtils::NewBodyAttributes(), 104 NS_NewHTMLBodyElement); 105 106 // Don't call StartBodyContents here in order to be able to put it in a 107 // speculation. 108 mOpQueue.AppendElement()->Init(mozilla::AsVariant(opStartLayout())); 109 } 110 111 void nsHtml5Highlighter::UpdateCharsetSource(nsCharsetSource aCharsetSource) { 112 opUpdateCharsetSource operation(aCharsetSource); 113 mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation)); 114 } 115 116 int32_t nsHtml5Highlighter::Transition(int32_t aState, bool aReconsume, 117 int32_t aPos) { 118 mPos = aPos; 119 switch (mState) { 120 case nsHtml5Tokenizer::SCRIPT_DATA: 121 case nsHtml5Tokenizer::RAWTEXT: 122 case nsHtml5Tokenizer::RCDATA: 123 case nsHtml5Tokenizer::DATA: 124 // We can transition on < and on &. Either way, we don't yet know the 125 // role of the token, so open a span without class. 126 if (aState == nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE) { 127 StartSpan(); 128 // Start another span for highlighting the ampersand 129 StartSpan(); 130 mAmpersand = CurrentNode(); 131 } else { 132 EndCharactersAndStartMarkupRun(); 133 } 134 break; 135 case nsHtml5Tokenizer::TAG_OPEN: 136 switch (aState) { 137 case nsHtml5Tokenizer::TAG_NAME: 138 StartSpan(u"start-tag"); 139 break; 140 case nsHtml5Tokenizer::DATA: 141 FinishTag(); // DATA 142 break; 143 case nsHtml5Tokenizer::PROCESSING_INSTRUCTION: 144 AddClass(u"pi"); 145 break; 146 } 147 break; 148 case nsHtml5Tokenizer::TAG_NAME: 149 switch (aState) { 150 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME: 151 EndSpanOrA(); // nsHtml5Tokenizer::TAG_NAME 152 break; 153 case nsHtml5Tokenizer::SELF_CLOSING_START_TAG: 154 EndSpanOrA(); // nsHtml5Tokenizer::TAG_NAME 155 StartSpan(); // for highlighting the slash 156 mSlash = CurrentNode(); 157 break; 158 default: 159 FinishTag(); 160 break; 161 } 162 break; 163 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME: 164 switch (aState) { 165 case nsHtml5Tokenizer::ATTRIBUTE_NAME: 166 StartSpan(u"attribute-name"); 167 break; 168 case nsHtml5Tokenizer::SELF_CLOSING_START_TAG: 169 StartSpan(); // for highlighting the slash 170 mSlash = CurrentNode(); 171 break; 172 default: 173 FinishTag(); 174 break; 175 } 176 break; 177 case nsHtml5Tokenizer::ATTRIBUTE_NAME: 178 switch (aState) { 179 case nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME: 180 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE: 181 EndSpanOrA(); // nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME 182 break; 183 case nsHtml5Tokenizer::SELF_CLOSING_START_TAG: 184 EndSpanOrA(); // nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME 185 StartSpan(); // for highlighting the slash 186 mSlash = CurrentNode(); 187 break; 188 default: 189 FinishTag(); 190 break; 191 } 192 break; 193 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE: 194 switch (aState) { 195 case nsHtml5Tokenizer::ATTRIBUTE_VALUE_DOUBLE_QUOTED: 196 case nsHtml5Tokenizer::ATTRIBUTE_VALUE_SINGLE_QUOTED: 197 FlushCurrent(); 198 StartA(); 199 break; 200 case nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED: 201 StartA(); 202 break; 203 default: 204 FinishTag(); 205 break; 206 } 207 break; 208 case nsHtml5Tokenizer::ATTRIBUTE_VALUE_DOUBLE_QUOTED: 209 case nsHtml5Tokenizer::ATTRIBUTE_VALUE_SINGLE_QUOTED: 210 switch (aState) { 211 case nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED: 212 EndSpanOrA(); 213 break; 214 case nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE: 215 StartSpan(); 216 StartSpan(); // for ampersand itself 217 mAmpersand = CurrentNode(); 218 break; 219 default: 220 MOZ_ASSERT_UNREACHABLE("Impossible transition."); 221 break; 222 } 223 break; 224 case nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED: 225 switch (aState) { 226 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME: 227 break; 228 case nsHtml5Tokenizer::SELF_CLOSING_START_TAG: 229 StartSpan(); // for highlighting the slash 230 mSlash = CurrentNode(); 231 break; 232 default: 233 FinishTag(); 234 break; 235 } 236 break; 237 case nsHtml5Tokenizer::SELF_CLOSING_START_TAG: 238 EndSpanOrA(); // end the slash highlight 239 switch (aState) { 240 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME: 241 break; 242 default: 243 FinishTag(); 244 break; 245 } 246 break; 247 case nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED: 248 switch (aState) { 249 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME: 250 EndSpanOrA(); 251 break; 252 case nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE: 253 StartSpan(); 254 StartSpan(); // for ampersand itself 255 mAmpersand = CurrentNode(); 256 break; 257 default: 258 FinishTag(); 259 break; 260 } 261 break; 262 case nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME: 263 switch (aState) { 264 case nsHtml5Tokenizer::SELF_CLOSING_START_TAG: 265 StartSpan(); // for highlighting the slash 266 mSlash = CurrentNode(); 267 break; 268 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE: 269 break; 270 case nsHtml5Tokenizer::ATTRIBUTE_NAME: 271 StartSpan(u"attribute-name"); 272 break; 273 default: 274 FinishTag(); 275 break; 276 } 277 break; 278 // most comment states are omitted, because they don't matter to 279 // highlighting 280 case nsHtml5Tokenizer::COMMENT_START: 281 case nsHtml5Tokenizer::BOGUS_COMMENT: 282 AddClass(u"comment"); 283 [[fallthrough]]; 284 case nsHtml5Tokenizer::COMMENT_END: 285 case nsHtml5Tokenizer::COMMENT_END_BANG: 286 case nsHtml5Tokenizer::COMMENT_START_DASH: 287 case nsHtml5Tokenizer::BOGUS_COMMENT_HYPHEN: 288 case nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG_DASH_DASH: 289 if (aState == nsHtml5Tokenizer::DATA) { 290 FinishTag(); 291 } 292 break; 293 // most cdata states are omitted, because they don't matter to 294 // highlighting 295 case nsHtml5Tokenizer::CDATA_RSQB_RSQB: 296 if (aState == nsHtml5Tokenizer::DATA) { 297 AddClass(u"cdata"); 298 FinishTag(); 299 } 300 break; 301 case nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE: 302 EndSpanOrA(); // the span for the ampersand 303 switch (aState) { 304 case nsHtml5Tokenizer::CONSUME_NCR: 305 case nsHtml5Tokenizer::CHARACTER_REFERENCE_HILO_LOOKUP: 306 break; 307 default: 308 // not actually a character reference 309 EndSpanOrA(); 310 break; 311 } 312 break; 313 case nsHtml5Tokenizer::CHARACTER_REFERENCE_HILO_LOOKUP: 314 if (aState == nsHtml5Tokenizer::CHARACTER_REFERENCE_TAIL) { 315 break; 316 } 317 // not actually a character reference 318 EndSpanOrA(); 319 break; 320 case nsHtml5Tokenizer::CHARACTER_REFERENCE_TAIL: 321 if (!aReconsume) { 322 FlushCurrent(); 323 } 324 EndSpanOrA(); 325 break; 326 case nsHtml5Tokenizer::DECIMAL_NRC_LOOP: 327 case nsHtml5Tokenizer::HEX_NCR_LOOP: 328 switch (aState) { 329 case nsHtml5Tokenizer::HANDLE_NCR_VALUE: 330 AddClass(u"entity"); 331 FlushCurrent(); 332 break; 333 case nsHtml5Tokenizer::HANDLE_NCR_VALUE_RECONSUME: 334 AddClass(u"entity"); 335 break; 336 } 337 EndSpanOrA(); 338 break; 339 case nsHtml5Tokenizer::CLOSE_TAG_OPEN: 340 switch (aState) { 341 case nsHtml5Tokenizer::DATA: 342 FinishTag(); 343 break; 344 case nsHtml5Tokenizer::TAG_NAME: 345 StartSpan(u"end-tag"); 346 break; 347 } 348 break; 349 case nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN: 350 if (aState == nsHtml5Tokenizer::NON_DATA_END_TAG_NAME) { 351 FlushCurrent(); 352 StartSpan(); // don't know if it is "end-tag" yet :-( 353 break; 354 } 355 EndSpanOrA(); 356 StartCharacters(); 357 break; 358 case nsHtml5Tokenizer::NON_DATA_END_TAG_NAME: 359 switch (aState) { 360 case nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME: 361 AddClass(u"end-tag"); 362 EndSpanOrA(); 363 break; 364 case nsHtml5Tokenizer::SELF_CLOSING_START_TAG: 365 AddClass(u"end-tag"); 366 EndSpanOrA(); 367 StartSpan(); // for highlighting the slash 368 mSlash = CurrentNode(); 369 break; 370 case nsHtml5Tokenizer::DATA: // yes, as a result of emitting the token 371 AddClass(u"end-tag"); 372 FinishTag(); 373 break; 374 default: 375 FinishTag(); 376 break; 377 } 378 break; 379 case nsHtml5Tokenizer::SCRIPT_DATA_LESS_THAN_SIGN: 380 case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: 381 if (aState == nsHtml5Tokenizer::NON_DATA_END_TAG_NAME) { 382 FlushCurrent(); 383 StartSpan(); // don't know if it is "end-tag" yet :-( 384 break; 385 } 386 FinishTag(); 387 break; 388 case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH_DASH: 389 case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED: 390 case nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH: 391 if (aState == nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN) { 392 EndCharactersAndStartMarkupRun(); 393 } 394 break; 395 // Lots of double escape states omitted, because they don't highlight. 396 // Likewise, only doctype states that can emit the doctype are of 397 // interest. Otherwise, the transition out of bogus comment deals. 398 case nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME: 399 case nsHtml5Tokenizer::DOCTYPE_NAME: 400 case nsHtml5Tokenizer::AFTER_DOCTYPE_NAME: 401 case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_KEYWORD: 402 case nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: 403 case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: 404 case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER: 405 case nsHtml5Tokenizer::BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: 406 case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: 407 case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER: 408 case nsHtml5Tokenizer::BOGUS_DOCTYPE: 409 case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_KEYWORD: 410 case nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: 411 case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: 412 case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: 413 if (aState == nsHtml5Tokenizer::DATA) { 414 AddClass(u"doctype"); 415 FinishTag(); 416 } 417 break; 418 case nsHtml5Tokenizer::PROCESSING_INSTRUCTION_QUESTION_MARK: 419 if (aState == nsHtml5Tokenizer::DATA) { 420 FinishTag(); 421 } 422 break; 423 default: 424 break; 425 } 426 mState = aState; 427 return aState; 428 } 429 430 [[nodiscard]] bool nsHtml5Highlighter::End() { 431 switch (mState) { 432 case nsHtml5Tokenizer::COMMENT_END: 433 case nsHtml5Tokenizer::COMMENT_END_BANG: 434 case nsHtml5Tokenizer::COMMENT_START_DASH: 435 case nsHtml5Tokenizer::BOGUS_COMMENT: 436 case nsHtml5Tokenizer::BOGUS_COMMENT_HYPHEN: 437 AddClass(u"comment"); 438 break; 439 case nsHtml5Tokenizer::CDATA_RSQB_RSQB: 440 AddClass(u"cdata"); 441 break; 442 case nsHtml5Tokenizer::DECIMAL_NRC_LOOP: 443 case nsHtml5Tokenizer::HEX_NCR_LOOP: 444 // XXX need tokenizer help here 445 break; 446 case nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME: 447 case nsHtml5Tokenizer::DOCTYPE_NAME: 448 case nsHtml5Tokenizer::AFTER_DOCTYPE_NAME: 449 case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_KEYWORD: 450 case nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: 451 case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: 452 case nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER: 453 case nsHtml5Tokenizer::BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: 454 case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: 455 case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER: 456 case nsHtml5Tokenizer::BOGUS_DOCTYPE: 457 case nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_KEYWORD: 458 case nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: 459 case nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: 460 case nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: 461 AddClass(u"doctype"); 462 break; 463 default: 464 break; 465 } 466 nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement(); 467 NS_ASSERTION(treeOp, "Tree op allocation failed."); 468 treeOp->Init(mozilla::AsVariant(opStreamEnded())); 469 return FlushOps().isOk(); 470 } 471 472 void nsHtml5Highlighter::SetBuffer(nsHtml5UTF16Buffer* aBuffer) { 473 MOZ_ASSERT(!mBuffer, "Old buffer still here!"); 474 mBuffer = aBuffer; 475 mCStart = aBuffer->getStart(); 476 } 477 478 void nsHtml5Highlighter::DropBuffer(int32_t aPos) { 479 MOZ_ASSERT(mBuffer, "No buffer to drop!"); 480 mPos = aPos; 481 FlushChars(); 482 mBuffer = nullptr; 483 } 484 485 void nsHtml5Highlighter::StartSpan() { 486 FlushChars(); 487 Push(nsGkAtoms::span, nullptr, NS_NewHTMLSpanElement); 488 ++mInlinesOpen; 489 } 490 491 void nsHtml5Highlighter::StartSpan(const char16_t* aClass) { 492 StartSpan(); 493 AddClass(aClass); 494 } 495 496 void nsHtml5Highlighter::EndSpanOrA() { 497 FlushChars(); 498 Pop(); 499 --mInlinesOpen; 500 } 501 502 void nsHtml5Highlighter::StartBodyContents() { 503 MOZ_ASSERT(mLineNumber == 1); 504 PushCurrentLineContainer(); 505 StartCharacters(); 506 } 507 508 void nsHtml5Highlighter::StartCharacters() { 509 MOZ_ASSERT(!mInCharacters, "Already in characters!"); 510 FlushChars(); 511 Push(nsGkAtoms::span, nullptr, NS_NewHTMLSpanElement); 512 mCurrentRun = CurrentNode(); 513 mInCharacters = true; 514 } 515 516 void nsHtml5Highlighter::EndCharactersAndStartMarkupRun() { 517 MOZ_ASSERT(mInCharacters, "Not in characters!"); 518 FlushChars(); 519 Pop(); 520 mInCharacters = false; 521 // Now start markup run 522 StartSpan(); 523 mCurrentRun = CurrentNode(); 524 } 525 526 void nsHtml5Highlighter::StartA() { 527 FlushChars(); 528 Push(nsGkAtoms::a, nullptr, NS_NewHTMLAnchorElement); 529 AddClass(u"attribute-value"); 530 ++mInlinesOpen; 531 } 532 533 void nsHtml5Highlighter::FinishTag() { 534 while (mInlinesOpen > 1) { 535 EndSpanOrA(); 536 } 537 FlushCurrent(); // > 538 EndSpanOrA(); // DATA 539 NS_ASSERTION(!mInlinesOpen, "mInlinesOpen got out of sync!"); 540 StartCharacters(); 541 } 542 543 void nsHtml5Highlighter::FlushChars() { 544 if (mCStart < mPos) { 545 char16_t* buf = mBuffer->getBuffer(); 546 int32_t i = mCStart; 547 while (i < mPos) { 548 char16_t c = buf[i]; 549 switch (c) { 550 case '\r': 551 // The input this code sees has been normalized so that there are 552 // CR breaks and LF breaks but no CRLF breaks. Overwrite CR with LF 553 // to show consistent LF line breaks to layout. It is OK to mutate 554 // the input data, because there are no reparses in the View Source 555 // case, so we won't need the original data in the buffer anymore. 556 buf[i] = '\n'; 557 [[fallthrough]]; 558 case '\n': { 559 ++i; 560 if (mCStart < i) { 561 int32_t len = i - mCStart; 562 AppendCharacters(buf, mCStart, len); 563 mCStart = i; 564 } 565 NewLine(); 566 break; 567 } 568 default: 569 ++i; 570 break; 571 } 572 } 573 if (mCStart < mPos) { 574 int32_t len = mPos - mCStart; 575 AppendCharacters(buf, mCStart, len); 576 mCStart = mPos; 577 } 578 } 579 } 580 581 void nsHtml5Highlighter::PushCurrentLineContainer() { 582 Push(nsGkAtoms::span, nullptr, NS_NewHTMLSpanElement); 583 mOpQueue.AppendElement()->Init( 584 mozilla::AsVariant(opAddLineNumberId(CurrentNode(), mLineNumber))); 585 } 586 587 // NOTE(emilio): It's important that nothing here ends up calling FlushChars(), 588 // since we're in the middle of a flush. 589 void nsHtml5Highlighter::NewLine() { 590 ++mLineNumber; 591 AutoTArray<nsIContent**, 8> handleStack; 592 const bool wasInCharacters = mInCharacters; 593 if (mInCharacters) { 594 Pop(); 595 mInCharacters = false; 596 } 597 while (mInlinesOpen) { 598 handleStack.AppendElement(CurrentNode()); 599 Pop(); 600 mInlinesOpen--; 601 } 602 Pop(); // Pop the existing container. 603 PushCurrentLineContainer(); 604 for (nsIContent** handle : Reversed(handleStack)) { 605 nsIContent** dest = AllocateContentHandle(); 606 mOpQueue.AppendElement()->Init(mozilla::AsVariant(opShallowCloneInto( 607 handle, dest, CurrentNode(), mozilla::dom::FROM_PARSER_NETWORK))); 608 mStack.AppendElement(dest); 609 ++mInlinesOpen; 610 } 611 if (wasInCharacters) { 612 Push(nsGkAtoms::span, nullptr, NS_NewHTMLSpanElement); 613 mCurrentRun = CurrentNode(); 614 mInCharacters = true; 615 } 616 } 617 618 void nsHtml5Highlighter::FlushCurrent() { 619 mPos++; 620 FlushChars(); 621 } 622 623 bool nsHtml5Highlighter::ShouldFlushOps() { 624 // Arbitrary threshold that doesn't have an exact justification. 625 // The general idea is to flush much, much sooner than reaching 626 // the maximum size of `nsTArray`. 627 return mOpQueue.Length() > 100000; 628 } 629 630 mozilla::Result<bool, nsresult> nsHtml5Highlighter::FlushOps() { 631 bool hasOps = !mOpQueue.IsEmpty(); 632 if (hasOps) { 633 if (!mOpSink->MoveOpsFrom(mOpQueue)) { 634 return Err(NS_ERROR_OUT_OF_MEMORY); 635 } 636 } 637 return hasOps; 638 } 639 640 void nsHtml5Highlighter::MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName, 641 nsHtml5String aValue) { 642 if (!(nsHtml5AttributeName::ATTR_HREF == aName || 643 nsHtml5AttributeName::ATTR_SRC == aName || 644 nsHtml5AttributeName::ATTR_ACTION == aName || 645 nsHtml5AttributeName::ATTR_CITE == aName || 646 nsHtml5AttributeName::ATTR_BACKGROUND == aName || 647 nsHtml5AttributeName::ATTR_LONGDESC == aName || 648 nsHtml5AttributeName::ATTR_XLINK_HREF == aName || 649 nsHtml5AttributeName::ATTR_DEFINITIONURL == aName)) { 650 return; 651 } 652 AddViewSourceHref(aValue); 653 } 654 655 void nsHtml5Highlighter::CompletedNamedCharacterReference() { 656 AddClass(u"entity"); 657 } 658 659 nsIContent** nsHtml5Highlighter::AllocateContentHandle() { 660 if (mHandlesUsed == NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH) { 661 mOldHandles.AppendElement(std::move(mHandles)); 662 mHandles = 663 MakeUnique<nsIContent*[]>(NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH); 664 mHandlesUsed = 0; 665 } 666 #ifdef DEBUG 667 mHandles[mHandlesUsed] = reinterpret_cast<nsIContent*>(uintptr_t(0xC0DEDBAD)); 668 #endif 669 return &mHandles[mHandlesUsed++]; 670 } 671 672 nsIContent** nsHtml5Highlighter::CreateElement( 673 nsAtom* aName, nsHtml5HtmlAttributes* aAttributes, 674 nsIContent** aIntendedParent, 675 mozilla::dom::HTMLContentCreatorFunction aCreator) { 676 MOZ_ASSERT(aName, "Got null name."); 677 nsIContent** content = AllocateContentHandle(); 678 opCreateHTMLElement opeation(content, aName, aAttributes, aCreator, 679 aIntendedParent, 680 mozilla::dom::FROM_PARSER_NETWORK); 681 mOpQueue.AppendElement()->Init(mozilla::AsVariant(opeation)); 682 return content; 683 } 684 685 nsIContent** nsHtml5Highlighter::CurrentNode() { 686 MOZ_ASSERT(!mStack.IsEmpty(), "Must have something on stack."); 687 return mStack.LastElement(); 688 } 689 690 void nsHtml5Highlighter::Push( 691 nsAtom* aName, nsHtml5HtmlAttributes* aAttributes, 692 mozilla::dom::HTMLContentCreatorFunction aCreator) { 693 MOZ_ASSERT(!mStack.IsEmpty(), "Pushing without root."); 694 nsIContent** elt = CreateElement(aName, aAttributes, CurrentNode(), 695 aCreator); // Don't inline below! 696 opAppend operation(elt, CurrentNode(), mozilla::dom::FROM_PARSER_NETWORK); 697 mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation)); 698 mStack.AppendElement(elt); 699 } 700 701 void nsHtml5Highlighter::Pop() { 702 MOZ_ASSERT(mStack.Length() >= 2, "Popping when stack too short."); 703 mStack.RemoveLastElement(); 704 } 705 706 void nsHtml5Highlighter::AppendCharacters(const char16_t* aBuffer, 707 int32_t aStart, int32_t aLength) { 708 MOZ_ASSERT(aBuffer, "Null buffer"); 709 710 char16_t* bufferCopy = new char16_t[aLength]; 711 memcpy(bufferCopy, aBuffer + aStart, aLength * sizeof(char16_t)); 712 713 opAppendText operation(CurrentNode(), bufferCopy, aLength); 714 mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation)); 715 } 716 717 void nsHtml5Highlighter::AddClass(const char16_t* aClass) { 718 opAddClass operation(CurrentNode(), (char16_t*)aClass); 719 mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation)); 720 } 721 722 void nsHtml5Highlighter::AddViewSourceHref(nsHtml5String aValue) { 723 char16_t* bufferCopy = new char16_t[aValue.Length() + 1]; 724 aValue.CopyToBuffer(bufferCopy); 725 bufferCopy[aValue.Length()] = 0; 726 727 opAddViewSourceHref operation(CurrentNode(), bufferCopy, aValue.Length()); 728 mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation)); 729 } 730 731 void nsHtml5Highlighter::AddBase(nsHtml5String aValue) { 732 if (mSeenBase) { 733 return; 734 } 735 mSeenBase = true; 736 char16_t* bufferCopy = new char16_t[aValue.Length() + 1]; 737 aValue.CopyToBuffer(bufferCopy); 738 bufferCopy[aValue.Length()] = 0; 739 740 opAddViewSourceBase operation(bufferCopy, aValue.Length()); 741 mOpQueue.AppendElement()->Init(mozilla::AsVariant(operation)); 742 } 743 744 void nsHtml5Highlighter::AddErrorToCurrentNode(const char* aMsgId) { 745 nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement(); 746 NS_ASSERTION(treeOp, "Tree op allocation failed."); 747 opAddErrorType operation(CurrentNode(), (char*)aMsgId); 748 treeOp->Init(mozilla::AsVariant(operation)); 749 } 750 751 void nsHtml5Highlighter::AddErrorToCurrentRun(const char* aMsgId) { 752 MOZ_ASSERT(mCurrentRun, "Adding error to run without one!"); 753 nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement(); 754 NS_ASSERTION(treeOp, "Tree op allocation failed."); 755 opAddErrorType operation(mCurrentRun, (char*)aMsgId); 756 treeOp->Init(mozilla::AsVariant(operation)); 757 } 758 759 void nsHtml5Highlighter::AddErrorToCurrentRun(const char* aMsgId, 760 nsAtom* aName) { 761 MOZ_ASSERT(mCurrentRun, "Adding error to run without one!"); 762 nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement(); 763 NS_ASSERTION(treeOp, "Tree op allocation failed."); 764 opAddErrorType operation(mCurrentRun, (char*)aMsgId, aName); 765 treeOp->Init(mozilla::AsVariant(operation)); 766 } 767 768 void nsHtml5Highlighter::AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName, 769 nsAtom* aOther) { 770 MOZ_ASSERT(mCurrentRun, "Adding error to run without one!"); 771 nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement(); 772 NS_ASSERTION(treeOp, "Tree op allocation failed."); 773 opAddErrorType operation(mCurrentRun, (char*)aMsgId, aName, aOther); 774 treeOp->Init(mozilla::AsVariant(operation)); 775 } 776 777 void nsHtml5Highlighter::AddErrorToCurrentAmpersand(const char* aMsgId) { 778 MOZ_ASSERT(mAmpersand, "Adding error to ampersand without one!"); 779 nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement(); 780 NS_ASSERTION(treeOp, "Tree op allocation failed."); 781 opAddErrorType operation(mAmpersand, (char*)aMsgId); 782 treeOp->Init(mozilla::AsVariant(operation)); 783 } 784 785 void nsHtml5Highlighter::AddErrorToCurrentSlash(const char* aMsgId) { 786 MOZ_ASSERT(mSlash, "Adding error to slash without one!"); 787 nsHtml5TreeOperation* treeOp = mOpQueue.AppendElement(); 788 NS_ASSERTION(treeOp, "Tree op allocation failed."); 789 opAddErrorType operation(mSlash, (char*)aMsgId); 790 treeOp->Init(mozilla::AsVariant(operation)); 791 }