nsPlainTextSerializer.cpp (63420B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 /* 8 * nsIContentSerializer implementation that can be used with an 9 * nsIDocumentEncoder to convert a DOM into plaintext in a nice way 10 * (eg for copy/paste as plaintext). 11 */ 12 13 #include "nsPlainTextSerializer.h" 14 15 #include "mozilla/Casting.h" 16 #include "mozilla/Preferences.h" 17 #include "mozilla/Span.h" 18 #include "mozilla/StaticPrefs_converter.h" 19 #include "mozilla/TextEditor.h" 20 #include "mozilla/dom/AbstractRange.h" 21 #include "mozilla/dom/CharacterData.h" 22 #include "mozilla/dom/CharacterDataBuffer.h" 23 #include "mozilla/dom/Element.h" 24 #include "mozilla/dom/HTMLBRElement.h" 25 #include "mozilla/dom/Text.h" 26 #include "mozilla/intl/Segmenter.h" 27 #include "mozilla/intl/UnicodeProperties.h" 28 #include "nsCRT.h" 29 #include "nsComputedDOMStyle.h" 30 #include "nsContentUtils.h" 31 #include "nsDebug.h" 32 #include "nsGkAtoms.h" 33 #include "nsIDocumentEncoder.h" 34 #include "nsNameSpaceManager.h" 35 #include "nsPrintfCString.h" 36 #include "nsReadableUtils.h" 37 #include "nsUnicharUtils.h" 38 #include "nsUnicodeProperties.h" 39 40 namespace mozilla { 41 class Encoding; 42 } 43 44 using namespace mozilla; 45 using namespace mozilla::dom; 46 47 #define PREF_STRUCTS "converter.html2txt.structs" 48 #define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy" 49 50 static const int32_t kTabSize = 4; 51 static const int32_t kIndentSizeHeaders = 52 2; /* Indention of h1, if 53 mHeaderStrategy = kIndentIncreasedWithHeaderLevel 54 or = kNumberHeadingsAndIndentSlightly. Indention of 55 other headers is derived from that. */ 56 static const int32_t kIndentIncrementHeaders = 57 2; /* If mHeaderStrategy = kIndentIncreasedWithHeaderLevel, 58 indent h(x+1) this many 59 columns more than h(x) */ 60 static const int32_t kIndentSizeList = kTabSize; 61 // Indention of non-first lines of ul and ol 62 static const int32_t kIndentSizeDD = kTabSize; // Indention of <dd> 63 static const char16_t kNBSP = 160; 64 static const char16_t kSPACE = ' '; 65 66 static int32_t HeaderLevel(const nsAtom* aTag); 67 static int32_t GetUnicharWidth(char32_t ucs); 68 static int32_t GetUnicharStringWidth(Span<const char16_t> aString); 69 70 // Someday may want to make this non-const: 71 static const uint32_t TagStackSize = 500; 72 73 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsPlainTextSerializer) 74 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsPlainTextSerializer) 75 76 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsPlainTextSerializer) 77 NS_INTERFACE_MAP_ENTRY(nsIContentSerializer) 78 NS_INTERFACE_MAP_ENTRY(nsISupports) 79 NS_INTERFACE_MAP_END 80 81 NS_IMPL_CYCLE_COLLECTION(nsPlainTextSerializer) 82 83 nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer) { 84 RefPtr<nsPlainTextSerializer> it = new nsPlainTextSerializer(); 85 it.forget(aSerializer); 86 return NS_OK; 87 } 88 89 // @param aFlags As defined in nsIDocumentEncoder.idl. 90 static void DetermineLineBreak(const int32_t aFlags, nsAString& aLineBreak) { 91 // Set the line break character: 92 if ((aFlags & nsIDocumentEncoder::OutputCRLineBreak) && 93 (aFlags & nsIDocumentEncoder::OutputLFLineBreak)) { 94 // Windows 95 aLineBreak.AssignLiteral(u"\r\n"); 96 } else if (aFlags & nsIDocumentEncoder::OutputCRLineBreak) { 97 // Mac 98 aLineBreak.AssignLiteral(u"\r"); 99 } else if (aFlags & nsIDocumentEncoder::OutputLFLineBreak) { 100 // Unix/DOM 101 aLineBreak.AssignLiteral(u"\n"); 102 } else { 103 // Platform/default 104 aLineBreak.AssignLiteral(NS_ULINEBREAK); 105 } 106 } 107 108 void nsPlainTextSerializer::CurrentLine::MaybeReplaceNbspsInContent( 109 const int32_t aFlags) { 110 if (!(aFlags & nsIDocumentEncoder::OutputPersistNBSP)) { 111 // First, replace all nbsp characters with spaces, 112 // which the unicode encoder won't do for us. 113 mContent.ReplaceChar(kNBSP, kSPACE); 114 } 115 } 116 117 void nsPlainTextSerializer::CurrentLine::ResetContentAndIndentationHeader() { 118 mContent.Truncate(); 119 mIndentation.mHeader.Truncate(); 120 } 121 122 int32_t nsPlainTextSerializer::CurrentLine::FindWrapIndexForContent( 123 const uint32_t aWrapColumn, bool aUseLineBreaker) const { 124 MOZ_ASSERT(!mContent.IsEmpty()); 125 126 const uint32_t prefixwidth = DeterminePrefixWidth(); 127 int32_t goodSpace = 0; 128 129 if (aUseLineBreaker) { 130 // We advance one line break point at a time from the beginning of the 131 // mContent until we find a width less than or equal to wrap column. 132 uint32_t width = 0; 133 intl::LineBreakIteratorUtf16 lineBreakIter(mContent); 134 while (Maybe<uint32_t> nextGoodSpace = lineBreakIter.Next()) { 135 // Trim space at the tail. UAX#14 doesn't have break opportunity for 136 // ASCII space at the tail. 137 const Maybe<uint32_t> originalNextGoodSpace = nextGoodSpace; 138 while (*nextGoodSpace > 0 && 139 mContent.CharAt(*nextGoodSpace - 1) == 0x20) { 140 nextGoodSpace = Some(*nextGoodSpace - 1); 141 } 142 if (*nextGoodSpace == 0) { 143 // Restore the original nextGoodSpace. 144 nextGoodSpace = originalNextGoodSpace; 145 } 146 147 width += GetUnicharStringWidth(Span<const char16_t>( 148 mContent.get() + goodSpace, *nextGoodSpace - goodSpace)); 149 if (prefixwidth + width > aWrapColumn) { 150 // The next break point makes the width exceeding the wrap column, so 151 // goodSpace is what we want. 152 break; 153 } 154 goodSpace = AssertedCast<int32_t>(*nextGoodSpace); 155 } 156 157 return goodSpace; 158 } 159 160 // In this case we don't want strings, especially CJK-ones, to be split. See 161 // bug 333064 for more information. We break only at ASCII spaces. 162 if (aWrapColumn >= prefixwidth) { 163 // Search backward from the adjusted wrap column or from the text end. 164 goodSpace = 165 std::min<int32_t>(aWrapColumn - prefixwidth, mContent.Length() - 1); 166 while (goodSpace >= 0) { 167 if (nsCRT::IsAsciiSpace(mContent.CharAt(goodSpace))) { 168 return goodSpace; 169 } 170 goodSpace--; 171 } 172 } 173 174 // Search forward from the adjusted wrap column. 175 goodSpace = (prefixwidth > aWrapColumn) ? 1 : aWrapColumn - prefixwidth; 176 const int32_t contentLength = mContent.Length(); 177 while (goodSpace < contentLength && 178 !nsCRT::IsAsciiSpace(mContent.CharAt(goodSpace))) { 179 goodSpace++; 180 } 181 182 return goodSpace; 183 } 184 185 nsPlainTextSerializer::OutputManager::OutputManager(const int32_t aFlags, 186 nsAString& aOutput) 187 : mFlags{aFlags}, mOutput{aOutput}, mAtFirstColumn{true} { 188 MOZ_ASSERT(aOutput.IsEmpty()); 189 190 DetermineLineBreak(mFlags, mLineBreak); 191 } 192 193 void nsPlainTextSerializer::OutputManager::Append( 194 const CurrentLine& aLine, 195 const StripTrailingWhitespaces aStripTrailingWhitespaces) { 196 if (IsAtFirstColumn()) { 197 nsAutoString quotesAndIndent; 198 aLine.CreateQuotesAndIndent(quotesAndIndent); 199 200 if ((aStripTrailingWhitespaces == StripTrailingWhitespaces::kMaybe)) { 201 const bool stripTrailingSpaces = aLine.mContent.IsEmpty(); 202 if (stripTrailingSpaces) { 203 quotesAndIndent.Trim(" ", false, true, false); 204 } 205 } 206 207 Append(quotesAndIndent); 208 } 209 210 Append(aLine.mContent); 211 } 212 213 void nsPlainTextSerializer::OutputManager::Append(const nsAString& aString) { 214 if (!aString.IsEmpty()) { 215 mOutput.Append(aString); 216 mAtFirstColumn = false; 217 } 218 } 219 220 void nsPlainTextSerializer::OutputManager::AppendLineBreak(bool aForceCRLF) { 221 mOutput.Append(aForceCRLF ? u"\r\n"_ns : mLineBreak); 222 mAtFirstColumn = true; 223 } 224 225 uint32_t nsPlainTextSerializer::OutputManager::GetOutputLength() const { 226 return mOutput.Length(); 227 } 228 229 nsPlainTextSerializer::nsPlainTextSerializer() 230 : mFloatingLines(-1), 231 kSpace(u" "_ns) // Init of "constant" 232 { 233 mSpanLevel = 0; 234 for (int32_t i = 0; i <= 6; i++) { 235 mHeaderCounter[i] = 0; 236 } 237 238 // Flow 239 mEmptyLines = 1; // The start of the document is an "empty line" in itself, 240 mInWhitespace = false; 241 mPreFormattedMail = false; 242 243 mPreformattedBlockBoundary = false; 244 245 // initialize the tag stack to zero: 246 // The stack only ever contains pointers to static atoms, so they don't 247 // need refcounting. 248 mTagStack = new const nsAtom*[TagStackSize]; 249 mTagStackIndex = 0; 250 mIgnoreAboveIndex = (uint32_t)kNotFound; 251 252 mULCount = 0; 253 } 254 255 nsPlainTextSerializer::~nsPlainTextSerializer() { 256 delete[] mTagStack; 257 NS_WARNING_ASSERTION(mHeadLevel == 0, "Wrong head level!"); 258 } 259 260 nsPlainTextSerializer::Settings::HeaderStrategy 261 nsPlainTextSerializer::Settings::Convert(const int32_t aPrefHeaderStrategy) { 262 HeaderStrategy result{HeaderStrategy::kIndentIncreasedWithHeaderLevel}; 263 264 switch (aPrefHeaderStrategy) { 265 case 0: { 266 result = HeaderStrategy::kNoIndentation; 267 break; 268 } 269 case 1: { 270 result = HeaderStrategy::kIndentIncreasedWithHeaderLevel; 271 break; 272 } 273 case 2: { 274 result = HeaderStrategy::kNumberHeadingsAndIndentSlightly; 275 break; 276 } 277 default: { 278 NS_WARNING( 279 nsPrintfCString("Header strategy pref contains undefined value: %i", 280 aPrefHeaderStrategy) 281 .get()); 282 } 283 } 284 285 return result; 286 } 287 288 const int32_t kDefaultHeaderStrategy = 1; 289 290 void nsPlainTextSerializer::Settings::Init(const int32_t aFlags, 291 const uint32_t aWrapColumn) { 292 mFlags = aFlags; 293 294 if (mFlags & nsIDocumentEncoder::OutputFormatted) { 295 // Get some prefs that controls how we do formatted output 296 mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs); 297 298 int32_t headerStrategy = 299 Preferences::GetInt(PREF_HEADER_STRATEGY, kDefaultHeaderStrategy); 300 mHeaderStrategy = Convert(headerStrategy); 301 } 302 303 mWithRubyAnnotation = StaticPrefs::converter_html2txt_always_include_ruby() || 304 (mFlags & nsIDocumentEncoder::OutputRubyAnnotation); 305 306 // XXX We should let the caller decide whether to do this or not 307 mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent; 308 309 mWrapColumn = aWrapColumn; 310 } 311 312 void nsPlainTextSerializer::HardWrapString(nsAString& aString, 313 uint32_t aWrapColumn, 314 int32_t aFlags) { 315 MOZ_ASSERT(aFlags & nsIDocumentEncoder::OutputWrap, "Why?"); 316 MOZ_ASSERT(aWrapColumn, "Why?"); 317 318 Settings settings; 319 settings.Init(aFlags, aWrapColumn); 320 321 // Line breaker will do the right thing, no need to split manually. 322 CurrentLine line; 323 line.mContent.Assign(aString); 324 325 nsAutoString output; 326 { 327 OutputManager manager(aFlags, output); 328 PerformWrapAndOutputCompleteLines(settings, line, manager, 329 /* aUseLineBreaker = */ true, nullptr); 330 manager.Flush(line); 331 } 332 aString.Assign(output); 333 } 334 335 NS_IMETHODIMP 336 nsPlainTextSerializer::Init(const uint32_t aFlags, uint32_t aWrapColumn, 337 const Encoding* aEncoding, bool aIsCopying, 338 bool aIsWholeDocument, 339 bool* aNeedsPreformatScanning, nsAString& aOutput) { 340 #ifdef DEBUG 341 // Check if the major control flags are set correctly. 342 if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) { 343 // One of OutputFormatted or OutputWrap must be set, but not both. 344 NS_ASSERTION((aFlags & nsIDocumentEncoder::OutputFormatted) != 345 (aFlags & nsIDocumentEncoder::OutputWrap), 346 "If you want format=flowed, you must combine it " 347 "with either nsIDocumentEncoder::OutputFormatted " 348 "or nsIDocumentEncoder::OutputWrap"); 349 } 350 351 if (aFlags & nsIDocumentEncoder::OutputFormatted) { 352 NS_ASSERTION( 353 !(aFlags & nsIDocumentEncoder::OutputPreformatted), 354 "Can't do formatted and preformatted output at the same time!"); 355 } 356 #endif 357 MOZ_ASSERT(!(aFlags & nsIDocumentEncoder::OutputFormatDelSp) || 358 (aFlags & nsIDocumentEncoder::OutputFormatFlowed)); 359 360 *aNeedsPreformatScanning = true; 361 mSettings.Init(aFlags, aWrapColumn); 362 mOutputManager.emplace(mSettings.GetFlags(), aOutput); 363 364 mUseLineBreaker = mSettings.MayWrap() && mSettings.MayBreakLines(); 365 366 mLineBreakDue = false; 367 mFloatingLines = -1; 368 369 mPreformattedBlockBoundary = false; 370 371 MOZ_ASSERT(mOLStack.IsEmpty()); 372 373 return NS_OK; 374 } 375 376 bool nsPlainTextSerializer::GetLastBool(const nsTArray<bool>& aStack) { 377 uint32_t size = aStack.Length(); 378 if (size == 0) { 379 return false; 380 } 381 return aStack.ElementAt(size - 1); 382 } 383 384 void nsPlainTextSerializer::SetLastBool(nsTArray<bool>& aStack, bool aValue) { 385 uint32_t size = aStack.Length(); 386 if (size > 0) { 387 aStack.ElementAt(size - 1) = aValue; 388 } else { 389 NS_ERROR("There is no \"Last\" value"); 390 } 391 } 392 393 void nsPlainTextSerializer::PushBool(nsTArray<bool>& aStack, bool aValue) { 394 aStack.AppendElement(bool(aValue)); 395 } 396 397 bool nsPlainTextSerializer::PopBool(nsTArray<bool>& aStack) { 398 return aStack.Length() ? aStack.PopLastElement() : false; 399 } 400 401 bool nsPlainTextSerializer::IsIgnorableRubyAnnotation( 402 const nsAtom* aTag) const { 403 if (mSettings.GetWithRubyAnnotation()) { 404 return false; 405 } 406 407 return aTag == nsGkAtoms::rp || aTag == nsGkAtoms::rt || 408 aTag == nsGkAtoms::rtc; 409 } 410 411 // Return true if aElement has 'display:none' or if we just don't know. 412 static bool IsDisplayNone(Element* aElement) { 413 RefPtr<const ComputedStyle> computedStyle = 414 nsComputedDOMStyle::GetComputedStyleNoFlush(aElement); 415 return !computedStyle || 416 computedStyle->StyleDisplay()->mDisplay == StyleDisplay::None; 417 } 418 419 static bool IsIgnorableScriptOrStyle(Element* aElement) { 420 return aElement->IsAnyOfHTMLElements(nsGkAtoms::script, nsGkAtoms::style) && 421 IsDisplayNone(aElement); 422 } 423 424 NS_IMETHODIMP 425 nsPlainTextSerializer::AppendText(Text* aText, int32_t aStartOffset, 426 int32_t aEndOffset) { 427 if (mIgnoreAboveIndex != (uint32_t)kNotFound) { 428 return NS_OK; 429 } 430 431 NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!"); 432 if (aStartOffset < 0) return NS_ERROR_INVALID_ARG; 433 434 NS_ENSURE_ARG(aText); 435 436 nsresult rv = NS_OK; 437 438 const CharacterDataBuffer* characterDataBuffer = nullptr; 439 if (!(characterDataBuffer = aText->GetCharacterDataBuffer())) { 440 return NS_ERROR_FAILURE; 441 } 442 443 int32_t fragLength = characterDataBuffer->GetLength(); 444 int32_t endoffset = 445 (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength); 446 NS_ASSERTION(aStartOffset <= endoffset, 447 "A start offset is beyond the end of the text fragment!"); 448 449 int32_t length = endoffset - aStartOffset; 450 if (length <= 0) { 451 return NS_OK; 452 } 453 454 // If we don't want any output, just return. 455 if (!DoOutput()) { 456 return NS_OK; 457 } 458 459 if (mLineBreakDue) { 460 EnsureVerticalSpace(mFloatingLines); 461 } 462 463 // Check whether this text node is under an element that doesn’t need to be 464 // serialized. If so, we can return early here. 465 if (MustSuppressLeaf()) { 466 return NS_OK; 467 } 468 469 nsAutoString textstr; 470 if (characterDataBuffer->Is2b()) { 471 textstr.Assign(characterDataBuffer->Get2b() + aStartOffset, length); 472 } else { 473 // AssignASCII is for 7-bit character only, so don't use it 474 const char* data = characterDataBuffer->Get1b(); 475 CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr); 476 } 477 478 // Mask the text if the text node is in a password field. 479 if (aText->HasFlag(NS_MAYBE_MASKED)) { 480 TextEditor::MaskString(textstr, *aText, 0, aStartOffset); 481 } 482 483 if (mSettings.HasFlag(nsIDocumentEncoder::OutputForPlainTextClipboardCopy)) { 484 // XXX it would be nice if we could just use the Write() to handle the line 485 // breaks for all cases (bug 1993406). 486 Write(textstr); 487 return rv; 488 } 489 490 // We have to split the string across newlines 491 // to match parser behavior 492 int32_t start = 0; 493 int32_t offset = textstr.FindCharInSet(u"\n\r"); 494 while (offset != kNotFound) { 495 if (offset > start) { 496 // Pass in the line 497 DoAddText(Substring(textstr, start, offset - start)); 498 } 499 500 // Pass in a newline 501 DoAddLineBreak(); 502 503 start = offset + 1; 504 offset = textstr.FindCharInSet(u"\n\r", start); 505 } 506 507 // Consume the last bit of the string if there's any left 508 if (start < length) { 509 if (start) { 510 DoAddText(Substring(textstr, start, length - start)); 511 } else { 512 DoAddText(textstr); 513 } 514 } 515 516 return rv; 517 } 518 519 NS_IMETHODIMP 520 nsPlainTextSerializer::AppendCDATASection(Text* aCDATASection, 521 int32_t aStartOffset, 522 int32_t aEndOffset) { 523 MOZ_ASSERT(!aCDATASection || 524 aCDATASection->NodeType() == nsINode::CDATA_SECTION_NODE); 525 return AppendText(aCDATASection, aStartOffset, aEndOffset); 526 } 527 528 NS_IMETHODIMP 529 nsPlainTextSerializer::ScanElementForPreformat(Element* aElement) { 530 mPreformatStack.push(IsElementPreformatted(aElement)); 531 return NS_OK; 532 } 533 534 NS_IMETHODIMP 535 nsPlainTextSerializer::ForgetElementForPreformat(Element* aElement) { 536 MOZ_RELEASE_ASSERT(!mPreformatStack.empty(), 537 "Tried to pop without previous push."); 538 mPreformatStack.pop(); 539 return NS_OK; 540 } 541 542 NS_IMETHODIMP 543 nsPlainTextSerializer::AppendElementStart(Element* aElement, 544 Element* aOriginalElement) { 545 NS_ENSURE_ARG(aElement); 546 547 nsresult rv = NS_OK; 548 nsAtom* id = GetIdForContent(aElement); 549 if (!FragmentOrElement::IsHTMLVoid(id)) { 550 rv = DoOpenContainer(aElement, id); 551 } else { 552 rv = DoAddLeaf(aElement, id); 553 } 554 555 if (id == nsGkAtoms::head) { 556 ++mHeadLevel; 557 } 558 559 return rv; 560 } 561 562 NS_IMETHODIMP 563 nsPlainTextSerializer::AppendElementEnd(Element* aElement, 564 Element* aOriginalElement) { 565 NS_ENSURE_ARG(aElement); 566 567 nsresult rv = NS_OK; 568 nsAtom* id = GetIdForContent(aElement); 569 if (!FragmentOrElement::IsHTMLVoid(id)) { 570 rv = DoCloseContainer(aElement, id); 571 } 572 573 if (id == nsGkAtoms::head) { 574 NS_ASSERTION(mHeadLevel != 0, "mHeadLevel being decremented below 0"); 575 --mHeadLevel; 576 } 577 578 return rv; 579 } 580 581 NS_IMETHODIMP 582 nsPlainTextSerializer::FlushAndFinish() { 583 MOZ_ASSERT(mOutputManager); 584 585 mOutputManager->Flush(mCurrentLine); 586 return Finish(); 587 } 588 589 NS_IMETHODIMP 590 nsPlainTextSerializer::Finish() { 591 mOutputManager.reset(); 592 593 return NS_OK; 594 } 595 596 NS_IMETHODIMP 597 nsPlainTextSerializer::GetOutputLength(uint32_t& aLength) const { 598 MOZ_ASSERT(mOutputManager); 599 600 aLength = mOutputManager->GetOutputLength(); 601 602 return NS_OK; 603 } 604 605 NS_IMETHODIMP 606 nsPlainTextSerializer::AppendDocumentStart(Document* aDocument) { 607 return NS_OK; 608 } 609 610 constexpr int32_t kOlStackDummyValue = 0; 611 612 nsresult nsPlainTextSerializer::DoOpenContainer(Element* aElement, 613 const nsAtom* aTag) { 614 MOZ_ASSERT(aElement); 615 MOZ_ASSERT(GetIdForContent(aElement) == aTag); 616 MOZ_ASSERT(!FragmentOrElement::IsHTMLVoid(aTag)); 617 618 if (IsIgnorableRubyAnnotation(aTag)) { 619 // Ignorable ruby annotation shouldn't be replaced by a placeholder 620 // character, neither any of its descendants. 621 mIgnoredChildNodeLevel++; 622 return NS_OK; 623 } 624 if (IsIgnorableScriptOrStyle(aElement)) { 625 mIgnoredChildNodeLevel++; 626 return NS_OK; 627 } 628 629 if (mSettings.HasFlag(nsIDocumentEncoder::OutputForPlainTextClipboardCopy)) { 630 if (mPreformattedBlockBoundary && DoOutput()) { 631 // Should always end a line, but get no more whitespace 632 if (mFloatingLines < 0) mFloatingLines = 0; 633 mLineBreakDue = true; 634 } 635 mPreformattedBlockBoundary = false; 636 } 637 638 if (mSettings.HasFlag(nsIDocumentEncoder::OutputRaw)) { 639 // Raw means raw. Don't even think about doing anything fancy 640 // here like indenting, adding line breaks or any other 641 // characters such as list item bullets, quote characters 642 // around <q>, etc. 643 644 return NS_OK; 645 } 646 647 if (mTagStackIndex < TagStackSize) { 648 mTagStack[mTagStackIndex++] = aTag; 649 } 650 651 if (mIgnoreAboveIndex != (uint32_t)kNotFound) { 652 return NS_OK; 653 } 654 655 // Reset this so that <blockquote type=cite> doesn't affect the whitespace 656 // above random <pre>s below it. 657 mHasWrittenCiteBlockquote = 658 mHasWrittenCiteBlockquote && aTag == nsGkAtoms::pre; 659 660 bool isInCiteBlockquote = false; 661 662 // XXX special-case <blockquote type=cite> so that we don't add additional 663 // newlines before the text. 664 if (aTag == nsGkAtoms::blockquote) { 665 nsAutoString value; 666 nsresult rv = GetAttributeValue(aElement, nsGkAtoms::type, value); 667 isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite"); 668 } 669 670 if (mLineBreakDue && !isInCiteBlockquote) EnsureVerticalSpace(mFloatingLines); 671 672 // Check if this tag's content that should not be output 673 if ((aTag == nsGkAtoms::noscript && 674 !mSettings.HasFlag(nsIDocumentEncoder::OutputNoScriptContent)) || 675 ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) && 676 !mSettings.HasFlag(nsIDocumentEncoder::OutputNoFramesContent))) { 677 // Ignore everything that follows the current tag in 678 // question until a matching end tag is encountered. 679 mIgnoreAboveIndex = mTagStackIndex - 1; 680 return NS_OK; 681 } 682 683 if (aTag == nsGkAtoms::body) { 684 // Try to figure out here whether we have a 685 // preformatted style attribute set by Thunderbird. 686 // 687 // Trigger on the presence of a "pre-wrap" in the 688 // style attribute. That's a very simplistic way to do 689 // it, but better than nothing. 690 nsAutoString style; 691 int32_t whitespace; 692 if (NS_SUCCEEDED(GetAttributeValue(aElement, nsGkAtoms::style, style)) && 693 (kNotFound != (whitespace = style.Find(u"white-space:")))) { 694 if (kNotFound != style.LowerCaseFindASCII("pre-wrap", whitespace)) { 695 #ifdef DEBUG_preformatted 696 printf("Set mPreFormattedMail based on style pre-wrap\n"); 697 #endif 698 mPreFormattedMail = true; 699 } else if (kNotFound != style.LowerCaseFindASCII("pre", whitespace)) { 700 #ifdef DEBUG_preformatted 701 printf("Set mPreFormattedMail based on style pre\n"); 702 #endif 703 mPreFormattedMail = true; 704 } 705 } else { 706 /* See comment at end of function. */ 707 mInWhitespace = true; 708 mPreFormattedMail = false; 709 } 710 711 return NS_OK; 712 } 713 714 // Keep this in sync with DoCloseContainer! 715 if (!DoOutput()) { 716 return NS_OK; 717 } 718 719 if (aTag == nsGkAtoms::p) 720 EnsureVerticalSpace(1); 721 else if (aTag == nsGkAtoms::pre) { 722 if (GetLastBool(mIsInCiteBlockquote)) 723 EnsureVerticalSpace(0); 724 else if (mHasWrittenCiteBlockquote) { 725 EnsureVerticalSpace(0); 726 mHasWrittenCiteBlockquote = false; 727 } else 728 EnsureVerticalSpace(1); 729 } else if (aTag == nsGkAtoms::tr) { 730 PushBool(mHasWrittenCellsForRow, false); 731 } else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) { 732 // We must make sure that the content of two table cells get a 733 // space between them. 734 735 // To make the separation between cells most obvious and 736 // importable, we use a TAB. 737 if (mHasWrittenCellsForRow.IsEmpty()) { 738 // We don't always see a <tr> (nor a <table>) before the <td> if we're 739 // copying part of a table 740 PushBool(mHasWrittenCellsForRow, true); // will never be popped 741 } else if (GetLastBool(mHasWrittenCellsForRow)) { 742 // Bypass |Write| so that the TAB isn't compressed away. 743 AddToLine(u"\t", 1); 744 mInWhitespace = true; 745 } else { 746 SetLastBool(mHasWrittenCellsForRow, true); 747 } 748 } else if (aTag == nsGkAtoms::ul) { 749 // Indent here to support nested lists, which aren't included in li :-( 750 EnsureVerticalSpace(IsInOlOrUl() ? 0 : 1); 751 // Must end the current line before we change indention 752 mCurrentLine.mIndentation.mLength += kIndentSizeList; 753 mULCount++; 754 } else if (aTag == nsGkAtoms::ol) { 755 EnsureVerticalSpace(IsInOlOrUl() ? 0 : 1); 756 if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) { 757 // Must end the current line before we change indention 758 nsAutoString startAttr; 759 int32_t startVal = 1; 760 if (NS_SUCCEEDED( 761 GetAttributeValue(aElement, nsGkAtoms::start, startAttr))) { 762 nsresult rv = NS_OK; 763 startVal = startAttr.ToInteger(&rv); 764 if (NS_FAILED(rv)) { 765 startVal = 1; 766 } 767 } 768 mOLStack.AppendElement(startVal); 769 } else { 770 mOLStack.AppendElement(kOlStackDummyValue); 771 } 772 mCurrentLine.mIndentation.mLength += kIndentSizeList; // see ul 773 } else if (aTag == nsGkAtoms::li && 774 mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) { 775 if (mTagStackIndex > 1 && IsInOL()) { 776 if (!mOLStack.IsEmpty()) { 777 nsAutoString valueAttr; 778 if (NS_SUCCEEDED( 779 GetAttributeValue(aElement, nsGkAtoms::value, valueAttr))) { 780 nsresult rv = NS_OK; 781 int32_t valueAttrVal = valueAttr.ToInteger(&rv); 782 if (NS_SUCCEEDED(rv)) { 783 mOLStack.LastElement() = valueAttrVal; 784 } 785 } 786 // This is what nsBulletFrame does for OLs: 787 mCurrentLine.mIndentation.mHeader.AppendInt(mOLStack.LastElement(), 10); 788 mOLStack.LastElement()++; 789 } else { 790 mCurrentLine.mIndentation.mHeader.Append(char16_t('#')); 791 } 792 793 mCurrentLine.mIndentation.mHeader.Append(char16_t('.')); 794 795 } else { 796 static const char bulletCharArray[] = "*o+#"; 797 uint32_t index = mULCount > 0 ? (mULCount - 1) : 3; 798 char bulletChar = bulletCharArray[index % 4]; 799 mCurrentLine.mIndentation.mHeader.Append(char16_t(bulletChar)); 800 } 801 802 mCurrentLine.mIndentation.mHeader.Append(char16_t(' ')); 803 } else if (aTag == nsGkAtoms::dl) { 804 EnsureVerticalSpace(1); 805 } else if (aTag == nsGkAtoms::dt) { 806 EnsureVerticalSpace(0); 807 } else if (aTag == nsGkAtoms::dd) { 808 EnsureVerticalSpace(0); 809 mCurrentLine.mIndentation.mLength += kIndentSizeDD; 810 } else if (aTag == nsGkAtoms::span) { 811 ++mSpanLevel; 812 } else if (aTag == nsGkAtoms::blockquote) { 813 // Push 814 PushBool(mIsInCiteBlockquote, isInCiteBlockquote); 815 if (isInCiteBlockquote) { 816 EnsureVerticalSpace(0); 817 mCurrentLine.mCiteQuoteLevel++; 818 } else { 819 EnsureVerticalSpace(1); 820 mCurrentLine.mIndentation.mLength += 821 kTabSize; // Check for some maximum value? 822 } 823 } else if (aTag == nsGkAtoms::q) { 824 Write(u"\""_ns); 825 } 826 827 // Else make sure we'll separate block level tags, 828 // even if we're about to leave, before doing any other formatting. 829 else if (IsCssBlockLevelElement(aElement)) { 830 EnsureVerticalSpace(0); 831 } 832 833 if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) { 834 OpenContainerForOutputFormatted(aElement, aTag); 835 } 836 return NS_OK; 837 } 838 839 void nsPlainTextSerializer::OpenContainerForOutputFormatted( 840 Element* aElement, const nsAtom* aTag) { 841 MOZ_ASSERT(aElement); 842 MOZ_ASSERT(GetIdForContent(aElement) == aTag); 843 MOZ_ASSERT(!FragmentOrElement::IsHTMLVoid(aTag)); 844 845 const bool currentNodeIsConverted = IsCurrentNodeConverted(aElement); 846 847 if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || aTag == nsGkAtoms::h3 || 848 aTag == nsGkAtoms::h4 || aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) { 849 EnsureVerticalSpace(2); 850 if (mSettings.GetHeaderStrategy() == 851 Settings::HeaderStrategy::kNumberHeadingsAndIndentSlightly) { 852 mCurrentLine.mIndentation.mLength += kIndentSizeHeaders; 853 // Caching 854 int32_t level = HeaderLevel(aTag); 855 // Increase counter for current level 856 mHeaderCounter[level]++; 857 // Reset all lower levels 858 int32_t i; 859 860 for (i = level + 1; i <= 6; i++) { 861 mHeaderCounter[i] = 0; 862 } 863 864 // Construct numbers 865 nsAutoString leadup; 866 for (i = 1; i <= level; i++) { 867 leadup.AppendInt(mHeaderCounter[i]); 868 leadup.Append(char16_t('.')); 869 } 870 leadup.Append(char16_t(' ')); 871 Write(leadup); 872 } else if (mSettings.GetHeaderStrategy() == 873 Settings::HeaderStrategy::kIndentIncreasedWithHeaderLevel) { 874 mCurrentLine.mIndentation.mLength += kIndentSizeHeaders; 875 for (int32_t i = HeaderLevel(aTag); i > 1; i--) { 876 // for h(x), run x-1 times 877 mCurrentLine.mIndentation.mLength += kIndentIncrementHeaders; 878 } 879 } 880 } else if (aTag == nsGkAtoms::sup && mSettings.GetStructs() && 881 !currentNodeIsConverted) { 882 Write(u"^"_ns); 883 } else if (aTag == nsGkAtoms::sub && mSettings.GetStructs() && 884 !currentNodeIsConverted) { 885 Write(u"_"_ns); 886 } else if (aTag == nsGkAtoms::code && mSettings.GetStructs() && 887 !currentNodeIsConverted) { 888 Write(u"|"_ns); 889 } else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) && 890 mSettings.GetStructs() && !currentNodeIsConverted) { 891 Write(u"*"_ns); 892 } else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) && 893 mSettings.GetStructs() && !currentNodeIsConverted) { 894 Write(u"/"_ns); 895 } else if (aTag == nsGkAtoms::u && mSettings.GetStructs() && 896 !currentNodeIsConverted) { 897 Write(u"_"_ns); 898 } 899 900 /* Container elements are always block elements, so we shouldn't 901 output any whitespace immediately after the container tag even if 902 there's extra whitespace there because the HTML is pretty-printed 903 or something. To ensure that happens, tell the serializer we're 904 already in whitespace so it won't output more. */ 905 mInWhitespace = true; 906 } 907 908 nsresult nsPlainTextSerializer::DoCloseContainer(Element* aElement, 909 const nsAtom* aTag) { 910 MOZ_ASSERT(aElement); 911 MOZ_ASSERT(GetIdForContent(aElement) == aTag); 912 MOZ_ASSERT(!FragmentOrElement::IsHTMLVoid(aTag)); 913 914 if (IsIgnorableRubyAnnotation(aTag)) { 915 mIgnoredChildNodeLevel--; 916 return NS_OK; 917 } 918 if (IsIgnorableScriptOrStyle(aElement)) { 919 mIgnoredChildNodeLevel--; 920 return NS_OK; 921 } 922 923 if (mSettings.HasFlag(nsIDocumentEncoder::OutputForPlainTextClipboardCopy)) { 924 if (DoOutput() && IsElementPreformatted() && 925 IsCssBlockLevelElement(aElement)) { 926 // If we're closing a preformatted block element, output a line break 927 // when we find a new container. 928 mPreformattedBlockBoundary = true; 929 } 930 } 931 932 if (mSettings.HasFlag(nsIDocumentEncoder::OutputRaw)) { 933 // Raw means raw. Don't even think about doing anything fancy 934 // here like indenting, adding line breaks or any other 935 // characters such as list item bullets, quote characters 936 // around <q>, etc. 937 938 return NS_OK; 939 } 940 941 if (mTagStackIndex > 0) { 942 --mTagStackIndex; 943 } 944 945 if (mTagStackIndex >= mIgnoreAboveIndex) { 946 if (mTagStackIndex == mIgnoreAboveIndex) { 947 // We're dealing with the close tag whose matching 948 // open tag had set the mIgnoreAboveIndex value. 949 // Reset mIgnoreAboveIndex before discarding this tag. 950 mIgnoreAboveIndex = (uint32_t)kNotFound; 951 } 952 return NS_OK; 953 } 954 955 MOZ_ASSERT(mOutputManager); 956 957 // End current line if we're ending a block level tag 958 if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) { 959 // We want the output to end with a new line, 960 // but in preformatted areas like text fields, 961 // we can't emit newlines that weren't there. 962 // So add the newline only in the case of formatted output. 963 if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) { 964 EnsureVerticalSpace(0); 965 } else { 966 mOutputManager->Flush(mCurrentLine); 967 } 968 // We won't want to do anything with these in formatted mode either, 969 // so just return now: 970 return NS_OK; 971 } 972 973 // Keep this in sync with DoOpenContainer! 974 if (!DoOutput()) { 975 return NS_OK; 976 } 977 978 if (aTag == nsGkAtoms::tr) { 979 PopBool(mHasWrittenCellsForRow); 980 // Should always end a line, but get no more whitespace 981 if (mFloatingLines < 0) mFloatingLines = 0; 982 mLineBreakDue = true; 983 } else if (((aTag == nsGkAtoms::li) || (aTag == nsGkAtoms::dt)) && 984 mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) { 985 // Items that should always end a line, but get no more whitespace 986 if (mFloatingLines < 0) mFloatingLines = 0; 987 mLineBreakDue = true; 988 } else if (aTag == nsGkAtoms::pre) { 989 mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1; 990 mLineBreakDue = true; 991 } else if (aTag == nsGkAtoms::ul) { 992 mOutputManager->Flush(mCurrentLine); 993 mCurrentLine.mIndentation.mLength -= kIndentSizeList; 994 --mULCount; 995 if (!IsInOlOrUl()) { 996 mFloatingLines = 1; 997 mLineBreakDue = true; 998 } 999 } else if (aTag == nsGkAtoms::ol) { 1000 mOutputManager->Flush(mCurrentLine); // Doing this after decreasing 1001 // OLStackIndex would be wrong. 1002 mCurrentLine.mIndentation.mLength -= kIndentSizeList; 1003 MOZ_ASSERT(!mOLStack.IsEmpty(), "Wrong OLStack level!"); 1004 mOLStack.RemoveLastElement(); 1005 if (!IsInOlOrUl()) { 1006 mFloatingLines = 1; 1007 mLineBreakDue = true; 1008 } 1009 } else if (aTag == nsGkAtoms::dl) { 1010 mFloatingLines = 1; 1011 mLineBreakDue = true; 1012 } else if (aTag == nsGkAtoms::dd) { 1013 mOutputManager->Flush(mCurrentLine); 1014 mCurrentLine.mIndentation.mLength -= kIndentSizeDD; 1015 } else if (aTag == nsGkAtoms::span) { 1016 NS_ASSERTION(mSpanLevel, "Span level will be negative!"); 1017 --mSpanLevel; 1018 } else if (aTag == nsGkAtoms::div) { 1019 if (mFloatingLines < 0) mFloatingLines = 0; 1020 mLineBreakDue = true; 1021 } else if (aTag == nsGkAtoms::blockquote) { 1022 mOutputManager->Flush(mCurrentLine); // Is this needed? 1023 1024 // Pop 1025 bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote); 1026 1027 if (isInCiteBlockquote) { 1028 NS_ASSERTION(mCurrentLine.mCiteQuoteLevel, 1029 "CiteQuote level will be negative!"); 1030 mCurrentLine.mCiteQuoteLevel--; 1031 mFloatingLines = 0; 1032 mHasWrittenCiteBlockquote = true; 1033 } else { 1034 mCurrentLine.mIndentation.mLength -= kTabSize; 1035 mFloatingLines = 1; 1036 } 1037 mLineBreakDue = true; 1038 } else if (aTag == nsGkAtoms::q) { 1039 Write(u"\""_ns); 1040 } else if (IsCssBlockLevelElement(aElement)) { 1041 // All other blocks get 1 vertical space after them 1042 // in formatted mode, otherwise 0. 1043 // This is hard. Sometimes 0 is a better number, but 1044 // how to know? 1045 if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) { 1046 EnsureVerticalSpace(1); 1047 } else { 1048 if (mFloatingLines < 0) mFloatingLines = 0; 1049 mLineBreakDue = true; 1050 } 1051 } 1052 1053 if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) { 1054 CloseContainerForOutputFormatted(aElement, aTag); 1055 } 1056 1057 return NS_OK; 1058 } 1059 1060 void nsPlainTextSerializer::CloseContainerForOutputFormatted( 1061 Element* aElement, const nsAtom* aTag) { 1062 MOZ_ASSERT(aElement); 1063 MOZ_ASSERT(GetIdForContent(aElement) == aTag); 1064 MOZ_ASSERT(!FragmentOrElement::IsHTMLVoid(aTag)); 1065 1066 const bool currentNodeIsConverted = IsCurrentNodeConverted(aElement); 1067 1068 if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || aTag == nsGkAtoms::h3 || 1069 aTag == nsGkAtoms::h4 || aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) { 1070 using HeaderStrategy = Settings::HeaderStrategy; 1071 if ((mSettings.GetHeaderStrategy() == 1072 HeaderStrategy::kIndentIncreasedWithHeaderLevel) || 1073 (mSettings.GetHeaderStrategy() == 1074 HeaderStrategy::kNumberHeadingsAndIndentSlightly)) { 1075 mCurrentLine.mIndentation.mLength -= kIndentSizeHeaders; 1076 } 1077 if (mSettings.GetHeaderStrategy() == 1078 HeaderStrategy::kIndentIncreasedWithHeaderLevel) { 1079 for (int32_t i = HeaderLevel(aTag); i > 1; i--) { 1080 // for h(x), run x-1 times 1081 mCurrentLine.mIndentation.mLength -= kIndentIncrementHeaders; 1082 } 1083 } 1084 EnsureVerticalSpace(1); 1085 } else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) { 1086 nsAutoString url; 1087 if (NS_SUCCEEDED(GetAttributeValue(aElement, nsGkAtoms::href, url)) && 1088 !url.IsEmpty()) { 1089 nsAutoString temp; 1090 temp.AssignLiteral(" <"); 1091 temp += url; 1092 temp.Append(char16_t('>')); 1093 Write(temp); 1094 } 1095 } else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub) && 1096 mSettings.GetStructs() && !currentNodeIsConverted) { 1097 Write(kSpace); 1098 } else if (aTag == nsGkAtoms::code && mSettings.GetStructs() && 1099 !currentNodeIsConverted) { 1100 Write(u"|"_ns); 1101 } else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) && 1102 mSettings.GetStructs() && !currentNodeIsConverted) { 1103 Write(u"*"_ns); 1104 } else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) && 1105 mSettings.GetStructs() && !currentNodeIsConverted) { 1106 Write(u"/"_ns); 1107 } else if (aTag == nsGkAtoms::u && mSettings.GetStructs() && 1108 !currentNodeIsConverted) { 1109 Write(u"_"_ns); 1110 } 1111 } 1112 1113 bool nsPlainTextSerializer::MustSuppressLeaf() const { 1114 if (mIgnoredChildNodeLevel > 0) { 1115 return true; 1116 } 1117 1118 if ((mTagStackIndex > 1 && 1119 mTagStack[mTagStackIndex - 2] == nsGkAtoms::select) || 1120 (mTagStackIndex > 0 && 1121 mTagStack[mTagStackIndex - 1] == nsGkAtoms::select)) { 1122 // Don't output the contents of SELECT elements; 1123 // Might be nice, eventually, to output just the selected element. 1124 // Read more in bug 31994. 1125 return true; 1126 } 1127 1128 return false; 1129 } 1130 1131 void nsPlainTextSerializer::DoAddLineBreak() { 1132 MOZ_ASSERT(DoOutput()); 1133 MOZ_ASSERT(!mLineBreakDue); 1134 MOZ_ASSERT(mIgnoreAboveIndex == (uint32_t)kNotFound); 1135 MOZ_ASSERT(!MustSuppressLeaf()); 1136 1137 // The only times we want to pass along whitespace from the original 1138 // html source are if we're forced into preformatted mode via flags, 1139 // or if we're prettyprinting and we're inside a <pre>. 1140 // Otherwise, either we're collapsing to minimal text, or we're 1141 // prettyprinting to mimic the html format, and in neither case 1142 // does the formatting of the html source help us. 1143 if (mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted) || 1144 (mPreFormattedMail && !mSettings.GetWrapColumn()) || 1145 IsElementPreformatted()) { 1146 EnsureVerticalSpace(mEmptyLines + 1); 1147 } else if (!mInWhitespace) { 1148 Write(kSpace); 1149 mInWhitespace = true; 1150 } 1151 } 1152 1153 void nsPlainTextSerializer::DoAddText(const nsAString& aText) { 1154 MOZ_ASSERT(DoOutput()); 1155 MOZ_ASSERT(!mLineBreakDue); 1156 MOZ_ASSERT(mIgnoreAboveIndex == (uint32_t)kNotFound); 1157 MOZ_ASSERT(!MustSuppressLeaf()); 1158 1159 // Reset this, as it’s no longer true after serializing texts, so the next 1160 // <pre> element will get a leading newline. 1161 mHasWrittenCiteBlockquote = false; 1162 1163 Write(aText); 1164 } 1165 1166 void CreateLineOfDashes(nsAString& aResult, const uint32_t aWrapColumn) { 1167 MOZ_ASSERT(aResult.IsEmpty()); 1168 1169 const uint32_t width = (aWrapColumn > 0 ? aWrapColumn : 25); 1170 while (aResult.Length() < width) { 1171 aResult.Append(char16_t('-')); 1172 } 1173 } 1174 1175 nsresult nsPlainTextSerializer::DoAddLeaf(Element* aElement, 1176 const nsAtom* aTag) { 1177 MOZ_ASSERT(aElement); 1178 MOZ_ASSERT(GetIdForContent(aElement) == aTag); 1179 MOZ_ASSERT(FragmentOrElement::IsHTMLVoid(aTag)); 1180 1181 mPreformattedBlockBoundary = false; 1182 1183 if (!DoOutput()) { 1184 return NS_OK; 1185 } 1186 1187 if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines); 1188 1189 if (MustSuppressLeaf()) { 1190 return NS_OK; 1191 } 1192 1193 if (aTag == nsGkAtoms::br) { 1194 // Another egregious editor workaround, see bug 38194: 1195 // ignore the bogus br tags that the editor sticks here and there. 1196 // FYI: `brElement` may be `nullptr` if the element is <br> element 1197 // of non-HTML element. 1198 // XXX Do we need to call `EnsureVerticalSpace()` when the <br> element 1199 // is not an HTML element? 1200 HTMLBRElement* brElement = HTMLBRElement::FromNodeOrNull(aElement); 1201 if (!brElement || !brElement->IsPaddingForEmptyLastLine()) { 1202 EnsureVerticalSpace(mEmptyLines + 1); 1203 } 1204 } else if (aTag == nsGkAtoms::hr && 1205 mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) { 1206 EnsureVerticalSpace(0); 1207 1208 // Make a line of dashes as wide as the wrap width 1209 // XXX honoring percentage would be nice 1210 nsAutoString line; 1211 CreateLineOfDashes(line, mSettings.GetWrapColumn()); 1212 Write(line); 1213 1214 EnsureVerticalSpace(0); 1215 } else if (aTag == nsGkAtoms::img) { 1216 /* Output (in decreasing order of preference) 1217 alt, title or nothing */ 1218 // See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG> 1219 nsAutoString imageDescription; 1220 if (NS_SUCCEEDED( 1221 GetAttributeValue(aElement, nsGkAtoms::alt, imageDescription))) { 1222 // If the alt attribute has an empty value (|alt=""|), output nothing 1223 } else if (NS_SUCCEEDED(GetAttributeValue(aElement, nsGkAtoms::title, 1224 imageDescription)) && 1225 !imageDescription.IsEmpty()) { 1226 imageDescription = u" ["_ns + imageDescription + u"] "_ns; 1227 } 1228 1229 Write(imageDescription); 1230 } 1231 1232 return NS_OK; 1233 } 1234 1235 /** 1236 * Adds as many newline as necessary to get |aNumberOfRows| empty lines 1237 * 1238 * aNumberOfRows = -1 : Being in the middle of some line of text 1239 * aNumberOfRows = 0 : Being at the start of a line 1240 * aNumberOfRows = n>0 : Having n empty lines before the current line. 1241 */ 1242 void nsPlainTextSerializer::EnsureVerticalSpace(const int32_t aNumberOfRows) { 1243 // If we have something in the indent we probably want to output 1244 // it and it's not included in the count for empty lines so we don't 1245 // realize that we should start a new line. 1246 if (aNumberOfRows >= 0 && !mCurrentLine.mIndentation.mHeader.IsEmpty()) { 1247 EndHardBreakLine(); 1248 mInWhitespace = true; 1249 } 1250 1251 while (mEmptyLines < aNumberOfRows) { 1252 EndHardBreakLine(); 1253 mInWhitespace = true; 1254 } 1255 mLineBreakDue = false; 1256 mFloatingLines = -1; 1257 } 1258 1259 void nsPlainTextSerializer::OutputManager::Flush(CurrentLine& aLine) { 1260 if (!aLine.mContent.IsEmpty()) { 1261 aLine.MaybeReplaceNbspsInContent(mFlags); 1262 1263 Append(aLine, StripTrailingWhitespaces::kNo); 1264 1265 aLine.ResetContentAndIndentationHeader(); 1266 } 1267 } 1268 1269 static bool IsSpaceStuffable(const char16_t* s) { 1270 return (s[0] == '>' || s[0] == ' ' || s[0] == kNBSP || 1271 NS_strncmp(s, u"From ", 5) == 0); 1272 } 1273 1274 void nsPlainTextSerializer::PerformWrapAndOutputCompleteLines( 1275 const Settings& aSettings, CurrentLine& aLine, OutputManager& aOutput, 1276 bool aUseLineBreaker, nsPlainTextSerializer* aSerializer) { 1277 if (!aSettings.MayWrap()) { 1278 return; 1279 } 1280 1281 // Yes, wrap! 1282 // The "+4" is to avoid wrap lines that only would be a couple 1283 // of letters too long. We give this bonus only if the 1284 // wrapcolumn is more than 20. 1285 const uint32_t wrapColumn = aSettings.GetWrapColumn(); 1286 uint32_t bonuswidth = (wrapColumn > 20) ? 4 : 0; 1287 while (!aLine.mContent.IsEmpty()) { 1288 const uint32_t prefixwidth = aLine.DeterminePrefixWidth(); 1289 // The width of the line as it will appear on the screen (approx.). 1290 const uint32_t currentLineContentWidth = 1291 GetUnicharStringWidth(aLine.mContent); 1292 if (currentLineContentWidth + prefixwidth <= wrapColumn + bonuswidth) { 1293 break; 1294 } 1295 1296 const int32_t goodSpace = 1297 aLine.FindWrapIndexForContent(wrapColumn, aUseLineBreaker); 1298 1299 const int32_t contentLength = aLine.mContent.Length(); 1300 if (goodSpace <= 0 || goodSpace >= contentLength) { 1301 // Nothing to do. Hopefully we get more data later to use for a place to 1302 // break line. 1303 break; 1304 } 1305 // Found a place to break 1306 // -1 (trim a char at the break position) only if the line break was a 1307 // space. 1308 nsAutoString restOfContent; 1309 if (nsCRT::IsAsciiSpace(aLine.mContent.CharAt(goodSpace))) { 1310 aLine.mContent.Right(restOfContent, contentLength - goodSpace - 1); 1311 } else { 1312 aLine.mContent.Right(restOfContent, contentLength - goodSpace); 1313 } 1314 // if breaker was U+0020, it has to consider for delsp=yes support 1315 const bool breakBySpace = aLine.mContent.CharAt(goodSpace) == ' '; 1316 aLine.mContent.Truncate(goodSpace); 1317 // Append the line to the output. 1318 if (!aLine.mContent.IsEmpty()) { 1319 // Trim _one_ potential trailing newline. 1320 if (aLine.mContent.Last() == '\n') { 1321 aLine.mContent.Truncate(goodSpace - 1); 1322 } 1323 if (!aSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted)) { 1324 aLine.mContent.Trim(" ", false, true, false); 1325 } 1326 if (aSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed) && 1327 !aLine.mIndentation.mLength) { 1328 // Add the soft part of the soft linebreak (RFC 2646 4.1) 1329 // We only do this when there is no indentation since format=flowed 1330 // lines and indentation doesn't work well together. 1331 1332 // If breaker character is ASCII space with RFC 3676 support 1333 // (delsp=yes), add twice space. 1334 if (aSettings.HasFlag(nsIDocumentEncoder::OutputFormatDelSp) && 1335 breakBySpace) { 1336 aLine.mContent.AppendLiteral(" "); 1337 } else { 1338 aLine.mContent.Append(char16_t(' ')); 1339 } 1340 } 1341 AppendLineToOutput(aSettings, aLine, aOutput); 1342 if (aSerializer) { 1343 aSerializer->ResetStateAfterLine(); 1344 aSerializer->mEmptyLines = -1; 1345 } 1346 } 1347 aLine.mContent.Truncate(); 1348 // Space stuffing a la RFC 2646 (format=flowed) 1349 if (aSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) { 1350 aLine.mSpaceStuffed = !restOfContent.IsEmpty() && 1351 IsSpaceStuffable(restOfContent.get()) && 1352 // We space-stuff quoted lines anyway 1353 aLine.mCiteQuoteLevel == 0; 1354 } 1355 aLine.mContent.Append(restOfContent); 1356 } 1357 } 1358 1359 void nsPlainTextSerializer::MaybeWrapAndOutputCompleteLines() { 1360 PerformWrapAndOutputCompleteLines(mSettings, mCurrentLine, *mOutputManager, 1361 mUseLineBreaker, this); 1362 } 1363 1364 /** 1365 * This function adds a piece of text to the current stored line. If we are 1366 * wrapping text and the stored line will become too long, a suitable 1367 * location to wrap will be found and the line that's complete will be 1368 * output. 1369 */ 1370 void nsPlainTextSerializer::AddToLine(const char16_t* aLineFragment, 1371 int32_t aLineFragmentLength) { 1372 if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines); 1373 1374 if (mCurrentLine.mContent.IsEmpty()) { 1375 if (0 == aLineFragmentLength) { 1376 return; 1377 } 1378 1379 if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) { 1380 // Space stuffing a la RFC 2646 (format=flowed). 1381 // We space-stuff quoted lines anyway 1382 mCurrentLine.mSpaceStuffed = 1383 IsSpaceStuffable(aLineFragment) && mCurrentLine.mCiteQuoteLevel == 0; 1384 } 1385 mEmptyLines = -1; 1386 } 1387 1388 mCurrentLine.mContent.Append(aLineFragment, aLineFragmentLength); 1389 1390 MaybeWrapAndOutputCompleteLines(); 1391 } 1392 1393 // The signature separator (RFC 2646). 1394 const char kSignatureSeparator[] = "-- "; 1395 1396 // The OpenPGP dash-escaped signature separator in inline 1397 // signed messages according to the OpenPGP standard (RFC 2440). 1398 const char kDashEscapedSignatureSeparator[] = "- -- "; 1399 1400 static bool IsSignatureSeparator(const nsAString& aString) { 1401 return aString.EqualsLiteral(kSignatureSeparator) || 1402 aString.EqualsLiteral(kDashEscapedSignatureSeparator); 1403 } 1404 1405 void nsPlainTextSerializer::AppendLineToOutput(const Settings& aSettings, 1406 CurrentLine& aLine, 1407 OutputManager& aOutput) { 1408 aLine.MaybeReplaceNbspsInContent(aSettings.GetFlags()); 1409 // If we don't have anything "real" to output we have to 1410 // make sure the indent doesn't end in a space since that 1411 // would trick a format=flowed-aware receiver. 1412 aOutput.Append(aLine, OutputManager::StripTrailingWhitespaces::kMaybe); 1413 aOutput.AppendLineBreak(); 1414 aLine.ResetContentAndIndentationHeader(); 1415 } 1416 1417 /** 1418 * Outputs the contents of mCurrentLine.mContent, and resets line 1419 * specific variables. Also adds an indentation and prefix if there is one 1420 * specified. Strips ending spaces from the line if it isn't preformatted. 1421 */ 1422 void nsPlainTextSerializer::EndHardBreakLine() { 1423 /* In non-preformatted mode, remove spaces from the end of the line for 1424 * format=flowed compatibility. Don't do this for these special cases: 1425 * "-- ", the signature separator (RFC 2646) shouldn't be touched and 1426 * "- -- ", the OpenPGP dash-escaped signature separator in inline 1427 * signed messages according to the OpenPGP standard (RFC 2440). 1428 */ 1429 if (!mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted) && 1430 !IsSignatureSeparator(mCurrentLine.mContent)) { 1431 mCurrentLine.mContent.Trim(" ", false, true, false); 1432 } 1433 1434 // Hard break 1435 if (mCurrentLine.HasContentOrIndentationHeader()) { 1436 mEmptyLines = 0; 1437 } else { 1438 mEmptyLines++; 1439 } 1440 1441 MOZ_ASSERT(mOutputManager); 1442 AppendLineToOutput(mSettings, mCurrentLine, *mOutputManager); 1443 ResetStateAfterLine(); 1444 } 1445 1446 /** 1447 * Creates the calculated and stored indent and text in the indentation. That is 1448 * quote chars and numbers for numbered lists and such. 1449 */ 1450 void nsPlainTextSerializer::CurrentLine::CreateQuotesAndIndent( 1451 nsAString& aResult) const { 1452 // Put the mail quote "> " chars in, if appropriate: 1453 if (mCiteQuoteLevel > 0) { 1454 nsAutoString quotes; 1455 for (int i = 0; i < mCiteQuoteLevel; i++) { 1456 quotes.Append(char16_t('>')); 1457 } 1458 if (!mContent.IsEmpty()) { 1459 /* Better don't output a space here, if the line is empty, 1460 in case a receiving format=flowed-aware UA thinks, this were a flowed 1461 line, which it isn't - it's just empty. (Flowed lines may be joined 1462 with the following one, so the empty line may be lost completely.) */ 1463 quotes.Append(char16_t(' ')); 1464 } 1465 aResult = quotes; 1466 } 1467 1468 // Indent if necessary 1469 int32_t indentwidth = mIndentation.mLength - mIndentation.mHeader.Length(); 1470 if (mSpaceStuffed) { 1471 indentwidth += 1; 1472 } 1473 1474 // Don't make empty lines look flowed 1475 if (indentwidth > 0 && HasContentOrIndentationHeader()) { 1476 nsAutoString spaces; 1477 for (int i = 0; i < indentwidth; ++i) { 1478 spaces.Append(char16_t(' ')); 1479 } 1480 aResult += spaces; 1481 } 1482 1483 if (!mIndentation.mHeader.IsEmpty()) { 1484 aResult += mIndentation.mHeader; 1485 } 1486 } 1487 1488 static bool IsLineFeedCarriageReturnBlankOrTab(char16_t c) { 1489 return ('\n' == c || '\r' == c || ' ' == c || '\t' == c); 1490 } 1491 1492 static void ReplaceVisiblyTrailingNbsps(nsAString& aString) { 1493 const int32_t totLen = aString.Length(); 1494 for (int32_t i = totLen - 1; i >= 0; i--) { 1495 char16_t c = aString[i]; 1496 if (IsLineFeedCarriageReturnBlankOrTab(c)) { 1497 continue; 1498 } 1499 if (kNBSP == c) { 1500 aString.Replace(i, 1, ' '); 1501 } else { 1502 break; 1503 } 1504 } 1505 } 1506 1507 void nsPlainTextSerializer::ConvertToLinesAndOutput(const nsAString& aString) { 1508 nsAString::const_iterator iter; 1509 aString.BeginReading(iter); 1510 nsAString::const_iterator done_searching; 1511 aString.EndReading(done_searching); 1512 1513 // Put the mail quote "> " chars in, if appropriate. 1514 // Have to put it in before every line. 1515 while (iter != done_searching) { 1516 nsAString::const_iterator bol = iter; 1517 nsAString::const_iterator newline = done_searching; 1518 1519 // Find one of '\n' or '\r' using iterators since nsAString 1520 // doesn't have the old FindCharInSet function. 1521 bool spacesOnly = true; 1522 while (iter != done_searching) { 1523 if ('\n' == *iter || '\r' == *iter) { 1524 newline = iter; 1525 break; 1526 } 1527 if (' ' != *iter) { 1528 spacesOnly = false; 1529 } 1530 ++iter; 1531 } 1532 1533 // Done searching 1534 nsAutoString stringpart; 1535 bool outputLineBreak = false; 1536 bool isNewLineCRLF = false; 1537 if (newline == done_searching) { 1538 // No new lines. 1539 stringpart.Assign(Substring(bol, newline)); 1540 if (!stringpart.IsEmpty()) { 1541 char16_t lastchar = stringpart.Last(); 1542 mInWhitespace = IsLineFeedCarriageReturnBlankOrTab(lastchar); 1543 } 1544 mEmptyLines = -1; 1545 } else { 1546 // There is a newline 1547 stringpart.Assign(Substring(bol, newline)); 1548 mInWhitespace = true; 1549 outputLineBreak = true; 1550 if ('\r' == *iter++ && '\n' == *iter) { 1551 // There was a CRLF in the input. This used to be illegal and 1552 // stripped by the parser. Apparently not anymore. Let's skip 1553 // over the LF. 1554 newline = iter++; 1555 isNewLineCRLF = true; 1556 } 1557 } 1558 1559 if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) { 1560 if ((outputLineBreak || !spacesOnly) && // bugs 261467,125928 1561 !IsQuotedLine(stringpart) && !IsSignatureSeparator(stringpart)) { 1562 stringpart.Trim(" ", false, true, true); 1563 } 1564 mCurrentLine.mSpaceStuffed = 1565 IsSpaceStuffable(stringpart.get()) && !IsQuotedLine(stringpart); 1566 } 1567 mCurrentLine.mContent.Append(stringpart); 1568 1569 mCurrentLine.MaybeReplaceNbspsInContent(mSettings.GetFlags()); 1570 1571 mOutputManager->Append(mCurrentLine, 1572 OutputManager::StripTrailingWhitespaces::kNo); 1573 if (outputLineBreak) { 1574 if (mSettings.HasFlag( 1575 nsIDocumentEncoder::OutputForPlainTextClipboardCopy)) { 1576 // This is aligned with other browsers that they don't convert CRLF to 1577 // the platform line break. 1578 if ('\n' == *newline) { 1579 mOutputManager->AppendLineBreak(isNewLineCRLF); 1580 // If there is preceding text, we are starting a new line, so reset 1581 // mEmptyLines. If there is no preceding text, we are outputting 1582 // multiple line breaks, so we count them toward mEmptyLines. 1583 mEmptyLines = stringpart.IsEmpty() ? mEmptyLines + 1 : 0; 1584 } else { 1585 mOutputManager->Append(u"\r"_ns); 1586 // `\r` isn’t treated as a line break here, so we’re now in the middle 1587 // of the line. 1588 mEmptyLines = -1; 1589 } 1590 } else { 1591 mOutputManager->AppendLineBreak(); 1592 mEmptyLines = 0; 1593 } 1594 } 1595 1596 mCurrentLine.ResetContentAndIndentationHeader(); 1597 } 1598 } 1599 1600 /** 1601 * Write a string. This is the highlevel function to use to get text output. 1602 * By using AddToLine, Output, EndHardBreakLine and other functions it handles 1603 * quotation, line wrapping, indentation, whitespace compression and other 1604 * things. 1605 */ 1606 void nsPlainTextSerializer::Write(const nsAString& aStr) { 1607 // XXX Copy necessary to use nsString methods and gain 1608 // access to underlying buffer 1609 nsAutoString str(aStr); 1610 1611 #ifdef DEBUG_wrapping 1612 printf("Write(%s): wrap col = %d\n", NS_ConvertUTF16toUTF8(str).get(), 1613 mSettings.GetWrapColumn()); 1614 #endif 1615 1616 const int32_t totLen = str.Length(); 1617 1618 // If the string is empty, do nothing: 1619 if (totLen <= 0) return; 1620 1621 // For Flowed text change nbsp-ses to spaces at end of lines to allow them 1622 // to be cut off along with usual spaces if required. (bug #125928) 1623 if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) { 1624 ReplaceVisiblyTrailingNbsps(str); 1625 } 1626 1627 // We have two major codepaths here. One that does preformatted text and one 1628 // that does normal formatted text. The one for preformatted text calls 1629 // Output directly while the other code path goes through AddToLine. 1630 if ((mPreFormattedMail && !mSettings.GetWrapColumn()) || 1631 (IsElementPreformatted() && !mPreFormattedMail) || 1632 (mSpanLevel > 0 && mEmptyLines >= 0 && IsQuotedLine(str))) { 1633 // No intelligent wrapping. 1634 1635 // This mustn't be mixed with intelligent wrapping without clearing 1636 // the mCurrentLine.mContent buffer before!!! 1637 NS_ASSERTION(mCurrentLine.mContent.IsEmpty() || 1638 (IsElementPreformatted() && !mPreFormattedMail), 1639 "Mixed wrapping data and nonwrapping data on the same line"); 1640 MOZ_ASSERT(mOutputManager); 1641 1642 if (!mCurrentLine.mContent.IsEmpty()) { 1643 mOutputManager->Flush(mCurrentLine); 1644 } 1645 1646 ConvertToLinesAndOutput(str); 1647 return; 1648 } 1649 1650 // Intelligent handling of text 1651 // If needed, strip out all "end of lines" 1652 // and multiple whitespace between words 1653 int32_t nextpos; 1654 const char16_t* offsetIntoBuffer = nullptr; 1655 1656 int32_t bol = 0; 1657 while (bol < totLen) { // Loop over lines 1658 // Find a place where we may have to do whitespace compression 1659 nextpos = str.FindCharInSet(u" \t\n\r", bol); 1660 #ifdef DEBUG_wrapping 1661 nsAutoString remaining; 1662 str.Right(remaining, totLen - bol); 1663 foo = ToNewCString(remaining); 1664 // printf("Next line: bol = %d, newlinepos = %d, totLen = %d, " 1665 // "string = '%s'\n", bol, nextpos, totLen, foo); 1666 free(foo); 1667 #endif 1668 1669 if (nextpos == kNotFound) { 1670 // The rest of the string 1671 offsetIntoBuffer = str.get() + bol; 1672 AddToLine(offsetIntoBuffer, totLen - bol); 1673 bol = totLen; 1674 mInWhitespace = false; 1675 } else { 1676 // There's still whitespace left in the string 1677 if (nextpos != 0 && (nextpos + 1) < totLen) { 1678 offsetIntoBuffer = str.get() + nextpos; 1679 // skip '\n' if it is between CJ chars 1680 if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) && 1681 IS_CJ_CHAR(offsetIntoBuffer[1])) { 1682 offsetIntoBuffer = str.get() + bol; 1683 AddToLine(offsetIntoBuffer, nextpos - bol); 1684 bol = nextpos + 1; 1685 continue; 1686 } 1687 } 1688 // If we're already in whitespace and not preformatted, just skip it: 1689 if (mInWhitespace && (nextpos == bol) && !mPreFormattedMail && 1690 !mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted)) { 1691 // Skip whitespace 1692 bol++; 1693 continue; 1694 } 1695 1696 if (nextpos == bol && 1697 !mSettings.HasFlag( 1698 nsIDocumentEncoder::OutputForPlainTextClipboardCopy)) { 1699 // Note that we are in whitespace. 1700 mInWhitespace = true; 1701 offsetIntoBuffer = str.get() + nextpos; 1702 // XXX Why do we need to keep the very first character when compressing 1703 // the reset? 1704 AddToLine(offsetIntoBuffer, 1); 1705 bol++; 1706 continue; 1707 } 1708 1709 mInWhitespace = true; 1710 1711 offsetIntoBuffer = str.get() + bol; 1712 if (mPreFormattedMail || 1713 mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted)) { 1714 // Preserve the real whitespace character 1715 nextpos++; 1716 AddToLine(offsetIntoBuffer, nextpos - bol); 1717 bol = nextpos; 1718 } else { 1719 // Replace the whitespace with a space 1720 AddToLine(offsetIntoBuffer, nextpos - bol); 1721 AddToLine(kSpace.get(), 1); 1722 bol = nextpos + 1; // Let's eat the whitespace 1723 } 1724 } 1725 } // Continue looping over the string 1726 } 1727 1728 /** 1729 * Gets the value of an attribute in a string. If the function returns 1730 * NS_ERROR_NOT_AVAILABLE, there was none such attribute specified. 1731 */ 1732 nsresult nsPlainTextSerializer::GetAttributeValue(Element* aElement, 1733 const nsAtom* aName, 1734 nsString& aValueRet) const { 1735 MOZ_ASSERT(aElement); 1736 MOZ_ASSERT(aName); 1737 1738 if (aElement->GetAttr(aName, aValueRet)) { 1739 return NS_OK; 1740 } 1741 1742 return NS_ERROR_NOT_AVAILABLE; 1743 } 1744 1745 /** 1746 * Returns true, if the element was inserted by Moz' TXT->HTML converter. 1747 * In this case, we should ignore it. 1748 */ 1749 bool nsPlainTextSerializer::IsCurrentNodeConverted(Element* aElement) const { 1750 MOZ_ASSERT(aElement); 1751 1752 nsAutoString value; 1753 nsresult rv = GetAttributeValue(aElement, nsGkAtoms::_class, value); 1754 return (NS_SUCCEEDED(rv) && 1755 (StringBeginsWith(value, u"moz-txt"_ns, 1756 nsASCIICaseInsensitiveStringComparator) || 1757 StringBeginsWith(value, u"\"moz-txt"_ns, 1758 nsASCIICaseInsensitiveStringComparator))); 1759 } 1760 1761 // static 1762 nsAtom* nsPlainTextSerializer::GetIdForContent(nsIContent* aContent) { 1763 if (!aContent->IsHTMLElement()) { 1764 return nullptr; 1765 } 1766 1767 nsAtom* localName = aContent->NodeInfo()->NameAtom(); 1768 return localName->IsStatic() ? localName : nullptr; 1769 } 1770 1771 bool nsPlainTextSerializer::IsElementPreformatted() const { 1772 return !mPreformatStack.empty() && mPreformatStack.top(); 1773 } 1774 1775 bool nsPlainTextSerializer::IsElementPreformatted(Element* aElement) { 1776 RefPtr<const ComputedStyle> computedStyle = 1777 nsComputedDOMStyle::GetComputedStyleNoFlush(aElement); 1778 if (computedStyle) { 1779 const nsStyleText* textStyle = computedStyle->StyleText(); 1780 return textStyle->WhiteSpaceOrNewlineIsSignificant(); 1781 } 1782 // Fall back to looking at the tag, in case there is no style information. 1783 return GetIdForContent(aElement) == nsGkAtoms::pre; 1784 } 1785 1786 bool nsPlainTextSerializer::IsCssBlockLevelElement(Element* aElement) { 1787 RefPtr<const ComputedStyle> computedStyle = 1788 nsComputedDOMStyle::GetComputedStyleNoFlush(aElement); 1789 if (computedStyle) { 1790 const nsStyleDisplay* displayStyle = computedStyle->StyleDisplay(); 1791 return displayStyle->IsBlockOutsideStyle(); 1792 } 1793 // Fall back to looking at the tag, in case there is no style information. 1794 return nsContentUtils::IsHTMLBlockLevelElement(aElement); 1795 } 1796 1797 /** 1798 * This method is required only to identify LI's inside OL. 1799 * Returns TRUE if we are inside an OL tag and FALSE otherwise. 1800 */ 1801 bool nsPlainTextSerializer::IsInOL() const { 1802 int32_t i = mTagStackIndex; 1803 while (--i >= 0) { 1804 if (mTagStack[i] == nsGkAtoms::ol) return true; 1805 if (mTagStack[i] == nsGkAtoms::ul) { 1806 // If a UL is reached first, LI belongs the UL nested in OL. 1807 return false; 1808 } 1809 } 1810 // We may reach here for orphan LI's. 1811 return false; 1812 } 1813 1814 bool nsPlainTextSerializer::IsInOlOrUl() const { 1815 return (mULCount > 0) || !mOLStack.IsEmpty(); 1816 } 1817 1818 /* 1819 @return 0 = no header, 1 = h1, ..., 6 = h6 1820 */ 1821 int32_t HeaderLevel(const nsAtom* aTag) { 1822 if (aTag == nsGkAtoms::h1) { 1823 return 1; 1824 } 1825 if (aTag == nsGkAtoms::h2) { 1826 return 2; 1827 } 1828 if (aTag == nsGkAtoms::h3) { 1829 return 3; 1830 } 1831 if (aTag == nsGkAtoms::h4) { 1832 return 4; 1833 } 1834 if (aTag == nsGkAtoms::h5) { 1835 return 5; 1836 } 1837 if (aTag == nsGkAtoms::h6) { 1838 return 6; 1839 } 1840 return 0; 1841 } 1842 1843 /* These functions define the column width of an ISO 10646 character 1844 * as follows: 1845 * 1846 * - The null character (U+0000) has a column width of 0. 1847 * 1848 * - Other C0/C1 control characters and DEL will lead to a return 1849 * value of -1. 1850 * 1851 * - Non-spacing and enclosing combining characters (general 1852 * category code Mn or Me in the Unicode database) have a 1853 * column width of 0. 1854 * 1855 * - Spacing characters in the East Asian Wide (W) or East Asian 1856 * FullWidth (F) category as defined in Unicode Technical 1857 * Report #11 have a column width of 2. 1858 * 1859 * - All remaining characters (including all printable 1860 * ISO 8859-1 and WGL4 characters, Unicode control characters, 1861 * etc.) have a column width of 1. 1862 */ 1863 1864 int32_t GetUnicharWidth(char32_t aCh) { 1865 /* test for 8-bit control characters */ 1866 if (aCh == 0) { 1867 return 0; 1868 } 1869 if (aCh < 32 || (aCh >= 0x7f && aCh < 0xa0)) { 1870 return -1; 1871 } 1872 1873 /* The first combining char in Unicode is U+0300 */ 1874 if (aCh < 0x0300) { 1875 return 1; 1876 } 1877 1878 auto gc = unicode::GetGeneralCategory(aCh); 1879 if (gc == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK || 1880 gc == HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) { 1881 return 0; 1882 } 1883 1884 /* if we arrive here, ucs is not a combining or C0/C1 control character */ 1885 1886 /* fast test for majority of non-wide scripts */ 1887 if (aCh < 0x1100) { 1888 return 1; 1889 } 1890 1891 return intl::UnicodeProperties::IsEastAsianWidthFW(aCh) ? 2 : 1; 1892 } 1893 1894 int32_t GetUnicharStringWidth(Span<const char16_t> aString) { 1895 int32_t width = 0; 1896 for (auto iter = aString.begin(); iter != aString.end(); ++iter) { 1897 char32_t c = *iter; 1898 if (NS_IS_HIGH_SURROGATE(c) && (iter + 1) != aString.end() && 1899 NS_IS_LOW_SURROGATE(*(iter + 1))) { 1900 c = SURROGATE_TO_UCS4(c, *++iter); 1901 } 1902 const int32_t w = GetUnicharWidth(c); 1903 // Taking 1 as the width of non-printable character, for bug 94475. 1904 width += (w < 0 ? 1 : w); 1905 } 1906 return width; 1907 }