tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

nsPlainTextSerializer.cpp (63420B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 /*
      8 * nsIContentSerializer implementation that can be used with an
      9 * nsIDocumentEncoder to convert a DOM into plaintext in a nice way
     10 * (eg for copy/paste as plaintext).
     11 */
     12 
     13 #include "nsPlainTextSerializer.h"
     14 
     15 #include "mozilla/Casting.h"
     16 #include "mozilla/Preferences.h"
     17 #include "mozilla/Span.h"
     18 #include "mozilla/StaticPrefs_converter.h"
     19 #include "mozilla/TextEditor.h"
     20 #include "mozilla/dom/AbstractRange.h"
     21 #include "mozilla/dom/CharacterData.h"
     22 #include "mozilla/dom/CharacterDataBuffer.h"
     23 #include "mozilla/dom/Element.h"
     24 #include "mozilla/dom/HTMLBRElement.h"
     25 #include "mozilla/dom/Text.h"
     26 #include "mozilla/intl/Segmenter.h"
     27 #include "mozilla/intl/UnicodeProperties.h"
     28 #include "nsCRT.h"
     29 #include "nsComputedDOMStyle.h"
     30 #include "nsContentUtils.h"
     31 #include "nsDebug.h"
     32 #include "nsGkAtoms.h"
     33 #include "nsIDocumentEncoder.h"
     34 #include "nsNameSpaceManager.h"
     35 #include "nsPrintfCString.h"
     36 #include "nsReadableUtils.h"
     37 #include "nsUnicharUtils.h"
     38 #include "nsUnicodeProperties.h"
     39 
     40 namespace mozilla {
     41 class Encoding;
     42 }
     43 
     44 using namespace mozilla;
     45 using namespace mozilla::dom;
     46 
     47 #define PREF_STRUCTS "converter.html2txt.structs"
     48 #define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy"
     49 
     50 static const int32_t kTabSize = 4;
     51 static const int32_t kIndentSizeHeaders =
     52    2; /* Indention of h1, if
     53        mHeaderStrategy = kIndentIncreasedWithHeaderLevel
     54        or = kNumberHeadingsAndIndentSlightly. Indention of
     55        other headers is derived from that. */
     56 static const int32_t kIndentIncrementHeaders =
     57    2; /* If mHeaderStrategy = kIndentIncreasedWithHeaderLevel,
     58   indent h(x+1) this many
     59   columns more than h(x) */
     60 static const int32_t kIndentSizeList = kTabSize;
     61 // Indention of non-first lines of ul and ol
     62 static const int32_t kIndentSizeDD = kTabSize;  // Indention of <dd>
     63 static const char16_t kNBSP = 160;
     64 static const char16_t kSPACE = ' ';
     65 
     66 static int32_t HeaderLevel(const nsAtom* aTag);
     67 static int32_t GetUnicharWidth(char32_t ucs);
     68 static int32_t GetUnicharStringWidth(Span<const char16_t> aString);
     69 
     70 // Someday may want to make this non-const:
     71 static const uint32_t TagStackSize = 500;
     72 
     73 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsPlainTextSerializer)
     74 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsPlainTextSerializer)
     75 
     76 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsPlainTextSerializer)
     77  NS_INTERFACE_MAP_ENTRY(nsIContentSerializer)
     78  NS_INTERFACE_MAP_ENTRY(nsISupports)
     79 NS_INTERFACE_MAP_END
     80 
     81 NS_IMPL_CYCLE_COLLECTION(nsPlainTextSerializer)
     82 
     83 nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer) {
     84  RefPtr<nsPlainTextSerializer> it = new nsPlainTextSerializer();
     85  it.forget(aSerializer);
     86  return NS_OK;
     87 }
     88 
     89 // @param aFlags As defined in nsIDocumentEncoder.idl.
     90 static void DetermineLineBreak(const int32_t aFlags, nsAString& aLineBreak) {
     91  // Set the line break character:
     92  if ((aFlags & nsIDocumentEncoder::OutputCRLineBreak) &&
     93      (aFlags & nsIDocumentEncoder::OutputLFLineBreak)) {
     94    // Windows
     95    aLineBreak.AssignLiteral(u"\r\n");
     96  } else if (aFlags & nsIDocumentEncoder::OutputCRLineBreak) {
     97    // Mac
     98    aLineBreak.AssignLiteral(u"\r");
     99  } else if (aFlags & nsIDocumentEncoder::OutputLFLineBreak) {
    100    // Unix/DOM
    101    aLineBreak.AssignLiteral(u"\n");
    102  } else {
    103    // Platform/default
    104    aLineBreak.AssignLiteral(NS_ULINEBREAK);
    105  }
    106 }
    107 
    108 void nsPlainTextSerializer::CurrentLine::MaybeReplaceNbspsInContent(
    109    const int32_t aFlags) {
    110  if (!(aFlags & nsIDocumentEncoder::OutputPersistNBSP)) {
    111    // First, replace all nbsp characters with spaces,
    112    // which the unicode encoder won't do for us.
    113    mContent.ReplaceChar(kNBSP, kSPACE);
    114  }
    115 }
    116 
    117 void nsPlainTextSerializer::CurrentLine::ResetContentAndIndentationHeader() {
    118  mContent.Truncate();
    119  mIndentation.mHeader.Truncate();
    120 }
    121 
    122 int32_t nsPlainTextSerializer::CurrentLine::FindWrapIndexForContent(
    123    const uint32_t aWrapColumn, bool aUseLineBreaker) const {
    124  MOZ_ASSERT(!mContent.IsEmpty());
    125 
    126  const uint32_t prefixwidth = DeterminePrefixWidth();
    127  int32_t goodSpace = 0;
    128 
    129  if (aUseLineBreaker) {
    130    // We advance one line break point at a time from the beginning of the
    131    // mContent until we find a width less than or equal to wrap column.
    132    uint32_t width = 0;
    133    intl::LineBreakIteratorUtf16 lineBreakIter(mContent);
    134    while (Maybe<uint32_t> nextGoodSpace = lineBreakIter.Next()) {
    135      // Trim space at the tail. UAX#14 doesn't have break opportunity for
    136      // ASCII space at the tail.
    137      const Maybe<uint32_t> originalNextGoodSpace = nextGoodSpace;
    138      while (*nextGoodSpace > 0 &&
    139             mContent.CharAt(*nextGoodSpace - 1) == 0x20) {
    140        nextGoodSpace = Some(*nextGoodSpace - 1);
    141      }
    142      if (*nextGoodSpace == 0) {
    143        // Restore the original nextGoodSpace.
    144        nextGoodSpace = originalNextGoodSpace;
    145      }
    146 
    147      width += GetUnicharStringWidth(Span<const char16_t>(
    148          mContent.get() + goodSpace, *nextGoodSpace - goodSpace));
    149      if (prefixwidth + width > aWrapColumn) {
    150        // The next break point makes the width exceeding the wrap column, so
    151        // goodSpace is what we want.
    152        break;
    153      }
    154      goodSpace = AssertedCast<int32_t>(*nextGoodSpace);
    155    }
    156 
    157    return goodSpace;
    158  }
    159 
    160  // In this case we don't want strings, especially CJK-ones, to be split. See
    161  // bug 333064 for more information. We break only at ASCII spaces.
    162  if (aWrapColumn >= prefixwidth) {
    163    // Search backward from the adjusted wrap column or from the text end.
    164    goodSpace =
    165        std::min<int32_t>(aWrapColumn - prefixwidth, mContent.Length() - 1);
    166    while (goodSpace >= 0) {
    167      if (nsCRT::IsAsciiSpace(mContent.CharAt(goodSpace))) {
    168        return goodSpace;
    169      }
    170      goodSpace--;
    171    }
    172  }
    173 
    174  // Search forward from the adjusted wrap column.
    175  goodSpace = (prefixwidth > aWrapColumn) ? 1 : aWrapColumn - prefixwidth;
    176  const int32_t contentLength = mContent.Length();
    177  while (goodSpace < contentLength &&
    178         !nsCRT::IsAsciiSpace(mContent.CharAt(goodSpace))) {
    179    goodSpace++;
    180  }
    181 
    182  return goodSpace;
    183 }
    184 
    185 nsPlainTextSerializer::OutputManager::OutputManager(const int32_t aFlags,
    186                                                    nsAString& aOutput)
    187    : mFlags{aFlags}, mOutput{aOutput}, mAtFirstColumn{true} {
    188  MOZ_ASSERT(aOutput.IsEmpty());
    189 
    190  DetermineLineBreak(mFlags, mLineBreak);
    191 }
    192 
    193 void nsPlainTextSerializer::OutputManager::Append(
    194    const CurrentLine& aLine,
    195    const StripTrailingWhitespaces aStripTrailingWhitespaces) {
    196  if (IsAtFirstColumn()) {
    197    nsAutoString quotesAndIndent;
    198    aLine.CreateQuotesAndIndent(quotesAndIndent);
    199 
    200    if ((aStripTrailingWhitespaces == StripTrailingWhitespaces::kMaybe)) {
    201      const bool stripTrailingSpaces = aLine.mContent.IsEmpty();
    202      if (stripTrailingSpaces) {
    203        quotesAndIndent.Trim(" ", false, true, false);
    204      }
    205    }
    206 
    207    Append(quotesAndIndent);
    208  }
    209 
    210  Append(aLine.mContent);
    211 }
    212 
    213 void nsPlainTextSerializer::OutputManager::Append(const nsAString& aString) {
    214  if (!aString.IsEmpty()) {
    215    mOutput.Append(aString);
    216    mAtFirstColumn = false;
    217  }
    218 }
    219 
    220 void nsPlainTextSerializer::OutputManager::AppendLineBreak(bool aForceCRLF) {
    221  mOutput.Append(aForceCRLF ? u"\r\n"_ns : mLineBreak);
    222  mAtFirstColumn = true;
    223 }
    224 
    225 uint32_t nsPlainTextSerializer::OutputManager::GetOutputLength() const {
    226  return mOutput.Length();
    227 }
    228 
    229 nsPlainTextSerializer::nsPlainTextSerializer()
    230    : mFloatingLines(-1),
    231      kSpace(u" "_ns)  // Init of "constant"
    232 {
    233  mSpanLevel = 0;
    234  for (int32_t i = 0; i <= 6; i++) {
    235    mHeaderCounter[i] = 0;
    236  }
    237 
    238  // Flow
    239  mEmptyLines = 1;  // The start of the document is an "empty line" in itself,
    240  mInWhitespace = false;
    241  mPreFormattedMail = false;
    242 
    243  mPreformattedBlockBoundary = false;
    244 
    245  // initialize the tag stack to zero:
    246  // The stack only ever contains pointers to static atoms, so they don't
    247  // need refcounting.
    248  mTagStack = new const nsAtom*[TagStackSize];
    249  mTagStackIndex = 0;
    250  mIgnoreAboveIndex = (uint32_t)kNotFound;
    251 
    252  mULCount = 0;
    253 }
    254 
    255 nsPlainTextSerializer::~nsPlainTextSerializer() {
    256  delete[] mTagStack;
    257  NS_WARNING_ASSERTION(mHeadLevel == 0, "Wrong head level!");
    258 }
    259 
    260 nsPlainTextSerializer::Settings::HeaderStrategy
    261 nsPlainTextSerializer::Settings::Convert(const int32_t aPrefHeaderStrategy) {
    262  HeaderStrategy result{HeaderStrategy::kIndentIncreasedWithHeaderLevel};
    263 
    264  switch (aPrefHeaderStrategy) {
    265    case 0: {
    266      result = HeaderStrategy::kNoIndentation;
    267      break;
    268    }
    269    case 1: {
    270      result = HeaderStrategy::kIndentIncreasedWithHeaderLevel;
    271      break;
    272    }
    273    case 2: {
    274      result = HeaderStrategy::kNumberHeadingsAndIndentSlightly;
    275      break;
    276    }
    277    default: {
    278      NS_WARNING(
    279          nsPrintfCString("Header strategy pref contains undefined value: %i",
    280                          aPrefHeaderStrategy)
    281              .get());
    282    }
    283  }
    284 
    285  return result;
    286 }
    287 
    288 const int32_t kDefaultHeaderStrategy = 1;
    289 
    290 void nsPlainTextSerializer::Settings::Init(const int32_t aFlags,
    291                                           const uint32_t aWrapColumn) {
    292  mFlags = aFlags;
    293 
    294  if (mFlags & nsIDocumentEncoder::OutputFormatted) {
    295    // Get some prefs that controls how we do formatted output
    296    mStructs = Preferences::GetBool(PREF_STRUCTS, mStructs);
    297 
    298    int32_t headerStrategy =
    299        Preferences::GetInt(PREF_HEADER_STRATEGY, kDefaultHeaderStrategy);
    300    mHeaderStrategy = Convert(headerStrategy);
    301  }
    302 
    303  mWithRubyAnnotation = StaticPrefs::converter_html2txt_always_include_ruby() ||
    304                        (mFlags & nsIDocumentEncoder::OutputRubyAnnotation);
    305 
    306  // XXX We should let the caller decide whether to do this or not
    307  mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent;
    308 
    309  mWrapColumn = aWrapColumn;
    310 }
    311 
    312 void nsPlainTextSerializer::HardWrapString(nsAString& aString,
    313                                           uint32_t aWrapColumn,
    314                                           int32_t aFlags) {
    315  MOZ_ASSERT(aFlags & nsIDocumentEncoder::OutputWrap, "Why?");
    316  MOZ_ASSERT(aWrapColumn, "Why?");
    317 
    318  Settings settings;
    319  settings.Init(aFlags, aWrapColumn);
    320 
    321  // Line breaker will do the right thing, no need to split manually.
    322  CurrentLine line;
    323  line.mContent.Assign(aString);
    324 
    325  nsAutoString output;
    326  {
    327    OutputManager manager(aFlags, output);
    328    PerformWrapAndOutputCompleteLines(settings, line, manager,
    329                                      /* aUseLineBreaker = */ true, nullptr);
    330    manager.Flush(line);
    331  }
    332  aString.Assign(output);
    333 }
    334 
    335 NS_IMETHODIMP
    336 nsPlainTextSerializer::Init(const uint32_t aFlags, uint32_t aWrapColumn,
    337                            const Encoding* aEncoding, bool aIsCopying,
    338                            bool aIsWholeDocument,
    339                            bool* aNeedsPreformatScanning, nsAString& aOutput) {
    340 #ifdef DEBUG
    341  // Check if the major control flags are set correctly.
    342  if (aFlags & nsIDocumentEncoder::OutputFormatFlowed) {
    343    // One of OutputFormatted or OutputWrap must be set, but not both.
    344    NS_ASSERTION((aFlags & nsIDocumentEncoder::OutputFormatted) !=
    345                     (aFlags & nsIDocumentEncoder::OutputWrap),
    346                 "If you want format=flowed, you must combine it "
    347                 "with either nsIDocumentEncoder::OutputFormatted "
    348                 "or nsIDocumentEncoder::OutputWrap");
    349  }
    350 
    351  if (aFlags & nsIDocumentEncoder::OutputFormatted) {
    352    NS_ASSERTION(
    353        !(aFlags & nsIDocumentEncoder::OutputPreformatted),
    354        "Can't do formatted and preformatted output at the same time!");
    355  }
    356 #endif
    357  MOZ_ASSERT(!(aFlags & nsIDocumentEncoder::OutputFormatDelSp) ||
    358             (aFlags & nsIDocumentEncoder::OutputFormatFlowed));
    359 
    360  *aNeedsPreformatScanning = true;
    361  mSettings.Init(aFlags, aWrapColumn);
    362  mOutputManager.emplace(mSettings.GetFlags(), aOutput);
    363 
    364  mUseLineBreaker = mSettings.MayWrap() && mSettings.MayBreakLines();
    365 
    366  mLineBreakDue = false;
    367  mFloatingLines = -1;
    368 
    369  mPreformattedBlockBoundary = false;
    370 
    371  MOZ_ASSERT(mOLStack.IsEmpty());
    372 
    373  return NS_OK;
    374 }
    375 
    376 bool nsPlainTextSerializer::GetLastBool(const nsTArray<bool>& aStack) {
    377  uint32_t size = aStack.Length();
    378  if (size == 0) {
    379    return false;
    380  }
    381  return aStack.ElementAt(size - 1);
    382 }
    383 
    384 void nsPlainTextSerializer::SetLastBool(nsTArray<bool>& aStack, bool aValue) {
    385  uint32_t size = aStack.Length();
    386  if (size > 0) {
    387    aStack.ElementAt(size - 1) = aValue;
    388  } else {
    389    NS_ERROR("There is no \"Last\" value");
    390  }
    391 }
    392 
    393 void nsPlainTextSerializer::PushBool(nsTArray<bool>& aStack, bool aValue) {
    394  aStack.AppendElement(bool(aValue));
    395 }
    396 
    397 bool nsPlainTextSerializer::PopBool(nsTArray<bool>& aStack) {
    398  return aStack.Length() ? aStack.PopLastElement() : false;
    399 }
    400 
    401 bool nsPlainTextSerializer::IsIgnorableRubyAnnotation(
    402    const nsAtom* aTag) const {
    403  if (mSettings.GetWithRubyAnnotation()) {
    404    return false;
    405  }
    406 
    407  return aTag == nsGkAtoms::rp || aTag == nsGkAtoms::rt ||
    408         aTag == nsGkAtoms::rtc;
    409 }
    410 
    411 // Return true if aElement has 'display:none' or if we just don't know.
    412 static bool IsDisplayNone(Element* aElement) {
    413  RefPtr<const ComputedStyle> computedStyle =
    414      nsComputedDOMStyle::GetComputedStyleNoFlush(aElement);
    415  return !computedStyle ||
    416         computedStyle->StyleDisplay()->mDisplay == StyleDisplay::None;
    417 }
    418 
    419 static bool IsIgnorableScriptOrStyle(Element* aElement) {
    420  return aElement->IsAnyOfHTMLElements(nsGkAtoms::script, nsGkAtoms::style) &&
    421         IsDisplayNone(aElement);
    422 }
    423 
    424 NS_IMETHODIMP
    425 nsPlainTextSerializer::AppendText(Text* aText, int32_t aStartOffset,
    426                                  int32_t aEndOffset) {
    427  if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
    428    return NS_OK;
    429  }
    430 
    431  NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!");
    432  if (aStartOffset < 0) return NS_ERROR_INVALID_ARG;
    433 
    434  NS_ENSURE_ARG(aText);
    435 
    436  nsresult rv = NS_OK;
    437 
    438  const CharacterDataBuffer* characterDataBuffer = nullptr;
    439  if (!(characterDataBuffer = aText->GetCharacterDataBuffer())) {
    440    return NS_ERROR_FAILURE;
    441  }
    442 
    443  int32_t fragLength = characterDataBuffer->GetLength();
    444  int32_t endoffset =
    445      (aEndOffset == -1) ? fragLength : std::min(aEndOffset, fragLength);
    446  NS_ASSERTION(aStartOffset <= endoffset,
    447               "A start offset is beyond the end of the text fragment!");
    448 
    449  int32_t length = endoffset - aStartOffset;
    450  if (length <= 0) {
    451    return NS_OK;
    452  }
    453 
    454  // If we don't want any output, just return.
    455  if (!DoOutput()) {
    456    return NS_OK;
    457  }
    458 
    459  if (mLineBreakDue) {
    460    EnsureVerticalSpace(mFloatingLines);
    461  }
    462 
    463  // Check whether this text node is under an element that doesn’t need  to be
    464  // serialized. If so, we can return early here.
    465  if (MustSuppressLeaf()) {
    466    return NS_OK;
    467  }
    468 
    469  nsAutoString textstr;
    470  if (characterDataBuffer->Is2b()) {
    471    textstr.Assign(characterDataBuffer->Get2b() + aStartOffset, length);
    472  } else {
    473    // AssignASCII is for 7-bit character only, so don't use it
    474    const char* data = characterDataBuffer->Get1b();
    475    CopyASCIItoUTF16(Substring(data + aStartOffset, data + endoffset), textstr);
    476  }
    477 
    478  // Mask the text if the text node is in a password field.
    479  if (aText->HasFlag(NS_MAYBE_MASKED)) {
    480    TextEditor::MaskString(textstr, *aText, 0, aStartOffset);
    481  }
    482 
    483  if (mSettings.HasFlag(nsIDocumentEncoder::OutputForPlainTextClipboardCopy)) {
    484    // XXX it would be nice if we could just use the Write() to handle the line
    485    // breaks for all cases (bug 1993406).
    486    Write(textstr);
    487    return rv;
    488  }
    489 
    490  // We have to split the string across newlines
    491  // to match parser behavior
    492  int32_t start = 0;
    493  int32_t offset = textstr.FindCharInSet(u"\n\r");
    494  while (offset != kNotFound) {
    495    if (offset > start) {
    496      // Pass in the line
    497      DoAddText(Substring(textstr, start, offset - start));
    498    }
    499 
    500    // Pass in a newline
    501    DoAddLineBreak();
    502 
    503    start = offset + 1;
    504    offset = textstr.FindCharInSet(u"\n\r", start);
    505  }
    506 
    507  // Consume the last bit of the string if there's any left
    508  if (start < length) {
    509    if (start) {
    510      DoAddText(Substring(textstr, start, length - start));
    511    } else {
    512      DoAddText(textstr);
    513    }
    514  }
    515 
    516  return rv;
    517 }
    518 
    519 NS_IMETHODIMP
    520 nsPlainTextSerializer::AppendCDATASection(Text* aCDATASection,
    521                                          int32_t aStartOffset,
    522                                          int32_t aEndOffset) {
    523  MOZ_ASSERT(!aCDATASection ||
    524             aCDATASection->NodeType() == nsINode::CDATA_SECTION_NODE);
    525  return AppendText(aCDATASection, aStartOffset, aEndOffset);
    526 }
    527 
    528 NS_IMETHODIMP
    529 nsPlainTextSerializer::ScanElementForPreformat(Element* aElement) {
    530  mPreformatStack.push(IsElementPreformatted(aElement));
    531  return NS_OK;
    532 }
    533 
    534 NS_IMETHODIMP
    535 nsPlainTextSerializer::ForgetElementForPreformat(Element* aElement) {
    536  MOZ_RELEASE_ASSERT(!mPreformatStack.empty(),
    537                     "Tried to pop without previous push.");
    538  mPreformatStack.pop();
    539  return NS_OK;
    540 }
    541 
    542 NS_IMETHODIMP
    543 nsPlainTextSerializer::AppendElementStart(Element* aElement,
    544                                          Element* aOriginalElement) {
    545  NS_ENSURE_ARG(aElement);
    546 
    547  nsresult rv = NS_OK;
    548  nsAtom* id = GetIdForContent(aElement);
    549  if (!FragmentOrElement::IsHTMLVoid(id)) {
    550    rv = DoOpenContainer(aElement, id);
    551  } else {
    552    rv = DoAddLeaf(aElement, id);
    553  }
    554 
    555  if (id == nsGkAtoms::head) {
    556    ++mHeadLevel;
    557  }
    558 
    559  return rv;
    560 }
    561 
    562 NS_IMETHODIMP
    563 nsPlainTextSerializer::AppendElementEnd(Element* aElement,
    564                                        Element* aOriginalElement) {
    565  NS_ENSURE_ARG(aElement);
    566 
    567  nsresult rv = NS_OK;
    568  nsAtom* id = GetIdForContent(aElement);
    569  if (!FragmentOrElement::IsHTMLVoid(id)) {
    570    rv = DoCloseContainer(aElement, id);
    571  }
    572 
    573  if (id == nsGkAtoms::head) {
    574    NS_ASSERTION(mHeadLevel != 0, "mHeadLevel being decremented below 0");
    575    --mHeadLevel;
    576  }
    577 
    578  return rv;
    579 }
    580 
    581 NS_IMETHODIMP
    582 nsPlainTextSerializer::FlushAndFinish() {
    583  MOZ_ASSERT(mOutputManager);
    584 
    585  mOutputManager->Flush(mCurrentLine);
    586  return Finish();
    587 }
    588 
    589 NS_IMETHODIMP
    590 nsPlainTextSerializer::Finish() {
    591  mOutputManager.reset();
    592 
    593  return NS_OK;
    594 }
    595 
    596 NS_IMETHODIMP
    597 nsPlainTextSerializer::GetOutputLength(uint32_t& aLength) const {
    598  MOZ_ASSERT(mOutputManager);
    599 
    600  aLength = mOutputManager->GetOutputLength();
    601 
    602  return NS_OK;
    603 }
    604 
    605 NS_IMETHODIMP
    606 nsPlainTextSerializer::AppendDocumentStart(Document* aDocument) {
    607  return NS_OK;
    608 }
    609 
    610 constexpr int32_t kOlStackDummyValue = 0;
    611 
    612 nsresult nsPlainTextSerializer::DoOpenContainer(Element* aElement,
    613                                                const nsAtom* aTag) {
    614  MOZ_ASSERT(aElement);
    615  MOZ_ASSERT(GetIdForContent(aElement) == aTag);
    616  MOZ_ASSERT(!FragmentOrElement::IsHTMLVoid(aTag));
    617 
    618  if (IsIgnorableRubyAnnotation(aTag)) {
    619    // Ignorable ruby annotation shouldn't be replaced by a placeholder
    620    // character, neither any of its descendants.
    621    mIgnoredChildNodeLevel++;
    622    return NS_OK;
    623  }
    624  if (IsIgnorableScriptOrStyle(aElement)) {
    625    mIgnoredChildNodeLevel++;
    626    return NS_OK;
    627  }
    628 
    629  if (mSettings.HasFlag(nsIDocumentEncoder::OutputForPlainTextClipboardCopy)) {
    630    if (mPreformattedBlockBoundary && DoOutput()) {
    631      // Should always end a line, but get no more whitespace
    632      if (mFloatingLines < 0) mFloatingLines = 0;
    633      mLineBreakDue = true;
    634    }
    635    mPreformattedBlockBoundary = false;
    636  }
    637 
    638  if (mSettings.HasFlag(nsIDocumentEncoder::OutputRaw)) {
    639    // Raw means raw.  Don't even think about doing anything fancy
    640    // here like indenting, adding line breaks or any other
    641    // characters such as list item bullets, quote characters
    642    // around <q>, etc.
    643 
    644    return NS_OK;
    645  }
    646 
    647  if (mTagStackIndex < TagStackSize) {
    648    mTagStack[mTagStackIndex++] = aTag;
    649  }
    650 
    651  if (mIgnoreAboveIndex != (uint32_t)kNotFound) {
    652    return NS_OK;
    653  }
    654 
    655  // Reset this so that <blockquote type=cite> doesn't affect the whitespace
    656  // above random <pre>s below it.
    657  mHasWrittenCiteBlockquote =
    658      mHasWrittenCiteBlockquote && aTag == nsGkAtoms::pre;
    659 
    660  bool isInCiteBlockquote = false;
    661 
    662  // XXX special-case <blockquote type=cite> so that we don't add additional
    663  // newlines before the text.
    664  if (aTag == nsGkAtoms::blockquote) {
    665    nsAutoString value;
    666    nsresult rv = GetAttributeValue(aElement, nsGkAtoms::type, value);
    667    isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite");
    668  }
    669 
    670  if (mLineBreakDue && !isInCiteBlockquote) EnsureVerticalSpace(mFloatingLines);
    671 
    672  // Check if this tag's content that should not be output
    673  if ((aTag == nsGkAtoms::noscript &&
    674       !mSettings.HasFlag(nsIDocumentEncoder::OutputNoScriptContent)) ||
    675      ((aTag == nsGkAtoms::iframe || aTag == nsGkAtoms::noframes) &&
    676       !mSettings.HasFlag(nsIDocumentEncoder::OutputNoFramesContent))) {
    677    // Ignore everything that follows the current tag in
    678    // question until a matching end tag is encountered.
    679    mIgnoreAboveIndex = mTagStackIndex - 1;
    680    return NS_OK;
    681  }
    682 
    683  if (aTag == nsGkAtoms::body) {
    684    // Try to figure out here whether we have a
    685    // preformatted style attribute set by Thunderbird.
    686    //
    687    // Trigger on the presence of a "pre-wrap" in the
    688    // style attribute. That's a very simplistic way to do
    689    // it, but better than nothing.
    690    nsAutoString style;
    691    int32_t whitespace;
    692    if (NS_SUCCEEDED(GetAttributeValue(aElement, nsGkAtoms::style, style)) &&
    693        (kNotFound != (whitespace = style.Find(u"white-space:")))) {
    694      if (kNotFound != style.LowerCaseFindASCII("pre-wrap", whitespace)) {
    695 #ifdef DEBUG_preformatted
    696        printf("Set mPreFormattedMail based on style pre-wrap\n");
    697 #endif
    698        mPreFormattedMail = true;
    699      } else if (kNotFound != style.LowerCaseFindASCII("pre", whitespace)) {
    700 #ifdef DEBUG_preformatted
    701        printf("Set mPreFormattedMail based on style pre\n");
    702 #endif
    703        mPreFormattedMail = true;
    704      }
    705    } else {
    706      /* See comment at end of function. */
    707      mInWhitespace = true;
    708      mPreFormattedMail = false;
    709    }
    710 
    711    return NS_OK;
    712  }
    713 
    714  // Keep this in sync with DoCloseContainer!
    715  if (!DoOutput()) {
    716    return NS_OK;
    717  }
    718 
    719  if (aTag == nsGkAtoms::p)
    720    EnsureVerticalSpace(1);
    721  else if (aTag == nsGkAtoms::pre) {
    722    if (GetLastBool(mIsInCiteBlockquote))
    723      EnsureVerticalSpace(0);
    724    else if (mHasWrittenCiteBlockquote) {
    725      EnsureVerticalSpace(0);
    726      mHasWrittenCiteBlockquote = false;
    727    } else
    728      EnsureVerticalSpace(1);
    729  } else if (aTag == nsGkAtoms::tr) {
    730    PushBool(mHasWrittenCellsForRow, false);
    731  } else if (aTag == nsGkAtoms::td || aTag == nsGkAtoms::th) {
    732    // We must make sure that the content of two table cells get a
    733    // space between them.
    734 
    735    // To make the separation between cells most obvious and
    736    // importable, we use a TAB.
    737    if (mHasWrittenCellsForRow.IsEmpty()) {
    738      // We don't always see a <tr> (nor a <table>) before the <td> if we're
    739      // copying part of a table
    740      PushBool(mHasWrittenCellsForRow, true);  // will never be popped
    741    } else if (GetLastBool(mHasWrittenCellsForRow)) {
    742      // Bypass |Write| so that the TAB isn't compressed away.
    743      AddToLine(u"\t", 1);
    744      mInWhitespace = true;
    745    } else {
    746      SetLastBool(mHasWrittenCellsForRow, true);
    747    }
    748  } else if (aTag == nsGkAtoms::ul) {
    749    // Indent here to support nested lists, which aren't included in li :-(
    750    EnsureVerticalSpace(IsInOlOrUl() ? 0 : 1);
    751    // Must end the current line before we change indention
    752    mCurrentLine.mIndentation.mLength += kIndentSizeList;
    753    mULCount++;
    754  } else if (aTag == nsGkAtoms::ol) {
    755    EnsureVerticalSpace(IsInOlOrUl() ? 0 : 1);
    756    if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
    757      // Must end the current line before we change indention
    758      nsAutoString startAttr;
    759      int32_t startVal = 1;
    760      if (NS_SUCCEEDED(
    761              GetAttributeValue(aElement, nsGkAtoms::start, startAttr))) {
    762        nsresult rv = NS_OK;
    763        startVal = startAttr.ToInteger(&rv);
    764        if (NS_FAILED(rv)) {
    765          startVal = 1;
    766        }
    767      }
    768      mOLStack.AppendElement(startVal);
    769    } else {
    770      mOLStack.AppendElement(kOlStackDummyValue);
    771    }
    772    mCurrentLine.mIndentation.mLength += kIndentSizeList;  // see ul
    773  } else if (aTag == nsGkAtoms::li &&
    774             mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
    775    if (mTagStackIndex > 1 && IsInOL()) {
    776      if (!mOLStack.IsEmpty()) {
    777        nsAutoString valueAttr;
    778        if (NS_SUCCEEDED(
    779                GetAttributeValue(aElement, nsGkAtoms::value, valueAttr))) {
    780          nsresult rv = NS_OK;
    781          int32_t valueAttrVal = valueAttr.ToInteger(&rv);
    782          if (NS_SUCCEEDED(rv)) {
    783            mOLStack.LastElement() = valueAttrVal;
    784          }
    785        }
    786        // This is what nsBulletFrame does for OLs:
    787        mCurrentLine.mIndentation.mHeader.AppendInt(mOLStack.LastElement(), 10);
    788        mOLStack.LastElement()++;
    789      } else {
    790        mCurrentLine.mIndentation.mHeader.Append(char16_t('#'));
    791      }
    792 
    793      mCurrentLine.mIndentation.mHeader.Append(char16_t('.'));
    794 
    795    } else {
    796      static const char bulletCharArray[] = "*o+#";
    797      uint32_t index = mULCount > 0 ? (mULCount - 1) : 3;
    798      char bulletChar = bulletCharArray[index % 4];
    799      mCurrentLine.mIndentation.mHeader.Append(char16_t(bulletChar));
    800    }
    801 
    802    mCurrentLine.mIndentation.mHeader.Append(char16_t(' '));
    803  } else if (aTag == nsGkAtoms::dl) {
    804    EnsureVerticalSpace(1);
    805  } else if (aTag == nsGkAtoms::dt) {
    806    EnsureVerticalSpace(0);
    807  } else if (aTag == nsGkAtoms::dd) {
    808    EnsureVerticalSpace(0);
    809    mCurrentLine.mIndentation.mLength += kIndentSizeDD;
    810  } else if (aTag == nsGkAtoms::span) {
    811    ++mSpanLevel;
    812  } else if (aTag == nsGkAtoms::blockquote) {
    813    // Push
    814    PushBool(mIsInCiteBlockquote, isInCiteBlockquote);
    815    if (isInCiteBlockquote) {
    816      EnsureVerticalSpace(0);
    817      mCurrentLine.mCiteQuoteLevel++;
    818    } else {
    819      EnsureVerticalSpace(1);
    820      mCurrentLine.mIndentation.mLength +=
    821          kTabSize;  // Check for some maximum value?
    822    }
    823  } else if (aTag == nsGkAtoms::q) {
    824    Write(u"\""_ns);
    825  }
    826 
    827  // Else make sure we'll separate block level tags,
    828  // even if we're about to leave, before doing any other formatting.
    829  else if (IsCssBlockLevelElement(aElement)) {
    830    EnsureVerticalSpace(0);
    831  }
    832 
    833  if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
    834    OpenContainerForOutputFormatted(aElement, aTag);
    835  }
    836  return NS_OK;
    837 }
    838 
    839 void nsPlainTextSerializer::OpenContainerForOutputFormatted(
    840    Element* aElement, const nsAtom* aTag) {
    841  MOZ_ASSERT(aElement);
    842  MOZ_ASSERT(GetIdForContent(aElement) == aTag);
    843  MOZ_ASSERT(!FragmentOrElement::IsHTMLVoid(aTag));
    844 
    845  const bool currentNodeIsConverted = IsCurrentNodeConverted(aElement);
    846 
    847  if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || aTag == nsGkAtoms::h3 ||
    848      aTag == nsGkAtoms::h4 || aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
    849    EnsureVerticalSpace(2);
    850    if (mSettings.GetHeaderStrategy() ==
    851        Settings::HeaderStrategy::kNumberHeadingsAndIndentSlightly) {
    852      mCurrentLine.mIndentation.mLength += kIndentSizeHeaders;
    853      // Caching
    854      int32_t level = HeaderLevel(aTag);
    855      // Increase counter for current level
    856      mHeaderCounter[level]++;
    857      // Reset all lower levels
    858      int32_t i;
    859 
    860      for (i = level + 1; i <= 6; i++) {
    861        mHeaderCounter[i] = 0;
    862      }
    863 
    864      // Construct numbers
    865      nsAutoString leadup;
    866      for (i = 1; i <= level; i++) {
    867        leadup.AppendInt(mHeaderCounter[i]);
    868        leadup.Append(char16_t('.'));
    869      }
    870      leadup.Append(char16_t(' '));
    871      Write(leadup);
    872    } else if (mSettings.GetHeaderStrategy() ==
    873               Settings::HeaderStrategy::kIndentIncreasedWithHeaderLevel) {
    874      mCurrentLine.mIndentation.mLength += kIndentSizeHeaders;
    875      for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
    876        // for h(x), run x-1 times
    877        mCurrentLine.mIndentation.mLength += kIndentIncrementHeaders;
    878      }
    879    }
    880  } else if (aTag == nsGkAtoms::sup && mSettings.GetStructs() &&
    881             !currentNodeIsConverted) {
    882    Write(u"^"_ns);
    883  } else if (aTag == nsGkAtoms::sub && mSettings.GetStructs() &&
    884             !currentNodeIsConverted) {
    885    Write(u"_"_ns);
    886  } else if (aTag == nsGkAtoms::code && mSettings.GetStructs() &&
    887             !currentNodeIsConverted) {
    888    Write(u"|"_ns);
    889  } else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) &&
    890             mSettings.GetStructs() && !currentNodeIsConverted) {
    891    Write(u"*"_ns);
    892  } else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) &&
    893             mSettings.GetStructs() && !currentNodeIsConverted) {
    894    Write(u"/"_ns);
    895  } else if (aTag == nsGkAtoms::u && mSettings.GetStructs() &&
    896             !currentNodeIsConverted) {
    897    Write(u"_"_ns);
    898  }
    899 
    900  /* Container elements are always block elements, so we shouldn't
    901     output any whitespace immediately after the container tag even if
    902     there's extra whitespace there because the HTML is pretty-printed
    903     or something. To ensure that happens, tell the serializer we're
    904     already in whitespace so it won't output more. */
    905  mInWhitespace = true;
    906 }
    907 
    908 nsresult nsPlainTextSerializer::DoCloseContainer(Element* aElement,
    909                                                 const nsAtom* aTag) {
    910  MOZ_ASSERT(aElement);
    911  MOZ_ASSERT(GetIdForContent(aElement) == aTag);
    912  MOZ_ASSERT(!FragmentOrElement::IsHTMLVoid(aTag));
    913 
    914  if (IsIgnorableRubyAnnotation(aTag)) {
    915    mIgnoredChildNodeLevel--;
    916    return NS_OK;
    917  }
    918  if (IsIgnorableScriptOrStyle(aElement)) {
    919    mIgnoredChildNodeLevel--;
    920    return NS_OK;
    921  }
    922 
    923  if (mSettings.HasFlag(nsIDocumentEncoder::OutputForPlainTextClipboardCopy)) {
    924    if (DoOutput() && IsElementPreformatted() &&
    925        IsCssBlockLevelElement(aElement)) {
    926      // If we're closing a preformatted block element, output a line break
    927      // when we find a new container.
    928      mPreformattedBlockBoundary = true;
    929    }
    930  }
    931 
    932  if (mSettings.HasFlag(nsIDocumentEncoder::OutputRaw)) {
    933    // Raw means raw.  Don't even think about doing anything fancy
    934    // here like indenting, adding line breaks or any other
    935    // characters such as list item bullets, quote characters
    936    // around <q>, etc.
    937 
    938    return NS_OK;
    939  }
    940 
    941  if (mTagStackIndex > 0) {
    942    --mTagStackIndex;
    943  }
    944 
    945  if (mTagStackIndex >= mIgnoreAboveIndex) {
    946    if (mTagStackIndex == mIgnoreAboveIndex) {
    947      // We're dealing with the close tag whose matching
    948      // open tag had set the mIgnoreAboveIndex value.
    949      // Reset mIgnoreAboveIndex before discarding this tag.
    950      mIgnoreAboveIndex = (uint32_t)kNotFound;
    951    }
    952    return NS_OK;
    953  }
    954 
    955  MOZ_ASSERT(mOutputManager);
    956 
    957  // End current line if we're ending a block level tag
    958  if ((aTag == nsGkAtoms::body) || (aTag == nsGkAtoms::html)) {
    959    // We want the output to end with a new line,
    960    // but in preformatted areas like text fields,
    961    // we can't emit newlines that weren't there.
    962    // So add the newline only in the case of formatted output.
    963    if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
    964      EnsureVerticalSpace(0);
    965    } else {
    966      mOutputManager->Flush(mCurrentLine);
    967    }
    968    // We won't want to do anything with these in formatted mode either,
    969    // so just return now:
    970    return NS_OK;
    971  }
    972 
    973  // Keep this in sync with DoOpenContainer!
    974  if (!DoOutput()) {
    975    return NS_OK;
    976  }
    977 
    978  if (aTag == nsGkAtoms::tr) {
    979    PopBool(mHasWrittenCellsForRow);
    980    // Should always end a line, but get no more whitespace
    981    if (mFloatingLines < 0) mFloatingLines = 0;
    982    mLineBreakDue = true;
    983  } else if (((aTag == nsGkAtoms::li) || (aTag == nsGkAtoms::dt)) &&
    984             mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
    985    // Items that should always end a line, but get no more whitespace
    986    if (mFloatingLines < 0) mFloatingLines = 0;
    987    mLineBreakDue = true;
    988  } else if (aTag == nsGkAtoms::pre) {
    989    mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1;
    990    mLineBreakDue = true;
    991  } else if (aTag == nsGkAtoms::ul) {
    992    mOutputManager->Flush(mCurrentLine);
    993    mCurrentLine.mIndentation.mLength -= kIndentSizeList;
    994    --mULCount;
    995    if (!IsInOlOrUl()) {
    996      mFloatingLines = 1;
    997      mLineBreakDue = true;
    998    }
    999  } else if (aTag == nsGkAtoms::ol) {
   1000    mOutputManager->Flush(mCurrentLine);  // Doing this after decreasing
   1001                                          // OLStackIndex would be wrong.
   1002    mCurrentLine.mIndentation.mLength -= kIndentSizeList;
   1003    MOZ_ASSERT(!mOLStack.IsEmpty(), "Wrong OLStack level!");
   1004    mOLStack.RemoveLastElement();
   1005    if (!IsInOlOrUl()) {
   1006      mFloatingLines = 1;
   1007      mLineBreakDue = true;
   1008    }
   1009  } else if (aTag == nsGkAtoms::dl) {
   1010    mFloatingLines = 1;
   1011    mLineBreakDue = true;
   1012  } else if (aTag == nsGkAtoms::dd) {
   1013    mOutputManager->Flush(mCurrentLine);
   1014    mCurrentLine.mIndentation.mLength -= kIndentSizeDD;
   1015  } else if (aTag == nsGkAtoms::span) {
   1016    NS_ASSERTION(mSpanLevel, "Span level will be negative!");
   1017    --mSpanLevel;
   1018  } else if (aTag == nsGkAtoms::div) {
   1019    if (mFloatingLines < 0) mFloatingLines = 0;
   1020    mLineBreakDue = true;
   1021  } else if (aTag == nsGkAtoms::blockquote) {
   1022    mOutputManager->Flush(mCurrentLine);  // Is this needed?
   1023 
   1024    // Pop
   1025    bool isInCiteBlockquote = PopBool(mIsInCiteBlockquote);
   1026 
   1027    if (isInCiteBlockquote) {
   1028      NS_ASSERTION(mCurrentLine.mCiteQuoteLevel,
   1029                   "CiteQuote level will be negative!");
   1030      mCurrentLine.mCiteQuoteLevel--;
   1031      mFloatingLines = 0;
   1032      mHasWrittenCiteBlockquote = true;
   1033    } else {
   1034      mCurrentLine.mIndentation.mLength -= kTabSize;
   1035      mFloatingLines = 1;
   1036    }
   1037    mLineBreakDue = true;
   1038  } else if (aTag == nsGkAtoms::q) {
   1039    Write(u"\""_ns);
   1040  } else if (IsCssBlockLevelElement(aElement)) {
   1041    // All other blocks get 1 vertical space after them
   1042    // in formatted mode, otherwise 0.
   1043    // This is hard. Sometimes 0 is a better number, but
   1044    // how to know?
   1045    if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
   1046      EnsureVerticalSpace(1);
   1047    } else {
   1048      if (mFloatingLines < 0) mFloatingLines = 0;
   1049      mLineBreakDue = true;
   1050    }
   1051  }
   1052 
   1053  if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
   1054    CloseContainerForOutputFormatted(aElement, aTag);
   1055  }
   1056 
   1057  return NS_OK;
   1058 }
   1059 
   1060 void nsPlainTextSerializer::CloseContainerForOutputFormatted(
   1061    Element* aElement, const nsAtom* aTag) {
   1062  MOZ_ASSERT(aElement);
   1063  MOZ_ASSERT(GetIdForContent(aElement) == aTag);
   1064  MOZ_ASSERT(!FragmentOrElement::IsHTMLVoid(aTag));
   1065 
   1066  const bool currentNodeIsConverted = IsCurrentNodeConverted(aElement);
   1067 
   1068  if (aTag == nsGkAtoms::h1 || aTag == nsGkAtoms::h2 || aTag == nsGkAtoms::h3 ||
   1069      aTag == nsGkAtoms::h4 || aTag == nsGkAtoms::h5 || aTag == nsGkAtoms::h6) {
   1070    using HeaderStrategy = Settings::HeaderStrategy;
   1071    if ((mSettings.GetHeaderStrategy() ==
   1072         HeaderStrategy::kIndentIncreasedWithHeaderLevel) ||
   1073        (mSettings.GetHeaderStrategy() ==
   1074         HeaderStrategy::kNumberHeadingsAndIndentSlightly)) {
   1075      mCurrentLine.mIndentation.mLength -= kIndentSizeHeaders;
   1076    }
   1077    if (mSettings.GetHeaderStrategy() ==
   1078        HeaderStrategy::kIndentIncreasedWithHeaderLevel) {
   1079      for (int32_t i = HeaderLevel(aTag); i > 1; i--) {
   1080        // for h(x), run x-1 times
   1081        mCurrentLine.mIndentation.mLength -= kIndentIncrementHeaders;
   1082      }
   1083    }
   1084    EnsureVerticalSpace(1);
   1085  } else if (aTag == nsGkAtoms::a && !currentNodeIsConverted) {
   1086    nsAutoString url;
   1087    if (NS_SUCCEEDED(GetAttributeValue(aElement, nsGkAtoms::href, url)) &&
   1088        !url.IsEmpty()) {
   1089      nsAutoString temp;
   1090      temp.AssignLiteral(" <");
   1091      temp += url;
   1092      temp.Append(char16_t('>'));
   1093      Write(temp);
   1094    }
   1095  } else if ((aTag == nsGkAtoms::sup || aTag == nsGkAtoms::sub) &&
   1096             mSettings.GetStructs() && !currentNodeIsConverted) {
   1097    Write(kSpace);
   1098  } else if (aTag == nsGkAtoms::code && mSettings.GetStructs() &&
   1099             !currentNodeIsConverted) {
   1100    Write(u"|"_ns);
   1101  } else if ((aTag == nsGkAtoms::strong || aTag == nsGkAtoms::b) &&
   1102             mSettings.GetStructs() && !currentNodeIsConverted) {
   1103    Write(u"*"_ns);
   1104  } else if ((aTag == nsGkAtoms::em || aTag == nsGkAtoms::i) &&
   1105             mSettings.GetStructs() && !currentNodeIsConverted) {
   1106    Write(u"/"_ns);
   1107  } else if (aTag == nsGkAtoms::u && mSettings.GetStructs() &&
   1108             !currentNodeIsConverted) {
   1109    Write(u"_"_ns);
   1110  }
   1111 }
   1112 
   1113 bool nsPlainTextSerializer::MustSuppressLeaf() const {
   1114  if (mIgnoredChildNodeLevel > 0) {
   1115    return true;
   1116  }
   1117 
   1118  if ((mTagStackIndex > 1 &&
   1119       mTagStack[mTagStackIndex - 2] == nsGkAtoms::select) ||
   1120      (mTagStackIndex > 0 &&
   1121       mTagStack[mTagStackIndex - 1] == nsGkAtoms::select)) {
   1122    // Don't output the contents of SELECT elements;
   1123    // Might be nice, eventually, to output just the selected element.
   1124    // Read more in bug 31994.
   1125    return true;
   1126  }
   1127 
   1128  return false;
   1129 }
   1130 
   1131 void nsPlainTextSerializer::DoAddLineBreak() {
   1132  MOZ_ASSERT(DoOutput());
   1133  MOZ_ASSERT(!mLineBreakDue);
   1134  MOZ_ASSERT(mIgnoreAboveIndex == (uint32_t)kNotFound);
   1135  MOZ_ASSERT(!MustSuppressLeaf());
   1136 
   1137  // The only times we want to pass along whitespace from the original
   1138  // html source are if we're forced into preformatted mode via flags,
   1139  // or if we're prettyprinting and we're inside a <pre>.
   1140  // Otherwise, either we're collapsing to minimal text, or we're
   1141  // prettyprinting to mimic the html format, and in neither case
   1142  // does the formatting of the html source help us.
   1143  if (mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted) ||
   1144      (mPreFormattedMail && !mSettings.GetWrapColumn()) ||
   1145      IsElementPreformatted()) {
   1146    EnsureVerticalSpace(mEmptyLines + 1);
   1147  } else if (!mInWhitespace) {
   1148    Write(kSpace);
   1149    mInWhitespace = true;
   1150  }
   1151 }
   1152 
   1153 void nsPlainTextSerializer::DoAddText(const nsAString& aText) {
   1154  MOZ_ASSERT(DoOutput());
   1155  MOZ_ASSERT(!mLineBreakDue);
   1156  MOZ_ASSERT(mIgnoreAboveIndex == (uint32_t)kNotFound);
   1157  MOZ_ASSERT(!MustSuppressLeaf());
   1158 
   1159  // Reset this, as it’s no longer true after serializing texts, so the next
   1160  // <pre> element will get a leading newline.
   1161  mHasWrittenCiteBlockquote = false;
   1162 
   1163  Write(aText);
   1164 }
   1165 
   1166 void CreateLineOfDashes(nsAString& aResult, const uint32_t aWrapColumn) {
   1167  MOZ_ASSERT(aResult.IsEmpty());
   1168 
   1169  const uint32_t width = (aWrapColumn > 0 ? aWrapColumn : 25);
   1170  while (aResult.Length() < width) {
   1171    aResult.Append(char16_t('-'));
   1172  }
   1173 }
   1174 
   1175 nsresult nsPlainTextSerializer::DoAddLeaf(Element* aElement,
   1176                                          const nsAtom* aTag) {
   1177  MOZ_ASSERT(aElement);
   1178  MOZ_ASSERT(GetIdForContent(aElement) == aTag);
   1179  MOZ_ASSERT(FragmentOrElement::IsHTMLVoid(aTag));
   1180 
   1181  mPreformattedBlockBoundary = false;
   1182 
   1183  if (!DoOutput()) {
   1184    return NS_OK;
   1185  }
   1186 
   1187  if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);
   1188 
   1189  if (MustSuppressLeaf()) {
   1190    return NS_OK;
   1191  }
   1192 
   1193  if (aTag == nsGkAtoms::br) {
   1194    // Another egregious editor workaround, see bug 38194:
   1195    // ignore the bogus br tags that the editor sticks here and there.
   1196    // FYI: `brElement` may be `nullptr` if the element is <br> element
   1197    //      of non-HTML element.
   1198    // XXX Do we need to call `EnsureVerticalSpace()` when the <br> element
   1199    //     is not an HTML element?
   1200    HTMLBRElement* brElement = HTMLBRElement::FromNodeOrNull(aElement);
   1201    if (!brElement || !brElement->IsPaddingForEmptyLastLine()) {
   1202      EnsureVerticalSpace(mEmptyLines + 1);
   1203    }
   1204  } else if (aTag == nsGkAtoms::hr &&
   1205             mSettings.HasFlag(nsIDocumentEncoder::OutputFormatted)) {
   1206    EnsureVerticalSpace(0);
   1207 
   1208    // Make a line of dashes as wide as the wrap width
   1209    // XXX honoring percentage would be nice
   1210    nsAutoString line;
   1211    CreateLineOfDashes(line, mSettings.GetWrapColumn());
   1212    Write(line);
   1213 
   1214    EnsureVerticalSpace(0);
   1215  } else if (aTag == nsGkAtoms::img) {
   1216    /* Output (in decreasing order of preference)
   1217       alt, title or nothing */
   1218    // See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG>
   1219    nsAutoString imageDescription;
   1220    if (NS_SUCCEEDED(
   1221            GetAttributeValue(aElement, nsGkAtoms::alt, imageDescription))) {
   1222      // If the alt attribute has an empty value (|alt=""|), output nothing
   1223    } else if (NS_SUCCEEDED(GetAttributeValue(aElement, nsGkAtoms::title,
   1224                                              imageDescription)) &&
   1225               !imageDescription.IsEmpty()) {
   1226      imageDescription = u" ["_ns + imageDescription + u"] "_ns;
   1227    }
   1228 
   1229    Write(imageDescription);
   1230  }
   1231 
   1232  return NS_OK;
   1233 }
   1234 
   1235 /**
   1236 * Adds as many newline as necessary to get |aNumberOfRows| empty lines
   1237 *
   1238 * aNumberOfRows = -1    :   Being in the middle of some line of text
   1239 * aNumberOfRows =  0    :   Being at the start of a line
   1240 * aNumberOfRows =  n>0  :   Having n empty lines before the current line.
   1241 */
   1242 void nsPlainTextSerializer::EnsureVerticalSpace(const int32_t aNumberOfRows) {
   1243  // If we have something in the indent we probably want to output
   1244  // it and it's not included in the count for empty lines so we don't
   1245  // realize that we should start a new line.
   1246  if (aNumberOfRows >= 0 && !mCurrentLine.mIndentation.mHeader.IsEmpty()) {
   1247    EndHardBreakLine();
   1248    mInWhitespace = true;
   1249  }
   1250 
   1251  while (mEmptyLines < aNumberOfRows) {
   1252    EndHardBreakLine();
   1253    mInWhitespace = true;
   1254  }
   1255  mLineBreakDue = false;
   1256  mFloatingLines = -1;
   1257 }
   1258 
   1259 void nsPlainTextSerializer::OutputManager::Flush(CurrentLine& aLine) {
   1260  if (!aLine.mContent.IsEmpty()) {
   1261    aLine.MaybeReplaceNbspsInContent(mFlags);
   1262 
   1263    Append(aLine, StripTrailingWhitespaces::kNo);
   1264 
   1265    aLine.ResetContentAndIndentationHeader();
   1266  }
   1267 }
   1268 
   1269 static bool IsSpaceStuffable(const char16_t* s) {
   1270  return (s[0] == '>' || s[0] == ' ' || s[0] == kNBSP ||
   1271          NS_strncmp(s, u"From ", 5) == 0);
   1272 }
   1273 
   1274 void nsPlainTextSerializer::PerformWrapAndOutputCompleteLines(
   1275    const Settings& aSettings, CurrentLine& aLine, OutputManager& aOutput,
   1276    bool aUseLineBreaker, nsPlainTextSerializer* aSerializer) {
   1277  if (!aSettings.MayWrap()) {
   1278    return;
   1279  }
   1280 
   1281  // Yes, wrap!
   1282  // The "+4" is to avoid wrap lines that only would be a couple
   1283  // of letters too long. We give this bonus only if the
   1284  // wrapcolumn is more than 20.
   1285  const uint32_t wrapColumn = aSettings.GetWrapColumn();
   1286  uint32_t bonuswidth = (wrapColumn > 20) ? 4 : 0;
   1287  while (!aLine.mContent.IsEmpty()) {
   1288    const uint32_t prefixwidth = aLine.DeterminePrefixWidth();
   1289    // The width of the line as it will appear on the screen (approx.).
   1290    const uint32_t currentLineContentWidth =
   1291        GetUnicharStringWidth(aLine.mContent);
   1292    if (currentLineContentWidth + prefixwidth <= wrapColumn + bonuswidth) {
   1293      break;
   1294    }
   1295 
   1296    const int32_t goodSpace =
   1297        aLine.FindWrapIndexForContent(wrapColumn, aUseLineBreaker);
   1298 
   1299    const int32_t contentLength = aLine.mContent.Length();
   1300    if (goodSpace <= 0 || goodSpace >= contentLength) {
   1301      // Nothing to do. Hopefully we get more data later to use for a place to
   1302      // break line.
   1303      break;
   1304    }
   1305    // Found a place to break
   1306    // -1 (trim a char at the break position) only if the line break was a
   1307    // space.
   1308    nsAutoString restOfContent;
   1309    if (nsCRT::IsAsciiSpace(aLine.mContent.CharAt(goodSpace))) {
   1310      aLine.mContent.Right(restOfContent, contentLength - goodSpace - 1);
   1311    } else {
   1312      aLine.mContent.Right(restOfContent, contentLength - goodSpace);
   1313    }
   1314    // if breaker was U+0020, it has to consider for delsp=yes support
   1315    const bool breakBySpace = aLine.mContent.CharAt(goodSpace) == ' ';
   1316    aLine.mContent.Truncate(goodSpace);
   1317    // Append the line to the output.
   1318    if (!aLine.mContent.IsEmpty()) {
   1319      // Trim _one_ potential trailing newline.
   1320      if (aLine.mContent.Last() == '\n') {
   1321        aLine.mContent.Truncate(goodSpace - 1);
   1322      }
   1323      if (!aSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted)) {
   1324        aLine.mContent.Trim(" ", false, true, false);
   1325      }
   1326      if (aSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed) &&
   1327          !aLine.mIndentation.mLength) {
   1328        // Add the soft part of the soft linebreak (RFC 2646 4.1)
   1329        // We only do this when there is no indentation since format=flowed
   1330        // lines and indentation doesn't work well together.
   1331 
   1332        // If breaker character is ASCII space with RFC 3676 support
   1333        // (delsp=yes), add twice space.
   1334        if (aSettings.HasFlag(nsIDocumentEncoder::OutputFormatDelSp) &&
   1335            breakBySpace) {
   1336          aLine.mContent.AppendLiteral("  ");
   1337        } else {
   1338          aLine.mContent.Append(char16_t(' '));
   1339        }
   1340      }
   1341      AppendLineToOutput(aSettings, aLine, aOutput);
   1342      if (aSerializer) {
   1343        aSerializer->ResetStateAfterLine();
   1344        aSerializer->mEmptyLines = -1;
   1345      }
   1346    }
   1347    aLine.mContent.Truncate();
   1348    // Space stuffing a la RFC 2646 (format=flowed)
   1349    if (aSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
   1350      aLine.mSpaceStuffed = !restOfContent.IsEmpty() &&
   1351                            IsSpaceStuffable(restOfContent.get()) &&
   1352                            // We space-stuff quoted lines anyway
   1353                            aLine.mCiteQuoteLevel == 0;
   1354    }
   1355    aLine.mContent.Append(restOfContent);
   1356  }
   1357 }
   1358 
   1359 void nsPlainTextSerializer::MaybeWrapAndOutputCompleteLines() {
   1360  PerformWrapAndOutputCompleteLines(mSettings, mCurrentLine, *mOutputManager,
   1361                                    mUseLineBreaker, this);
   1362 }
   1363 
   1364 /**
   1365 * This function adds a piece of text to the current stored line. If we are
   1366 * wrapping text and the stored line will become too long, a suitable
   1367 * location to wrap will be found and the line that's complete will be
   1368 * output.
   1369 */
   1370 void nsPlainTextSerializer::AddToLine(const char16_t* aLineFragment,
   1371                                      int32_t aLineFragmentLength) {
   1372  if (mLineBreakDue) EnsureVerticalSpace(mFloatingLines);
   1373 
   1374  if (mCurrentLine.mContent.IsEmpty()) {
   1375    if (0 == aLineFragmentLength) {
   1376      return;
   1377    }
   1378 
   1379    if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
   1380      // Space stuffing a la RFC 2646 (format=flowed).
   1381      // We space-stuff quoted lines anyway
   1382      mCurrentLine.mSpaceStuffed =
   1383          IsSpaceStuffable(aLineFragment) && mCurrentLine.mCiteQuoteLevel == 0;
   1384    }
   1385    mEmptyLines = -1;
   1386  }
   1387 
   1388  mCurrentLine.mContent.Append(aLineFragment, aLineFragmentLength);
   1389 
   1390  MaybeWrapAndOutputCompleteLines();
   1391 }
   1392 
   1393 // The signature separator (RFC 2646).
   1394 const char kSignatureSeparator[] = "-- ";
   1395 
   1396 // The OpenPGP dash-escaped signature separator in inline
   1397 // signed messages according to the OpenPGP standard (RFC 2440).
   1398 const char kDashEscapedSignatureSeparator[] = "- -- ";
   1399 
   1400 static bool IsSignatureSeparator(const nsAString& aString) {
   1401  return aString.EqualsLiteral(kSignatureSeparator) ||
   1402         aString.EqualsLiteral(kDashEscapedSignatureSeparator);
   1403 }
   1404 
   1405 void nsPlainTextSerializer::AppendLineToOutput(const Settings& aSettings,
   1406                                               CurrentLine& aLine,
   1407                                               OutputManager& aOutput) {
   1408  aLine.MaybeReplaceNbspsInContent(aSettings.GetFlags());
   1409  // If we don't have anything "real" to output we have to
   1410  // make sure the indent doesn't end in a space since that
   1411  // would trick a format=flowed-aware receiver.
   1412  aOutput.Append(aLine, OutputManager::StripTrailingWhitespaces::kMaybe);
   1413  aOutput.AppendLineBreak();
   1414  aLine.ResetContentAndIndentationHeader();
   1415 }
   1416 
   1417 /**
   1418 * Outputs the contents of mCurrentLine.mContent, and resets line
   1419 * specific variables. Also adds an indentation and prefix if there is one
   1420 * specified. Strips ending spaces from the line if it isn't preformatted.
   1421 */
   1422 void nsPlainTextSerializer::EndHardBreakLine() {
   1423  /* In non-preformatted mode, remove spaces from the end of the line for
   1424   * format=flowed compatibility. Don't do this for these special cases:
   1425   * "-- ", the signature separator (RFC 2646) shouldn't be touched and
   1426   * "- -- ", the OpenPGP dash-escaped signature separator in inline
   1427   * signed messages according to the OpenPGP standard (RFC 2440).
   1428   */
   1429  if (!mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted) &&
   1430      !IsSignatureSeparator(mCurrentLine.mContent)) {
   1431    mCurrentLine.mContent.Trim(" ", false, true, false);
   1432  }
   1433 
   1434  // Hard break
   1435  if (mCurrentLine.HasContentOrIndentationHeader()) {
   1436    mEmptyLines = 0;
   1437  } else {
   1438    mEmptyLines++;
   1439  }
   1440 
   1441  MOZ_ASSERT(mOutputManager);
   1442  AppendLineToOutput(mSettings, mCurrentLine, *mOutputManager);
   1443  ResetStateAfterLine();
   1444 }
   1445 
   1446 /**
   1447 * Creates the calculated and stored indent and text in the indentation. That is
   1448 * quote chars and numbers for numbered lists and such.
   1449 */
   1450 void nsPlainTextSerializer::CurrentLine::CreateQuotesAndIndent(
   1451    nsAString& aResult) const {
   1452  // Put the mail quote "> " chars in, if appropriate:
   1453  if (mCiteQuoteLevel > 0) {
   1454    nsAutoString quotes;
   1455    for (int i = 0; i < mCiteQuoteLevel; i++) {
   1456      quotes.Append(char16_t('>'));
   1457    }
   1458    if (!mContent.IsEmpty()) {
   1459      /* Better don't output a space here, if the line is empty,
   1460         in case a receiving format=flowed-aware UA thinks, this were a flowed
   1461         line, which it isn't - it's just empty. (Flowed lines may be joined
   1462         with the following one, so the empty line may be lost completely.) */
   1463      quotes.Append(char16_t(' '));
   1464    }
   1465    aResult = quotes;
   1466  }
   1467 
   1468  // Indent if necessary
   1469  int32_t indentwidth = mIndentation.mLength - mIndentation.mHeader.Length();
   1470  if (mSpaceStuffed) {
   1471    indentwidth += 1;
   1472  }
   1473 
   1474  // Don't make empty lines look flowed
   1475  if (indentwidth > 0 && HasContentOrIndentationHeader()) {
   1476    nsAutoString spaces;
   1477    for (int i = 0; i < indentwidth; ++i) {
   1478      spaces.Append(char16_t(' '));
   1479    }
   1480    aResult += spaces;
   1481  }
   1482 
   1483  if (!mIndentation.mHeader.IsEmpty()) {
   1484    aResult += mIndentation.mHeader;
   1485  }
   1486 }
   1487 
   1488 static bool IsLineFeedCarriageReturnBlankOrTab(char16_t c) {
   1489  return ('\n' == c || '\r' == c || ' ' == c || '\t' == c);
   1490 }
   1491 
   1492 static void ReplaceVisiblyTrailingNbsps(nsAString& aString) {
   1493  const int32_t totLen = aString.Length();
   1494  for (int32_t i = totLen - 1; i >= 0; i--) {
   1495    char16_t c = aString[i];
   1496    if (IsLineFeedCarriageReturnBlankOrTab(c)) {
   1497      continue;
   1498    }
   1499    if (kNBSP == c) {
   1500      aString.Replace(i, 1, ' ');
   1501    } else {
   1502      break;
   1503    }
   1504  }
   1505 }
   1506 
   1507 void nsPlainTextSerializer::ConvertToLinesAndOutput(const nsAString& aString) {
   1508  nsAString::const_iterator iter;
   1509  aString.BeginReading(iter);
   1510  nsAString::const_iterator done_searching;
   1511  aString.EndReading(done_searching);
   1512 
   1513  // Put the mail quote "> " chars in, if appropriate.
   1514  // Have to put it in before every line.
   1515  while (iter != done_searching) {
   1516    nsAString::const_iterator bol = iter;
   1517    nsAString::const_iterator newline = done_searching;
   1518 
   1519    // Find one of '\n' or '\r' using iterators since nsAString
   1520    // doesn't have the old FindCharInSet function.
   1521    bool spacesOnly = true;
   1522    while (iter != done_searching) {
   1523      if ('\n' == *iter || '\r' == *iter) {
   1524        newline = iter;
   1525        break;
   1526      }
   1527      if (' ' != *iter) {
   1528        spacesOnly = false;
   1529      }
   1530      ++iter;
   1531    }
   1532 
   1533    // Done searching
   1534    nsAutoString stringpart;
   1535    bool outputLineBreak = false;
   1536    bool isNewLineCRLF = false;
   1537    if (newline == done_searching) {
   1538      // No new lines.
   1539      stringpart.Assign(Substring(bol, newline));
   1540      if (!stringpart.IsEmpty()) {
   1541        char16_t lastchar = stringpart.Last();
   1542        mInWhitespace = IsLineFeedCarriageReturnBlankOrTab(lastchar);
   1543      }
   1544      mEmptyLines = -1;
   1545    } else {
   1546      // There is a newline
   1547      stringpart.Assign(Substring(bol, newline));
   1548      mInWhitespace = true;
   1549      outputLineBreak = true;
   1550      if ('\r' == *iter++ && '\n' == *iter) {
   1551        // There was a CRLF in the input. This used to be illegal and
   1552        // stripped by the parser. Apparently not anymore. Let's skip
   1553        // over the LF.
   1554        newline = iter++;
   1555        isNewLineCRLF = true;
   1556      }
   1557    }
   1558 
   1559    if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
   1560      if ((outputLineBreak || !spacesOnly) &&  // bugs 261467,125928
   1561          !IsQuotedLine(stringpart) && !IsSignatureSeparator(stringpart)) {
   1562        stringpart.Trim(" ", false, true, true);
   1563      }
   1564      mCurrentLine.mSpaceStuffed =
   1565          IsSpaceStuffable(stringpart.get()) && !IsQuotedLine(stringpart);
   1566    }
   1567    mCurrentLine.mContent.Append(stringpart);
   1568 
   1569    mCurrentLine.MaybeReplaceNbspsInContent(mSettings.GetFlags());
   1570 
   1571    mOutputManager->Append(mCurrentLine,
   1572                           OutputManager::StripTrailingWhitespaces::kNo);
   1573    if (outputLineBreak) {
   1574      if (mSettings.HasFlag(
   1575              nsIDocumentEncoder::OutputForPlainTextClipboardCopy)) {
   1576        // This is aligned with other browsers that they don't convert CRLF to
   1577        // the platform line break.
   1578        if ('\n' == *newline) {
   1579          mOutputManager->AppendLineBreak(isNewLineCRLF);
   1580          // If there is preceding text, we are starting a new line, so reset
   1581          // mEmptyLines. If there is no preceding text, we are outputting
   1582          // multiple line breaks, so we count them toward mEmptyLines.
   1583          mEmptyLines = stringpart.IsEmpty() ? mEmptyLines + 1 : 0;
   1584        } else {
   1585          mOutputManager->Append(u"\r"_ns);
   1586          // `\r` isn’t treated as a line break here, so we’re now in the middle
   1587          // of the line.
   1588          mEmptyLines = -1;
   1589        }
   1590      } else {
   1591        mOutputManager->AppendLineBreak();
   1592        mEmptyLines = 0;
   1593      }
   1594    }
   1595 
   1596    mCurrentLine.ResetContentAndIndentationHeader();
   1597  }
   1598 }
   1599 
   1600 /**
   1601 * Write a string. This is the highlevel function to use to get text output.
   1602 * By using AddToLine, Output, EndHardBreakLine and other functions it handles
   1603 * quotation, line wrapping, indentation, whitespace compression and other
   1604 * things.
   1605 */
   1606 void nsPlainTextSerializer::Write(const nsAString& aStr) {
   1607  // XXX Copy necessary to use nsString methods and gain
   1608  // access to underlying buffer
   1609  nsAutoString str(aStr);
   1610 
   1611 #ifdef DEBUG_wrapping
   1612  printf("Write(%s): wrap col = %d\n", NS_ConvertUTF16toUTF8(str).get(),
   1613         mSettings.GetWrapColumn());
   1614 #endif
   1615 
   1616  const int32_t totLen = str.Length();
   1617 
   1618  // If the string is empty, do nothing:
   1619  if (totLen <= 0) return;
   1620 
   1621  // For Flowed text change nbsp-ses to spaces at end of lines to allow them
   1622  // to be cut off along with usual spaces if required. (bug #125928)
   1623  if (mSettings.HasFlag(nsIDocumentEncoder::OutputFormatFlowed)) {
   1624    ReplaceVisiblyTrailingNbsps(str);
   1625  }
   1626 
   1627  // We have two major codepaths here. One that does preformatted text and one
   1628  // that does normal formatted text. The one for preformatted text calls
   1629  // Output directly while the other code path goes through AddToLine.
   1630  if ((mPreFormattedMail && !mSettings.GetWrapColumn()) ||
   1631      (IsElementPreformatted() && !mPreFormattedMail) ||
   1632      (mSpanLevel > 0 && mEmptyLines >= 0 && IsQuotedLine(str))) {
   1633    // No intelligent wrapping.
   1634 
   1635    // This mustn't be mixed with intelligent wrapping without clearing
   1636    // the mCurrentLine.mContent buffer before!!!
   1637    NS_ASSERTION(mCurrentLine.mContent.IsEmpty() ||
   1638                     (IsElementPreformatted() && !mPreFormattedMail),
   1639                 "Mixed wrapping data and nonwrapping data on the same line");
   1640    MOZ_ASSERT(mOutputManager);
   1641 
   1642    if (!mCurrentLine.mContent.IsEmpty()) {
   1643      mOutputManager->Flush(mCurrentLine);
   1644    }
   1645 
   1646    ConvertToLinesAndOutput(str);
   1647    return;
   1648  }
   1649 
   1650  // Intelligent handling of text
   1651  // If needed, strip out all "end of lines"
   1652  // and multiple whitespace between words
   1653  int32_t nextpos;
   1654  const char16_t* offsetIntoBuffer = nullptr;
   1655 
   1656  int32_t bol = 0;
   1657  while (bol < totLen) {  // Loop over lines
   1658    // Find a place where we may have to do whitespace compression
   1659    nextpos = str.FindCharInSet(u" \t\n\r", bol);
   1660 #ifdef DEBUG_wrapping
   1661    nsAutoString remaining;
   1662    str.Right(remaining, totLen - bol);
   1663    foo = ToNewCString(remaining);
   1664    // printf("Next line: bol = %d, newlinepos = %d, totLen = %d, "
   1665    //        "string = '%s'\n", bol, nextpos, totLen, foo);
   1666    free(foo);
   1667 #endif
   1668 
   1669    if (nextpos == kNotFound) {
   1670      // The rest of the string
   1671      offsetIntoBuffer = str.get() + bol;
   1672      AddToLine(offsetIntoBuffer, totLen - bol);
   1673      bol = totLen;
   1674      mInWhitespace = false;
   1675    } else {
   1676      // There's still whitespace left in the string
   1677      if (nextpos != 0 && (nextpos + 1) < totLen) {
   1678        offsetIntoBuffer = str.get() + nextpos;
   1679        // skip '\n' if it is between CJ chars
   1680        if (offsetIntoBuffer[0] == '\n' && IS_CJ_CHAR(offsetIntoBuffer[-1]) &&
   1681            IS_CJ_CHAR(offsetIntoBuffer[1])) {
   1682          offsetIntoBuffer = str.get() + bol;
   1683          AddToLine(offsetIntoBuffer, nextpos - bol);
   1684          bol = nextpos + 1;
   1685          continue;
   1686        }
   1687      }
   1688      // If we're already in whitespace and not preformatted, just skip it:
   1689      if (mInWhitespace && (nextpos == bol) && !mPreFormattedMail &&
   1690          !mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted)) {
   1691        // Skip whitespace
   1692        bol++;
   1693        continue;
   1694      }
   1695 
   1696      if (nextpos == bol &&
   1697          !mSettings.HasFlag(
   1698              nsIDocumentEncoder::OutputForPlainTextClipboardCopy)) {
   1699        // Note that we are in whitespace.
   1700        mInWhitespace = true;
   1701        offsetIntoBuffer = str.get() + nextpos;
   1702        // XXX Why do we need to keep the very first character when compressing
   1703        // the reset?
   1704        AddToLine(offsetIntoBuffer, 1);
   1705        bol++;
   1706        continue;
   1707      }
   1708 
   1709      mInWhitespace = true;
   1710 
   1711      offsetIntoBuffer = str.get() + bol;
   1712      if (mPreFormattedMail ||
   1713          mSettings.HasFlag(nsIDocumentEncoder::OutputPreformatted)) {
   1714        // Preserve the real whitespace character
   1715        nextpos++;
   1716        AddToLine(offsetIntoBuffer, nextpos - bol);
   1717        bol = nextpos;
   1718      } else {
   1719        // Replace the whitespace with a space
   1720        AddToLine(offsetIntoBuffer, nextpos - bol);
   1721        AddToLine(kSpace.get(), 1);
   1722        bol = nextpos + 1;  // Let's eat the whitespace
   1723      }
   1724    }
   1725  }  // Continue looping over the string
   1726 }
   1727 
   1728 /**
   1729 * Gets the value of an attribute in a string. If the function returns
   1730 * NS_ERROR_NOT_AVAILABLE, there was none such attribute specified.
   1731 */
   1732 nsresult nsPlainTextSerializer::GetAttributeValue(Element* aElement,
   1733                                                  const nsAtom* aName,
   1734                                                  nsString& aValueRet) const {
   1735  MOZ_ASSERT(aElement);
   1736  MOZ_ASSERT(aName);
   1737 
   1738  if (aElement->GetAttr(aName, aValueRet)) {
   1739    return NS_OK;
   1740  }
   1741 
   1742  return NS_ERROR_NOT_AVAILABLE;
   1743 }
   1744 
   1745 /**
   1746 * Returns true, if the element was inserted by Moz' TXT->HTML converter.
   1747 * In this case, we should ignore it.
   1748 */
   1749 bool nsPlainTextSerializer::IsCurrentNodeConverted(Element* aElement) const {
   1750  MOZ_ASSERT(aElement);
   1751 
   1752  nsAutoString value;
   1753  nsresult rv = GetAttributeValue(aElement, nsGkAtoms::_class, value);
   1754  return (NS_SUCCEEDED(rv) &&
   1755          (StringBeginsWith(value, u"moz-txt"_ns,
   1756                            nsASCIICaseInsensitiveStringComparator) ||
   1757           StringBeginsWith(value, u"\"moz-txt"_ns,
   1758                            nsASCIICaseInsensitiveStringComparator)));
   1759 }
   1760 
   1761 // static
   1762 nsAtom* nsPlainTextSerializer::GetIdForContent(nsIContent* aContent) {
   1763  if (!aContent->IsHTMLElement()) {
   1764    return nullptr;
   1765  }
   1766 
   1767  nsAtom* localName = aContent->NodeInfo()->NameAtom();
   1768  return localName->IsStatic() ? localName : nullptr;
   1769 }
   1770 
   1771 bool nsPlainTextSerializer::IsElementPreformatted() const {
   1772  return !mPreformatStack.empty() && mPreformatStack.top();
   1773 }
   1774 
   1775 bool nsPlainTextSerializer::IsElementPreformatted(Element* aElement) {
   1776  RefPtr<const ComputedStyle> computedStyle =
   1777      nsComputedDOMStyle::GetComputedStyleNoFlush(aElement);
   1778  if (computedStyle) {
   1779    const nsStyleText* textStyle = computedStyle->StyleText();
   1780    return textStyle->WhiteSpaceOrNewlineIsSignificant();
   1781  }
   1782  // Fall back to looking at the tag, in case there is no style information.
   1783  return GetIdForContent(aElement) == nsGkAtoms::pre;
   1784 }
   1785 
   1786 bool nsPlainTextSerializer::IsCssBlockLevelElement(Element* aElement) {
   1787  RefPtr<const ComputedStyle> computedStyle =
   1788      nsComputedDOMStyle::GetComputedStyleNoFlush(aElement);
   1789  if (computedStyle) {
   1790    const nsStyleDisplay* displayStyle = computedStyle->StyleDisplay();
   1791    return displayStyle->IsBlockOutsideStyle();
   1792  }
   1793  // Fall back to looking at the tag, in case there is no style information.
   1794  return nsContentUtils::IsHTMLBlockLevelElement(aElement);
   1795 }
   1796 
   1797 /**
   1798 * This method is required only to identify LI's inside OL.
   1799 * Returns TRUE if we are inside an OL tag and FALSE otherwise.
   1800 */
   1801 bool nsPlainTextSerializer::IsInOL() const {
   1802  int32_t i = mTagStackIndex;
   1803  while (--i >= 0) {
   1804    if (mTagStack[i] == nsGkAtoms::ol) return true;
   1805    if (mTagStack[i] == nsGkAtoms::ul) {
   1806      // If a UL is reached first, LI belongs the UL nested in OL.
   1807      return false;
   1808    }
   1809  }
   1810  // We may reach here for orphan LI's.
   1811  return false;
   1812 }
   1813 
   1814 bool nsPlainTextSerializer::IsInOlOrUl() const {
   1815  return (mULCount > 0) || !mOLStack.IsEmpty();
   1816 }
   1817 
   1818 /*
   1819  @return 0 = no header, 1 = h1, ..., 6 = h6
   1820 */
   1821 int32_t HeaderLevel(const nsAtom* aTag) {
   1822  if (aTag == nsGkAtoms::h1) {
   1823    return 1;
   1824  }
   1825  if (aTag == nsGkAtoms::h2) {
   1826    return 2;
   1827  }
   1828  if (aTag == nsGkAtoms::h3) {
   1829    return 3;
   1830  }
   1831  if (aTag == nsGkAtoms::h4) {
   1832    return 4;
   1833  }
   1834  if (aTag == nsGkAtoms::h5) {
   1835    return 5;
   1836  }
   1837  if (aTag == nsGkAtoms::h6) {
   1838    return 6;
   1839  }
   1840  return 0;
   1841 }
   1842 
   1843 /* These functions define the column width of an ISO 10646 character
   1844 * as follows:
   1845 *
   1846 *    - The null character (U+0000) has a column width of 0.
   1847 *
   1848 *    - Other C0/C1 control characters and DEL will lead to a return
   1849 *      value of -1.
   1850 *
   1851 *    - Non-spacing and enclosing combining characters (general
   1852 *      category code Mn or Me in the Unicode database) have a
   1853 *      column width of 0.
   1854 *
   1855 *    - Spacing characters in the East Asian Wide (W) or East Asian
   1856 *      FullWidth (F) category as defined in Unicode Technical
   1857 *      Report #11 have a column width of 2.
   1858 *
   1859 *    - All remaining characters (including all printable
   1860 *      ISO 8859-1 and WGL4 characters, Unicode control characters,
   1861 *      etc.) have a column width of 1.
   1862 */
   1863 
   1864 int32_t GetUnicharWidth(char32_t aCh) {
   1865  /* test for 8-bit control characters */
   1866  if (aCh == 0) {
   1867    return 0;
   1868  }
   1869  if (aCh < 32 || (aCh >= 0x7f && aCh < 0xa0)) {
   1870    return -1;
   1871  }
   1872 
   1873  /* The first combining char in Unicode is U+0300 */
   1874  if (aCh < 0x0300) {
   1875    return 1;
   1876  }
   1877 
   1878  auto gc = unicode::GetGeneralCategory(aCh);
   1879  if (gc == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK ||
   1880      gc == HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) {
   1881    return 0;
   1882  }
   1883 
   1884  /* if we arrive here, ucs is not a combining or C0/C1 control character */
   1885 
   1886  /* fast test for majority of non-wide scripts */
   1887  if (aCh < 0x1100) {
   1888    return 1;
   1889  }
   1890 
   1891  return intl::UnicodeProperties::IsEastAsianWidthFW(aCh) ? 2 : 1;
   1892 }
   1893 
   1894 int32_t GetUnicharStringWidth(Span<const char16_t> aString) {
   1895  int32_t width = 0;
   1896  for (auto iter = aString.begin(); iter != aString.end(); ++iter) {
   1897    char32_t c = *iter;
   1898    if (NS_IS_HIGH_SURROGATE(c) && (iter + 1) != aString.end() &&
   1899        NS_IS_LOW_SURROGATE(*(iter + 1))) {
   1900      c = SURROGATE_TO_UCS4(c, *++iter);
   1901    }
   1902    const int32_t w = GetUnicharWidth(c);
   1903    // Taking 1 as the width of non-printable character, for bug 94475.
   1904    width += (w < 0 ? 1 : w);
   1905  }
   1906  return width;
   1907 }