tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

TestPlainTextSerializer.cpp (13112B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "Common.h"
      8 #include "gtest/gtest.h"
      9 #include "nsCRT.h"
     10 #include "nsIDocumentEncoder.h"
     11 #include "nsIParserUtils.h"
     12 #include "nsServiceManagerUtils.h"
     13 #include "nsString.h"
     14 
     15 // Test for ASCII with format=flowed; delsp=yes
     16 TEST(PlainTextSerializer, ASCIIWithFlowedDelSp)
     17 {
     18  nsString test;
     19  nsString result;
     20 
     21  test.AssignLiteral(
     22      "<html><body>"
     23      "Firefox Firefox Firefox Firefox "
     24      "Firefox Firefox Firefox Firefox "
     25      "Firefox Firefox Firefox Firefox"
     26      "</body></html>");
     27 
     28  ConvertBufToPlainText(test,
     29                        nsIDocumentEncoder::OutputFormatted |
     30                            nsIDocumentEncoder::OutputCRLineBreak |
     31                            nsIDocumentEncoder::OutputLFLineBreak |
     32                            nsIDocumentEncoder::OutputFormatFlowed |
     33                            nsIDocumentEncoder::OutputFormatDelSp,
     34                        kDefaultWrapColumn);
     35 
     36  // create result case
     37  result.AssignLiteral(
     38      "Firefox Firefox Firefox Firefox "
     39      "Firefox Firefox Firefox Firefox "
     40      "Firefox  \r\nFirefox Firefox Firefox\r\n");
     41 
     42  ASSERT_EQ(test, result)
     43      << "Wrong HTML to ASCII text serialization with format=flowed; delsp=yes";
     44 }
     45 
     46 TEST(PlainTextSerializer, Bug1864820)
     47 {
     48  nsString test(
     49      uR"#(
     50 <html><body>
     51 &gt;&nbsp;&nbsp;label=master&amp;label=experimental&amp;product=chrome&amp;product=firefox&amp;product=safari&amp;aligned&amp;view=interop&amp;q=label%3Ainterop-2023-property
     52 <blockquote>
     53 &gt;&nbsp;&nbsp;label=master&amp;label=experimental&amp;product=chrome&amp;product=firefox&amp;product=safari&amp;aligned&amp;view=interop&amp;q=label%3Ainterop-2023-property
     54 </blockquote>
     55 </body></html>
     56 )#");
     57 
     58  ConvertBufToPlainText(test,
     59                        nsIDocumentEncoder::OutputFormatted |
     60                            nsIDocumentEncoder::OutputPersistNBSP |
     61                            nsIDocumentEncoder::OutputLFLineBreak |
     62                            nsIDocumentEncoder::OutputFormatFlowed,
     63                        kDefaultWrapColumn);
     64 
     65  nsString result(
     66      uR"#(
     67 >  label=master&label=experimental&product=chrome&product=firefox&product=safari&aligned&view=interop&q=label%3Ainterop-2023-property
     68 
     69     >  label=master&label=experimental&product=chrome&product=firefox&product=safari&aligned&view=interop&q=label%3Ainterop-2023-property
     70 )#");
     71  result.Trim(" \n");
     72  test.Trim(" \n");
     73  ASSERT_EQ(test, result) << "Shouldn't hang with format=flowed";
     74 }
     75 
     76 // Test for CJK with format=flowed; delsp=yes
     77 TEST(PlainTextSerializer, CJKWithFlowedDelSp)
     78 {
     79  nsString test;
     80  nsString result;
     81 
     82  test.AssignLiteral("<html><body>");
     83  for (uint32_t i = 0; i < 40; i++) {
     84    // Insert Kanji (U+5341)
     85    test.Append(0x5341);
     86  }
     87  test.AppendLiteral("</body></html>");
     88 
     89  ConvertBufToPlainText(test,
     90                        nsIDocumentEncoder::OutputFormatted |
     91                            nsIDocumentEncoder::OutputCRLineBreak |
     92                            nsIDocumentEncoder::OutputLFLineBreak |
     93                            nsIDocumentEncoder::OutputFormatFlowed |
     94                            nsIDocumentEncoder::OutputFormatDelSp,
     95                        kDefaultWrapColumn);
     96 
     97  // create result case
     98  for (uint32_t i = 0; i < 36; i++) {
     99    result.Append(0x5341);
    100  }
    101  result.AppendLiteral(" \r\n");
    102  for (uint32_t i = 0; i < 4; i++) {
    103    result.Append(0x5341);
    104  }
    105  result.AppendLiteral("\r\n");
    106 
    107  ASSERT_EQ(test, result)
    108      << "Wrong HTML to CJK text serialization with format=flowed; delsp=yes";
    109 }
    110 
    111 // Test for CJK with DisallowLineBreaking
    112 TEST(PlainTextSerializer, CJKWithDisallowLineBreaking)
    113 {
    114  nsString test;
    115  nsString result;
    116 
    117  test.AssignLiteral("<html><body>");
    118  for (uint32_t i = 0; i < 400; i++) {
    119    // Insert Kanji (U+5341)
    120    test.Append(0x5341);
    121  }
    122  test.AppendLiteral("</body></html>");
    123 
    124  ConvertBufToPlainText(test,
    125                        nsIDocumentEncoder::OutputFormatted |
    126                            nsIDocumentEncoder::OutputCRLineBreak |
    127                            nsIDocumentEncoder::OutputLFLineBreak |
    128                            nsIDocumentEncoder::OutputFormatFlowed |
    129                            nsIDocumentEncoder::OutputDisallowLineBreaking,
    130                        kDefaultWrapColumn);
    131 
    132  // create result case
    133  for (uint32_t i = 0; i < 400; i++) {
    134    result.Append(0x5341);
    135  }
    136  result.AppendLiteral("\r\n");
    137 
    138  ASSERT_EQ(test, result)
    139      << "Wrong HTML to CJK text serialization with OutputDisallowLineBreaking";
    140 }
    141 
    142 // Test for Latin with DisallowLineBreaking
    143 TEST(PlainTextSerializer, LatinWithDisallowLineBreaking)
    144 {
    145  nsString test;
    146  test.AssignLiteral("<html><body>");
    147  for (uint32_t i = 0; i < 400; i++) {
    148    // Insert á (Latin Small Letter a with Acute) (U+00E1)
    149    test.Append(0x00E1);
    150  }
    151  test.AppendLiteral("</body></html>\r\n");
    152 
    153  ConvertBufToPlainText(test,
    154                        nsIDocumentEncoder::OutputFormatted |
    155                            nsIDocumentEncoder::OutputCRLineBreak |
    156                            nsIDocumentEncoder::OutputLFLineBreak |
    157                            nsIDocumentEncoder::OutputFormatFlowed |
    158                            nsIDocumentEncoder::OutputDisallowLineBreaking,
    159                        kDefaultWrapColumn);
    160 
    161  // Create expect case.
    162  nsString expect;
    163  for (uint32_t i = 0; i < 400; i++) {
    164    expect.Append(0x00E1);
    165  }
    166  expect.AppendLiteral(" \r\n\r\n");
    167 
    168  ASSERT_EQ(test, expect) << "Wrong HTML to Latin text serialization with "
    169                             "OutputDisallowLineBreaking";
    170 }
    171 
    172 // Test for ASCII with format=flowed; and quoted lines in preformatted span.
    173 TEST(PlainTextSerializer, PreformatFlowedQuotes)
    174 {
    175  nsString test;
    176  nsString result;
    177 
    178  test.AssignLiteral(
    179      "<html><body>"
    180      "<span style=\"white-space: pre-wrap;\" _moz_quote=\"true\">"
    181      "&gt; Firefox Firefox Firefox Firefox <br>"
    182      "&gt; Firefox Firefox Firefox <b>Firefox</b><br>"
    183      "&gt;<br>"
    184      "&gt;&gt; Firefox Firefox Firefox Firefox <br>"
    185      "&gt;&gt; Firefox Firefox Firefox Firefox<br>"
    186      "</span></body></html>");
    187 
    188  ConvertBufToPlainText(test,
    189                        nsIDocumentEncoder::OutputFormatted |
    190                            nsIDocumentEncoder::OutputCRLineBreak |
    191                            nsIDocumentEncoder::OutputLFLineBreak |
    192                            nsIDocumentEncoder::OutputFormatFlowed,
    193                        kDefaultWrapColumn);
    194 
    195  // create result case
    196  result.AssignLiteral(
    197      "> Firefox Firefox Firefox Firefox \r\n"
    198      "> Firefox Firefox Firefox *Firefox*\r\n"
    199      ">\r\n"
    200      ">> Firefox Firefox Firefox Firefox \r\n"
    201      ">> Firefox Firefox Firefox Firefox\r\n");
    202 
    203  ASSERT_EQ(test, result) << "Wrong HTML to ASCII text serialization "
    204                             "with format=flowed; and quoted "
    205                             "lines using OutputFormatted";
    206 }
    207 
    208 // Test for ASCII with format=flowed; and not using OutputFormatted.
    209 TEST(PlainTextSerializer, OutputFormatFlowedAndWrapped)
    210 {
    211  nsString test;
    212  nsString result;
    213 
    214  test.AssignLiteral(
    215      "<html><body>"
    216      "<span style=\"white-space: pre-wrap;\" _moz_quote=\"true\">"
    217      "&gt; Firefox Firefox Firefox Firefox <br>"
    218      "&gt; Firefox Firefox Firefox <b>Firefox</b><br>"
    219      "&gt;<br>"
    220      "&gt;&gt; Firefox Firefox Firefox Firefox <br>"
    221      "&gt;&gt; Firefox Firefox Firefox Firefox<br>"
    222      "</span></body></html>");
    223 
    224  ConvertBufToPlainText(test,
    225                        nsIDocumentEncoder::OutputWrap |
    226                            nsIDocumentEncoder::OutputCRLineBreak |
    227                            nsIDocumentEncoder::OutputLFLineBreak |
    228                            nsIDocumentEncoder::OutputFormatFlowed,
    229                        kDefaultWrapColumn);
    230 
    231  // create result case
    232  result.AssignLiteral(
    233      "> Firefox Firefox Firefox Firefox \r\n"
    234      "> Firefox Firefox Firefox Firefox\r\n"
    235      ">\r\n"
    236      ">> Firefox Firefox Firefox Firefox \r\n"
    237      ">> Firefox Firefox Firefox Firefox\r\n");
    238 
    239  ASSERT_EQ(test, result) << "Wrong HTML to ASCII text serialization "
    240                             "with format=flowed; and quoted "
    241                             "lines using OutputWrap";
    242 }
    243 
    244 TEST(PlainTextSerializer, PrettyPrintedHtml)
    245 {
    246  nsString test;
    247  test.AppendLiteral("<html>" NS_LINEBREAK "<body>" NS_LINEBREAK
    248                     "  first<br>" NS_LINEBREAK "  second<br>" NS_LINEBREAK
    249                     "</body>" NS_LINEBREAK "</html>");
    250 
    251  ConvertBufToPlainText(test, 0, kDefaultWrapColumn);
    252 
    253  nsAutoString expect;
    254  expect.AppendLiteral("first" NS_LINEBREAK "second" NS_LINEBREAK);
    255 
    256  ASSERT_EQ(test, expect) << "Wrong prettyprinted html to text serialization";
    257 }
    258 
    259 TEST(PlainTextSerializer, PreElement)
    260 {
    261  nsString test;
    262  test.AppendLiteral("<html>" NS_LINEBREAK "<body>" NS_LINEBREAK
    263                     "<pre>" NS_LINEBREAK "  first" NS_LINEBREAK
    264                     "  second" NS_LINEBREAK "</pre>" NS_LINEBREAK
    265                     "</body>" NS_LINEBREAK "</html>");
    266 
    267  ConvertBufToPlainText(test, 0, kDefaultWrapColumn);
    268 
    269  nsAutoString expect;
    270  expect.AppendLiteral("  first" NS_LINEBREAK
    271                       "  second" NS_LINEBREAK NS_LINEBREAK);
    272 
    273  ASSERT_EQ(test, expect) << "Wrong prettyprinted html to text serialization";
    274 }
    275 
    276 TEST(PlainTextSerializer, BlockElement)
    277 {
    278  nsString test;
    279  test.AppendLiteral("<html>" NS_LINEBREAK "<body>" NS_LINEBREAK
    280                     "<div>" NS_LINEBREAK "  first" NS_LINEBREAK
    281                     "</div>" NS_LINEBREAK "<div>" NS_LINEBREAK
    282                     "  second" NS_LINEBREAK "</div>" NS_LINEBREAK
    283                     "</body>" NS_LINEBREAK "</html>");
    284 
    285  ConvertBufToPlainText(test, 0, kDefaultWrapColumn);
    286 
    287  nsAutoString expect;
    288  expect.AppendLiteral("first" NS_LINEBREAK "second" NS_LINEBREAK);
    289 
    290  ASSERT_EQ(test, expect) << "Wrong prettyprinted html to text serialization";
    291 }
    292 
    293 TEST(PlainTextSerializer, PreWrapElementForThunderbird)
    294 {
    295  // This test examines the magic pre-wrap setup that Thunderbird relies on.
    296  nsString test;
    297  test.AppendLiteral("<html>" NS_LINEBREAK
    298                     "<body style=\"white-space: pre-wrap;\">" NS_LINEBREAK
    299                     "<pre>" NS_LINEBREAK
    300                     "  first line is too long" NS_LINEBREAK
    301                     "  second line is even loooonger  " NS_LINEBREAK
    302                     "</pre>" NS_LINEBREAK "</body>" NS_LINEBREAK "</html>");
    303 
    304  const uint32_t wrapColumn = 10;
    305  ConvertBufToPlainText(test, nsIDocumentEncoder::OutputWrap, wrapColumn);
    306 
    307  // "\n\n  first\nline is\ntoo long\n  second\nline is\neven\nloooonger\n\n\n"
    308  nsAutoString expect;
    309  expect.AppendLiteral(NS_LINEBREAK NS_LINEBREAK
    310                       "  first" NS_LINEBREAK "line is" NS_LINEBREAK
    311                       "too long" NS_LINEBREAK "  second" NS_LINEBREAK
    312                       "line is" NS_LINEBREAK "even" NS_LINEBREAK
    313                       "loooonger" NS_LINEBREAK NS_LINEBREAK NS_LINEBREAK);
    314 
    315  ASSERT_EQ(test, expect) << "Wrong prettyprinted html to text serialization";
    316 }
    317 
    318 TEST(PlainTextSerializer, Simple)
    319 {
    320  nsString test;
    321  test.AppendLiteral(
    322      "<html><base>base</base><head><span>span</span></head>"
    323      "<body>body</body></html>");
    324  ConvertBufToPlainText(test, 0, kDefaultWrapColumn);
    325 
    326  nsAutoString expect;
    327  expect.AppendLiteral("basespanbody");
    328 
    329  ASSERT_EQ(test, expect) << "Wrong html to text serialization";
    330 }
    331 
    332 TEST(PlainTextSerializer, OneHundredAndOneOL)
    333 {
    334  nsAutoString test;
    335  test.AppendLiteral(
    336      "<html>"
    337      "<body>"
    338      "<ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><"
    339      "ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><"
    340      "ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><"
    341      "ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><"
    342      "ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><"
    343      "ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol></ol></ol></ol></"
    344      "ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></"
    345      "ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></"
    346      "ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></"
    347      "ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></"
    348      "ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></"
    349      "ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></"
    350      "ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></"
    351      "ol></ol><li>X</li></ol>"
    352      "</body>"
    353      "</html>");
    354 
    355  ConvertBufToPlainText(test, nsIDocumentEncoder::OutputFormatted,
    356                        kDefaultWrapColumn);
    357 
    358  nsAutoString expected;
    359  expected.AppendLiteral(" 1. X" NS_LINEBREAK);
    360  ASSERT_EQ(test, expected);
    361 }