TestPlainTextSerializer.cpp (13112B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "Common.h" 8 #include "gtest/gtest.h" 9 #include "nsCRT.h" 10 #include "nsIDocumentEncoder.h" 11 #include "nsIParserUtils.h" 12 #include "nsServiceManagerUtils.h" 13 #include "nsString.h" 14 15 // Test for ASCII with format=flowed; delsp=yes 16 TEST(PlainTextSerializer, ASCIIWithFlowedDelSp) 17 { 18 nsString test; 19 nsString result; 20 21 test.AssignLiteral( 22 "<html><body>" 23 "Firefox Firefox Firefox Firefox " 24 "Firefox Firefox Firefox Firefox " 25 "Firefox Firefox Firefox Firefox" 26 "</body></html>"); 27 28 ConvertBufToPlainText(test, 29 nsIDocumentEncoder::OutputFormatted | 30 nsIDocumentEncoder::OutputCRLineBreak | 31 nsIDocumentEncoder::OutputLFLineBreak | 32 nsIDocumentEncoder::OutputFormatFlowed | 33 nsIDocumentEncoder::OutputFormatDelSp, 34 kDefaultWrapColumn); 35 36 // create result case 37 result.AssignLiteral( 38 "Firefox Firefox Firefox Firefox " 39 "Firefox Firefox Firefox Firefox " 40 "Firefox \r\nFirefox Firefox Firefox\r\n"); 41 42 ASSERT_EQ(test, result) 43 << "Wrong HTML to ASCII text serialization with format=flowed; delsp=yes"; 44 } 45 46 TEST(PlainTextSerializer, Bug1864820) 47 { 48 nsString test( 49 uR"#( 50 <html><body> 51 > label=master&label=experimental&product=chrome&product=firefox&product=safari&aligned&view=interop&q=label%3Ainterop-2023-property 52 <blockquote> 53 > label=master&label=experimental&product=chrome&product=firefox&product=safari&aligned&view=interop&q=label%3Ainterop-2023-property 54 </blockquote> 55 </body></html> 56 )#"); 57 58 ConvertBufToPlainText(test, 59 nsIDocumentEncoder::OutputFormatted | 60 nsIDocumentEncoder::OutputPersistNBSP | 61 nsIDocumentEncoder::OutputLFLineBreak | 62 nsIDocumentEncoder::OutputFormatFlowed, 63 kDefaultWrapColumn); 64 65 nsString result( 66 uR"#( 67 > label=master&label=experimental&product=chrome&product=firefox&product=safari&aligned&view=interop&q=label%3Ainterop-2023-property 68 69 > label=master&label=experimental&product=chrome&product=firefox&product=safari&aligned&view=interop&q=label%3Ainterop-2023-property 70 )#"); 71 result.Trim(" \n"); 72 test.Trim(" \n"); 73 ASSERT_EQ(test, result) << "Shouldn't hang with format=flowed"; 74 } 75 76 // Test for CJK with format=flowed; delsp=yes 77 TEST(PlainTextSerializer, CJKWithFlowedDelSp) 78 { 79 nsString test; 80 nsString result; 81 82 test.AssignLiteral("<html><body>"); 83 for (uint32_t i = 0; i < 40; i++) { 84 // Insert Kanji (U+5341) 85 test.Append(0x5341); 86 } 87 test.AppendLiteral("</body></html>"); 88 89 ConvertBufToPlainText(test, 90 nsIDocumentEncoder::OutputFormatted | 91 nsIDocumentEncoder::OutputCRLineBreak | 92 nsIDocumentEncoder::OutputLFLineBreak | 93 nsIDocumentEncoder::OutputFormatFlowed | 94 nsIDocumentEncoder::OutputFormatDelSp, 95 kDefaultWrapColumn); 96 97 // create result case 98 for (uint32_t i = 0; i < 36; i++) { 99 result.Append(0x5341); 100 } 101 result.AppendLiteral(" \r\n"); 102 for (uint32_t i = 0; i < 4; i++) { 103 result.Append(0x5341); 104 } 105 result.AppendLiteral("\r\n"); 106 107 ASSERT_EQ(test, result) 108 << "Wrong HTML to CJK text serialization with format=flowed; delsp=yes"; 109 } 110 111 // Test for CJK with DisallowLineBreaking 112 TEST(PlainTextSerializer, CJKWithDisallowLineBreaking) 113 { 114 nsString test; 115 nsString result; 116 117 test.AssignLiteral("<html><body>"); 118 for (uint32_t i = 0; i < 400; i++) { 119 // Insert Kanji (U+5341) 120 test.Append(0x5341); 121 } 122 test.AppendLiteral("</body></html>"); 123 124 ConvertBufToPlainText(test, 125 nsIDocumentEncoder::OutputFormatted | 126 nsIDocumentEncoder::OutputCRLineBreak | 127 nsIDocumentEncoder::OutputLFLineBreak | 128 nsIDocumentEncoder::OutputFormatFlowed | 129 nsIDocumentEncoder::OutputDisallowLineBreaking, 130 kDefaultWrapColumn); 131 132 // create result case 133 for (uint32_t i = 0; i < 400; i++) { 134 result.Append(0x5341); 135 } 136 result.AppendLiteral("\r\n"); 137 138 ASSERT_EQ(test, result) 139 << "Wrong HTML to CJK text serialization with OutputDisallowLineBreaking"; 140 } 141 142 // Test for Latin with DisallowLineBreaking 143 TEST(PlainTextSerializer, LatinWithDisallowLineBreaking) 144 { 145 nsString test; 146 test.AssignLiteral("<html><body>"); 147 for (uint32_t i = 0; i < 400; i++) { 148 // Insert á (Latin Small Letter a with Acute) (U+00E1) 149 test.Append(0x00E1); 150 } 151 test.AppendLiteral("</body></html>\r\n"); 152 153 ConvertBufToPlainText(test, 154 nsIDocumentEncoder::OutputFormatted | 155 nsIDocumentEncoder::OutputCRLineBreak | 156 nsIDocumentEncoder::OutputLFLineBreak | 157 nsIDocumentEncoder::OutputFormatFlowed | 158 nsIDocumentEncoder::OutputDisallowLineBreaking, 159 kDefaultWrapColumn); 160 161 // Create expect case. 162 nsString expect; 163 for (uint32_t i = 0; i < 400; i++) { 164 expect.Append(0x00E1); 165 } 166 expect.AppendLiteral(" \r\n\r\n"); 167 168 ASSERT_EQ(test, expect) << "Wrong HTML to Latin text serialization with " 169 "OutputDisallowLineBreaking"; 170 } 171 172 // Test for ASCII with format=flowed; and quoted lines in preformatted span. 173 TEST(PlainTextSerializer, PreformatFlowedQuotes) 174 { 175 nsString test; 176 nsString result; 177 178 test.AssignLiteral( 179 "<html><body>" 180 "<span style=\"white-space: pre-wrap;\" _moz_quote=\"true\">" 181 "> Firefox Firefox Firefox Firefox <br>" 182 "> Firefox Firefox Firefox <b>Firefox</b><br>" 183 "><br>" 184 ">> Firefox Firefox Firefox Firefox <br>" 185 ">> Firefox Firefox Firefox Firefox<br>" 186 "</span></body></html>"); 187 188 ConvertBufToPlainText(test, 189 nsIDocumentEncoder::OutputFormatted | 190 nsIDocumentEncoder::OutputCRLineBreak | 191 nsIDocumentEncoder::OutputLFLineBreak | 192 nsIDocumentEncoder::OutputFormatFlowed, 193 kDefaultWrapColumn); 194 195 // create result case 196 result.AssignLiteral( 197 "> Firefox Firefox Firefox Firefox \r\n" 198 "> Firefox Firefox Firefox *Firefox*\r\n" 199 ">\r\n" 200 ">> Firefox Firefox Firefox Firefox \r\n" 201 ">> Firefox Firefox Firefox Firefox\r\n"); 202 203 ASSERT_EQ(test, result) << "Wrong HTML to ASCII text serialization " 204 "with format=flowed; and quoted " 205 "lines using OutputFormatted"; 206 } 207 208 // Test for ASCII with format=flowed; and not using OutputFormatted. 209 TEST(PlainTextSerializer, OutputFormatFlowedAndWrapped) 210 { 211 nsString test; 212 nsString result; 213 214 test.AssignLiteral( 215 "<html><body>" 216 "<span style=\"white-space: pre-wrap;\" _moz_quote=\"true\">" 217 "> Firefox Firefox Firefox Firefox <br>" 218 "> Firefox Firefox Firefox <b>Firefox</b><br>" 219 "><br>" 220 ">> Firefox Firefox Firefox Firefox <br>" 221 ">> Firefox Firefox Firefox Firefox<br>" 222 "</span></body></html>"); 223 224 ConvertBufToPlainText(test, 225 nsIDocumentEncoder::OutputWrap | 226 nsIDocumentEncoder::OutputCRLineBreak | 227 nsIDocumentEncoder::OutputLFLineBreak | 228 nsIDocumentEncoder::OutputFormatFlowed, 229 kDefaultWrapColumn); 230 231 // create result case 232 result.AssignLiteral( 233 "> Firefox Firefox Firefox Firefox \r\n" 234 "> Firefox Firefox Firefox Firefox\r\n" 235 ">\r\n" 236 ">> Firefox Firefox Firefox Firefox \r\n" 237 ">> Firefox Firefox Firefox Firefox\r\n"); 238 239 ASSERT_EQ(test, result) << "Wrong HTML to ASCII text serialization " 240 "with format=flowed; and quoted " 241 "lines using OutputWrap"; 242 } 243 244 TEST(PlainTextSerializer, PrettyPrintedHtml) 245 { 246 nsString test; 247 test.AppendLiteral("<html>" NS_LINEBREAK "<body>" NS_LINEBREAK 248 " first<br>" NS_LINEBREAK " second<br>" NS_LINEBREAK 249 "</body>" NS_LINEBREAK "</html>"); 250 251 ConvertBufToPlainText(test, 0, kDefaultWrapColumn); 252 253 nsAutoString expect; 254 expect.AppendLiteral("first" NS_LINEBREAK "second" NS_LINEBREAK); 255 256 ASSERT_EQ(test, expect) << "Wrong prettyprinted html to text serialization"; 257 } 258 259 TEST(PlainTextSerializer, PreElement) 260 { 261 nsString test; 262 test.AppendLiteral("<html>" NS_LINEBREAK "<body>" NS_LINEBREAK 263 "<pre>" NS_LINEBREAK " first" NS_LINEBREAK 264 " second" NS_LINEBREAK "</pre>" NS_LINEBREAK 265 "</body>" NS_LINEBREAK "</html>"); 266 267 ConvertBufToPlainText(test, 0, kDefaultWrapColumn); 268 269 nsAutoString expect; 270 expect.AppendLiteral(" first" NS_LINEBREAK 271 " second" NS_LINEBREAK NS_LINEBREAK); 272 273 ASSERT_EQ(test, expect) << "Wrong prettyprinted html to text serialization"; 274 } 275 276 TEST(PlainTextSerializer, BlockElement) 277 { 278 nsString test; 279 test.AppendLiteral("<html>" NS_LINEBREAK "<body>" NS_LINEBREAK 280 "<div>" NS_LINEBREAK " first" NS_LINEBREAK 281 "</div>" NS_LINEBREAK "<div>" NS_LINEBREAK 282 " second" NS_LINEBREAK "</div>" NS_LINEBREAK 283 "</body>" NS_LINEBREAK "</html>"); 284 285 ConvertBufToPlainText(test, 0, kDefaultWrapColumn); 286 287 nsAutoString expect; 288 expect.AppendLiteral("first" NS_LINEBREAK "second" NS_LINEBREAK); 289 290 ASSERT_EQ(test, expect) << "Wrong prettyprinted html to text serialization"; 291 } 292 293 TEST(PlainTextSerializer, PreWrapElementForThunderbird) 294 { 295 // This test examines the magic pre-wrap setup that Thunderbird relies on. 296 nsString test; 297 test.AppendLiteral("<html>" NS_LINEBREAK 298 "<body style=\"white-space: pre-wrap;\">" NS_LINEBREAK 299 "<pre>" NS_LINEBREAK 300 " first line is too long" NS_LINEBREAK 301 " second line is even loooonger " NS_LINEBREAK 302 "</pre>" NS_LINEBREAK "</body>" NS_LINEBREAK "</html>"); 303 304 const uint32_t wrapColumn = 10; 305 ConvertBufToPlainText(test, nsIDocumentEncoder::OutputWrap, wrapColumn); 306 307 // "\n\n first\nline is\ntoo long\n second\nline is\neven\nloooonger\n\n\n" 308 nsAutoString expect; 309 expect.AppendLiteral(NS_LINEBREAK NS_LINEBREAK 310 " first" NS_LINEBREAK "line is" NS_LINEBREAK 311 "too long" NS_LINEBREAK " second" NS_LINEBREAK 312 "line is" NS_LINEBREAK "even" NS_LINEBREAK 313 "loooonger" NS_LINEBREAK NS_LINEBREAK NS_LINEBREAK); 314 315 ASSERT_EQ(test, expect) << "Wrong prettyprinted html to text serialization"; 316 } 317 318 TEST(PlainTextSerializer, Simple) 319 { 320 nsString test; 321 test.AppendLiteral( 322 "<html><base>base</base><head><span>span</span></head>" 323 "<body>body</body></html>"); 324 ConvertBufToPlainText(test, 0, kDefaultWrapColumn); 325 326 nsAutoString expect; 327 expect.AppendLiteral("basespanbody"); 328 329 ASSERT_EQ(test, expect) << "Wrong html to text serialization"; 330 } 331 332 TEST(PlainTextSerializer, OneHundredAndOneOL) 333 { 334 nsAutoString test; 335 test.AppendLiteral( 336 "<html>" 337 "<body>" 338 "<ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><" 339 "ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><" 340 "ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><" 341 "ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><" 342 "ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><" 343 "ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol><ol></ol></ol></ol></" 344 "ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></" 345 "ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></" 346 "ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></" 347 "ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></" 348 "ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></" 349 "ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></" 350 "ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></ol></" 351 "ol></ol><li>X</li></ol>" 352 "</body>" 353 "</html>"); 354 355 ConvertBufToPlainText(test, nsIDocumentEncoder::OutputFormatted, 356 kDefaultWrapColumn); 357 358 nsAutoString expected; 359 expected.AppendLiteral(" 1. X" NS_LINEBREAK); 360 ASSERT_EQ(test, expected); 361 }