tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

mozTXTToHTMLConv.cpp (46008B)


      1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 #include "mozilla/TextUtils.h"
      7 #include "mozTXTToHTMLConv.h"
      8 #include "mozilla/intl/Segmenter.h"
      9 #include "mozilla/Maybe.h"
     10 #include "nsIThreadRetargetableStreamListener.h"
     11 #include "nsNetUtil.h"
     12 #include "nsUnicharUtils.h"
     13 #include "nsUnicodeProperties.h"
     14 #include "nsCRT.h"
     15 #include "nsIExternalProtocolHandler.h"
     16 #include "nsIURI.h"
     17 
     18 #include <algorithm>
     19 
     20 #ifdef DEBUG_BenB_Perf
     21 #  include "prtime.h"
     22 #  include "prinrval.h"
     23 #endif
     24 
     25 using mozilla::IsAscii;
     26 using mozilla::IsAsciiAlpha;
     27 using mozilla::IsAsciiDigit;
     28 using mozilla::Maybe;
     29 using mozilla::Some;
     30 using mozilla::Span;
     31 using mozilla::intl::GraphemeClusterBreakIteratorUtf16;
     32 using mozilla::intl::GraphemeClusterBreakReverseIteratorUtf16;
     33 
     34 const double growthRate = 1.2;
     35 
     36 // Bug 183111, editor now replaces multiple spaces with leading
     37 // 0xA0's and a single ending space, so need to treat 0xA0's as spaces.
     38 // 0xA0 is the Latin1/Unicode character for "non-breaking space (nbsp)"
     39 // Also recognize the Japanese ideographic space 0x3000 as a space.
     40 static inline bool IsSpace(const char16_t aChar) {
     41  return (nsCRT::IsAsciiSpace(aChar) || aChar == 0xA0 || aChar == 0x3000);
     42 }
     43 
     44 // Escape Char will take ch, escape it and append the result to
     45 // aStringToAppendTo
     46 void mozTXTToHTMLConv::EscapeChar(const char16_t ch,
     47                                  nsAString& aStringToAppendTo,
     48                                  bool inAttribute) {
     49  switch (ch) {
     50    case '<':
     51      aStringToAppendTo.AppendLiteral("&lt;");
     52      break;
     53    case '>':
     54      aStringToAppendTo.AppendLiteral("&gt;");
     55      break;
     56    case '&':
     57      aStringToAppendTo.AppendLiteral("&amp;");
     58      break;
     59    case '"':
     60      if (inAttribute) {
     61        aStringToAppendTo.AppendLiteral("&quot;");
     62        break;
     63      }
     64      // else fall through
     65      [[fallthrough]];
     66    default:
     67      aStringToAppendTo += ch;
     68  }
     69 }
     70 
     71 // EscapeStr takes the passed in string and
     72 // escapes it IN PLACE.
     73 void mozTXTToHTMLConv::EscapeStr(nsString& aInString, bool inAttribute) {
     74  // the replace substring routines
     75  // don't seem to work if you have a character
     76  // in the in string that is also in the replacement
     77  // string! =(
     78  // aInString.ReplaceSubstring("&", "&amp;");
     79  // aInString.ReplaceSubstring("<", "&lt;");
     80  // aInString.ReplaceSubstring(">", "&gt;");
     81  for (uint32_t i = 0; i < aInString.Length();) {
     82    switch (aInString[i]) {
     83      case '<':
     84        aInString.Cut(i, 1);
     85        aInString.InsertLiteral(u"&lt;", i);
     86        i += 4;  // skip past the integers we just added
     87        break;
     88      case '>':
     89        aInString.Cut(i, 1);
     90        aInString.InsertLiteral(u"&gt;", i);
     91        i += 4;  // skip past the integers we just added
     92        break;
     93      case '&':
     94        aInString.Cut(i, 1);
     95        aInString.InsertLiteral(u"&amp;", i);
     96        i += 5;  // skip past the integers we just added
     97        break;
     98      case '"':
     99        if (inAttribute) {
    100          aInString.Cut(i, 1);
    101          aInString.InsertLiteral(u"&quot;", i);
    102          i += 6;
    103          break;
    104        }
    105        // else fall through
    106        [[fallthrough]];
    107      default:
    108        i++;
    109    }
    110  }
    111 }
    112 
    113 void mozTXTToHTMLConv::UnescapeStr(const char16_t* aInString, int32_t aStartPos,
    114                                   int32_t aLength, nsString& aOutString) {
    115  const char16_t* subString = nullptr;
    116  for (uint32_t i = aStartPos; int32_t(i) - aStartPos < aLength;) {
    117    int32_t remainingChars = i - aStartPos;
    118    if (aInString[i] == '&') {
    119      subString = &aInString[i];
    120      if (!NS_strncmp(subString, u"&lt;",
    121                      std::min(4, aLength - remainingChars))) {
    122        aOutString.Append(char16_t('<'));
    123        i += 4;
    124      } else if (!NS_strncmp(subString, u"&gt;",
    125                             std::min(4, aLength - remainingChars))) {
    126        aOutString.Append(char16_t('>'));
    127        i += 4;
    128      } else if (!NS_strncmp(subString, u"&amp;",
    129                             std::min(5, aLength - remainingChars))) {
    130        aOutString.Append(char16_t('&'));
    131        i += 5;
    132      } else if (!NS_strncmp(subString, u"&quot;",
    133                             std::min(6, aLength - remainingChars))) {
    134        aOutString.Append(char16_t('"'));
    135        i += 6;
    136      } else {
    137        aOutString += aInString[i];
    138        i++;
    139      }
    140    } else {
    141      aOutString += aInString[i];
    142      i++;
    143    }
    144  }
    145 }
    146 
    147 void mozTXTToHTMLConv::CompleteAbbreviatedURL(const char16_t* aInString,
    148                                              int32_t aInLength,
    149                                              const uint32_t pos,
    150                                              nsString& aOutString) {
    151  NS_ASSERTION(int32_t(pos) < aInLength,
    152               "bad args to CompleteAbbreviatedURL, see bug #190851");
    153  if (int32_t(pos) >= aInLength) return;
    154 
    155  if (aInString[pos] == '@') {
    156    // only pre-pend a mailto url if the string contains a .domain in it..
    157    // i.e. we want to linkify johndoe@foo.com but not "let's meet @8pm"
    158    nsDependentString inString(aInString, aInLength);
    159    if (inString.FindChar('.', pos) !=
    160        kNotFound)  // if we have a '.' after the @ sign....
    161    {
    162      aOutString.AssignLiteral("mailto:");
    163      aOutString += aInString;
    164    }
    165  } else if (aInString[pos] == '.') {
    166    if (ItMatchesDelimited(aInString, aInLength, u"www.", 4, LT_IGNORE,
    167                           LT_IGNORE)) {
    168      aOutString.AssignLiteral("http://");
    169      aOutString += aInString;
    170    }
    171  }
    172 }
    173 
    174 bool mozTXTToHTMLConv::FindURLStart(const char16_t* aInString,
    175                                    int32_t aInLength, const uint32_t pos,
    176                                    const modetype check, uint32_t& start) {
    177  switch (check) {  // no breaks, because end of blocks is never reached
    178    case RFC1738: {
    179      if (!NS_strncmp(&aInString[std::max(int32_t(pos - 4), 0)], u"<URL:", 5)) {
    180        start = pos + 1;
    181        return true;
    182      }
    183      return false;
    184    }
    185    case RFC2396E: {
    186      nsDependentSubstring temp(aInString, aInLength);
    187      int32_t i = pos <= 0 ? kNotFound : temp.RFindCharInSet(u"<>\"", pos - 1);
    188      if (i != kNotFound &&
    189          (temp[uint32_t(i)] == '<' || temp[uint32_t(i)] == '"')) {
    190        start = uint32_t(++i);
    191        return start < pos;
    192      }
    193      return false;
    194    }
    195    case freetext: {
    196      int32_t i = pos - 1;
    197      for (; i >= 0 &&
    198             (IsAsciiAlpha(aInString[uint32_t(i)]) ||
    199              IsAsciiDigit(aInString[uint32_t(i)]) ||
    200              aInString[uint32_t(i)] == '+' || aInString[uint32_t(i)] == '-' ||
    201              aInString[uint32_t(i)] == '.');
    202           i--) {
    203        ;
    204      }
    205      if (++i >= 0 && uint32_t(i) < pos &&
    206          IsAsciiAlpha(aInString[uint32_t(i)])) {
    207        start = uint32_t(i);
    208        return true;
    209      }
    210      return false;
    211    }
    212    case abbreviated: {
    213      int32_t i = pos - 1;
    214      // This disallows non-ascii-characters for email.
    215      // Currently correct, but revisit later after standards changed.
    216      bool isEmail = aInString[pos] == (char16_t)'@';
    217      // These chars mark the start of the URL
    218      for (; i >= 0 && aInString[uint32_t(i)] != '>' &&
    219             aInString[uint32_t(i)] != '<' && aInString[uint32_t(i)] != '"' &&
    220             aInString[uint32_t(i)] != '\'' && aInString[uint32_t(i)] != '`' &&
    221             aInString[uint32_t(i)] != ',' && aInString[uint32_t(i)] != '{' &&
    222             aInString[uint32_t(i)] != '[' && aInString[uint32_t(i)] != '(' &&
    223             aInString[uint32_t(i)] != '|' && aInString[uint32_t(i)] != '\\' &&
    224             !IsSpace(aInString[uint32_t(i)]) &&
    225             (!isEmail || IsAscii(aInString[uint32_t(i)])) &&
    226             (!isEmail || aInString[uint32_t(i)] != ')');
    227           i--) {
    228        ;
    229      }
    230      if (++i >= 0 && uint32_t(i) < pos &&
    231          (IsAsciiAlpha(aInString[uint32_t(i)]) ||
    232           IsAsciiDigit(aInString[uint32_t(i)]))) {
    233        start = uint32_t(i);
    234        return true;
    235      }
    236      return false;
    237    }
    238    default:
    239      return false;
    240  }  // switch
    241 }
    242 
    243 bool mozTXTToHTMLConv::FindURLEnd(const char16_t* aInString,
    244                                  int32_t aInStringLength, const uint32_t pos,
    245                                  const modetype check, const uint32_t start,
    246                                  uint32_t& end) {
    247  switch (check) {  // no breaks, because end of blocks is never reached
    248    case RFC1738:
    249    case RFC2396E: {
    250      nsDependentSubstring temp(aInString, aInStringLength);
    251 
    252      int32_t i = temp.FindCharInSet(u"<>\"", pos + 1);
    253      if (i != kNotFound &&
    254          temp[uint32_t(i--)] ==
    255              (check == RFC1738 || temp[start - 1] == '<' ? '>' : '"')) {
    256        end = uint32_t(i);
    257        return end > pos;
    258      }
    259      return false;
    260    }
    261    case freetext:
    262    case abbreviated: {
    263      uint32_t i = pos + 1;
    264      bool isEmail = aInString[pos] == (char16_t)'@';
    265      bool seenOpeningParenthesis = false;  // there is a '(' earlier in the URL
    266      bool seenOpeningSquareBracket =
    267          false;  // there is a '[' earlier in the URL
    268      for (; int32_t(i) < aInStringLength; i++) {
    269        // These chars mark the end of the URL
    270        if (aInString[i] == '>' || aInString[i] == '<' || aInString[i] == '"' ||
    271            aInString[i] == '`' || aInString[i] == '}' || aInString[i] == '{' ||
    272            (aInString[i] == ')' && !seenOpeningParenthesis) ||
    273            (aInString[i] == ']' && !seenOpeningSquareBracket) ||
    274            // Allow IPv6 adresses like http://[1080::8:800:200C:417A]/foo.
    275            (aInString[i] == '[' && i > 2 &&
    276             (aInString[i - 1] != '/' || aInString[i - 2] != '/')) ||
    277            IsSpace(aInString[i])) {
    278          break;
    279        }
    280        // Disallow non-ascii-characters for email.
    281        // Currently correct, but revisit later after standards changed.
    282        if (isEmail && (aInString[i] == '(' || aInString[i] == '\'' ||
    283                        !IsAscii(aInString[i]))) {
    284          break;
    285        }
    286        if (aInString[i] == '(') seenOpeningParenthesis = true;
    287        if (aInString[i] == '[') seenOpeningSquareBracket = true;
    288      }
    289      // These chars are allowed in the middle of the URL, but not at end.
    290      // Technically they are, but are used in normal text after the URL.
    291      while (--i > pos && (aInString[i] == '.' || aInString[i] == ',' ||
    292                           aInString[i] == ';' || aInString[i] == '!' ||
    293                           aInString[i] == '?' || aInString[i] == '-' ||
    294                           aInString[i] == ':' || aInString[i] == '\'')) {
    295        ;
    296      }
    297      if (i > pos) {
    298        end = i;
    299        return true;
    300      }
    301      return false;
    302    }
    303    default:
    304      return false;
    305  }  // switch
    306 }
    307 
    308 void mozTXTToHTMLConv::CalculateURLBoundaries(
    309    const char16_t* aInString, int32_t aInStringLength, const uint32_t pos,
    310    const uint32_t whathasbeendone, const modetype check, const uint32_t start,
    311    const uint32_t end, nsString& txtURL, nsString& desc,
    312    int32_t& replaceBefore, int32_t& replaceAfter) {
    313  uint32_t descstart = start;
    314  switch (check) {
    315    case RFC1738: {
    316      descstart = start - 5;
    317      desc.Append(&aInString[descstart],
    318                  end - descstart + 2);  // include "<URL:" and ">"
    319      replaceAfter = end - pos + 1;
    320    } break;
    321    case RFC2396E: {
    322      descstart = start - 1;
    323      desc.Append(&aInString[descstart],
    324                  end - descstart + 2);  // include brackets
    325      replaceAfter = end - pos + 1;
    326    } break;
    327    case freetext:
    328    case abbreviated: {
    329      descstart = start;
    330      desc.Append(&aInString[descstart],
    331                  end - start + 1);  // don't include brackets
    332      replaceAfter = end - pos;
    333    } break;
    334    default:
    335      break;
    336  }  // switch
    337 
    338  EscapeStr(desc, false);
    339 
    340  txtURL.Append(&aInString[start], end - start + 1);
    341  txtURL.StripWhitespace();
    342 
    343  // FIX ME
    344  nsAutoString temp2;
    345  ScanTXT(nsDependentSubstring(&aInString[descstart], pos - descstart),
    346          ~kURLs /*prevents loop*/ & whathasbeendone, temp2);
    347  replaceBefore = temp2.Length();
    348 }
    349 
    350 bool mozTXTToHTMLConv::ShouldLinkify(const nsCString& aURL) {
    351  if (!mIOService) return false;
    352 
    353  nsAutoCString scheme;
    354  nsresult rv = mIOService->ExtractScheme(aURL, scheme);
    355  if (NS_FAILED(rv)) return false;
    356 
    357  if (scheme == "http" || scheme == "https" || scheme == "mailto") {
    358    return true;
    359  }
    360 
    361  // Get the handler for this scheme.
    362  nsCOMPtr<nsIProtocolHandler> handler;
    363  rv = mIOService->GetProtocolHandler(scheme.get(), getter_AddRefs(handler));
    364  if (NS_FAILED(rv)) return false;
    365 
    366  // Is it an external protocol handler? If not, linkify it.
    367  nsCOMPtr<nsIExternalProtocolHandler> externalHandler =
    368      do_QueryInterface(handler);
    369  if (!externalHandler) return true;  // handler is built-in, linkify it!
    370 
    371  // If external app exists for the scheme then linkify it.
    372  bool exists;
    373  rv = externalHandler->ExternalAppExistsForScheme(scheme, &exists);
    374  return (NS_SUCCEEDED(rv) && exists);
    375 }
    376 
    377 bool mozTXTToHTMLConv::CheckURLAndCreateHTML(const nsString& txtURL,
    378                                             const nsString& desc,
    379                                             const modetype mode,
    380                                             nsString& outputHTML) {
    381  // Create *uri from txtURL
    382  nsCOMPtr<nsIURI> uri;
    383  nsresult rv;
    384  // Lazily initialize mIOService
    385  if (!mIOService) {
    386    mIOService = do_GetIOService();
    387 
    388    if (!mIOService) return false;
    389  }
    390 
    391  // See if the url should be linkified.
    392  NS_ConvertUTF16toUTF8 utf8URL(txtURL);
    393  if (!ShouldLinkify(utf8URL)) return false;
    394 
    395  // it would be faster if we could just check to see if there is a protocol
    396  // handler for the url and return instead of actually trying to create a
    397  // url...
    398  rv = mIOService->NewURI(utf8URL, nullptr, nullptr, getter_AddRefs(uri));
    399 
    400  // Real work
    401  if (NS_SUCCEEDED(rv) && uri) {
    402    outputHTML.AssignLiteral("<a class=\"moz-txt-link-");
    403    switch (mode) {
    404      case RFC1738:
    405        outputHTML.AppendLiteral("rfc1738");
    406        break;
    407      case RFC2396E:
    408        outputHTML.AppendLiteral("rfc2396E");
    409        break;
    410      case freetext:
    411        outputHTML.AppendLiteral("freetext");
    412        break;
    413      case abbreviated:
    414        outputHTML.AppendLiteral("abbreviated");
    415        break;
    416      default:
    417        break;
    418    }
    419    nsAutoString escapedURL(txtURL);
    420    EscapeStr(escapedURL, true);
    421 
    422    outputHTML.AppendLiteral("\" href=\"");
    423    outputHTML += escapedURL;
    424    outputHTML.AppendLiteral("\">");
    425    outputHTML += desc;
    426    outputHTML.AppendLiteral("</a>");
    427    return true;
    428  }
    429  return false;
    430 }
    431 
    432 NS_IMETHODIMP mozTXTToHTMLConv::FindURLInPlaintext(const char16_t* aInString,
    433                                                   int32_t aInLength,
    434                                                   int32_t aPos,
    435                                                   int32_t* aStartPos,
    436                                                   int32_t* aEndPos) {
    437  // call FindURL on the passed in string
    438  nsAutoString outputHTML;  // we'll ignore the generated output HTML
    439 
    440  *aStartPos = -1;
    441  *aEndPos = -1;
    442 
    443  FindURL(aInString, aInLength, aPos, kURLs, outputHTML, *aStartPos, *aEndPos);
    444 
    445  return NS_OK;
    446 }
    447 
    448 bool mozTXTToHTMLConv::FindURL(const char16_t* aInString, int32_t aInLength,
    449                               const uint32_t pos,
    450                               const uint32_t whathasbeendone,
    451                               nsString& outputHTML, int32_t& replaceBefore,
    452                               int32_t& replaceAfter) {
    453  enum statetype { unchecked, invalid, startok, endok, success };
    454  static const modetype ranking[] = {RFC1738, RFC2396E, freetext, abbreviated};
    455 
    456  statetype state[mozTXTToHTMLConv_lastMode + 1];  // 0(=unknown)..lastMode
    457  /* I don't like this abuse of enums as index for the array,
    458     but I don't know a better method */
    459 
    460  // Define, which modes to check
    461  /* all modes but abbreviated are checked for text[pos] == ':',
    462     only abbreviated for '.', RFC2396E and abbreviated for '@' */
    463  for (modetype iState = unknown; iState <= mozTXTToHTMLConv_lastMode;
    464       iState = modetype(iState + 1)) {
    465    state[iState] = aInString[pos] == ':' ? unchecked : invalid;
    466  }
    467  switch (aInString[pos]) {
    468    case '@':
    469      state[RFC2396E] = unchecked;
    470      [[fallthrough]];
    471    case '.':
    472      state[abbreviated] = unchecked;
    473      break;
    474    case ':':
    475      state[abbreviated] = invalid;
    476      break;
    477    default:
    478      break;
    479  }
    480 
    481  // Test, first successful mode wins, sequence defined by |ranking|
    482  int32_t iCheck = 0;  // the currently tested modetype
    483  modetype check = ranking[iCheck];
    484  for (; iCheck < mozTXTToHTMLConv_numberOfModes && state[check] != success;
    485       iCheck++)
    486  /* check state from last run.
    487     If this is the first, check this one, which isn't = success yet */
    488  {
    489    check = ranking[iCheck];
    490 
    491    uint32_t start, end;
    492 
    493    if (state[check] == unchecked) {
    494      if (FindURLStart(aInString, aInLength, pos, check, start)) {
    495        state[check] = startok;
    496      }
    497    }
    498 
    499    if (state[check] == startok) {
    500      if (FindURLEnd(aInString, aInLength, pos, check, start, end)) {
    501        state[check] = endok;
    502      }
    503    }
    504 
    505    if (state[check] == endok) {
    506      nsAutoString txtURL, desc;
    507      int32_t resultReplaceBefore, resultReplaceAfter;
    508 
    509      CalculateURLBoundaries(aInString, aInLength, pos, whathasbeendone, check,
    510                             start, end, txtURL, desc, resultReplaceBefore,
    511                             resultReplaceAfter);
    512 
    513      if (aInString[pos] != ':') {
    514        // CalculateURLBoundaries removes whitespace, so a new pos is needed
    515        uint32_t urlPos = std::max(txtURL.FindChar(aInString[pos]), 0);
    516        nsAutoString temp = txtURL;
    517        txtURL.SetLength(0);
    518        CompleteAbbreviatedURL(temp.get(), temp.Length(), urlPos, txtURL);
    519      }
    520 
    521      if (!txtURL.IsEmpty() &&
    522          CheckURLAndCreateHTML(txtURL, desc, check, outputHTML)) {
    523        replaceBefore = resultReplaceBefore;
    524        replaceAfter = resultReplaceAfter;
    525        state[check] = success;
    526      }
    527    }  // if
    528  }  // for
    529  return state[check] == success;
    530 }
    531 
    532 static inline bool IsAlpha(const uint32_t aChar) {
    533  return mozilla::unicode::GetGenCategory(aChar) == nsUGenCategory::kLetter;
    534 }
    535 
    536 static inline bool IsDigit(const uint32_t aChar) {
    537  return mozilla::unicode::GetGenCategory(aChar) == nsUGenCategory::kNumber;
    538 }
    539 
    540 bool mozTXTToHTMLConv::ItMatchesDelimited(const char16_t* aInString,
    541                                          int32_t aInLength,
    542                                          const char16_t* rep, int32_t aRepLen,
    543                                          LIMTYPE before, LIMTYPE after) {
    544  // this little method gets called a LOT. I found we were spending a
    545  // lot of time just calculating the length of the variable "rep"
    546  // over and over again every time we called it. So we're now passing
    547  // an integer in here.
    548  int32_t textLen = aInLength;
    549 
    550  if (((before == LT_IGNORE && (after == LT_IGNORE || after == LT_DELIMITER)) &&
    551       textLen < aRepLen) ||
    552      ((before != LT_IGNORE || (after != LT_IGNORE && after != LT_DELIMITER)) &&
    553       textLen < aRepLen + 1) ||
    554      (before != LT_IGNORE && after != LT_IGNORE && after != LT_DELIMITER &&
    555       textLen < aRepLen + 2)) {
    556    return false;
    557  }
    558 
    559  uint32_t text0 = aInString[0];
    560  if (aInLength > 1 && NS_IS_SURROGATE_PAIR(text0, aInString[1])) {
    561    text0 = SURROGATE_TO_UCS4(text0, aInString[1]);
    562  }
    563  // find length of the char/cluster to be ignored
    564  int32_t ignoreLen = before == LT_IGNORE ? 0 : 1;
    565  if (ignoreLen) {
    566    GraphemeClusterBreakIteratorUtf16 ci(
    567        Span<const char16_t>(aInString, aInLength));
    568    ignoreLen = *ci.Next();
    569  }
    570 
    571  int32_t afterIndex = aRepLen + ignoreLen;
    572  uint32_t textAfterPos = aInString[afterIndex];
    573  if (aInLength > afterIndex + 1 &&
    574      NS_IS_SURROGATE_PAIR(textAfterPos, aInString[afterIndex + 1])) {
    575    textAfterPos = SURROGATE_TO_UCS4(textAfterPos, aInString[afterIndex + 1]);
    576  }
    577 
    578  return !((before == LT_ALPHA && !IsAlpha(text0)) ||
    579           (before == LT_DIGIT && !IsDigit(text0)) ||
    580           (before == LT_DELIMITER &&
    581            (IsAlpha(text0) || IsDigit(text0) || text0 == *rep)) ||
    582           (after == LT_ALPHA && !IsAlpha(textAfterPos)) ||
    583           (after == LT_DIGIT && !IsDigit(textAfterPos)) ||
    584           (after == LT_DELIMITER &&
    585            (IsAlpha(textAfterPos) || IsDigit(textAfterPos) ||
    586             textAfterPos == *rep)) ||
    587           !Substring(Substring(aInString, aInString + aInLength), ignoreLen,
    588                      aRepLen)
    589                .Equals(Substring(rep, rep + aRepLen),
    590                        nsCaseInsensitiveStringComparator));
    591 }
    592 
    593 uint32_t mozTXTToHTMLConv::NumberOfMatches(const char16_t* aInString,
    594                                           int32_t aInStringLength,
    595                                           const char16_t* rep, int32_t aRepLen,
    596                                           LIMTYPE before, LIMTYPE after) {
    597  uint32_t result = 0;
    598 
    599  // Limit lookahead length to avoid pathological O(n^2) behavior; looking so
    600  // far ahead is unlikely to be important for cases where styling marked-up
    601  // fragments is actually useful anyhow.
    602  const uint32_t len =
    603      std::min(2000u, mozilla::AssertedCast<uint32_t>(aInStringLength));
    604  GraphemeClusterBreakIteratorUtf16 ci(Span<const char16_t>(aInString, len));
    605  for (uint32_t pos = 0; pos < len; pos = *ci.Next()) {
    606    if (ItMatchesDelimited(aInString + pos, aInStringLength - pos, rep, aRepLen,
    607                           before, after)) {
    608      result++;
    609    }
    610  }
    611  return result;
    612 }
    613 
    614 // NOTE: the converted html for the phrase is appended to aOutString
    615 // tagHTML and attributeHTML are plain ASCII (literal strings, in fact)
    616 bool mozTXTToHTMLConv::StructPhraseHit(
    617    const char16_t* aInString, int32_t aInStringLength, bool col0,
    618    const char16_t* tagTXT, int32_t aTagTXTLen, const char* tagHTML,
    619    const char* attributeHTML, nsAString& aOutString, uint32_t& openTags) {
    620  /* We're searching for the following pattern:
    621     LT_DELIMITER - "*" - ALPHA -
    622     [ some text (maybe more "*"-pairs) - ALPHA ] "*" - LT_DELIMITER.
    623     <strong> is only inserted, if existence of a pair could be verified
    624     We use the first opening/closing tag, if we can choose */
    625 
    626  const char16_t* newOffset = aInString;
    627  int32_t newLength = aInStringLength;
    628  if (!col0)  // skip the first element?
    629  {
    630    newOffset = &aInString[1];
    631    newLength = aInStringLength - 1;
    632  }
    633 
    634  // opening tag
    635  if (ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen,
    636                         (col0 ? LT_IGNORE : LT_DELIMITER),
    637                         LT_ALPHA)  // is opening tag
    638      && NumberOfMatches(newOffset, newLength, tagTXT, aTagTXTLen, LT_ALPHA,
    639                         LT_DELIMITER)  // remaining closing tags
    640             > openTags) {
    641    openTags++;
    642    aOutString.Append('<');
    643    aOutString.AppendASCII(tagHTML);
    644    aOutString.Append(char16_t(' '));
    645    aOutString.AppendASCII(attributeHTML);
    646    aOutString.AppendLiteral("><span class=\"moz-txt-tag\">");
    647    aOutString.Append(tagTXT);
    648    aOutString.AppendLiteral("</span>");
    649    return true;
    650  }
    651 
    652  // closing tag
    653  if (openTags > 0 && ItMatchesDelimited(aInString, aInStringLength, tagTXT,
    654                                         aTagTXTLen, LT_ALPHA, LT_DELIMITER)) {
    655    openTags--;
    656    aOutString.AppendLiteral("<span class=\"moz-txt-tag\">");
    657    aOutString.Append(tagTXT);
    658    aOutString.AppendLiteral("</span></");
    659    aOutString.AppendASCII(tagHTML);
    660    aOutString.Append(char16_t('>'));
    661    return true;
    662  }
    663 
    664  return false;
    665 }
    666 
    667 bool mozTXTToHTMLConv::SmilyHit(const char16_t* aInString, int32_t aLength,
    668                                bool col0, const char* tagTXT,
    669                                const nsString& imageName, nsString& outputHTML,
    670                                int32_t& glyphTextLen) {
    671  if (!aInString || !tagTXT || imageName.IsEmpty()) return false;
    672 
    673  int32_t tagLen = strlen(tagTXT);
    674 
    675  uint32_t delim = (col0 ? 0 : 1) + tagLen;
    676 
    677  if ((col0 || IsSpace(aInString[0])) &&
    678      (aLength <= int32_t(delim) || IsSpace(aInString[delim]) ||
    679       (aLength > int32_t(delim + 1) &&
    680        (aInString[delim] == '.' || aInString[delim] == ',' ||
    681         aInString[delim] == ';' || aInString[delim] == '8' ||
    682         aInString[delim] == '>' || aInString[delim] == '!' ||
    683         aInString[delim] == '?') &&
    684        IsSpace(aInString[delim + 1]))) &&
    685      ItMatchesDelimited(aInString, aLength,
    686                         NS_ConvertASCIItoUTF16(tagTXT).get(), tagLen,
    687                         col0 ? LT_IGNORE : LT_DELIMITER, LT_IGNORE)
    688      // Note: tests at different pos for LT_IGNORE and LT_DELIMITER
    689  ) {
    690    if (!col0) {
    691      outputHTML.Truncate();
    692      outputHTML.Append(char16_t(' '));
    693    }
    694 
    695    outputHTML.Append(imageName);  // emoji unicode
    696    glyphTextLen = (col0 ? 0 : 1) + tagLen;
    697    return true;
    698  }
    699 
    700  return false;
    701 }
    702 
    703 // the glyph is appended to aOutputString instead of the original string...
    704 bool mozTXTToHTMLConv::GlyphHit(const char16_t* aInString, int32_t aInLength,
    705                                bool col0, nsAString& aOutputString,
    706                                int32_t& glyphTextLen) {
    707  char16_t text0 = aInString[0];
    708  char16_t text1 = aInString[1];
    709  char16_t firstChar = (col0 ? text0 : text1);
    710 
    711  // temporary variable used to store the glyph html text
    712  nsAutoString outputHTML;
    713  bool bTestSmilie;
    714  bool bArg = false;
    715  int i;
    716 
    717  // refactor some of this mess to avoid code duplication and speed execution a
    718  // bit there are two cases that need to be tried one after another. To avoid a
    719  // lot of duplicate code, rolling into a loop
    720 
    721  i = 0;
    722  while (i < 2) {
    723    bTestSmilie = false;
    724    if (!i && (firstChar == ':' || firstChar == ';' || firstChar == '=' ||
    725               firstChar == '>' || firstChar == '8' || firstChar == 'O')) {
    726      // first test passed
    727 
    728      bTestSmilie = true;
    729      bArg = col0;
    730    }
    731    if (i && col0 &&
    732        (text1 == ':' || text1 == ';' || text1 == '=' || text1 == '>' ||
    733         text1 == '8' || text1 == 'O')) {
    734      // second test passed
    735 
    736      bTestSmilie = true;
    737      bArg = false;
    738    }
    739    if (bTestSmilie && (SmilyHit(aInString, aInLength, bArg, ":-)",
    740                                 u"🙂"_ns,  // smile, U+1F642
    741                                 outputHTML, glyphTextLen) ||
    742 
    743                        SmilyHit(aInString, aInLength, bArg, ":)",
    744                                 u"🙂"_ns,  // smile, U+1F642
    745                                 outputHTML, glyphTextLen) ||
    746 
    747                        SmilyHit(aInString, aInLength, bArg, ":-D",
    748                                 u"😂"_ns,  // laughing, U+1F602
    749                                 outputHTML, glyphTextLen) ||
    750 
    751                        SmilyHit(aInString, aInLength, bArg, ":-(",
    752                                 u"🙁"_ns,  // frown, U+1F641
    753                                 outputHTML, glyphTextLen) ||
    754 
    755                        SmilyHit(aInString, aInLength, bArg, ":(",
    756                                 u"🙁"_ns,  // frown, U+1F641
    757                                 outputHTML, glyphTextLen) ||
    758 
    759                        SmilyHit(aInString, aInLength, bArg, ":$",
    760                                 u"😳"_ns,  // embarassed, U+1F633
    761                                 outputHTML, glyphTextLen) ||
    762 
    763                        SmilyHit(aInString, aInLength, bArg, ";-)",
    764                                 u"😉"_ns,  // wink, U+1F609
    765                                 outputHTML, glyphTextLen) ||
    766 
    767                        SmilyHit(aInString, aInLength, col0, ";)",
    768                                 u"😉"_ns,  // wink, U+1F609
    769                                 outputHTML, glyphTextLen) ||
    770 
    771                        SmilyHit(aInString, aInLength, bArg, ":-\\",
    772                                 u"😕"_ns,  // undecided, U+1F615
    773                                 outputHTML, glyphTextLen) ||
    774 
    775                        SmilyHit(aInString, aInLength, bArg, ":-P",
    776                                 u"😛"_ns,  // tongue, U+1F61B
    777                                 outputHTML, glyphTextLen) ||
    778 
    779                        SmilyHit(aInString, aInLength, bArg, ";-P",
    780                                 u"😜"_ns,  // winking face with tongue, U+1F61C
    781                                 outputHTML, glyphTextLen) ||
    782 
    783                        SmilyHit(aInString, aInLength, bArg, "=-O",
    784                                 u"😮"_ns,  // surprise, U+1F62E
    785                                 outputHTML, glyphTextLen) ||
    786 
    787                        SmilyHit(aInString, aInLength, bArg, ":-*",
    788                                 u"😘"_ns,  // kiss, U+1F618
    789                                 outputHTML, glyphTextLen) ||
    790 
    791                        SmilyHit(aInString, aInLength, bArg, ">:o",
    792                                 u"🤬"_ns,  // swearing, U+1F92C
    793                                 outputHTML, glyphTextLen) ||
    794 
    795                        SmilyHit(aInString, aInLength, bArg, ">:-o",
    796                                 u"🤬"_ns,  // swearing, U+1F92C
    797                                 outputHTML, glyphTextLen) ||
    798 
    799                        SmilyHit(aInString, aInLength, bArg, ">:(",
    800                                 u"😠"_ns,  // angry, U+1F620
    801                                 outputHTML, glyphTextLen) ||
    802 
    803                        SmilyHit(aInString, aInLength, bArg, ">:-(",
    804                                 u"😠"_ns,  // angry, U+1F620
    805                                 outputHTML, glyphTextLen) ||
    806 
    807                        SmilyHit(aInString, aInLength, bArg, "8-)",
    808                                 u"😎"_ns,  // cool, U+1F60E
    809                                 outputHTML, glyphTextLen) ||
    810 
    811                        SmilyHit(aInString, aInLength, bArg, ":-$",
    812                                 u"🤑"_ns,  // money, U+1F911
    813                                 outputHTML, glyphTextLen) ||
    814 
    815                        SmilyHit(aInString, aInLength, bArg, ":-!",
    816                                 u"😬"_ns,  // foot, U+1F62C
    817                                 outputHTML, glyphTextLen) ||
    818 
    819                        SmilyHit(aInString, aInLength, bArg, "O:-)",
    820                                 u"😇"_ns,  // innocent, U+1F607
    821                                 outputHTML, glyphTextLen) ||
    822 
    823                        SmilyHit(aInString, aInLength, bArg, ":'(",
    824                                 u"😭"_ns,  // cry, U+1F62D
    825                                 outputHTML, glyphTextLen) ||
    826 
    827                        SmilyHit(aInString, aInLength, bArg, ":-X",
    828                                 u"🤐"_ns,  // sealed, U+1F910
    829                                 outputHTML, glyphTextLen))) {
    830      aOutputString.Append(outputHTML);
    831      return true;
    832    }
    833    i++;
    834  }
    835  if (text0 == '\f') {
    836    aOutputString.AppendLiteral("<span class='moz-txt-formfeed'></span>");
    837    glyphTextLen = 1;
    838    return true;
    839  }
    840  if (text0 == '+' || text1 == '+') {
    841    if (ItMatchesDelimited(aInString, aInLength, u" +/-", 4, LT_IGNORE,
    842                           LT_IGNORE)) {
    843      aOutputString.AppendLiteral(" &plusmn;");
    844      glyphTextLen = 4;
    845      return true;
    846    }
    847    if (col0 && ItMatchesDelimited(aInString, aInLength, u"+/-", 3, LT_IGNORE,
    848                                   LT_IGNORE)) {
    849      aOutputString.AppendLiteral("&plusmn;");
    850      glyphTextLen = 3;
    851      return true;
    852    }
    853  }
    854 
    855  // x^2  =>  x<sup>2</sup>,   also handle powers x^-2,  x^0.5
    856  // implement regular expression /[\dA-Za-z\)\]}]\^-?\d+(\.\d+)*[^\dA-Za-z]/
    857  if (text1 == '^' &&
    858      (IsAsciiDigit(text0) || IsAsciiAlpha(text0) || text0 == ')' ||
    859       text0 == ']' || text0 == '}') &&
    860      ((2 < aInLength && IsAsciiDigit(aInString[2])) ||
    861       (3 < aInLength && aInString[2] == '-' && IsAsciiDigit(aInString[3])))) {
    862    // Find first non-digit
    863    int32_t delimPos = 3;  // skip "^" and first digit (or '-')
    864    for (; delimPos < aInLength &&
    865           (IsAsciiDigit(aInString[delimPos]) ||
    866            (aInString[delimPos] == '.' && delimPos + 1 < aInLength &&
    867             IsAsciiDigit(aInString[delimPos + 1])));
    868         delimPos++) {
    869      ;
    870    }
    871 
    872    if (delimPos < aInLength && IsAsciiAlpha(aInString[delimPos])) {
    873      return false;
    874    }
    875 
    876    outputHTML.Truncate();
    877    outputHTML += text0;
    878    outputHTML.AppendLiteral(
    879        "<sup class=\"moz-txt-sup\">"
    880        "<span style=\"display:inline-block;width:0;height:0;overflow:hidden\">"
    881        "^</span>");
    882 
    883    aOutputString.Append(outputHTML);
    884    aOutputString.Append(&aInString[2], delimPos - 2);
    885    aOutputString.AppendLiteral("</sup>");
    886 
    887    glyphTextLen = delimPos /* - 1 + 1 */;
    888    return true;
    889  }
    890  /*
    891   The following strings are not substituted:
    892   |TXT   |HTML     |Reason
    893   +------+---------+----------
    894    ->     &larr;    Bug #454
    895    =>     &lArr;    dito
    896    <-     &rarr;    dito
    897    <=     &rArr;    dito
    898    (tm)   &trade;   dito
    899    1/4    &frac14;  is triggered by 1/4 Part 1, 2/4 Part 2, ...
    900    3/4    &frac34;  dito
    901    1/2    &frac12;  similar
    902  */
    903  return false;
    904 }
    905 
    906 /***************************************************************************
    907  Library-internal Interface
    908 ****************************************************************************/
    909 
    910 NS_IMPL_ISUPPORTS(mozTXTToHTMLConv, mozITXTToHTMLConv, nsIStreamConverter,
    911                  nsIThreadRetargetableStreamListener, nsIStreamListener,
    912                  nsIRequestObserver)
    913 
    914 int32_t mozTXTToHTMLConv::CiteLevelTXT(const char16_t* line,
    915                                       uint32_t& logLineStart) {
    916  int32_t result = 0;
    917  int32_t lineLength = NS_strlen(line);
    918 
    919  bool moreCites = true;
    920  while (moreCites) {
    921    /* E.g. the following lines count as quote:
    922 
    923       > text
    924       //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
    925       >text
    926       //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
    927           > text
    928       ] text
    929       USER> text
    930       USER] text
    931       //#endif
    932 
    933       logLineStart is the position of "t" in this example
    934    */
    935    uint32_t i = logLineStart;
    936 
    937 #ifdef QUOTE_RECOGNITION_AGGRESSIVE
    938    for (; int32_t(i) < lineLength && IsSpace(line[i]); i++);
    939    for (; int32_t(i) < lineLength && IsAsciiAlpha(line[i]) &&
    940           nsCRT::IsUpper(line[i]);
    941         i++);
    942    if (int32_t(i) < lineLength && (line[i] == '>' || line[i] == ']'))
    943 #else
    944    if (int32_t(i) < lineLength && line[i] == '>')
    945 #endif
    946    {
    947      i++;
    948      if (int32_t(i) < lineLength && line[i] == ' ') i++;
    949      // sendmail/mbox
    950      // Placed here for performance increase
    951      const char16_t* indexString = &line[logLineStart];
    952      // here, |logLineStart < lineLength| is always true
    953      uint32_t minlength = std::min(uint32_t(6), NS_strlen(indexString));
    954      if (Substring(indexString, indexString + minlength)
    955              .Equals(Substring(u">From "_ns, 0, minlength),
    956                      nsCaseInsensitiveStringComparator)) {
    957        // XXX RFC2646
    958        moreCites = false;
    959      } else {
    960        result++;
    961        logLineStart = i;
    962      }
    963    } else {
    964      moreCites = false;
    965    }
    966  }
    967 
    968  return result;
    969 }
    970 
    971 NS_IMETHODIMP
    972 mozTXTToHTMLConv::ScanTXT(const nsAString& aInString, uint32_t whattodo,
    973                          nsAString& aOutString) {
    974  if (aInString.Length() == 0) {
    975    aOutString.Truncate();
    976    return NS_OK;
    977  }
    978 
    979  if (!aOutString.SetCapacity(uint32_t(aInString.Length() * growthRate),
    980                              mozilla::fallible)) {
    981    return NS_ERROR_OUT_OF_MEMORY;
    982  }
    983 
    984  bool doURLs = 0 != (whattodo & kURLs);
    985  bool doGlyphSubstitution = 0 != (whattodo & kGlyphSubstitution);
    986  bool doStructPhrase = 0 != (whattodo & kStructPhrase);
    987 
    988  uint32_t structPhrase_strong = 0;  // Number of currently open tags
    989  uint32_t structPhrase_underline = 0;
    990  uint32_t structPhrase_italic = 0;
    991  uint32_t structPhrase_code = 0;
    992 
    993  uint32_t endOfLastURLOutput = 0;
    994 
    995  nsAutoString outputHTML;  // moved here for performance increase
    996 
    997  const char16_t* rawInputString = aInString.BeginReading();
    998  uint32_t inLength = aInString.Length();
    999 
   1000  const Span<const char16_t> inString(aInString);
   1001  GraphemeClusterBreakIteratorUtf16 ci(inString);
   1002  uint32_t i = 0;
   1003  while (i < inLength) {
   1004    if (doGlyphSubstitution) {
   1005      int32_t glyphTextLen;
   1006      if (GlyphHit(&rawInputString[i], inLength - i, i == 0, aOutString,
   1007                   glyphTextLen)) {
   1008        i = *ci.Seek(i + glyphTextLen - 1);
   1009        continue;
   1010      }
   1011    }
   1012 
   1013    if (doStructPhrase) {
   1014      const char16_t* newOffset = rawInputString;
   1015      int32_t newLength = aInString.Length();
   1016      if (i > 0)  // skip the first element?
   1017      {
   1018        GraphemeClusterBreakReverseIteratorUtf16 ri(
   1019            Span<const char16_t>(rawInputString, i));
   1020        Maybe<uint32_t> nextPos = ri.Next();
   1021        newOffset += *nextPos;
   1022        newLength -= *nextPos;
   1023      }
   1024 
   1025      switch (aInString[i])  // Performance increase
   1026      {
   1027        case '*':
   1028          if (StructPhraseHit(newOffset, newLength, i == 0, u"*", 1, "b",
   1029                              "class=\"moz-txt-star\"", aOutString,
   1030                              structPhrase_strong)) {
   1031            i = *ci.Next();
   1032            continue;
   1033          }
   1034          break;
   1035        case '/':
   1036          if (StructPhraseHit(newOffset, newLength, i == 0, u"/", 1, "i",
   1037                              "class=\"moz-txt-slash\"", aOutString,
   1038                              structPhrase_italic)) {
   1039            i = *ci.Next();
   1040            continue;
   1041          }
   1042          break;
   1043        case '_':
   1044          if (StructPhraseHit(newOffset, newLength, i == 0, u"_", 1,
   1045                              "span" /* <u> is deprecated */,
   1046                              "class=\"moz-txt-underscore\"", aOutString,
   1047                              structPhrase_underline)) {
   1048            i = *ci.Next();
   1049            continue;
   1050          }
   1051          break;
   1052        case '|':
   1053          if (StructPhraseHit(newOffset, newLength, i == 0, u"|", 1, "code",
   1054                              "class=\"moz-txt-verticalline\"", aOutString,
   1055                              structPhrase_code)) {
   1056            i = *ci.Next();
   1057            continue;
   1058          }
   1059          break;
   1060      }
   1061    }
   1062 
   1063    if (doURLs) {
   1064      switch (aInString[i]) {
   1065        case ':':
   1066        case '@':
   1067        case '.':
   1068          if ((i == 0 || ((i > 0) && aInString[i - 1] != ' ')) &&
   1069              ((i == aInString.Length() - 1) ||
   1070               (aInString[i + 1] != ' ')))  // Performance increase
   1071          {
   1072            int32_t replaceBefore;
   1073            int32_t replaceAfter;
   1074            if (FindURL(rawInputString, aInString.Length(), i, whattodo,
   1075                        outputHTML, replaceBefore, replaceAfter) &&
   1076                structPhrase_strong + structPhrase_italic +
   1077                        structPhrase_underline + structPhrase_code ==
   1078                    0
   1079                /* workaround for bug #19445 */) {
   1080              // Don't cut into previously inserted HTML (bug 1509493)
   1081              if (aOutString.Length() - replaceBefore < endOfLastURLOutput) {
   1082                break;
   1083              }
   1084              aOutString.Cut(aOutString.Length() - replaceBefore,
   1085                             replaceBefore);
   1086              aOutString += outputHTML;
   1087              endOfLastURLOutput = aOutString.Length();
   1088              i = *ci.Seek(i + replaceAfter);
   1089              continue;
   1090            }
   1091          }
   1092          break;
   1093      }  // switch
   1094    }
   1095 
   1096    switch (aInString[i]) {
   1097      // Special symbols
   1098      case '<':
   1099      case '>':
   1100      case '&':
   1101        EscapeChar(aInString[i], aOutString, false);
   1102        i = *ci.Next();
   1103        break;
   1104      // Normal characters
   1105      default: {
   1106        const uint32_t oldIdx = i;
   1107        i = *ci.Next();
   1108        aOutString.Append(inString.FromTo(oldIdx, i));
   1109        break;
   1110      }
   1111    }
   1112  }
   1113  return NS_OK;
   1114 }
   1115 
   1116 NS_IMETHODIMP
   1117 mozTXTToHTMLConv::ScanHTML(const nsAString& input, uint32_t whattodo,
   1118                           nsAString& aOutString) {
   1119  const nsPromiseFlatString& aInString = PromiseFlatString(input);
   1120  if (!aOutString.SetCapacity(uint32_t(aInString.Length() * growthRate),
   1121                              mozilla::fallible)) {
   1122    return NS_ERROR_OUT_OF_MEMORY;
   1123  }
   1124 
   1125  // some common variables we were recalculating
   1126  // every time inside the for loop...
   1127  int32_t lengthOfInString = aInString.Length();
   1128  const char16_t* uniBuffer = aInString.get();
   1129 
   1130 #ifdef DEBUG_BenB_Perf
   1131  PRTime parsing_start = PR_IntervalNow();
   1132 #endif
   1133 
   1134  // Look for simple entities not included in a tags and scan them.
   1135  // Skip all tags ("<[...]>") and content in an a link tag ("<a [...]</a>"),
   1136  // comment tag ("<!--[...]-->"), style tag, script tag or head tag.
   1137  // Unescape the rest (text between tags) and pass it to ScanTXT.
   1138  nsAutoCString canFollow(" \f\n\r\t>");
   1139  for (int32_t i = 0; i < lengthOfInString;) {
   1140    if (aInString[i] == '<')  // html tag
   1141    {
   1142      int32_t start = i;
   1143      if (i + 2 < lengthOfInString && nsCRT::ToLower(aInString[i + 1]) == 'a' &&
   1144          canFollow.FindChar(aInString[i + 2]) != kNotFound)
   1145      // if a tag, skip until </a>.
   1146      // Make sure there's a white-space character after, not to match "abbr".
   1147      {
   1148        i = aInString.LowerCaseFindASCII("</a>", i);
   1149        if (i == kNotFound) {
   1150          i = lengthOfInString;
   1151        } else {
   1152          i += 4;
   1153        }
   1154      } else if (Substring(aInString, i + 1, 3).LowerCaseEqualsASCII("!--"))
   1155      // if out-commended code, skip until -->
   1156      {
   1157        i = aInString.Find(u"-->", i);
   1158        if (i == kNotFound) {
   1159          i = lengthOfInString;
   1160        } else {
   1161          i += 3;
   1162        }
   1163      } else if (i + 6 < lengthOfInString &&
   1164                 Substring(aInString, i + 1, 5).LowerCaseEqualsASCII("style") &&
   1165                 canFollow.FindChar(aInString[i + 6]) != kNotFound)
   1166      // if style tag, skip until </style>
   1167      {
   1168        i = aInString.LowerCaseFindASCII("</style>", i);
   1169        if (i == kNotFound) {
   1170          i = lengthOfInString;
   1171        } else {
   1172          i += 8;
   1173        }
   1174      } else if (i + 7 < lengthOfInString &&
   1175                 Substring(aInString, i + 1, 6)
   1176                     .LowerCaseEqualsASCII("script") &&
   1177                 canFollow.FindChar(aInString[i + 7]) != kNotFound)
   1178      // if script tag, skip until </script>
   1179      {
   1180        i = aInString.LowerCaseFindASCII("</script>", i);
   1181        if (i == kNotFound) {
   1182          i = lengthOfInString;
   1183        } else {
   1184          i += 9;
   1185        }
   1186      } else if (i + 5 < lengthOfInString &&
   1187                 Substring(aInString, i + 1, 4).LowerCaseEqualsASCII("head") &&
   1188                 canFollow.FindChar(aInString[i + 5]) != kNotFound)
   1189      // if head tag, skip until </head>
   1190      // Make sure not to match <header>.
   1191      {
   1192        i = aInString.LowerCaseFindASCII("</head>", i);
   1193        if (i == kNotFound) {
   1194          i = lengthOfInString;
   1195        } else {
   1196          i += 7;
   1197        }
   1198      } else  // just skip tag (attributes etc.)
   1199      {
   1200        i = aInString.FindChar('>', i);
   1201        if (i == kNotFound) {
   1202          i = lengthOfInString;
   1203        } else {
   1204          i++;
   1205        }
   1206      }
   1207      aOutString.Append(&uniBuffer[start], i - start);
   1208    } else {
   1209      uint32_t start = uint32_t(i);
   1210      i = aInString.FindChar('<', i);
   1211      if (i == kNotFound) i = lengthOfInString;
   1212 
   1213      nsAutoStringN<256> tempString;
   1214      tempString.SetCapacity(uint32_t((uint32_t(i) - start) * growthRate));
   1215      UnescapeStr(uniBuffer, start, uint32_t(i) - start, tempString);
   1216      ScanTXT(tempString, whattodo, aOutString);
   1217    }
   1218  }
   1219 
   1220 #ifdef DEBUG_BenB_Perf
   1221  printf("ScanHTML time:    %d ms\n",
   1222         PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start));
   1223 #endif
   1224  return NS_OK;
   1225 }
   1226 
   1227 /****************************************************************************
   1228  XPCOM Interface
   1229 *****************************************************************************/
   1230 
   1231 NS_IMETHODIMP
   1232 mozTXTToHTMLConv::Convert(nsIInputStream* aFromStream, const char* aFromType,
   1233                          const char* aToType, nsISupports* aCtxt,
   1234                          nsIInputStream** _retval) {
   1235  return NS_ERROR_NOT_IMPLEMENTED;
   1236 }
   1237 
   1238 NS_IMETHODIMP
   1239 mozTXTToHTMLConv::AsyncConvertData(const char* aFromType, const char* aToType,
   1240                                   nsIStreamListener* aListener,
   1241                                   nsISupports* aCtxt) {
   1242  return NS_ERROR_NOT_IMPLEMENTED;
   1243 }
   1244 
   1245 NS_IMETHODIMP
   1246 mozTXTToHTMLConv::GetConvertedType(const nsACString& aFromType,
   1247                                   nsIChannel* aChannel, nsACString& aToType) {
   1248  return NS_ERROR_NOT_IMPLEMENTED;
   1249 }
   1250 
   1251 NS_IMETHODIMP
   1252 mozTXTToHTMLConv::OnDataAvailable(nsIRequest* request, nsIInputStream* inStr,
   1253                                  uint64_t sourceOffset, uint32_t count) {
   1254  return NS_ERROR_NOT_IMPLEMENTED;
   1255 }
   1256 
   1257 NS_IMETHODIMP
   1258 mozTXTToHTMLConv::OnDataFinished(nsresult aStatus) {
   1259  return NS_ERROR_NOT_IMPLEMENTED;
   1260 }
   1261 
   1262 NS_IMETHODIMP
   1263 mozTXTToHTMLConv::CheckListenerChain() { return NS_ERROR_NOT_IMPLEMENTED; }
   1264 
   1265 NS_IMETHODIMP
   1266 mozTXTToHTMLConv::MaybeRetarget(nsIRequest* request) {
   1267  return NS_ERROR_NOT_IMPLEMENTED;
   1268 }
   1269 
   1270 NS_IMETHODIMP
   1271 mozTXTToHTMLConv::OnStartRequest(nsIRequest* request) {
   1272  return NS_ERROR_NOT_IMPLEMENTED;
   1273 }
   1274 
   1275 NS_IMETHODIMP
   1276 mozTXTToHTMLConv::OnStopRequest(nsIRequest* request, nsresult aStatus) {
   1277  return NS_ERROR_NOT_IMPLEMENTED;
   1278 }
   1279 
   1280 NS_IMETHODIMP
   1281 mozTXTToHTMLConv::CiteLevelTXT(const char16_t* line, uint32_t* logLineStart,
   1282                               uint32_t* _retval) {
   1283  if (!logLineStart || !_retval || !line) return NS_ERROR_NULL_POINTER;
   1284  *_retval = CiteLevelTXT(line, *logLineStart);
   1285  return NS_OK;
   1286 }
   1287 
   1288 nsresult MOZ_NewTXTToHTMLConv(mozTXTToHTMLConv** aConv) {
   1289  MOZ_ASSERT(aConv != nullptr, "null ptr");
   1290  if (!aConv) return NS_ERROR_NULL_POINTER;
   1291 
   1292  RefPtr<mozTXTToHTMLConv> conv = new mozTXTToHTMLConv();
   1293  conv.forget(aConv);
   1294  //    return (*aConv)->Init();
   1295  return NS_OK;
   1296 }