tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

mozTXTToHTMLConv.h (12295B)


      1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 /**
      7  Description: Currently only functions to enhance plain text with HTML tags.
      8  See mozITXTToHTMLConv. Stream conversion is defunct.
      9 */
     10 
     11 #ifndef _mozTXTToHTMLConv_h__
     12 #define _mozTXTToHTMLConv_h__
     13 
     14 #include "mozITXTToHTMLConv.h"
     15 #include "nsIThreadRetargetableStreamListener.h"
     16 #include "nsString.h"
     17 #include "nsCOMPtr.h"
     18 
     19 class nsIIOService;
     20 
     21 class mozTXTToHTMLConv : public mozITXTToHTMLConv {
     22  virtual ~mozTXTToHTMLConv() = default;
     23 
     24  //////////////////////////////////////////////////////////
     25 public:
     26  //////////////////////////////////////////////////////////
     27 
     28  mozTXTToHTMLConv() = default;
     29  NS_DECL_ISUPPORTS
     30 
     31  NS_DECL_MOZITXTTOHTMLCONV
     32  NS_DECL_NSIREQUESTOBSERVER
     33  NS_DECL_NSISTREAMLISTENER
     34  NS_DECL_NSITHREADRETARGETABLESTREAMLISTENER
     35  NS_DECL_NSISTREAMCONVERTER
     36 
     37  /**
     38    see mozITXTToHTMLConv::CiteLevelTXT
     39   */
     40  int32_t CiteLevelTXT(const char16_t* line, uint32_t& logLineStart);
     41 
     42  //////////////////////////////////////////////////////////
     43 protected:
     44  //////////////////////////////////////////////////////////
     45  nsCOMPtr<nsIIOService>
     46      mIOService;  // for performance reasons, cache the netwerk service...
     47                   /**
     48                     Completes<ul>
     49                     <li>Case 1: mailto: "mozilla@bucksch.org" -> "mailto:mozilla@bucksch.org"
     50                     <li>Case 2: http:   "www.mozilla.org"     -> "http://www.mozilla.org"
     51                     <li>Case 3: ftp:    "ftp.mozilla.org"     -> "ftp://www.mozilla.org"
     52                     </ul>
     53                     It does no check, if the resulting URL is valid.
     54                     @param text (in): abbreviated URL
     55                     @param pos (in): position of "@" (case 1) or first "." (case 2 and 3)
     56                     @return Completed URL at success and empty string at failure
     57                    */
     58  void CompleteAbbreviatedURL(const char16_t* aInString, int32_t aInLength,
     59                              const uint32_t pos, nsString& aOutString);
     60 
     61  //////////////////////////////////////////////////////////
     62 private:
     63  //////////////////////////////////////////////////////////
     64 
     65  enum LIMTYPE {
     66    LT_IGNORE,     // limitation not checked
     67    LT_DELIMITER,  // not alphanumeric and not rep[0]. End of text is also ok.
     68    LT_ALPHA,      // alpha char
     69    LT_DIGIT
     70  };
     71 
     72  /**
     73    @param text (in): the string to search through.<p>
     74           If before = IGNORE,<br>
     75             rep is compared starting at 1. char of text (text[0]),<br>
     76             else starting at 2. char of text (text[1]).
     77           Chars after "after"-delimiter are ignored.
     78    @param rep (in): the string to look for
     79    @param aRepLen (in): the number of bytes in the string to look for
     80    @param before (in): limitation before rep
     81    @param after (in): limitation after rep
     82    @return true, if rep is found and limitation spec is met or rep is empty
     83  */
     84  bool ItMatchesDelimited(const char16_t* aInString, int32_t aInLength,
     85                          const char16_t* rep, int32_t aRepLen, LIMTYPE before,
     86                          LIMTYPE after);
     87 
     88  /**
     89    @param see ItMatchesDelimited
     90    @return Number of ItMatchesDelimited in text
     91  */
     92  uint32_t NumberOfMatches(const char16_t* aInString, int32_t aInStringLength,
     93                           const char16_t* rep, int32_t aRepLen, LIMTYPE before,
     94                           LIMTYPE after);
     95 
     96  /**
     97    Currently only changes "<", ">" and "&". All others stay as they are.<p>
     98    "Char" in function name to avoid side effects with nsString(ch)
     99    constructors.
    100    @param ch (in)
    101    @param aStringToAppendto (out) - the string to append the escaped
    102                                     string to.
    103    @param inAttribute (in) - will escape quotes, too (which is
    104                              only needed for attribute values)
    105  */
    106  void EscapeChar(const char16_t ch, nsAString& aStringToAppendto,
    107                  bool inAttribute);
    108 
    109  /**
    110    See EscapeChar. Escapes the string in place.
    111  */
    112  void EscapeStr(nsString& aInString, bool inAttribute);
    113 
    114  /**
    115    Currently only reverts "<", ">" and "&". All others stay as they are.<p>
    116    @param aInString (in) HTML string
    117    @param aStartPos (in) start index into the buffer
    118    @param aLength (in) length of the buffer
    119    @param aOutString (out) unescaped buffer
    120  */
    121  void UnescapeStr(const char16_t* aInString, int32_t aStartPos,
    122                   int32_t aLength, nsString& aOutString);
    123 
    124  /**
    125    <em>Note</em>: I use different strategies to pass context between the
    126    functions (full text and pos vs. cutted text and col0, glphyTextLen vs.
    127    replaceBefore/-After). It makes some sense, but is hard to understand
    128    (maintain) :-(.
    129  */
    130 
    131  /**
    132    <p><em>Note:</em> replaceBefore + replaceAfter + 1 (for char at pos) chars
    133    in text should be replaced by outputHTML.</p>
    134    <p><em>Note:</em> This function should be able to process a URL on multiple
    135    lines, but currently, ScanForURLs is called for every line, so it can't.</p>
    136    @param text (in): includes possibly a URL
    137    @param pos (in): position in text, where either ":", "." or "@" are found
    138    @param whathasbeendone (in): What the calling ScanTXT did/has to do with the
    139                (not-linkified) text, i.e. usually the "whattodo" parameter.
    140                (Needed to calculate replaceBefore.) NOT what will be done with
    141                the content of the link.
    142    @param outputHTML (out): URL with HTML-a tag
    143    @param replaceBefore (out): Number of chars of URL before pos
    144    @param replaceAfter (out): Number of chars of URL after pos
    145    @return URL found
    146  */
    147  bool FindURL(const char16_t* aInString, int32_t aInLength, const uint32_t pos,
    148               const uint32_t whathasbeendone, nsString& outputHTML,
    149               int32_t& replaceBefore, int32_t& replaceAfter);
    150 
    151  enum modetype {
    152    unknown,
    153    RFC1738,    /* Check, if RFC1738, APPENDIX compliant,
    154                   like "<URL:http://www.mozilla.org>". */
    155    RFC2396E,   /* RFC2396, APPENDIX E allows anglebrackets (like
    156                   "<http://www.mozilla.org>") (without "URL:") or
    157                   quotation marks(like ""http://www.mozilla.org"").
    158                   Also allow email addresses without scheme,
    159                   e.g. "<mozilla@bucksch.org>" */
    160    freetext,   /* assume heading scheme
    161                   with "[a-zA-Z][a-zA-Z0-9+\-\.]*:" like "news:"
    162                   (see RFC2396, Section 3.1).
    163                   Certain characters (see code) or any whitespace
    164                   (including linebreaks) end the URL.
    165                   Other certain (punctation) characters (see code)
    166                   at the end are stripped off. */
    167    abbreviated /* Similar to freetext, but without scheme, e.g.
    168                   "www.mozilla.org", "ftp.mozilla.org" and
    169                   "mozilla@bucksch.org". */
    170                /* RFC1738 and RFC2396E type URLs may use multiple lines,
    171                   whitespace is stripped. Special characters like ")" stay intact.*/
    172  };
    173 
    174  /**
    175   * @param text (in), pos (in): see FindURL
    176   * @param check (in): Start must be conform with this mode
    177   * @param start (out): Position in text, where URL (including brackets or
    178   *             similar) starts
    179   * @return |check|-conform start has been found
    180   */
    181  bool FindURLStart(const char16_t* aInString, int32_t aInLength,
    182                    const uint32_t pos, const modetype check, uint32_t& start);
    183 
    184  /**
    185   * @param text (in), pos (in): see FindURL
    186   * @param check (in): End must be conform with this mode
    187   * @param start (in): see FindURLStart
    188   * @param end (out): Similar to |start| param of FindURLStart
    189   * @return |check|-conform end has been found
    190   */
    191  bool FindURLEnd(const char16_t* aInString, int32_t aInStringLength,
    192                  const uint32_t pos, const modetype check,
    193                  const uint32_t start, uint32_t& end);
    194 
    195  /**
    196   * @param text (in), pos (in), whathasbeendone (in): see FindURL
    197   * @param check (in): Current mode
    198   * @param start (in), end (in): see FindURLEnd
    199   * @param txtURL (out): Guessed (raw) URL.
    200   *             Without whitespace, but not completed.
    201   * @param desc (out): Link as shown to the user, but already escaped.
    202   *             Should be placed between the <a> and </a> tags.
    203   * @param replaceBefore(out), replaceAfter (out): see FindURL
    204   */
    205  void CalculateURLBoundaries(const char16_t* aInString,
    206                              int32_t aInStringLength, const uint32_t pos,
    207                              const uint32_t whathasbeendone,
    208                              const modetype check, const uint32_t start,
    209                              const uint32_t end, nsString& txtURL,
    210                              nsString& desc, int32_t& replaceBefore,
    211                              int32_t& replaceAfter);
    212 
    213  /**
    214   * @param txtURL (in), desc (in): see CalculateURLBoundaries
    215   * @param outputHTML (out): see FindURL
    216   * @return A valid URL could be found (and creation of HTML successful)
    217   */
    218  bool CheckURLAndCreateHTML(const nsString& txtURL, const nsString& desc,
    219                             const modetype mode, nsString& outputHTML);
    220 
    221  /**
    222    @param text (in): line of text possibly with tagTXT.<p>
    223                if col0 is true,
    224                  starting with tagTXT<br>
    225                else
    226                  starting one char before tagTXT
    227    @param col0 (in): tagTXT is on the beginning of the line (or paragraph).
    228                open must be 0 then.
    229    @param tagTXT (in): Tag in plaintext to search for, e.g. "*"
    230    @param aTagTxtLen (in): length of tagTXT.
    231    @param tagHTML (in): HTML-Tag to replace tagTXT with,
    232                without "<" and ">", e.g. "strong"
    233    @param attributeHTML (in): HTML-attribute to add to opening tagHTML,
    234                e.g. "class=txt_star"
    235    @param aOutString: string to APPEND the converted html into
    236    @param open (in/out): Number of currently open tags of type tagHTML
    237    @return Conversion succeeded
    238  */
    239  bool StructPhraseHit(const char16_t* aInString, int32_t aInStringLength,
    240                       bool col0, const char16_t* tagTXT, int32_t aTagTxtLen,
    241                       const char* tagHTML, const char* attributeHTML,
    242                       nsAString& aOutString, uint32_t& openTags);
    243 
    244  /**
    245    @param text (in), col0 (in): see GlyphHit
    246    @param tagTXT (in): Smily, see also StructPhraseHit
    247    @param imageName (in): the basename of the file that contains the image for
    248                           this smilie
    249    @param outputHTML (out): new string containing the html for the smily
    250    @param glyphTextLen (out): see GlyphHit
    251  */
    252  bool SmilyHit(const char16_t* aInString, int32_t aLength, bool col0,
    253                const char* tagTXT, const nsString& imageName,
    254                nsString& outputHTML, int32_t& glyphTextLen);
    255 
    256  /**
    257    Checks, if we can replace some chars at the start of line with prettier HTML
    258    code.<p>
    259    If success is reported, replace the first glyphTextLen chars with outputHTML
    260 
    261    @param text (in): line of text possibly with Glyph.<p>
    262                If col0 is true,
    263                  starting with Glyph <br><!-- (br not part of text) -->
    264                else
    265                  starting one char before Glyph
    266    @param col0 (in): text starts at the beginning of the line (or paragraph)
    267    @param aOutString (out): APPENDS html for the glyph to this string
    268    @param glyphTextLen (out): Length of original text to replace
    269    @return see StructPhraseHit
    270  */
    271  bool GlyphHit(const char16_t* aInString, int32_t aInLength, bool col0,
    272                nsAString& aOutputString, int32_t& glyphTextLen);
    273 
    274  /**
    275    Check if a given url should be linkified.
    276    @param aURL (in): url to be checked on.
    277  */
    278  bool ShouldLinkify(const nsCString& aURL);
    279 };
    280 
    281 // It's said, that Win32 and Mac don't like static const members
    282 const int32_t mozTXTToHTMLConv_lastMode = 4;
    283 // Needed (only) by mozTXTToHTMLConv::FindURL
    284 const int32_t mozTXTToHTMLConv_numberOfModes = 4;  // dito; unknown not counted
    285 
    286 #endif