mozTXTToHTMLConv.h (12295B)
1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 /** 7 Description: Currently only functions to enhance plain text with HTML tags. 8 See mozITXTToHTMLConv. Stream conversion is defunct. 9 */ 10 11 #ifndef _mozTXTToHTMLConv_h__ 12 #define _mozTXTToHTMLConv_h__ 13 14 #include "mozITXTToHTMLConv.h" 15 #include "nsIThreadRetargetableStreamListener.h" 16 #include "nsString.h" 17 #include "nsCOMPtr.h" 18 19 class nsIIOService; 20 21 class mozTXTToHTMLConv : public mozITXTToHTMLConv { 22 virtual ~mozTXTToHTMLConv() = default; 23 24 ////////////////////////////////////////////////////////// 25 public: 26 ////////////////////////////////////////////////////////// 27 28 mozTXTToHTMLConv() = default; 29 NS_DECL_ISUPPORTS 30 31 NS_DECL_MOZITXTTOHTMLCONV 32 NS_DECL_NSIREQUESTOBSERVER 33 NS_DECL_NSISTREAMLISTENER 34 NS_DECL_NSITHREADRETARGETABLESTREAMLISTENER 35 NS_DECL_NSISTREAMCONVERTER 36 37 /** 38 see mozITXTToHTMLConv::CiteLevelTXT 39 */ 40 int32_t CiteLevelTXT(const char16_t* line, uint32_t& logLineStart); 41 42 ////////////////////////////////////////////////////////// 43 protected: 44 ////////////////////////////////////////////////////////// 45 nsCOMPtr<nsIIOService> 46 mIOService; // for performance reasons, cache the netwerk service... 47 /** 48 Completes<ul> 49 <li>Case 1: mailto: "mozilla@bucksch.org" -> "mailto:mozilla@bucksch.org" 50 <li>Case 2: http: "www.mozilla.org" -> "http://www.mozilla.org" 51 <li>Case 3: ftp: "ftp.mozilla.org" -> "ftp://www.mozilla.org" 52 </ul> 53 It does no check, if the resulting URL is valid. 54 @param text (in): abbreviated URL 55 @param pos (in): position of "@" (case 1) or first "." (case 2 and 3) 56 @return Completed URL at success and empty string at failure 57 */ 58 void CompleteAbbreviatedURL(const char16_t* aInString, int32_t aInLength, 59 const uint32_t pos, nsString& aOutString); 60 61 ////////////////////////////////////////////////////////// 62 private: 63 ////////////////////////////////////////////////////////// 64 65 enum LIMTYPE { 66 LT_IGNORE, // limitation not checked 67 LT_DELIMITER, // not alphanumeric and not rep[0]. End of text is also ok. 68 LT_ALPHA, // alpha char 69 LT_DIGIT 70 }; 71 72 /** 73 @param text (in): the string to search through.<p> 74 If before = IGNORE,<br> 75 rep is compared starting at 1. char of text (text[0]),<br> 76 else starting at 2. char of text (text[1]). 77 Chars after "after"-delimiter are ignored. 78 @param rep (in): the string to look for 79 @param aRepLen (in): the number of bytes in the string to look for 80 @param before (in): limitation before rep 81 @param after (in): limitation after rep 82 @return true, if rep is found and limitation spec is met or rep is empty 83 */ 84 bool ItMatchesDelimited(const char16_t* aInString, int32_t aInLength, 85 const char16_t* rep, int32_t aRepLen, LIMTYPE before, 86 LIMTYPE after); 87 88 /** 89 @param see ItMatchesDelimited 90 @return Number of ItMatchesDelimited in text 91 */ 92 uint32_t NumberOfMatches(const char16_t* aInString, int32_t aInStringLength, 93 const char16_t* rep, int32_t aRepLen, LIMTYPE before, 94 LIMTYPE after); 95 96 /** 97 Currently only changes "<", ">" and "&". All others stay as they are.<p> 98 "Char" in function name to avoid side effects with nsString(ch) 99 constructors. 100 @param ch (in) 101 @param aStringToAppendto (out) - the string to append the escaped 102 string to. 103 @param inAttribute (in) - will escape quotes, too (which is 104 only needed for attribute values) 105 */ 106 void EscapeChar(const char16_t ch, nsAString& aStringToAppendto, 107 bool inAttribute); 108 109 /** 110 See EscapeChar. Escapes the string in place. 111 */ 112 void EscapeStr(nsString& aInString, bool inAttribute); 113 114 /** 115 Currently only reverts "<", ">" and "&". All others stay as they are.<p> 116 @param aInString (in) HTML string 117 @param aStartPos (in) start index into the buffer 118 @param aLength (in) length of the buffer 119 @param aOutString (out) unescaped buffer 120 */ 121 void UnescapeStr(const char16_t* aInString, int32_t aStartPos, 122 int32_t aLength, nsString& aOutString); 123 124 /** 125 <em>Note</em>: I use different strategies to pass context between the 126 functions (full text and pos vs. cutted text and col0, glphyTextLen vs. 127 replaceBefore/-After). It makes some sense, but is hard to understand 128 (maintain) :-(. 129 */ 130 131 /** 132 <p><em>Note:</em> replaceBefore + replaceAfter + 1 (for char at pos) chars 133 in text should be replaced by outputHTML.</p> 134 <p><em>Note:</em> This function should be able to process a URL on multiple 135 lines, but currently, ScanForURLs is called for every line, so it can't.</p> 136 @param text (in): includes possibly a URL 137 @param pos (in): position in text, where either ":", "." or "@" are found 138 @param whathasbeendone (in): What the calling ScanTXT did/has to do with the 139 (not-linkified) text, i.e. usually the "whattodo" parameter. 140 (Needed to calculate replaceBefore.) NOT what will be done with 141 the content of the link. 142 @param outputHTML (out): URL with HTML-a tag 143 @param replaceBefore (out): Number of chars of URL before pos 144 @param replaceAfter (out): Number of chars of URL after pos 145 @return URL found 146 */ 147 bool FindURL(const char16_t* aInString, int32_t aInLength, const uint32_t pos, 148 const uint32_t whathasbeendone, nsString& outputHTML, 149 int32_t& replaceBefore, int32_t& replaceAfter); 150 151 enum modetype { 152 unknown, 153 RFC1738, /* Check, if RFC1738, APPENDIX compliant, 154 like "<URL:http://www.mozilla.org>". */ 155 RFC2396E, /* RFC2396, APPENDIX E allows anglebrackets (like 156 "<http://www.mozilla.org>") (without "URL:") or 157 quotation marks(like ""http://www.mozilla.org""). 158 Also allow email addresses without scheme, 159 e.g. "<mozilla@bucksch.org>" */ 160 freetext, /* assume heading scheme 161 with "[a-zA-Z][a-zA-Z0-9+\-\.]*:" like "news:" 162 (see RFC2396, Section 3.1). 163 Certain characters (see code) or any whitespace 164 (including linebreaks) end the URL. 165 Other certain (punctation) characters (see code) 166 at the end are stripped off. */ 167 abbreviated /* Similar to freetext, but without scheme, e.g. 168 "www.mozilla.org", "ftp.mozilla.org" and 169 "mozilla@bucksch.org". */ 170 /* RFC1738 and RFC2396E type URLs may use multiple lines, 171 whitespace is stripped. Special characters like ")" stay intact.*/ 172 }; 173 174 /** 175 * @param text (in), pos (in): see FindURL 176 * @param check (in): Start must be conform with this mode 177 * @param start (out): Position in text, where URL (including brackets or 178 * similar) starts 179 * @return |check|-conform start has been found 180 */ 181 bool FindURLStart(const char16_t* aInString, int32_t aInLength, 182 const uint32_t pos, const modetype check, uint32_t& start); 183 184 /** 185 * @param text (in), pos (in): see FindURL 186 * @param check (in): End must be conform with this mode 187 * @param start (in): see FindURLStart 188 * @param end (out): Similar to |start| param of FindURLStart 189 * @return |check|-conform end has been found 190 */ 191 bool FindURLEnd(const char16_t* aInString, int32_t aInStringLength, 192 const uint32_t pos, const modetype check, 193 const uint32_t start, uint32_t& end); 194 195 /** 196 * @param text (in), pos (in), whathasbeendone (in): see FindURL 197 * @param check (in): Current mode 198 * @param start (in), end (in): see FindURLEnd 199 * @param txtURL (out): Guessed (raw) URL. 200 * Without whitespace, but not completed. 201 * @param desc (out): Link as shown to the user, but already escaped. 202 * Should be placed between the <a> and </a> tags. 203 * @param replaceBefore(out), replaceAfter (out): see FindURL 204 */ 205 void CalculateURLBoundaries(const char16_t* aInString, 206 int32_t aInStringLength, const uint32_t pos, 207 const uint32_t whathasbeendone, 208 const modetype check, const uint32_t start, 209 const uint32_t end, nsString& txtURL, 210 nsString& desc, int32_t& replaceBefore, 211 int32_t& replaceAfter); 212 213 /** 214 * @param txtURL (in), desc (in): see CalculateURLBoundaries 215 * @param outputHTML (out): see FindURL 216 * @return A valid URL could be found (and creation of HTML successful) 217 */ 218 bool CheckURLAndCreateHTML(const nsString& txtURL, const nsString& desc, 219 const modetype mode, nsString& outputHTML); 220 221 /** 222 @param text (in): line of text possibly with tagTXT.<p> 223 if col0 is true, 224 starting with tagTXT<br> 225 else 226 starting one char before tagTXT 227 @param col0 (in): tagTXT is on the beginning of the line (or paragraph). 228 open must be 0 then. 229 @param tagTXT (in): Tag in plaintext to search for, e.g. "*" 230 @param aTagTxtLen (in): length of tagTXT. 231 @param tagHTML (in): HTML-Tag to replace tagTXT with, 232 without "<" and ">", e.g. "strong" 233 @param attributeHTML (in): HTML-attribute to add to opening tagHTML, 234 e.g. "class=txt_star" 235 @param aOutString: string to APPEND the converted html into 236 @param open (in/out): Number of currently open tags of type tagHTML 237 @return Conversion succeeded 238 */ 239 bool StructPhraseHit(const char16_t* aInString, int32_t aInStringLength, 240 bool col0, const char16_t* tagTXT, int32_t aTagTxtLen, 241 const char* tagHTML, const char* attributeHTML, 242 nsAString& aOutString, uint32_t& openTags); 243 244 /** 245 @param text (in), col0 (in): see GlyphHit 246 @param tagTXT (in): Smily, see also StructPhraseHit 247 @param imageName (in): the basename of the file that contains the image for 248 this smilie 249 @param outputHTML (out): new string containing the html for the smily 250 @param glyphTextLen (out): see GlyphHit 251 */ 252 bool SmilyHit(const char16_t* aInString, int32_t aLength, bool col0, 253 const char* tagTXT, const nsString& imageName, 254 nsString& outputHTML, int32_t& glyphTextLen); 255 256 /** 257 Checks, if we can replace some chars at the start of line with prettier HTML 258 code.<p> 259 If success is reported, replace the first glyphTextLen chars with outputHTML 260 261 @param text (in): line of text possibly with Glyph.<p> 262 If col0 is true, 263 starting with Glyph <br><!-- (br not part of text) --> 264 else 265 starting one char before Glyph 266 @param col0 (in): text starts at the beginning of the line (or paragraph) 267 @param aOutString (out): APPENDS html for the glyph to this string 268 @param glyphTextLen (out): Length of original text to replace 269 @return see StructPhraseHit 270 */ 271 bool GlyphHit(const char16_t* aInString, int32_t aInLength, bool col0, 272 nsAString& aOutputString, int32_t& glyphTextLen); 273 274 /** 275 Check if a given url should be linkified. 276 @param aURL (in): url to be checked on. 277 */ 278 bool ShouldLinkify(const nsCString& aURL); 279 }; 280 281 // It's said, that Win32 and Mac don't like static const members 282 const int32_t mozTXTToHTMLConv_lastMode = 4; 283 // Needed (only) by mozTXTToHTMLConv::FindURL 284 const int32_t mozTXTToHTMLConv_numberOfModes = 4; // dito; unknown not counted 285 286 #endif