nsHtml5Highlighter.h (10827B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 #ifndef nsHtml5Highlighter_h 5 #define nsHtml5Highlighter_h 6 7 #include "nsCOMPtr.h" 8 #include "nsHtml5TreeOperation.h" 9 #include "nsHtml5UTF16Buffer.h" 10 #include "nsHtml5TreeOperation.h" 11 #include "nsAHtml5TreeOpSink.h" 12 13 #define NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH 512 14 15 /** 16 * A state machine for generating HTML for display in View Source based on 17 * the transitions the tokenizer makes on the source being viewed. 18 */ 19 class nsHtml5Highlighter { 20 public: 21 /** 22 * The constructor. 23 * 24 * @param aOpSink the sink for the tree ops generated by this highlighter 25 */ 26 explicit nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink); 27 28 /** 29 * The destructor. 30 */ 31 ~nsHtml5Highlighter(); 32 33 /** 34 * Set the op sink (for speculation). 35 */ 36 void SetOpSink(nsAHtml5TreeOpSink* aOpSink); 37 38 /** 39 * Reset state to after generated head but before processing any of the input 40 * stream. 41 */ 42 void Rewind(); 43 44 /** 45 * Starts the generated document. 46 */ 47 void Start(const nsAutoString& aTitle); 48 49 /** 50 * Updates the charset source via the op queue. 51 */ 52 void UpdateCharsetSource(nsCharsetSource aCharsetSource); 53 54 /** 55 * Report a tokenizer state transition. 56 * 57 * @param aState the state being transitioned to 58 * @param aReconsume whether this is a reconsuming transition 59 * @param aPos the tokenizer's current position into the buffer 60 */ 61 int32_t Transition(int32_t aState, bool aReconsume, int32_t aPos); 62 63 /** 64 * Report end of file. 65 * 66 * Returns `true` normally and `false` on OOM. 67 */ 68 [[nodiscard]] bool End(); 69 70 /** 71 * Set the current buffer being tokenized 72 */ 73 void SetBuffer(nsHtml5UTF16Buffer* aBuffer); 74 75 /** 76 * Let go of the buffer being tokenized but first, flush text from it. 77 * 78 * @param aPos the first UTF-16 code unit not to flush 79 */ 80 void DropBuffer(int32_t aPos); 81 82 /** 83 * Query whether there are some many ops in the queue 84 * that they should be flushed now. 85 * 86 * @return true if FlushOps() should be called now 87 */ 88 bool ShouldFlushOps(); 89 90 /** 91 * Flush the tree ops into the sink. 92 * 93 * @return Ok(true) if there were ops to flush, Ok(false) 94 * if there were no ops to flush and Err() on OOM. 95 */ 96 mozilla::Result<bool, nsresult> FlushOps(); 97 98 /** 99 * Linkify the current attribute value if the attribute name is one of 100 * known URL attributes. (When executing tree ops, javascript: URLs will 101 * not be linkified, though.) 102 * 103 * @param aName the name of the attribute 104 * @param aValue the value of the attribute 105 */ 106 void MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName, 107 nsHtml5String aValue); 108 109 /** 110 * Inform the highlighter that the tokenizer successfully completed a 111 * named character reference. 112 */ 113 void CompletedNamedCharacterReference(); 114 115 /** 116 * Adds an error annotation to the node that's currently on top of 117 * mStack. 118 * 119 * @param aMsgId the id of the message in the property file 120 */ 121 void AddErrorToCurrentNode(const char* aMsgId); 122 123 /** 124 * Adds an error annotation to the node that corresponds to the most 125 * recently opened markup declaration/tag span, character reference or 126 * run of text. 127 * 128 * @param aMsgId the id of the message in the property file 129 */ 130 void AddErrorToCurrentRun(const char* aMsgId); 131 132 /** 133 * Adds an error annotation to the node that corresponds to the most 134 * recently opened markup declaration/tag span, character reference or 135 * run of text with one atom to use when formatting the message. 136 * 137 * @param aMsgId the id of the message in the property file 138 * @param aName the atom 139 */ 140 void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName); 141 142 /** 143 * Adds an error annotation to the node that corresponds to the most 144 * recently opened markup declaration/tag span, character reference or 145 * run of text with two atoms to use when formatting the message. 146 * 147 * @param aMsgId the id of the message in the property file 148 * @param aName the first atom 149 * @param aOther the second atom 150 */ 151 void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName, nsAtom* aOther); 152 153 /** 154 * Adds an error annotation to the node that corresponds to the most 155 * recent potentially character reference-starting ampersand. 156 * 157 * @param aMsgId the id of the message in the property file 158 */ 159 void AddErrorToCurrentAmpersand(const char* aMsgId); 160 161 /** 162 * Adds an error annotation to the node that corresponds to the most 163 * recent potentially self-closing slash. 164 * 165 * @param aMsgId the id of the message in the property file 166 */ 167 void AddErrorToCurrentSlash(const char* aMsgId); 168 169 /** 170 * Enqueues a tree op for adding base to the urls with the view-source: 171 * 172 * @param aValue the base URL to add 173 */ 174 void AddBase(nsHtml5String aValue); 175 176 /** Starts the body */ 177 void StartBodyContents(); 178 179 private: 180 /** 181 * Starts a wrapper around a run of characters. 182 */ 183 void StartCharacters(); 184 185 /** 186 * Starts a span with no class. 187 */ 188 void StartSpan(); 189 190 /** 191 * Starts a <span> and sets the class attribute on it. 192 * 193 * @param aClass the class to set (MUST be a static string that does not 194 * need to be released!) 195 */ 196 void StartSpan(const char16_t* aClass); 197 198 /** 199 * End the current <span> or <a> in the highlighter output. 200 */ 201 void EndSpanOrA(); 202 203 /** Ends a wrapper around a run of characters. */ 204 void EndCharactersAndStartMarkupRun(); 205 206 /** 207 * Starts an <a>. 208 */ 209 void StartA(); 210 211 /** 212 * Flushes characters up to but not including the current one. 213 */ 214 void FlushChars(); 215 216 /** 217 * Flushes characters up to and including the current one. 218 */ 219 void FlushCurrent(); 220 221 /** 222 * Finishes highlighting a tag in the input data by closing the open 223 * <span> and <a> elements in the highlighter output and then starts 224 * another <span> for potentially highlighting characters potentially 225 * appearing next. 226 */ 227 void FinishTag(); 228 229 /** 230 * Adds a class attribute to the current node. 231 * 232 * @param aClass the class to set (MUST be a static string that does not 233 * need to be released!) 234 */ 235 void AddClass(const char16_t* aClass); 236 237 /** 238 * Allocates a handle for an element. 239 * 240 * See the documentation for nsHtml5TreeBuilder::AllocateContentHandle() 241 * in nsHtml5TreeBuilderHSupplement.h. 242 * 243 * @return the handle 244 */ 245 nsIContent** AllocateContentHandle(); 246 247 /** 248 * Enqueues an element creation tree operation. 249 * 250 * @param aName the name of the element 251 * @param aAttributes the attribute holder (ownership will be taken) or 252 * nullptr for no attributes 253 * @param aIntendedParent the intended parent node for the created element 254 * @param aCreator the content creator function 255 * @return the handle for the element that will be created 256 */ 257 nsIContent** CreateElement(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes, 258 nsIContent** aIntendedParent, 259 mozilla::dom::HTMLContentCreatorFunction aCreator); 260 261 /** 262 * Gets the handle for the current node. May be called only after the 263 * root element has been set. 264 * 265 * @return the handle for the current node 266 */ 267 nsIContent** CurrentNode(); 268 269 /** 270 * Create an element and push it (its handle) on the stack. 271 * 272 * @param aName the name of the element 273 * @param aAttributes the attribute holder (ownership will be taken) or 274 * nullptr for no attributes 275 * @param aCreator the content creator function 276 */ 277 void Push(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes, 278 mozilla::dom::HTMLContentCreatorFunction aCreator); 279 280 /** Pushes a <span id="line<lineno>"> */ 281 void PushCurrentLineContainer(); 282 283 /** 284 * Pops all inlines from the stack, pushes a pre, and pushes all inlines back 285 * with the same attributes. 286 */ 287 void NewLine(); 288 289 /** 290 * Pops the current node off the stack. 291 */ 292 void Pop(); 293 294 /** 295 * Appends text content to the current node. 296 * 297 * @param aBuffer the buffer to copy from 298 * @param aStart the index of the first code unit to copy 299 * @param aLength the number of code units to copy 300 */ 301 void AppendCharacters(const char16_t* aBuffer, int32_t aStart, 302 int32_t aLength); 303 304 /** 305 * Enqueues a tree op for adding an href attribute with the view-source: 306 * URL scheme to the current node. 307 * 308 * @param aValue the (potentially relative) URL to link to 309 */ 310 void AddViewSourceHref(nsHtml5String aValue); 311 312 /** 313 * The state we are transitioning away from. 314 */ 315 int32_t mState; 316 317 /** 318 * The index of the first UTF-16 code unit in mBuffer that hasn't been 319 * flushed yet. 320 */ 321 int32_t mCStart; 322 323 /** 324 * The position of the code unit in mBuffer that caused the current 325 * transition. 326 */ 327 int32_t mPos; 328 329 /** 330 * The current line number. 331 */ 332 int32_t mLineNumber; 333 334 /** 335 * The number of inline elements open inside the <pre> excluding the 336 * span potentially wrapping a run of characters. 337 */ 338 int32_t mInlinesOpen; 339 340 /** 341 * Whether there's a span wrapping a run of characters (excluding CDATA 342 * section) open. 343 */ 344 bool mInCharacters; 345 346 /** 347 * The current buffer being tokenized. 348 */ 349 nsHtml5UTF16Buffer* mBuffer; 350 351 /** 352 * The outgoing tree op queue. 353 */ 354 nsTArray<nsHtml5TreeOperation> mOpQueue; 355 356 /** 357 * The tree op stage for the tree op executor or a speculation when looking 358 * for meta charset. 359 * 360 * The op sink is owned by the nsHtml5TreeOpExecutor, which outlives this 361 * object, because this object is owned by the nsHtml5Tokenizer instance that 362 * is owned by the nsHtml5StreamParser, which keeps the executor alive via 363 * nsHtml5Streamparser::mExecutorFlusher. 364 */ 365 nsAHtml5TreeOpSink* mOpSink; 366 367 /** 368 * The most recently opened markup declaration/tag or run of characters. 369 */ 370 nsIContent** mCurrentRun; 371 372 /** 373 * The most recent ampersand in a place where character references were 374 * allowed. 375 */ 376 nsIContent** mAmpersand; 377 378 /** 379 * The most recent slash that might become a self-closing slash. 380 */ 381 nsIContent** mSlash; 382 383 /** 384 * Memory for element handles. 385 */ 386 mozilla::UniquePtr<nsIContent*[]> mHandles; 387 388 /** 389 * Number of handles used in mHandles 390 */ 391 int32_t mHandlesUsed; 392 393 /** 394 * A holder for old contents of mHandles 395 */ 396 nsTArray<mozilla::UniquePtr<nsIContent*[]>> mOldHandles; 397 398 /** 399 * The element stack. 400 */ 401 nsTArray<nsIContent**> mStack; 402 403 /** 404 * Whether base is already visited once. 405 */ 406 bool mSeenBase; 407 }; 408 409 #endif // nsHtml5Highlighter_h