tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit aec3d65abbe8a0bea7f7bdd1f033e1148e87e6b0
parent 703157f328c5b800349a2b9b13a5150e556ae55e
Author: Atila Butkovits <abutkovits@mozilla.com>
Date:   Mon, 20 Oct 2025 15:37:49 +0300

Revert "Bug 1499682 - SIMD-accelerate the data state in the HTML tokenizer. r=smaug,sergesanspaille" for causing failures at test_html5_tree_construction.html.

This reverts commit 96fdec23a214937120ab575f6bd9e41a96706d40.

Diffstat:
Mbuild/moz.configure/toolchain.configure | 29-----------------------------
Mparser/html/javasrc/Tokenizer.java | 378+++++++++++++++++++++++++------------------------------------------------------
Mparser/html/moz.build | 32--------------------------------
Mparser/html/nsHtml5Tokenizer.cpp | 4351++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Mparser/html/nsHtml5Tokenizer.h | 4238+------------------------------------------------------------------------------
Dparser/html/nsHtml5TokenizerALU.cpp | 33---------------------------------
Dparser/html/nsHtml5TokenizerALUStubs.cpp | 32--------------------------------
Mparser/html/nsHtml5TokenizerHSupplement.h | 40+++-------------------------------------
Aparser/html/nsHtml5TokenizerLoopPolicies.h | 123+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dparser/html/nsHtml5TokenizerLoopPoliciesALU.h | 150-------------------------------------------------------------------------------
Dparser/html/nsHtml5TokenizerLoopPoliciesSIMD.h | 211-------------------------------------------------------------------------------
Dparser/html/nsHtml5TokenizerSIMD.cpp | 33---------------------------------
Dparser/html/nsHtml5TokenizerSIMDStubs.cpp | 32--------------------------------
Dparser/htmlaccel/gtest/TestHtmlSimd.cpp | 62--------------------------------------------------------------
Dparser/htmlaccel/gtest/moz.build | 15---------------
Dparser/htmlaccel/htmlaccel.h | 322-------------------------------------------------------------------------------
Dparser/htmlaccel/htmlaccelEnabled.h | 30------------------------------
Dparser/htmlaccel/htmlaccelNotInline.cpp | 30------------------------------
Dparser/htmlaccel/htmlaccelNotInline.h | 34----------------------------------
Dparser/htmlaccel/moz.build | 29-----------------------------
Mparser/moz.build | 2+-
21 files changed, 4483 insertions(+), 5723 deletions(-)

diff --git a/build/moz.configure/toolchain.configure b/build/moz.configure/toolchain.configure @@ -3893,35 +3893,6 @@ set_config( ), ) - -@depends(target, c_compiler) -def htmlaccel_config(target, c_compiler): - # Keep this is sync with the mozilla::htmlaccel::htmlaccelEnabled function. - # - # The code compiles on SSSE3, but AVX+BMI generates better code - # and has been available for 12 years at the time of landing this, - # so let's give the best code to users with reasonably recent hardware. - # - # Not enabled on 32-bit x86, due to lack of insight into what hardware is - # representative at this point in time and due to lack of such hardware - # for testing to see what config would actually be an optimization. - # - # aarch64 does not need extra flags. - # - # clang-cl doesn't tolerate -flax-vector-conversions but GCC requires it. - # - # -mavx2 doesn't change codegen vs. -mavx. AVX2 and BMI always co-occur - # in Intel CPUs, but there are AMD CPUs that have AVX and BMI without - # AVX2. - if target.cpu != "x86_64": - return [] - if c_compiler.type == "gcc": - return ["-mavx", "-mbmi", "-flax-vector-conversions"] - return ["-mavx", "-mbmi"] - - -set_config("HTML_ACCEL_FLAGS", htmlaccel_config) - # dtrace support ## option("--enable-dtrace", help="Build with dtrace support") diff --git a/parser/html/javasrc/Tokenizer.java b/parser/html/javasrc/Tokenizer.java @@ -932,7 +932,7 @@ public class Tokenizer implements Locator, Locator2 { // ]NOCPP] - @Inline HtmlAttributes emptyAttributes() { + HtmlAttributes emptyAttributes() { // [NOCPP[ if (newAttributesEachTime) { return new HtmlAttributes(mappingLangToXmlLang); @@ -944,7 +944,7 @@ public class Tokenizer implements Locator, Locator2 { // ]NOCPP] } - private void appendCharRefBuf(char c) { + @Inline private void appendCharRefBuf(char c) { // CPPONLY: assert charRefBufLen < charRefBuf.length: // CPPONLY: "RELEASE: Attempted to overrun charRefBuf!"; charRefBuf[charRefBufLen++] = c; @@ -982,7 +982,7 @@ public class Tokenizer implements Locator, Locator2 { * @param c * the UTF-16 code unit to append */ - private void appendStrBuf(char c) { + @Inline private void appendStrBuf(char c) { // CPPONLY: assert strBufLen < strBuf.length: "Previous buffer length insufficient."; // CPPONLY: if (strBufLen == strBuf.length) { // CPPONLY: if (!EnsureBufferSpace(1)) { @@ -1000,7 +1000,7 @@ public class Tokenizer implements Locator, Locator2 { * * @return the buffer as a string */ - @Inline protected String strBufToString() { + protected String strBufToString() { String str = Portability.newStringFromBuffer(strBuf, 0, strBufLen // CPPONLY: , tokenHandler, !newAttributesEachTime && attributeName == AttributeName.CLASS ); @@ -1014,7 +1014,7 @@ public class Tokenizer implements Locator, Locator2 { * * @return the buffer as local name */ - @Inline private void strBufToDoctypeName() { + private void strBufToDoctypeName() { doctypeName = Portability.newLocalNameFromBuffer(strBuf, strBufLen, interner); clearStrBufAfterUse(); } @@ -1025,7 +1025,7 @@ public class Tokenizer implements Locator, Locator2 { * @throws SAXException * if the token handler threw */ - @Inline private void emitStrBuf() throws SAXException { + private void emitStrBuf() throws SAXException { if (strBufLen > 0) { tokenHandler.characters(strBuf, 0, strBufLen); clearStrBufAfterUse(); @@ -1455,6 +1455,12 @@ public class Tokenizer implements Locator, Locator2 { */ int pos = start - 1; + /** + * The index of the first <code>char</code> in <code>buf</code> that is + * part of a coalesced run of character tokens or + * <code>Integer.MAX_VALUE</code> if there is not a current run being + * coalesced. + */ switch (state) { case DATA: case RCDATA: @@ -1480,24 +1486,19 @@ public class Tokenizer implements Locator, Locator2 { break; } + /** + * The number of <code>char</code>s in <code>buf</code> that have + * meaning. (The rest of the array is garbage and should not be + * examined.) + */ // CPPONLY: if (mViewSource) { // CPPONLY: mViewSource.SetBuffer(buffer); - // CPPONLY: if (htmlaccelEnabled()) { - // CPPONLY: pos = StateLoopViewSourceSIMD(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); - // CPPONLY: } else { - // CPPONLY: pos = StateLoopViewSourceALU(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); - // CPPONLY: } + // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); // CPPONLY: mViewSource.DropBuffer((pos == buffer.getEnd()) ? pos : pos + 1); // CPPONLY: } else if (tokenHandler.WantsLineAndColumn()) { - // CPPONLY: if (htmlaccelEnabled()) { - // CPPONLY: pos = StateLoopLineColSIMD(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); - // CPPONLY: } else { - // CPPONLY: pos = StateLoopLineColALU(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); - // CPPONLY: } - // CPPONLY: } else if (htmlaccelEnabled()) { - // CPPONLY: pos = StateLoopFastestSIMD(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); + // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); // CPPONLY: } else { - // CPPONLY: pos = StateLoopFastestALU(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); + // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); // CPPONLY: } // [NOCPP[ pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, @@ -1546,7 +1547,7 @@ public class Tokenizer implements Locator, Locator2 { } // ]NOCPP] - @SuppressWarnings("unused") @Inline private int stateLoop(int state, char c, + @SuppressWarnings("unused") private int stateLoop(int state, char c, int pos, @NoLength char[] buf, boolean reconsume, int returnState, int endPos) throws SAXException { boolean reportedConsecutiveHyphens = false; @@ -1622,127 +1623,54 @@ public class Tokenizer implements Locator, Locator2 { switch (state) { case DATA: dataloop: for (;;) { - // Ideally this reconsume block would be a separate state, DATA_RECONSUME above this one - // with fallthrough into this state. However, such a change would be disruptive to - // TransitionHandler and everything that works with returnState. if (reconsume) { reconsume = false; - // This is a manual copy of the switch below with break/continue - // adjusted as relevant. Make sure to keep in sync with the switch below! - switch (c) { - case '&': - /* - * U+0026 AMPERSAND (&) Switch to the character - * reference in data state. - */ - flushChars(buf, pos); - assert charRefBufLen == 0: "charRefBufLen not reset after previous use!"; - appendCharRefBuf(c); - setAdditionalAndRememberAmpersandLocation('\u0000'); - returnState = state; - state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); - continue stateloop; - case '<': - /* - * U+003C LESS-THAN SIGN (<) Switch to the tag - * open state. - */ - flushChars(buf, pos); - - state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos); - // `break` optimizes; `continue stateloop;` would be valid - break dataloop; - case '\u0000': - maybeEmitReplacementCharacter(buf, pos); - break; - case '\r': - emitCarriageReturn(buf, pos); - break stateloop; - case '\n': - silentLineFeed(); - // CPPONLY: MOZ_FALLTHROUGH; - default: - /* - * Anything else Emit the input character as a - * character token. - * - * Stay in the data state. - */ - break; + } else { + if (++pos == endPos) { + break stateloop; } + c = checkChar(buf, pos); } - datamiddle: for (;;) { - ++pos; - // Perhaps at some point, it will be appropriate to do SIMD in Java, but not today. - // The line below advances pos by some number of code units that this state is indifferent to. - // CPPONLY: pos += accelerateAdvancementData(buf, pos, endPos); - for (;;) { - if (pos == endPos) { - break stateloop; - } - c = checkChar(buf, pos); - // Make sure to keep in sync with the switch above in the reconsume block! - switch (c) { - case '&': - /* - * U+0026 AMPERSAND (&) Switch to the character - * reference in data state. - */ - flushChars(buf, pos); - assert charRefBufLen == 0: "charRefBufLen not reset after previous use!"; - appendCharRefBuf(c); - setAdditionalAndRememberAmpersandLocation('\u0000'); - returnState = state; - state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); - continue stateloop; - case '<': - /* - * U+003C LESS-THAN SIGN (<) Switch to the tag - * open state. - */ - flushChars(buf, pos); - - state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos); - // `break` optimizes; `continue stateloop;` would be valid - break dataloop; - case '\u0000': - maybeEmitReplacementCharacter(buf, pos); - // Continue from above the accelerateAdvancementData call. - continue datamiddle; - case '\r': - emitCarriageReturn(buf, pos); - break stateloop; - case '\n': - silentLineFeed(); - // Continue from above the accelerateAdvancementData call. - continue datamiddle; - default: - /* - * Anything else Emit the input character as a - * character token. - * - * Stay in the data state. - */ - // Don't go back to accelerateAdvancementData to avoid - // bouncing back and forth in a way that doesn't make good - // use of SIMD when we have less than a SIMD stride to go - // or when we come here due to a non-BMP characters. - // The SIMD code doesn't have ALU handling for the remainder - // that is shorter than a SIMD stride, because this case - // in this switch has to exist anyway (for SIMD-unavailable - // and for non-BMP cases) and this innermost loop can serve - // that purpose, too. In the non-BMP case we stay on the - // ALU path until we end up in one of the other cases in this - // switch (e.g. end of line) in order to avoid bouncing back - // and forth when we have text in a non-BMP script instead - // of an isolated emoji. - // - // We need to increment pos when staying in this innermost - // loop! - ++pos; - continue; - } - } + switch (c) { + case '&': + /* + * U+0026 AMPERSAND (&) Switch to the character + * reference in data state. + */ + flushChars(buf, pos); + assert charRefBufLen == 0: "charRefBufLen not reset after previous use!"; + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\u0000'); + returnState = state; + state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); + continue stateloop; + case '<': + /* + * U+003C LESS-THAN SIGN (<) Switch to the tag + * open state. + */ + flushChars(buf, pos); + + state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos); + // `break` optimizes; `continue stateloop;` would be valid + break dataloop; + case '\u0000': + maybeEmitReplacementCharacter(buf, pos); + continue; + case '\r': + emitCarriageReturn(buf, pos); + break stateloop; + case '\n': + silentLineFeed(); + // CPPONLY: MOZ_FALLTHROUGH; + default: + /* + * Anything else Emit the input character as a + * character token. + * + * Stay in the data state. + */ + continue; } } // CPPONLY: MOZ_FALLTHROUGH; @@ -4074,122 +4002,52 @@ public class Tokenizer implements Locator, Locator2 { // no fallthrough, reordering opportunity case RCDATA: rcdataloop: for (;;) { - // Ideally this reconsume block would be a separate state, RCDATA_RECONSUME above this one - // with fallthrough into this state. However, such a change would be disruptive to - // TransitionHandler and everything that works with returnState. if (reconsume) { reconsume = false; - // This is a manual copy of the switch below with break/continue - // adjusted as relevant. Make sure to keep in sync with the switch below! - switch (c) { - case '&': - /* - * U+0026 AMPERSAND (&) Switch to the character - * reference in RCDATA state. - */ - flushChars(buf, pos); - assert charRefBufLen == 0: "charRefBufLen not reset after previous use!"; - appendCharRefBuf(c); - setAdditionalAndRememberAmpersandLocation('\u0000'); - returnState = state; - state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); - continue stateloop; - case '<': - /* - * U+003C LESS-THAN SIGN (<) Switch to the - * RCDATA less-than sign state. - */ - flushChars(buf, pos); - - state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos); - continue stateloop; - case '\u0000': - maybeEmitReplacementCharacter(buf, pos); - break; - case '\r': - emitCarriageReturn(buf, pos); - break stateloop; - case '\n': - silentLineFeed(); - // CPPONLY: MOZ_FALLTHROUGH; - default: - /* - * Emit the current input character as a - * character token. Stay in the RCDATA state. - */ - break; + } else { + if (++pos == endPos) { + break stateloop; } + c = checkChar(buf, pos); } - rcdatamiddle: for (;;) { - ++pos; - // Perhaps at some point, it will be appropriate to do SIMD in Java, but not today. - // The line below advances pos by some number of code units that this state is indifferent to. - // RCDATA and DATA have the same set of characters that they are indifferent to, hence accelerateData. - // CPPONLY: pos += accelerateAdvancementData(buf, pos, endPos); - for (;;) { - if (pos == endPos) { - break stateloop; - } - c = checkChar(buf, pos); - // Make sure to keep in sync with the switch above in the reconsume block! - switch (c) { - case '&': - /* - * U+0026 AMPERSAND (&) Switch to the character - * reference in RCDATA state. - */ - flushChars(buf, pos); - assert charRefBufLen == 0: "charRefBufLen not reset after previous use!"; - appendCharRefBuf(c); - setAdditionalAndRememberAmpersandLocation('\u0000'); - returnState = state; - state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); - continue stateloop; - case '<': - /* - * U+003C LESS-THAN SIGN (<) Switch to the - * RCDATA less-than sign state. - */ - flushChars(buf, pos); - - state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos); - continue stateloop; - case '\u0000': - maybeEmitReplacementCharacter(buf, pos); - // Continue from above the accelerateAdvancementData call. - continue rcdatamiddle; - case '\r': - emitCarriageReturn(buf, pos); - break stateloop; - case '\n': - silentLineFeed(); - // Continue from above the accelerateAdvancementData call. - continue rcdatamiddle; - default: - /* - * Emit the current input character as a - * character token. Stay in the RCDATA state. - */ - // Don't go back to accelerateAdvancementData to avoid - // bouncing back and forth in a way that doesn't make good - // use of SIMD when we have less than a SIMD stride to go - // or when we come here due to a non-BMP characters. - // The SIMD code doesn't have ALU handling for the remainder - // that is shorter than a SIMD stride, because this case - // in this switch has to exist anyway (for SIMD-unavailable - // and for non-BMP cases) and this innermost loop can serve - // that purpose, too. In the non-BMP case we stay on the - // ALU path until we end up in one of the other cases in this - // switch (e.g. end of line) in order to avoid bouncing back - // and forth when we have text in a non-BMP script instead - // of an isolated emoji. - // - // We need to increment pos when staying in this innermost - // loop! - ++pos; - continue; - } - } + switch (c) { + case '&': + /* + * U+0026 AMPERSAND (&) Switch to the character + * reference in RCDATA state. + */ + flushChars(buf, pos); + assert charRefBufLen == 0: "charRefBufLen not reset after previous use!"; + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\u0000'); + returnState = state; + state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); + continue stateloop; + case '<': + /* + * U+003C LESS-THAN SIGN (<) Switch to the + * RCDATA less-than sign state. + */ + flushChars(buf, pos); + + returnState = state; + state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos); + continue stateloop; + case '\u0000': + emitReplacementCharacter(buf, pos); + continue; + case '\r': + emitCarriageReturn(buf, pos); + break stateloop; + case '\n': + silentLineFeed(); + // CPPONLY: MOZ_FALLTHROUGH; + default: + /* + * Emit the current input character as a + * character token. Stay in the RCDATA state. + */ + continue; } } // no fallthrough, reordering opportunity @@ -6490,24 +6348,24 @@ public class Tokenizer implements Locator, Locator2 { forceQuirks = false; } - @Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn() + private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn() throws SAXException { silentCarriageReturn(); adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false); } - @Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed() + private void adjustDoubleHyphenAndAppendToStrBufLineFeed() throws SAXException { silentLineFeed(); adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false); } - @Inline private void appendStrBufLineFeed() { + private void appendStrBufLineFeed() { silentLineFeed(); appendStrBuf('\n'); } - @Inline private void appendStrBufCarriageReturn() { + private void appendStrBufCarriageReturn() { silentCarriageReturn(); appendStrBuf('\n'); } @@ -6525,7 +6383,7 @@ public class Tokenizer implements Locator, Locator2 { // ]NOCPP] - @Inline private void emitCarriageReturn(@NoLength char[] buf, int pos) + private void emitCarriageReturn(@NoLength char[] buf, int pos) throws SAXException { silentCarriageReturn(); flushChars(buf, pos); @@ -6554,7 +6412,7 @@ public class Tokenizer implements Locator, Locator2 { cstart = pos + 1; } - @Inline private void setAdditionalAndRememberAmpersandLocation(char add) { + private void setAdditionalAndRememberAmpersandLocation(char add) { additional = add; // [NOCPP[ ampersandLocation = new LocatorImpl(this); @@ -7219,7 +7077,7 @@ public class Tokenizer implements Locator, Locator2 { * happened in a non-text context, this method turns that deferred suspension * request into an immediately-pending suspension request. */ - @Inline private void suspendIfRequestedAfterCurrentNonTextToken() { + private void suspendIfRequestedAfterCurrentNonTextToken() { if (suspendAfterCurrentNonTextToken) { suspendAfterCurrentNonTextToken = false; shouldSuspend = true; @@ -7363,7 +7221,7 @@ public class Tokenizer implements Locator, Locator2 { * @param val * @throws SAXException */ - @Inline private void emitOrAppendTwo(@Const @NoLength char[] val, int returnState) + private void emitOrAppendTwo(@Const @NoLength char[] val, int returnState) throws SAXException { if ((returnState & DATA_AND_RCDATA_MASK) != 0) { appendStrBuf(val[0]); @@ -7373,7 +7231,7 @@ public class Tokenizer implements Locator, Locator2 { } } - @Inline private void emitOrAppendOne(@Const @NoLength char[] val, int returnState) + private void emitOrAppendOne(@Const @NoLength char[] val, int returnState) throws SAXException { if ((returnState & DATA_AND_RCDATA_MASK) != 0) { appendStrBuf(val[0]); @@ -7410,7 +7268,7 @@ public class Tokenizer implements Locator, Locator2 { } } - @Inline public void requestSuspension() { + public void requestSuspension() { shouldSuspend = true; } @@ -7453,7 +7311,7 @@ public class Tokenizer implements Locator, Locator2 { // ]NOCPP] - @Inline public boolean isInDataState() { + public boolean isInDataState() { return (stateSave == DATA); } diff --git a/parser/html/moz.build b/parser/html/moz.build @@ -85,38 +85,6 @@ UNIFIED_SOURCES += [ "nsParserUtils.cpp", ] -# Each target needs to compile: -# (nsHtml5TokenizerALU.cpp XOR nsHtml5TokenizerALUStubs.cpp) -# AND -# (nsHtml5TokenizerSIMD.cpp XOR nsHtml5TokenizerSIMDStubs.cpp) -# AND -# (nsHtml5TokenizerALU.cpp OR nsHtml5TokenizerSIMD.cpp) -# -# Make sure the result is consistent with mozilla::htmlaccel::htmlaccelEnabled(). -# -# Due to https://github.com/llvm/llvm-project/issues/160886, none of the -# code here actually ends up with SIMD instructions, and SIMD stays in -# htmlaccelNotInline.cpp instead. Once the LLVM bug is fixed, the functions -# in htmlaccelNotInline.cpp should becomed always inlined and -# nsHtml5TokenizerSIMD.cpp should be built with HTML_ACCEL_FLAGS. - -if CONFIG["TARGET_CPU"] == "x86_64": - UNIFIED_SOURCES += [ - "nsHtml5TokenizerALU.cpp", - "nsHtml5TokenizerSIMD.cpp", - ] -elif CONFIG["TARGET_CPU"] == "aarch64" and CONFIG["TARGET_ENDIANNESS"] == "little": - # aarch64 doesn't need special flags for SIMD. - UNIFIED_SOURCES += [ - "nsHtml5TokenizerALUStubs.cpp", - "nsHtml5TokenizerSIMD.cpp", - ] -else: - UNIFIED_SOURCES += [ - "nsHtml5TokenizerALU.cpp", - "nsHtml5TokenizerSIMDStubs.cpp", - ] - FINAL_LIBRARY = "xul" LOCAL_INCLUDES += [ diff --git a/parser/html/nsHtml5Tokenizer.cpp b/parser/html/nsHtml5Tokenizer.cpp @@ -40,6 +40,8 @@ #include "nsHtml5Tokenizer.h" +#include "nsHtml5TokenizerLoopPolicies.h" + char16_t nsHtml5Tokenizer::LT_GT[] = {'<', '>'}; char16_t nsHtml5Tokenizer::LT_SOLIDUS[] = {'<', '/'}; char16_t nsHtml5Tokenizer::RSQB_RSQB[] = {']', ']'}; @@ -224,10 +226,8 @@ void nsHtml5Tokenizer::setLineNumber(int32_t line) { this->line = line; } -void nsHtml5Tokenizer::appendCharRefBuf(char16_t c) { - MOZ_RELEASE_ASSERT(charRefBufLen < charRefBuf.length, - "Attempted to overrun charRefBuf!"); - charRefBuf[charRefBufLen++] = c; +nsHtml5HtmlAttributes* nsHtml5Tokenizer::emptyAttributes() { + return nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES; } void nsHtml5Tokenizer::emitOrAppendCharRefBuf(int32_t returnState) { @@ -241,14 +241,26 @@ void nsHtml5Tokenizer::emitOrAppendCharRefBuf(int32_t returnState) { } } -void nsHtml5Tokenizer::appendStrBuf(char16_t c) { - MOZ_ASSERT(strBufLen < strBuf.length, "Previous buffer length insufficient."); - if (MOZ_UNLIKELY(strBufLen == strBuf.length)) { - if (MOZ_UNLIKELY(!EnsureBufferSpace(1))) { - MOZ_CRASH("Unable to recover from buffer reallocation failure"); - } +nsHtml5String nsHtml5Tokenizer::strBufToString() { + nsHtml5String str = nsHtml5Portability::newStringFromBuffer( + strBuf, 0, strBufLen, tokenHandler, + !newAttributesEachTime && + attributeName == nsHtml5AttributeName::ATTR_CLASS); + clearStrBufAfterUse(); + return str; +} + +void nsHtml5Tokenizer::strBufToDoctypeName() { + doctypeName = + nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen, interner); + clearStrBufAfterUse(); +} + +void nsHtml5Tokenizer::emitStrBuf() { + if (strBufLen > 0) { + tokenHandler->characters(strBuf, 0, strBufLen); + clearStrBufAfterUse(); } - strBuf[strBufLen++] = c; } void nsHtml5Tokenizer::appendStrBuf(char16_t* buffer, int32_t offset, @@ -331,156 +343,4169 @@ int32_t nsHtml5Tokenizer::emitCurrentTagToken(bool selfClosing, int32_t pos) { tokenHandler->startTag(tagName, attrs, selfClosing); } } - tagName = nullptr; - if (newAttributesEachTime) { - attributes = nullptr; - } else { - attributes->clear(0); - } - suspendIfRequestedAfterCurrentNonTextToken(); - return stateSave; + tagName = nullptr; + if (newAttributesEachTime) { + attributes = nullptr; + } else { + attributes->clear(0); + } + suspendIfRequestedAfterCurrentNonTextToken(); + return stateSave; +} + +void nsHtml5Tokenizer::attributeNameComplete() { + attributeName = + nsHtml5AttributeName::nameByBuffer(strBuf, strBufLen, interner); + if (!attributeName) { + nonInternedAttributeName->setNameForNonInterned( + nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen, + interner)); + attributeName = nonInternedAttributeName; + } + clearStrBufAfterUse(); + if (!attributes) { + attributes = new nsHtml5HtmlAttributes(0); + } + if (attributes->contains(attributeName)) { + errDuplicateAttribute(); + attributeName = nullptr; + } +} + +void nsHtml5Tokenizer::addAttributeWithoutValue() { + if (attributeName) { + attributes->addAttribute( + attributeName, nsHtml5Portability::newEmptyString(), attributeLine); + attributeName = nullptr; + } else { + clearStrBufAfterUse(); + } +} + +void nsHtml5Tokenizer::addAttributeWithValue() { + if (attributeName) { + nsHtml5String val = strBufToString(); + if (mViewSource) { + mViewSource->MaybeLinkifyAttributeValue(attributeName, val); + } + attributes->addAttribute(attributeName, val, attributeLine); + attributeName = nullptr; + } else { + clearStrBufAfterUse(); + } +} + +void nsHtml5Tokenizer::start() { + initializeWithoutStarting(); + tokenHandler->startTokenization(this); + if (mViewSource) { + line = 1; + col = -1; + nextCharOnNewLine = false; + } else if (tokenHandler->WantsLineAndColumn()) { + line = 0; + col = 1; + nextCharOnNewLine = true; + } else { + line = -1; + col = -1; + nextCharOnNewLine = false; + } +} + +bool nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer) { + int32_t state = stateSave; + int32_t returnState = returnStateSave; + char16_t c = '\0'; + shouldSuspend = false; + lastCR = false; + int32_t start = buffer->getStart(); + int32_t end = buffer->getEnd(); + int32_t pos = start - 1; + switch (state) { + case DATA: + case RCDATA: + case SCRIPT_DATA: + case PLAINTEXT: + case RAWTEXT: + case CDATA_SECTION: + case SCRIPT_DATA_ESCAPED: + case SCRIPT_DATA_ESCAPE_START: + case SCRIPT_DATA_ESCAPE_START_DASH: + case SCRIPT_DATA_ESCAPED_DASH: + case SCRIPT_DATA_ESCAPED_DASH_DASH: + case SCRIPT_DATA_DOUBLE_ESCAPE_START: + case SCRIPT_DATA_DOUBLE_ESCAPED: + case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: + case SCRIPT_DATA_DOUBLE_ESCAPED_DASH: + case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: + case SCRIPT_DATA_DOUBLE_ESCAPE_END: { + cstart = start; + break; + } + default: { + cstart = INT32_MAX; + break; + } + } + if (mViewSource) { + mViewSource->SetBuffer(buffer); + pos = stateLoop<nsHtml5ViewSourcePolicy>(state, c, pos, buffer->getBuffer(), + false, returnState, + buffer->getEnd()); + mViewSource->DropBuffer((pos == buffer->getEnd()) ? pos : pos + 1); + } else if (tokenHandler->WantsLineAndColumn()) { + pos = stateLoop<nsHtml5LineColPolicy>(state, c, pos, buffer->getBuffer(), + false, returnState, buffer->getEnd()); + } else { + pos = stateLoop<nsHtml5FastestPolicy>(state, c, pos, buffer->getBuffer(), + false, returnState, buffer->getEnd()); + } + if (pos == end) { + buffer->setStart(pos); + } else { + buffer->setStart(pos + 1); + } + return lastCR; +} + +template <class P> +int32_t nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, + char16_t* buf, bool reconsume, + int32_t returnState, int32_t endPos) { + bool reportedConsecutiveHyphens = false; +stateloop: + for (;;) { + switch (state) { + case DATA: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '&': { + flushChars(buf, pos); + MOZ_ASSERT(!charRefBufLen, + "charRefBufLen not reset after previous use!"); + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\0'); + returnState = state; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '<': { + flushChars(buf, pos); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::TAG_OPEN, reconsume, pos); + NS_HTML5_BREAK(dataloop); + } + case '\0': { + maybeEmitReplacementCharacter(buf, pos); + continue; + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + continue; + } + } + } + dataloop_end:; + [[fallthrough]]; + } + case TAG_OPEN: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + if (c >= 'A' && c <= 'Z') { + endTag = false; + clearStrBufBeforeUse(); + appendStrBuf((char16_t)(c + 0x20)); + containsHyphen = false; + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::TAG_NAME, + reconsume, pos); + NS_HTML5_BREAK(tagopenloop); + } else if (c >= 'a' && c <= 'z') { + endTag = false; + clearStrBufBeforeUse(); + appendStrBuf(c); + containsHyphen = false; + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::TAG_NAME, + reconsume, pos); + NS_HTML5_BREAK(tagopenloop); + } + switch (c) { + case '!': { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::MARKUP_DECLARATION_OPEN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '/': { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::CLOSE_TAG_OPEN, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\?': { + if (viewingXmlSource) { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::PROCESSING_INSTRUCTION, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + if (P::reportErrors) { + errProcessingInstruction(); + } + clearStrBufBeforeUse(); + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + if (P::reportErrors) { + errLtGt(); + } + tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 2); + cstart = pos + 1; + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + default: { + if (P::reportErrors) { + errBadCharAfterLt(c); + } + tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); + cstart = pos; + reconsume = true; + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + tagopenloop_end:; + [[fallthrough]]; + } + case TAG_NAME: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + strBufToElementNameString(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + strBufToElementNameString(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_BREAK(tagnameloop); + } + case '/': { + strBufToElementNameString(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SELF_CLOSING_START_TAG, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + strBufToElementNameString(); + state = P::transition(mViewSource.get(), + emitCurrentTagToken(false, pos), reconsume, + pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } else if (c == '-') { + containsHyphen = true; + } + appendStrBuf(c); + continue; + } + } + } + tagnameloop_end:; + [[fallthrough]]; + } + case BEFORE_ATTRIBUTE_NAME: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + continue; + } + case '/': { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SELF_CLOSING_START_TAG, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + state = P::transition(mViewSource.get(), + emitCurrentTagToken(false, pos), reconsume, + pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + case '\"': + case '\'': + case '<': + case '=': { + if (P::reportErrors) { + errBadCharBeforeAttributeNameOrNull(c); + } + [[fallthrough]]; + } + default: { + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } + attributeLine = line; + clearStrBufBeforeUse(); + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::ATTRIBUTE_NAME, reconsume, + pos); + NS_HTML5_BREAK(beforeattributenameloop); + } + } + } + beforeattributenameloop_end:; + [[fallthrough]]; + } + case ATTRIBUTE_NAME: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + attributeNameComplete(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + attributeNameComplete(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '/': { + attributeNameComplete(); + addAttributeWithoutValue(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SELF_CLOSING_START_TAG, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '=': { + attributeNameComplete(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE, + reconsume, pos); + NS_HTML5_BREAK(attributenameloop); + } + case '>': { + attributeNameComplete(); + addAttributeWithoutValue(); + state = P::transition(mViewSource.get(), + emitCurrentTagToken(false, pos), reconsume, + pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + case '\"': + case '\'': + case '<': { + if (P::reportErrors) { + errQuoteOrLtInAttributeNameOrNull(c); + } + [[fallthrough]]; + } + default: { + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } + appendStrBuf(c); + continue; + } + } + } + attributenameloop_end:; + [[fallthrough]]; + } + case BEFORE_ATTRIBUTE_VALUE: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + continue; + } + case '\"': { + attributeLine = line; + clearStrBufBeforeUse(); + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::ATTRIBUTE_VALUE_DOUBLE_QUOTED, + reconsume, pos); + NS_HTML5_BREAK(beforeattributevalueloop); + } + case '&': { + attributeLine = line; + clearStrBufBeforeUse(); + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED, + reconsume, pos); + + NS_HTML5_CONTINUE(stateloop); + } + case '\'': { + attributeLine = line; + clearStrBufBeforeUse(); + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::ATTRIBUTE_VALUE_SINGLE_QUOTED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + if (P::reportErrors) { + errAttributeValueMissing(); + } + addAttributeWithoutValue(); + state = P::transition(mViewSource.get(), + emitCurrentTagToken(false, pos), reconsume, + pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + case '<': + case '=': + case '`': { + if (P::reportErrors) { + errLtOrEqualsOrGraveInUnquotedAttributeOrNull(c); + } + [[fallthrough]]; + } + default: { + attributeLine = line; + clearStrBufBeforeUse(); + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED, + reconsume, pos); + + NS_HTML5_CONTINUE(stateloop); + } + } + } + beforeattributevalueloop_end:; + [[fallthrough]]; + } + case ATTRIBUTE_VALUE_DOUBLE_QUOTED: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '\"': { + addAttributeWithValue(); + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED, + reconsume, pos); + NS_HTML5_BREAK(attributevaluedoublequotedloop); + } + case '&': { + MOZ_ASSERT(!charRefBufLen, + "charRefBufLen not reset after previous use!"); + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\"'); + returnState = state; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + continue; + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + continue; + } + } + } + attributevaluedoublequotedloop_end:; + [[fallthrough]]; + } + case AFTER_ATTRIBUTE_VALUE_QUOTED: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '/': { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SELF_CLOSING_START_TAG, + reconsume, pos); + NS_HTML5_BREAK(afterattributevaluequotedloop); + } + case '>': { + state = P::transition(mViewSource.get(), + emitCurrentTagToken(false, pos), reconsume, + pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + default: { + if (P::reportErrors) { + errNoSpaceBetweenAttributes(); + } + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + afterattributevaluequotedloop_end:; + [[fallthrough]]; + } + case SELF_CLOSING_START_TAG: { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '>': { + state = + P::transition(mViewSource.get(), emitCurrentTagToken(true, pos), + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + default: { + if (P::reportErrors) { + errSlashNotFollowedByGt(); + } + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + case ATTRIBUTE_VALUE_UNQUOTED: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + addAttributeWithValue(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + addAttributeWithValue(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '&': { + MOZ_ASSERT(!charRefBufLen, + "charRefBufLen not reset after previous use!"); + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('>'); + returnState = state; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + addAttributeWithValue(); + state = P::transition(mViewSource.get(), + emitCurrentTagToken(false, pos), reconsume, + pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + case '<': + case '\"': + case '\'': + case '=': + case '`': { + if (P::reportErrors) { + errUnquotedAttributeValOrNull(c); + } + [[fallthrough]]; + } + default: { + appendStrBuf(c); + continue; + } + } + } + } + case AFTER_ATTRIBUTE_NAME: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + continue; + } + case '/': { + addAttributeWithoutValue(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SELF_CLOSING_START_TAG, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '=': { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + addAttributeWithoutValue(); + state = P::transition(mViewSource.get(), + emitCurrentTagToken(false, pos), reconsume, + pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + case '\"': + case '\'': + case '<': { + if (P::reportErrors) { + errQuoteOrLtInAttributeNameOrNull(c); + } + [[fallthrough]]; + } + default: { + addAttributeWithoutValue(); + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } + clearStrBufBeforeUse(); + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::ATTRIBUTE_NAME, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + } + case MARKUP_DECLARATION_OPEN: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + clearStrBufBeforeUse(); + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::MARKUP_DECLARATION_HYPHEN, + reconsume, pos); + NS_HTML5_BREAK(markupdeclarationopenloop); + } + case 'd': + case 'D': { + clearStrBufBeforeUse(); + appendStrBuf(c); + index = 0; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::MARKUP_DECLARATION_OCTYPE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '[': { + if (tokenHandler->cdataSectionAllowed()) { + clearStrBufBeforeUse(); + appendStrBuf(c); + index = 0; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::CDATA_START, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + [[fallthrough]]; + } + default: { + if (P::reportErrors) { + errBogusComment(); + } + clearStrBufBeforeUse(); + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + markupdeclarationopenloop_end:; + [[fallthrough]]; + } + case MARKUP_DECLARATION_HYPHEN: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + clearStrBufAfterOneHyphen(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_START, reconsume, + pos); + NS_HTML5_BREAK(markupdeclarationhyphenloop); + } + default: { + if (P::reportErrors) { + errBogusComment(); + } + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + markupdeclarationhyphenloop_end:; + [[fallthrough]]; + } + case COMMENT_START: { + reportedConsecutiveHyphens = false; + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_START_DASH, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + if (P::reportErrors) { + errPrematureEndOfComment(); + } + emitComment(0, pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '<': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_LESSTHAN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_BREAK(commentstartloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_BREAK(commentstartloop); + } + } + } + commentstartloop_end:; + [[fallthrough]]; + } + case COMMENT: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_END_DASH, + reconsume, pos); + NS_HTML5_BREAK(commentloop); + } + case '<': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_LESSTHAN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + continue; + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + continue; + } + } + } + commentloop_end:; + [[fallthrough]]; + } + case COMMENT_END_DASH: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + appendStrBuf(c); + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_END, reconsume, pos); + NS_HTML5_BREAK(commentenddashloop); + } + case '<': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_LESSTHAN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + commentenddashloop_end:; + [[fallthrough]]; + } + case COMMENT_END: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '>': { + emitComment(2, pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '-': { + adjustDoubleHyphenAndAppendToStrBufAndErr( + c, reportedConsecutiveHyphens); + reportedConsecutiveHyphens = true; + continue; + } + case '<': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_LESSTHAN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + adjustDoubleHyphenAndAppendToStrBufCarriageReturn<P>(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + adjustDoubleHyphenAndAppendToStrBufLineFeed<P>(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '!': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_END_BANG, + reconsume, pos); + NS_HTML5_BREAK(commentendloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + adjustDoubleHyphenAndAppendToStrBufAndErr( + c, reportedConsecutiveHyphens); + reportedConsecutiveHyphens = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + commentendloop_end:; + [[fallthrough]]; + } + case COMMENT_END_BANG: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '>': { + emitComment(3, pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '-': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_END_DASH, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + } + case COMMENT_LESSTHAN: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '!': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG, + reconsume, pos); + NS_HTML5_BREAK(commentlessthanloop); + } + case '<': { + appendStrBuf(c); + continue; + } + case '-': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_END_DASH, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + commentlessthanloop_end:; + [[fallthrough]]; + } + case COMMENT_LESSTHAN_BANG: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + appendStrBuf(c); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG_DASH, reconsume, pos); + NS_HTML5_BREAK(commentlessthanbangloop); + } + case '<': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_LESSTHAN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + commentlessthanbangloop_end:; + [[fallthrough]]; + } + case COMMENT_LESSTHAN_BANG_DASH: { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + appendStrBuf(c); + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG_DASH_DASH, + reconsume, pos); + break; + } + case '<': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_LESSTHAN, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + [[fallthrough]]; + } + case COMMENT_LESSTHAN_BANG_DASH_DASH: { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '>': { + appendStrBuf(c); + emitComment(3, pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '-': { + if (P::reportErrors) { + errNestedComment(); + } + adjustDoubleHyphenAndAppendToStrBufAndErr( + c, reportedConsecutiveHyphens); + reportedConsecutiveHyphens = true; + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT_END, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + c = '\n'; + P::silentCarriageReturn(this); + if (P::reportErrors) { + errNestedComment(); + } + adjustDoubleHyphenAndAppendToStrBufAndErr( + c, reportedConsecutiveHyphens); + reportedConsecutiveHyphens = true; + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + if (P::reportErrors) { + errNestedComment(); + } + adjustDoubleHyphenAndAppendToStrBufAndErr( + c, reportedConsecutiveHyphens); + reportedConsecutiveHyphens = true; + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '!': { + if (P::reportErrors) { + errNestedComment(); + } + adjustDoubleHyphenAndAppendToStrBufAndErr( + c, reportedConsecutiveHyphens); + reportedConsecutiveHyphens = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_END_BANG, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + if (P::reportErrors) { + errNestedComment(); + } + adjustDoubleHyphenAndAppendToStrBufAndErr( + c, reportedConsecutiveHyphens); + reportedConsecutiveHyphens = true; + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + case COMMENT_START_DASH: { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + appendStrBuf(c); + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT_END, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + if (P::reportErrors) { + errPrematureEndOfComment(); + } + emitComment(1, pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '<': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_LESSTHAN, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + case CDATA_START: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + if (index < 6) { + if (c == nsHtml5Tokenizer::CDATA_LSQB[index]) { + appendStrBuf(c); + } else { + if (P::reportErrors) { + errBogusComment(); + } + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + index++; + continue; + } else { + clearStrBufAfterUse(); + cstart = pos; + reconsume = true; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::CDATA_SECTION, reconsume, pos); + break; + } + } + [[fallthrough]]; + } + case CDATA_SECTION: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case ']': { + flushChars(buf, pos); + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::CDATA_RSQB, + reconsume, pos); + NS_HTML5_BREAK(cdatasectionloop); + } + case '\0': { + maybeEmitReplacementCharacter(buf, pos); + continue; + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + continue; + } + } + } + cdatasectionloop_end:; + [[fallthrough]]; + } + case CDATA_RSQB: { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case ']': { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::CDATA_RSQB_RSQB, reconsume, + pos); + break; + } + default: { + tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 1); + cstart = pos; + reconsume = true; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::CDATA_SECTION, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + [[fallthrough]]; + } + case CDATA_RSQB_RSQB: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case ']': { + tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 1); + continue; + } + case '>': { + cstart = pos + 1; + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + suspendIfRequestedAfterCurrentNonTextToken(); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + default: { + tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 2); + cstart = pos; + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::CDATA_SECTION, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + } + case ATTRIBUTE_VALUE_SINGLE_QUOTED: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '\'': { + addAttributeWithValue(); + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '&': { + MOZ_ASSERT(!charRefBufLen, + "charRefBufLen not reset after previous use!"); + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\''); + returnState = state; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, + reconsume, pos); + NS_HTML5_BREAK(attributevaluesinglequotedloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + continue; + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + continue; + } + } + } + attributevaluesinglequotedloop_end:; + [[fallthrough]]; + } + case CONSUME_CHARACTER_REFERENCE: { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case ' ': + case '\t': + case '\n': + case '\r': + case '\f': + case '<': + case '&': + case '\0': + case ';': { + emitOrAppendCharRefBuf(returnState); + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos; + } + reconsume = true; + state = + P::transition(mViewSource.get(), returnState, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '#': { + appendCharRefBuf('#'); + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::CONSUME_NCR, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + default: { + if (c == additional) { + emitOrAppendCharRefBuf(returnState); + reconsume = true; + state = + P::transition(mViewSource.get(), returnState, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + if (c >= 'a' && c <= 'z') { + firstCharKey = c - 'a' + 26; + } else if (c >= 'A' && c <= 'Z') { + firstCharKey = c - 'A'; + } else { + if (c == ';') { + if (P::reportErrors) { + errNoNamedCharacterMatch(); + } + } + emitOrAppendCharRefBuf(returnState); + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos; + } + reconsume = true; + state = + P::transition(mViewSource.get(), returnState, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + appendCharRefBuf(c); + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::CHARACTER_REFERENCE_HILO_LOOKUP, + reconsume, pos); + break; + } + } + [[fallthrough]]; + } + case CHARACTER_REFERENCE_HILO_LOOKUP: { + { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + int32_t hilo = 0; + if (c <= 'z') { + const int32_t* row = nsHtml5NamedCharactersAccel::HILO_ACCEL[c]; + if (row) { + hilo = row[firstCharKey]; + } + } + if (!hilo) { + if (c == ';') { + if (P::reportErrors) { + errNoNamedCharacterMatch(); + } + } + emitOrAppendCharRefBuf(returnState); + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos; + } + reconsume = true; + state = + P::transition(mViewSource.get(), returnState, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + appendCharRefBuf(c); + lo = hilo & 0xFFFF; + hi = hilo >> 16; + entCol = -1; + candidate = -1; + charRefBufMark = 0; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::CHARACTER_REFERENCE_TAIL, + reconsume, pos); + } + [[fallthrough]]; + } + case CHARACTER_REFERENCE_TAIL: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + entCol++; + for (;;) { + if (hi < lo) { + NS_HTML5_BREAK(outer); + } + if (entCol == nsHtml5NamedCharacters::NAMES[lo].length()) { + candidate = lo; + charRefBufMark = charRefBufLen; + lo++; + } else if (entCol > nsHtml5NamedCharacters::NAMES[lo].length()) { + NS_HTML5_BREAK(outer); + } else if (c > nsHtml5NamedCharacters::NAMES[lo].charAt(entCol)) { + lo++; + } else { + NS_HTML5_BREAK(loloop); + } + } + loloop_end:; + for (;;) { + if (hi < lo) { + NS_HTML5_BREAK(outer); + } + if (entCol == nsHtml5NamedCharacters::NAMES[hi].length()) { + NS_HTML5_BREAK(hiloop); + } + if (entCol > nsHtml5NamedCharacters::NAMES[hi].length()) { + NS_HTML5_BREAK(outer); + } else if (c < nsHtml5NamedCharacters::NAMES[hi].charAt(entCol)) { + hi--; + } else { + NS_HTML5_BREAK(hiloop); + } + } + hiloop_end:; + if (c == ';') { + if (entCol + 1 == nsHtml5NamedCharacters::NAMES[lo].length()) { + candidate = lo; + charRefBufMark = charRefBufLen; + } + NS_HTML5_BREAK(outer); + } + if (hi < lo) { + NS_HTML5_BREAK(outer); + } + appendCharRefBuf(c); + continue; + } + outer_end:; + if (candidate == -1) { + if (c == ';') { + if (P::reportErrors) { + errNoNamedCharacterMatch(); + } + } + emitOrAppendCharRefBuf(returnState); + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos; + } + reconsume = true; + state = P::transition(mViewSource.get(), returnState, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } else { + const nsHtml5CharacterName& candidateName = + nsHtml5NamedCharacters::NAMES[candidate]; + if (!candidateName.length() || + candidateName.charAt(candidateName.length() - 1) != ';') { + if ((returnState & DATA_AND_RCDATA_MASK)) { + char16_t ch; + if (charRefBufMark == charRefBufLen) { + ch = c; + } else { + ch = charRefBuf[charRefBufMark]; + } + if (ch == '=' || (ch >= '0' && ch <= '9') || + (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) { + if (c == ';') { + if (P::reportErrors) { + errNoNamedCharacterMatch(); + } + } + appendCharRefBufToStrBuf(); + reconsume = true; + state = P::transition(mViewSource.get(), returnState, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + } + if ((returnState & DATA_AND_RCDATA_MASK)) { + if (P::reportErrors) { + errUnescapedAmpersandInterpretedAsCharacterReference(); + } + } else { + if (P::reportErrors) { + errNotSemicolonTerminated(); + } + } + } + P::completedNamedCharacterReference(mViewSource.get()); + const char16_t* val = nsHtml5NamedCharacters::VALUES[candidate]; + if (!val[1]) { + emitOrAppendOne(val, returnState); + } else { + emitOrAppendTwo(val, returnState); + } + if (charRefBufMark < charRefBufLen) { + if ((returnState & DATA_AND_RCDATA_MASK)) { + appendStrBuf(charRefBuf, charRefBufMark, + charRefBufLen - charRefBufMark); + } else { + tokenHandler->characters(charRefBuf, charRefBufMark, + charRefBufLen - charRefBufMark); + } + } + bool earlyBreak = (c == ';' && charRefBufMark == charRefBufLen); + charRefBufLen = 0; + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = earlyBreak ? pos + 1 : pos; + } + reconsume = !earlyBreak; + state = P::transition(mViewSource.get(), returnState, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + case CONSUME_NCR: { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + value = 0; + seenDigits = false; + switch (c) { + case 'x': + case 'X': { + appendCharRefBuf(c); + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::HEX_NCR_LOOP, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + default: { + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::DECIMAL_NRC_LOOP, reconsume, + pos); + break; + } + } + [[fallthrough]]; + } + case DECIMAL_NRC_LOOP: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + MOZ_ASSERT(value >= 0, "value must not become negative."); + if (c >= '0' && c <= '9') { + seenDigits = true; + if (value <= 0x10FFFF) { + value *= 10; + value += c - '0'; + } + continue; + } else if (c == ';') { + if (seenDigits) { + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos + 1; + } + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::HANDLE_NCR_VALUE, + reconsume, pos); + NS_HTML5_BREAK(decimalloop); + } else { + if (P::reportErrors) { + errNoDigitsInNCR(); + } + appendCharRefBuf(';'); + emitOrAppendCharRefBuf(returnState); + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos + 1; + } + state = + P::transition(mViewSource.get(), returnState, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } else { + if (!seenDigits) { + if (P::reportErrors) { + errNoDigitsInNCR(); + } + emitOrAppendCharRefBuf(returnState); + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos; + } + reconsume = true; + state = + P::transition(mViewSource.get(), returnState, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } else { + if (P::reportErrors) { + errCharRefLacksSemicolon(); + } + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos; + } + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::HANDLE_NCR_VALUE, + reconsume, pos); + NS_HTML5_BREAK(decimalloop); + } + } + } + decimalloop_end:; + [[fallthrough]]; + } + case HANDLE_NCR_VALUE: { + charRefBufLen = 0; + handleNcrValue(returnState); + state = P::transition(mViewSource.get(), returnState, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case HEX_NCR_LOOP: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + MOZ_ASSERT(value >= 0, "value must not become negative."); + if (c >= '0' && c <= '9') { + seenDigits = true; + if (value <= 0x10FFFF) { + value *= 16; + value += c - '0'; + } + continue; + } else if (c >= 'A' && c <= 'F') { + seenDigits = true; + if (value <= 0x10FFFF) { + value *= 16; + value += c - 'A' + 10; + } + continue; + } else if (c >= 'a' && c <= 'f') { + seenDigits = true; + if (value <= 0x10FFFF) { + value *= 16; + value += c - 'a' + 10; + } + continue; + } else if (c == ';') { + if (seenDigits) { + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos + 1; + } + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::HANDLE_NCR_VALUE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } else { + if (P::reportErrors) { + errNoDigitsInNCR(); + } + appendCharRefBuf(';'); + emitOrAppendCharRefBuf(returnState); + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos + 1; + } + state = + P::transition(mViewSource.get(), returnState, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } else { + if (!seenDigits) { + if (P::reportErrors) { + errNoDigitsInNCR(); + } + emitOrAppendCharRefBuf(returnState); + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos; + } + reconsume = true; + state = + P::transition(mViewSource.get(), returnState, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } else { + if (P::reportErrors) { + errCharRefLacksSemicolon(); + } + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos; + } + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::HANDLE_NCR_VALUE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + } + case PLAINTEXT: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '\0': { + emitPlaintextReplacementCharacter(buf, pos); + continue; + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + continue; + } + } + } + } + case CLOSE_TAG_OPEN: { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '>': { + if (P::reportErrors) { + errLtSlashGt(); + } + cstart = pos + 1; + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + P::silentCarriageReturn(this); + if (P::reportErrors) { + errGarbageAfterLtSlash(); + } + clearStrBufBeforeUse(); + appendStrBuf('\n'); + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + if (P::reportErrors) { + errGarbageAfterLtSlash(); + } + clearStrBufBeforeUse(); + appendStrBuf(c); + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } + if (c >= 'a' && c <= 'z') { + endTag = true; + clearStrBufBeforeUse(); + appendStrBuf(c); + containsHyphen = false; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::TAG_NAME, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } else { + if (P::reportErrors) { + errGarbageAfterLtSlash(); + } + clearStrBufBeforeUse(); + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + } + case RCDATA: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '&': { + flushChars(buf, pos); + MOZ_ASSERT(!charRefBufLen, + "charRefBufLen not reset after previous use!"); + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\0'); + returnState = state; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '<': { + flushChars(buf, pos); + returnState = state; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + emitReplacementCharacter(buf, pos); + continue; + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + continue; + } + } + } + } + case RAWTEXT: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '<': { + flushChars(buf, pos); + returnState = state; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN, + reconsume, pos); + NS_HTML5_BREAK(rawtextloop); + } + case '\0': { + emitReplacementCharacter(buf, pos); + continue; + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + continue; + } + } + } + rawtextloop_end:; + [[fallthrough]]; + } + case RAWTEXT_RCDATA_LESS_THAN_SIGN: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '/': { + index = 0; + clearStrBufBeforeUse(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::NON_DATA_END_TAG_NAME, + reconsume, pos); + NS_HTML5_BREAK(rawtextrcdatalessthansignloop); + } + default: { + tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); + cstart = pos; + reconsume = true; + state = + P::transition(mViewSource.get(), returnState, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + rawtextrcdatalessthansignloop_end:; + [[fallthrough]]; + } + case NON_DATA_END_TAG_NAME: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + if (!endTagExpectationAsArray) { + tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2); + cstart = pos; + reconsume = true; + state = + P::transition(mViewSource.get(), returnState, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } else if (index < endTagExpectationAsArray.length) { + char16_t e = endTagExpectationAsArray[index]; + char16_t folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded != e) { + tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2); + emitStrBuf(); + cstart = pos; + reconsume = true; + state = + P::transition(mViewSource.get(), returnState, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + appendStrBuf(c); + index++; + continue; + } else { + endTag = true; + tagName = endTagExpectation; + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + clearStrBufAfterUse(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + clearStrBufAfterUse(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '/': { + clearStrBufAfterUse(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SELF_CLOSING_START_TAG, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + clearStrBufAfterUse(); + state = P::transition(mViewSource.get(), + emitCurrentTagToken(false, pos), + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + default: { + tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2); + emitStrBuf(); + cstart = pos; + reconsume = true; + state = P::transition(mViewSource.get(), returnState, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + } + } + case BOGUS_COMMENT: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '>': { + emitComment(0, pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '-': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT_HYPHEN, + reconsume, pos); + NS_HTML5_BREAK(boguscommentloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + continue; + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + continue; + } + } + } + boguscommentloop_end:; + [[fallthrough]]; + } + case BOGUS_COMMENT_HYPHEN: { + boguscommenthyphenloop: + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '>': { + emitComment(0, pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '-': { + appendSecondHyphenToBogusComment(); + NS_HTML5_CONTINUE(boguscommenthyphenloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, + pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + } + case SCRIPT_DATA: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '<': { + flushChars(buf, pos); + returnState = state; + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_LESS_THAN_SIGN, reconsume, pos); + NS_HTML5_BREAK(scriptdataloop); + } + case '\0': { + emitReplacementCharacter(buf, pos); + continue; + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + continue; + } + } + } + scriptdataloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_LESS_THAN_SIGN: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '/': { + index = 0; + clearStrBufBeforeUse(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::NON_DATA_END_TAG_NAME, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '!': { + tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); + cstart = pos; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPE_START, + reconsume, pos); + NS_HTML5_BREAK(scriptdatalessthansignloop); + } + default: { + tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); + cstart = pos; + reconsume = true; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + scriptdatalessthansignloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_ESCAPE_START: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPE_START_DASH, + reconsume, pos); + NS_HTML5_BREAK(scriptdataescapestartloop); + } + default: { + reconsume = true; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + scriptdataescapestartloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_ESCAPE_START_DASH: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH_DASH, + reconsume, pos); + NS_HTML5_BREAK(scriptdataescapestartdashloop); + } + default: { + reconsume = true; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + scriptdataescapestartdashloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_ESCAPED_DASH_DASH: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + continue; + } + case '<': { + flushChars(buf, pos); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + emitReplacementCharacter(buf, pos); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, + reconsume, pos); + NS_HTML5_BREAK(scriptdataescapeddashdashloop); + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, + reconsume, pos); + NS_HTML5_BREAK(scriptdataescapeddashdashloop); + } + } + } + scriptdataescapeddashdashloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_ESCAPED: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '-': { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH, + reconsume, pos); + NS_HTML5_BREAK(scriptdataescapedloop); + } + case '<': { + flushChars(buf, pos); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + emitReplacementCharacter(buf, pos); + continue; + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + continue; + } + } + } + scriptdataescapedloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_ESCAPED_DASH: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH_DASH, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '<': { + flushChars(buf, pos); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, + reconsume, pos); + NS_HTML5_BREAK(scriptdataescapeddashloop); + } + case '\0': { + emitReplacementCharacter(buf, pos); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + scriptdataescapeddashloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '/': { + index = 0; + clearStrBufBeforeUse(); + returnState = nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::NON_DATA_END_TAG_NAME, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case 'S': + case 's': { + tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); + cstart = pos; + index = 1; + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPE_START, reconsume, + pos); + NS_HTML5_BREAK(scriptdataescapedlessthanloop); + } + default: { + tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); + cstart = pos; + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + scriptdataescapedlessthanloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_DOUBLE_ESCAPE_START: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + MOZ_ASSERT(index > 0); + if (index < 6) { + char16_t folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded != nsHtml5Tokenizer::SCRIPT_ARR[index]) { + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + index++; + continue; + } + switch (c) { + case '\r': { + emitCarriageReturn<P>(buf, pos); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': + case '/': + case '>': { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + NS_HTML5_BREAK(scriptdatadoubleescapestartloop); + } + default: { + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + scriptdatadoubleescapestartloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_DOUBLE_ESCAPED: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '-': { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_DASH, reconsume, + pos); + NS_HTML5_BREAK(scriptdatadoubleescapedloop); + } + case '<': { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + emitReplacementCharacter(buf, pos); + continue; + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + continue; + } + } + } + scriptdatadoubleescapedloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_DOUBLE_ESCAPED_DASH: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH, + reconsume, pos); + NS_HTML5_BREAK(scriptdatadoubleescapeddashloop); + } + case '<': { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + emitReplacementCharacter(buf, pos); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + scriptdatadoubleescapeddashloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + continue; + } + case '<': { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, + reconsume, pos); + NS_HTML5_BREAK(scriptdatadoubleescapeddashdashloop); + } + case '>': { + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + emitReplacementCharacter(buf, pos); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + scriptdatadoubleescapeddashdashloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '/': { + index = 0; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPE_END, + reconsume, pos); + NS_HTML5_BREAK(scriptdatadoubleescapedlessthanloop); + } + default: { + reconsume = true; + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + scriptdatadoubleescapedlessthanloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_DOUBLE_ESCAPE_END: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + if (index < 6) { + char16_t folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded != nsHtml5Tokenizer::SCRIPT_ARR[index]) { + reconsume = true; + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + index++; + continue; + } + switch (c) { + case '\r': { + emitCarriageReturn<P>(buf, pos); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': + case '/': + case '>': { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + default: { + reconsume = true; + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + } + case MARKUP_DECLARATION_OCTYPE: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + if (index < 6) { + char16_t folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded == nsHtml5Tokenizer::OCTYPE[index]) { + appendStrBuf(c); + } else { + if (P::reportErrors) { + errBogusComment(); + } + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + index++; + continue; + } else { + reconsume = true; + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DOCTYPE, + reconsume, pos); + NS_HTML5_BREAK(markupdeclarationdoctypeloop); + } + } + markupdeclarationdoctypeloop_end:; + [[fallthrough]]; + } + case DOCTYPE: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + initDoctypeFields(); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME, + reconsume, pos); + NS_HTML5_BREAK(doctypeloop); + } + default: { + if (P::reportErrors) { + errMissingSpaceBeforeDoctypeName(); + } + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME, + reconsume, pos); + NS_HTML5_BREAK(doctypeloop); + } + } + } + doctypeloop_end:; + [[fallthrough]]; + } + case BEFORE_DOCTYPE_NAME: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + continue; + } + case '>': { + if (P::reportErrors) { + errNamelessDoctype(); + } + forceQuirks = true; + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } + clearStrBufBeforeUse(); + appendStrBuf(c); + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_NAME, reconsume, pos); + NS_HTML5_BREAK(beforedoctypenameloop); + } + } + } + beforedoctypenameloop_end:; + [[fallthrough]]; + } + case DOCTYPE_NAME: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + strBufToDoctypeName(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::AFTER_DOCTYPE_NAME, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + strBufToDoctypeName(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::AFTER_DOCTYPE_NAME, + reconsume, pos); + NS_HTML5_BREAK(doctypenameloop); + } + case '>': { + strBufToDoctypeName(); + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + if (c >= 'A' && c <= 'Z') { + c += 0x0020; + } + appendStrBuf(c); + continue; + } + } + } + doctypenameloop_end:; + [[fallthrough]]; + } + case AFTER_DOCTYPE_NAME: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + continue; + } + case '>': { + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case 'p': + case 'P': { + index = 0; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_UBLIC, reconsume, + pos); + NS_HTML5_BREAK(afterdoctypenameloop); + } + case 's': + case 'S': { + index = 0; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_YSTEM, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + default: { + bogusDoctype(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + afterdoctypenameloop_end:; + [[fallthrough]]; + } + case DOCTYPE_UBLIC: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + if (index < 5) { + char16_t folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded != nsHtml5Tokenizer::UBLIC[index]) { + bogusDoctype(); + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + index++; + continue; + } else { + reconsume = true; + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_KEYWORD, reconsume, pos); + NS_HTML5_BREAK(doctypeublicloop); + } + } + doctypeublicloop_end:; + [[fallthrough]]; + } + case AFTER_DOCTYPE_PUBLIC_KEYWORD: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, + pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, + pos); + NS_HTML5_BREAK(afterdoctypepublickeywordloop); + } + case '\"': { + if (P::reportErrors) { + errNoSpaceBetweenDoctypePublicKeywordAndQuote(); + } + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\'': { + if (P::reportErrors) { + errNoSpaceBetweenDoctypePublicKeywordAndQuote(); + } + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + if (P::reportErrors) { + errExpectedPublicId(); + } + forceQuirks = true; + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + default: { + bogusDoctype(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + afterdoctypepublickeywordloop_end:; + [[fallthrough]]; + } + case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + continue; + } + case '\"': { + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, + reconsume, pos); + NS_HTML5_BREAK(beforedoctypepublicidentifierloop); + } + case '\'': { + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + if (P::reportErrors) { + errExpectedPublicId(); + } + forceQuirks = true; + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + default: { + bogusDoctype(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + beforedoctypepublicidentifierloop_end:; + [[fallthrough]]; + } + case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\"': { + publicIdentifier = strBufToString(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, + pos); + NS_HTML5_BREAK(doctypepublicidentifierdoublequotedloop); + } + case '>': { + if (P::reportErrors) { + errGtInPublicId(); + } + forceQuirks = true; + publicIdentifier = strBufToString(); + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + continue; + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + continue; + } + } + } + doctypepublicidentifierdoublequotedloop_end:; + [[fallthrough]]; + } + case AFTER_DOCTYPE_PUBLIC_IDENTIFIER: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer:: + BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer:: + BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, + reconsume, pos); + NS_HTML5_BREAK(afterdoctypepublicidentifierloop); + } + case '>': { + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\"': { + if (P::reportErrors) { + errNoSpaceBetweenPublicAndSystemIds(); + } + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\'': { + if (P::reportErrors) { + errNoSpaceBetweenPublicAndSystemIds(); + } + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + default: { + bogusDoctype(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + afterdoctypepublicidentifierloop_end:; + [[fallthrough]]; + } + case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + continue; + } + case '>': { + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\"': { + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, + reconsume, pos); + NS_HTML5_BREAK(betweendoctypepublicandsystemidentifiersloop); + } + case '\'': { + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + default: { + bogusDoctype(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + betweendoctypepublicandsystemidentifiersloop_end:; + [[fallthrough]]; + } + case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\"': { + systemIdentifier = strBufToString(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, + pos); + NS_HTML5_BREAK(doctypesystemidentifierdoublequotedloop); + } + case '>': { + if (P::reportErrors) { + errGtInSystemId(); + } + forceQuirks = true; + systemIdentifier = strBufToString(); + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + continue; + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + continue; + } + } + } + doctypesystemidentifierdoublequotedloop_end:; + [[fallthrough]]; + } + case AFTER_DOCTYPE_SYSTEM_IDENTIFIER: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + continue; + } + case '>': { + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + default: { + bogusDoctypeWithoutQuirks(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume, + pos); + NS_HTML5_BREAK(afterdoctypesystemidentifierloop); + } + } + } + afterdoctypesystemidentifierloop_end:; + [[fallthrough]]; + } + case BOGUS_DOCTYPE: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '>': { + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + P::silentCarriageReturn(this); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + continue; + } + } + } + } + case DOCTYPE_YSTEM: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + if (index < 5) { + char16_t folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded != nsHtml5Tokenizer::YSTEM[index]) { + bogusDoctype(); + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + index++; + NS_HTML5_CONTINUE(stateloop); + } else { + reconsume = true; + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_KEYWORD, reconsume, pos); + NS_HTML5_BREAK(doctypeystemloop); + } + } + doctypeystemloop_end:; + [[fallthrough]]; + } + case AFTER_DOCTYPE_SYSTEM_KEYWORD: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, + pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, + pos); + NS_HTML5_BREAK(afterdoctypesystemkeywordloop); + } + case '\"': { + if (P::reportErrors) { + errNoSpaceBetweenDoctypeSystemKeywordAndQuote(); + } + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\'': { + if (P::reportErrors) { + errNoSpaceBetweenDoctypeSystemKeywordAndQuote(); + } + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + if (P::reportErrors) { + errExpectedPublicId(); + } + forceQuirks = true; + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + default: { + bogusDoctype(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + afterdoctypesystemkeywordloop_end:; + [[fallthrough]]; + } + case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + continue; + } + case '\"': { + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\'': { + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, + reconsume, pos); + NS_HTML5_BREAK(beforedoctypesystemidentifierloop); + } + case '>': { + if (P::reportErrors) { + errExpectedSystemId(); + } + forceQuirks = true; + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + default: { + bogusDoctype(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + beforedoctypesystemidentifierloop_end:; + [[fallthrough]]; + } + case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\'': { + systemIdentifier = strBufToString(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + if (P::reportErrors) { + errGtInSystemId(); + } + forceQuirks = true; + systemIdentifier = strBufToString(); + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + continue; + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + continue; + } + } + } + } + case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\'': { + publicIdentifier = strBufToString(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + if (P::reportErrors) { + errGtInPublicId(); + } + forceQuirks = true; + publicIdentifier = strBufToString(); + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + continue; + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + continue; + } + } + } + } + case PROCESSING_INSTRUCTION: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\?': { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::PROCESSING_INSTRUCTION_QUESTION_MARK, + reconsume, pos); + NS_HTML5_BREAK(processinginstructionloop); + } + default: { + continue; + } + } + } + processinginstructionloop_end:; + [[fallthrough]]; + } + case PROCESSING_INSTRUCTION_QUESTION_MARK: { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '>': { + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + suspendIfRequestedAfterCurrentNonTextToken(); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + default: { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::PROCESSING_INSTRUCTION, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + } + } +stateloop_end:; + flushChars(buf, pos); + stateSave = state; + returnStateSave = returnState; + return pos; } -void nsHtml5Tokenizer::attributeNameComplete() { - attributeName = - nsHtml5AttributeName::nameByBuffer(strBuf, strBufLen, interner); - if (!attributeName) { - nonInternedAttributeName->setNameForNonInterned( - nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen, - interner)); - attributeName = nonInternedAttributeName; - } +void nsHtml5Tokenizer::initDoctypeFields() { clearStrBufAfterUse(); - if (!attributes) { - attributes = new nsHtml5HtmlAttributes(0); + doctypeName = nullptr; + if (systemIdentifier) { + systemIdentifier.Release(); + systemIdentifier = nullptr; } - if (attributes->contains(attributeName)) { - errDuplicateAttribute(); - attributeName = nullptr; + if (publicIdentifier) { + publicIdentifier.Release(); + publicIdentifier = nullptr; } + forceQuirks = false; } -void nsHtml5Tokenizer::addAttributeWithoutValue() { - if (attributeName) { - attributes->addAttribute( - attributeName, nsHtml5Portability::newEmptyString(), attributeLine); - attributeName = nullptr; - } else { - clearStrBufAfterUse(); - } +template <class P> +void nsHtml5Tokenizer::adjustDoubleHyphenAndAppendToStrBufCarriageReturn() { + P::silentCarriageReturn(this); + adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false); } -void nsHtml5Tokenizer::addAttributeWithValue() { - if (attributeName) { - nsHtml5String val = strBufToString(); - if (mViewSource) { - mViewSource->MaybeLinkifyAttributeValue(attributeName, val); - } - attributes->addAttribute(attributeName, val, attributeLine); - attributeName = nullptr; - } else { - clearStrBufAfterUse(); - } +template <class P> +void nsHtml5Tokenizer::adjustDoubleHyphenAndAppendToStrBufLineFeed() { + P::silentLineFeed(this); + adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false); } -void nsHtml5Tokenizer::start() { - initializeWithoutStarting(); - tokenHandler->startTokenization(this); - if (mViewSource) { - line = 1; - col = -1; - nextCharOnNewLine = false; - } else if (tokenHandler->WantsLineAndColumn()) { - line = 0; - col = 1; - nextCharOnNewLine = true; - } else { - line = -1; - col = -1; - nextCharOnNewLine = false; - } +template <class P> +void nsHtml5Tokenizer::appendStrBufLineFeed() { + P::silentLineFeed(this); + appendStrBuf('\n'); } -bool nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer) { - int32_t state = stateSave; - int32_t returnState = returnStateSave; - char16_t c = '\0'; - shouldSuspend = false; - lastCR = false; - int32_t start = buffer->getStart(); - int32_t end = buffer->getEnd(); - int32_t pos = start - 1; - switch (state) { - case DATA: - case RCDATA: - case SCRIPT_DATA: - case PLAINTEXT: - case RAWTEXT: - case CDATA_SECTION: - case SCRIPT_DATA_ESCAPED: - case SCRIPT_DATA_ESCAPE_START: - case SCRIPT_DATA_ESCAPE_START_DASH: - case SCRIPT_DATA_ESCAPED_DASH: - case SCRIPT_DATA_ESCAPED_DASH_DASH: - case SCRIPT_DATA_DOUBLE_ESCAPE_START: - case SCRIPT_DATA_DOUBLE_ESCAPED: - case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: - case SCRIPT_DATA_DOUBLE_ESCAPED_DASH: - case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: - case SCRIPT_DATA_DOUBLE_ESCAPE_END: { - cstart = start; - break; - } - default: { - cstart = INT32_MAX; - break; - } - } - if (mViewSource) { - mViewSource->SetBuffer(buffer); - if (mozilla::htmlaccel::htmlaccelEnabled()) { - pos = StateLoopViewSourceSIMD(state, c, pos, buffer->getBuffer(), false, - returnState, buffer->getEnd()); - } else { - pos = StateLoopViewSourceALU(state, c, pos, buffer->getBuffer(), false, - returnState, buffer->getEnd()); - } - mViewSource->DropBuffer((pos == buffer->getEnd()) ? pos : pos + 1); - } else if (tokenHandler->WantsLineAndColumn()) { - if (mozilla::htmlaccel::htmlaccelEnabled()) { - pos = StateLoopLineColSIMD(state, c, pos, buffer->getBuffer(), false, - returnState, buffer->getEnd()); - } else { - pos = StateLoopLineColALU(state, c, pos, buffer->getBuffer(), false, - returnState, buffer->getEnd()); - } - } else if (mozilla::htmlaccel::htmlaccelEnabled()) { - pos = StateLoopFastestSIMD(state, c, pos, buffer->getBuffer(), false, - returnState, buffer->getEnd()); - } else { - pos = StateLoopFastestALU(state, c, pos, buffer->getBuffer(), false, - returnState, buffer->getEnd()); - } - if (pos == end) { - buffer->setStart(pos); - } else { - buffer->setStart(pos + 1); - } - return lastCR; +template <class P> +void nsHtml5Tokenizer::appendStrBufCarriageReturn() { + P::silentCarriageReturn(this); + appendStrBuf('\n'); } -void nsHtml5Tokenizer::initDoctypeFields() { - clearStrBufAfterUse(); - doctypeName = nullptr; - if (systemIdentifier) { - systemIdentifier.Release(); - systemIdentifier = nullptr; - } - if (publicIdentifier) { - publicIdentifier.Release(); - publicIdentifier = nullptr; - } - forceQuirks = false; +template <class P> +void nsHtml5Tokenizer::emitCarriageReturn(char16_t* buf, int32_t pos) { + P::silentCarriageReturn(this); + flushChars(buf, pos); + tokenHandler->characters(nsHtml5Tokenizer::LF, 0, 1); + cstart = INT32_MAX; } void nsHtml5Tokenizer::emitReplacementCharacter(char16_t* buf, int32_t pos) { @@ -503,6 +4528,10 @@ void nsHtml5Tokenizer::emitPlaintextReplacementCharacter(char16_t* buf, cstart = pos + 1; } +void nsHtml5Tokenizer::setAdditionalAndRememberAmpersandLocation(char16_t add) { + additional = add; +} + void nsHtml5Tokenizer::bogusDoctype() { errBogusDoctype(); forceQuirks = true; @@ -868,6 +4897,13 @@ void nsHtml5Tokenizer::emitDoctypeToken(int32_t pos) { suspendIfRequestedAfterCurrentNonTextToken(); } +void nsHtml5Tokenizer::suspendIfRequestedAfterCurrentNonTextToken() { + if (suspendAfterCurrentNonTextToken) { + suspendAfterCurrentNonTextToken = false; + shouldSuspend = true; + } +} + void nsHtml5Tokenizer::suspendAfterCurrentTokenIfNotInText() { switch (stateSave) { case DATA: @@ -979,6 +5015,25 @@ bool nsHtml5Tokenizer::internalEncodingDeclaration( return false; } +void nsHtml5Tokenizer::emitOrAppendTwo(const char16_t* val, + int32_t returnState) { + if ((returnState & DATA_AND_RCDATA_MASK)) { + appendStrBuf(val[0]); + appendStrBuf(val[1]); + } else { + tokenHandler->characters(val, 0, 2); + } +} + +void nsHtml5Tokenizer::emitOrAppendOne(const char16_t* val, + int32_t returnState) { + if ((returnState & DATA_AND_RCDATA_MASK)) { + appendStrBuf(val[0]); + } else { + tokenHandler->characters(val, 0, 1); + } +} + void nsHtml5Tokenizer::end() { if (!keepBuffer) { strBuf = nullptr; @@ -1002,6 +5057,10 @@ void nsHtml5Tokenizer::end() { } } +void nsHtml5Tokenizer::requestSuspension() { shouldSuspend = true; } + +bool nsHtml5Tokenizer::isInDataState() { return (stateSave == DATA); } + void nsHtml5Tokenizer::resetToDataState() { clearStrBufAfterUse(); charRefBufLen = 0; diff --git a/parser/html/nsHtml5Tokenizer.h b/parser/html/nsHtml5Tokenizer.h @@ -43,10 +43,8 @@ #include "nsHtml5NamedCharacters.h" #include "nsHtml5NamedCharactersAccel.h" #include "nsHtml5String.h" -#include "nsHtml5TreeBuilder.h" #include "nsIContent.h" #include "nsTraceRefcnt.h" -#include "mozilla/htmlaccel/htmlaccelEnabled.h" class nsHtml5StreamParser; @@ -339,12 +337,15 @@ class nsHtml5Tokenizer { void setLineNumber(int32_t line); inline int32_t getLineNumber() { return line; } - inline nsHtml5HtmlAttributes* emptyAttributes() { - return nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES; - } + nsHtml5HtmlAttributes* emptyAttributes(); private: - void appendCharRefBuf(char16_t c); + inline void appendCharRefBuf(char16_t c) { + MOZ_RELEASE_ASSERT(charRefBufLen < charRefBuf.length, + "Attempted to overrun charRefBuf!"); + charRefBuf[charRefBufLen++] = c; + } + void emitOrAppendCharRefBuf(int32_t returnState); inline void clearStrBufAfterUse() { strBufLen = 0; } @@ -359,32 +360,23 @@ class nsHtml5Tokenizer { strBufLen = 0; } - void appendStrBuf(char16_t c); + inline void appendStrBuf(char16_t c) { + MOZ_ASSERT(strBufLen < strBuf.length, + "Previous buffer length insufficient."); + if (MOZ_UNLIKELY(strBufLen == strBuf.length)) { + if (MOZ_UNLIKELY(!EnsureBufferSpace(1))) { + MOZ_CRASH("Unable to recover from buffer reallocation failure"); + } + } + strBuf[strBufLen++] = c; + } protected: - inline nsHtml5String strBufToString() { - nsHtml5String str = nsHtml5Portability::newStringFromBuffer( - strBuf, 0, strBufLen, tokenHandler, - !newAttributesEachTime && - attributeName == nsHtml5AttributeName::ATTR_CLASS); - clearStrBufAfterUse(); - return str; - } + nsHtml5String strBufToString(); private: - inline void strBufToDoctypeName() { - doctypeName = - nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen, interner); - clearStrBufAfterUse(); - } - - inline void emitStrBuf() { - if (strBufLen > 0) { - tokenHandler->characters(strBuf, 0, strBufLen); - clearStrBufAfterUse(); - } - } - + void strBufToDoctypeName(); + void emitStrBuf(); inline void appendSecondHyphenToBogusComment() { appendStrBuf('-'); } inline void adjustDoubleHyphenAndAppendToStrBufAndErr( @@ -416,4165 +408,23 @@ class nsHtml5Tokenizer { private: template <class P> - inline int32_t stateLoop(int32_t state, char16_t c, int32_t pos, - char16_t* buf, bool reconsume, int32_t returnState, - int32_t endPos) { - bool reportedConsecutiveHyphens = false; - stateloop: - for (;;) { - switch (state) { - case DATA: { - for (;;) { - if (reconsume) { - reconsume = false; - switch (c) { - case '&': { - flushChars(buf, pos); - MOZ_ASSERT(!charRefBufLen, - "charRefBufLen not reset after previous use!"); - appendCharRefBuf(c); - setAdditionalAndRememberAmpersandLocation('\0'); - returnState = state; - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - case '<': { - flushChars(buf, pos); - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::TAG_OPEN, reconsume, pos); - NS_HTML5_BREAK(dataloop); - } - case '\0': { - maybeEmitReplacementCharacter(buf, pos); - break; - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - break; - } - } - } - datamiddle: - for (;;) { - ++pos; - pos += P::accelerateAdvancementData(this, buf, pos, endPos); - for (;;) { - if (pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '&': { - flushChars(buf, pos); - MOZ_ASSERT(!charRefBufLen, - "charRefBufLen not reset after previous use!"); - appendCharRefBuf(c); - setAdditionalAndRememberAmpersandLocation('\0'); - returnState = state; - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '<': { - flushChars(buf, pos); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::TAG_OPEN, reconsume, - pos); - NS_HTML5_BREAK(dataloop); - } - case '\0': { - maybeEmitReplacementCharacter(buf, pos); - NS_HTML5_CONTINUE(datamiddle); - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - NS_HTML5_CONTINUE(datamiddle); - } - default: { - ++pos; - continue; - } - } - } - } - } - dataloop_end:; - [[fallthrough]]; - } - case TAG_OPEN: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - if (c >= 'A' && c <= 'Z') { - endTag = false; - clearStrBufBeforeUse(); - appendStrBuf((char16_t)(c + 0x20)); - containsHyphen = false; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::TAG_NAME, reconsume, pos); - NS_HTML5_BREAK(tagopenloop); - } else if (c >= 'a' && c <= 'z') { - endTag = false; - clearStrBufBeforeUse(); - appendStrBuf(c); - containsHyphen = false; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::TAG_NAME, reconsume, pos); - NS_HTML5_BREAK(tagopenloop); - } - switch (c) { - case '!': { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::MARKUP_DECLARATION_OPEN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '/': { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::CLOSE_TAG_OPEN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\?': { - if (viewingXmlSource) { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::PROCESSING_INSTRUCTION, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - if (P::reportErrors) { - errProcessingInstruction(); - } - clearStrBufBeforeUse(); - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - if (P::reportErrors) { - errLtGt(); - } - tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 2); - cstart = pos + 1; - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - default: { - if (P::reportErrors) { - errBadCharAfterLt(c); - } - tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); - cstart = pos; - reconsume = true; - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - tagopenloop_end:; - [[fallthrough]]; - } - case TAG_NAME: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - strBufToElementNameString(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - strBufToElementNameString(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_BREAK(tagnameloop); - } - case '/': { - strBufToElementNameString(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SELF_CLOSING_START_TAG, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - strBufToElementNameString(); - state = P::transition(mViewSource.get(), - emitCurrentTagToken(false, pos), - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - if (c >= 'A' && c <= 'Z') { - c += 0x20; - } else if (c == '-') { - containsHyphen = true; - } - appendStrBuf(c); - continue; - } - } - } - tagnameloop_end:; - [[fallthrough]]; - } - case BEFORE_ATTRIBUTE_NAME: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - continue; - } - case '/': { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SELF_CLOSING_START_TAG, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - state = P::transition(mViewSource.get(), - emitCurrentTagToken(false, pos), - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - case '\"': - case '\'': - case '<': - case '=': { - if (P::reportErrors) { - errBadCharBeforeAttributeNameOrNull(c); - } - [[fallthrough]]; - } - default: { - if (c >= 'A' && c <= 'Z') { - c += 0x20; - } - attributeLine = line; - clearStrBufBeforeUse(); - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_BREAK(beforeattributenameloop); - } - } - } - beforeattributenameloop_end:; - [[fallthrough]]; - } - case ATTRIBUTE_NAME: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - attributeNameComplete(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - attributeNameComplete(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '/': { - attributeNameComplete(); - addAttributeWithoutValue(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SELF_CLOSING_START_TAG, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '=': { - attributeNameComplete(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE, - reconsume, pos); - NS_HTML5_BREAK(attributenameloop); - } - case '>': { - attributeNameComplete(); - addAttributeWithoutValue(); - state = P::transition(mViewSource.get(), - emitCurrentTagToken(false, pos), - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - case '\"': - case '\'': - case '<': { - if (P::reportErrors) { - errQuoteOrLtInAttributeNameOrNull(c); - } - [[fallthrough]]; - } - default: { - if (c >= 'A' && c <= 'Z') { - c += 0x20; - } - appendStrBuf(c); - continue; - } - } - } - attributenameloop_end:; - [[fallthrough]]; - } - case BEFORE_ATTRIBUTE_VALUE: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - continue; - } - case '\"': { - attributeLine = line; - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::ATTRIBUTE_VALUE_DOUBLE_QUOTED, reconsume, - pos); - NS_HTML5_BREAK(beforeattributevalueloop); - } - case '&': { - attributeLine = line; - clearStrBufBeforeUse(); - reconsume = true; - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos); - - NS_HTML5_CONTINUE(stateloop); - } - case '\'': { - attributeLine = line; - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::ATTRIBUTE_VALUE_SINGLE_QUOTED, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - if (P::reportErrors) { - errAttributeValueMissing(); - } - addAttributeWithoutValue(); - state = P::transition(mViewSource.get(), - emitCurrentTagToken(false, pos), - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - case '<': - case '=': - case '`': { - if (P::reportErrors) { - errLtOrEqualsOrGraveInUnquotedAttributeOrNull(c); - } - [[fallthrough]]; - } - default: { - attributeLine = line; - clearStrBufBeforeUse(); - appendStrBuf(c); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos); - - NS_HTML5_CONTINUE(stateloop); - } - } - } - beforeattributevalueloop_end:; - [[fallthrough]]; - } - case ATTRIBUTE_VALUE_DOUBLE_QUOTED: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '\"': { - addAttributeWithValue(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, - pos); - NS_HTML5_BREAK(attributevaluedoublequotedloop); - } - case '&': { - MOZ_ASSERT(!charRefBufLen, - "charRefBufLen not reset after previous use!"); - appendCharRefBuf(c); - setAdditionalAndRememberAmpersandLocation('\"'); - returnState = state; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - continue; - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - continue; - } - } - } - attributevaluedoublequotedloop_end:; - [[fallthrough]]; - } - case AFTER_ATTRIBUTE_VALUE_QUOTED: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '/': { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SELF_CLOSING_START_TAG, - reconsume, pos); - NS_HTML5_BREAK(afterattributevaluequotedloop); - } - case '>': { - state = P::transition(mViewSource.get(), - emitCurrentTagToken(false, pos), - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - default: { - if (P::reportErrors) { - errNoSpaceBetweenAttributes(); - } - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - afterattributevaluequotedloop_end:; - [[fallthrough]]; - } - case SELF_CLOSING_START_TAG: { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '>': { - state = - P::transition(mViewSource.get(), - emitCurrentTagToken(true, pos), reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - default: { - if (P::reportErrors) { - errSlashNotFollowedByGt(); - } - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - case ATTRIBUTE_VALUE_UNQUOTED: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - addAttributeWithValue(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - addAttributeWithValue(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '&': { - MOZ_ASSERT(!charRefBufLen, - "charRefBufLen not reset after previous use!"); - appendCharRefBuf(c); - setAdditionalAndRememberAmpersandLocation('>'); - returnState = state; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - addAttributeWithValue(); - state = P::transition(mViewSource.get(), - emitCurrentTagToken(false, pos), - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - case '<': - case '\"': - case '\'': - case '=': - case '`': { - if (P::reportErrors) { - errUnquotedAttributeValOrNull(c); - } - [[fallthrough]]; - } - default: { - appendStrBuf(c); - continue; - } - } - } - } - case AFTER_ATTRIBUTE_NAME: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - continue; - } - case '/': { - addAttributeWithoutValue(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SELF_CLOSING_START_TAG, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '=': { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - addAttributeWithoutValue(); - state = P::transition(mViewSource.get(), - emitCurrentTagToken(false, pos), - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - case '\"': - case '\'': - case '<': { - if (P::reportErrors) { - errQuoteOrLtInAttributeNameOrNull(c); - } - [[fallthrough]]; - } - default: { - addAttributeWithoutValue(); - if (c >= 'A' && c <= 'Z') { - c += 0x20; - } - clearStrBufBeforeUse(); - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - } - case MARKUP_DECLARATION_OPEN: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - clearStrBufBeforeUse(); - appendStrBuf(c); - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::MARKUP_DECLARATION_HYPHEN, - reconsume, pos); - NS_HTML5_BREAK(markupdeclarationopenloop); - } - case 'd': - case 'D': { - clearStrBufBeforeUse(); - appendStrBuf(c); - index = 0; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::MARKUP_DECLARATION_OCTYPE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '[': { - if (tokenHandler->cdataSectionAllowed()) { - clearStrBufBeforeUse(); - appendStrBuf(c); - index = 0; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::CDATA_START, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - [[fallthrough]]; - } - default: { - if (P::reportErrors) { - errBogusComment(); - } - clearStrBufBeforeUse(); - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - markupdeclarationopenloop_end:; - [[fallthrough]]; - } - case MARKUP_DECLARATION_HYPHEN: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - clearStrBufAfterOneHyphen(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_START, - reconsume, pos); - NS_HTML5_BREAK(markupdeclarationhyphenloop); - } - default: { - if (P::reportErrors) { - errBogusComment(); - } - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - markupdeclarationhyphenloop_end:; - [[fallthrough]]; - } - case COMMENT_START: { - reportedConsecutiveHyphens = false; - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_START_DASH, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - if (P::reportErrors) { - errPrematureEndOfComment(); - } - emitComment(0, pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '<': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_LESSTHAN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_BREAK(commentstartloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_BREAK(commentstartloop); - } - } - } - commentstartloop_end:; - [[fallthrough]]; - } - case COMMENT: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_END_DASH, - reconsume, pos); - NS_HTML5_BREAK(commentloop); - } - case '<': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_LESSTHAN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - continue; - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - continue; - } - } - } - commentloop_end:; - [[fallthrough]]; - } - case COMMENT_END_DASH: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_END, reconsume, - pos); - NS_HTML5_BREAK(commentenddashloop); - } - case '<': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_LESSTHAN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - commentenddashloop_end:; - [[fallthrough]]; - } - case COMMENT_END: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '>': { - emitComment(2, pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '-': { - adjustDoubleHyphenAndAppendToStrBufAndErr( - c, reportedConsecutiveHyphens); - reportedConsecutiveHyphens = true; - continue; - } - case '<': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_LESSTHAN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - adjustDoubleHyphenAndAppendToStrBufCarriageReturn<P>(); - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - adjustDoubleHyphenAndAppendToStrBufLineFeed<P>(); - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '!': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_END_BANG, - reconsume, pos); - NS_HTML5_BREAK(commentendloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - adjustDoubleHyphenAndAppendToStrBufAndErr( - c, reportedConsecutiveHyphens); - reportedConsecutiveHyphens = true; - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - commentendloop_end:; - [[fallthrough]]; - } - case COMMENT_END_BANG: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '>': { - emitComment(3, pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '-': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_END_DASH, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - } - case COMMENT_LESSTHAN: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '!': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG, - reconsume, pos); - NS_HTML5_BREAK(commentlessthanloop); - } - case '<': { - appendStrBuf(c); - continue; - } - case '-': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_END_DASH, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - commentlessthanloop_end:; - [[fallthrough]]; - } - case COMMENT_LESSTHAN_BANG: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - appendStrBuf(c); - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG_DASH, - reconsume, pos); - NS_HTML5_BREAK(commentlessthanbangloop); - } - case '<': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_LESSTHAN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - commentlessthanbangloop_end:; - [[fallthrough]]; - } - case COMMENT_LESSTHAN_BANG_DASH: { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - appendStrBuf(c); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG_DASH_DASH, reconsume, - pos); - break; - } - case '<': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_LESSTHAN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - [[fallthrough]]; - } - case COMMENT_LESSTHAN_BANG_DASH_DASH: { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '>': { - appendStrBuf(c); - emitComment(3, pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '-': { - if (P::reportErrors) { - errNestedComment(); - } - adjustDoubleHyphenAndAppendToStrBufAndErr( - c, reportedConsecutiveHyphens); - reportedConsecutiveHyphens = true; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_END, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - c = '\n'; - P::silentCarriageReturn(this); - if (P::reportErrors) { - errNestedComment(); - } - adjustDoubleHyphenAndAppendToStrBufAndErr( - c, reportedConsecutiveHyphens); - reportedConsecutiveHyphens = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - if (P::reportErrors) { - errNestedComment(); - } - adjustDoubleHyphenAndAppendToStrBufAndErr( - c, reportedConsecutiveHyphens); - reportedConsecutiveHyphens = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '!': { - if (P::reportErrors) { - errNestedComment(); - } - adjustDoubleHyphenAndAppendToStrBufAndErr( - c, reportedConsecutiveHyphens); - reportedConsecutiveHyphens = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_END_BANG, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - if (P::reportErrors) { - errNestedComment(); - } - adjustDoubleHyphenAndAppendToStrBufAndErr( - c, reportedConsecutiveHyphens); - reportedConsecutiveHyphens = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - case COMMENT_START_DASH: { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - appendStrBuf(c); - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_END, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - if (P::reportErrors) { - errPrematureEndOfComment(); - } - emitComment(1, pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '<': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_LESSTHAN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - case CDATA_START: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - if (index < 6) { - if (c == nsHtml5Tokenizer::CDATA_LSQB[index]) { - appendStrBuf(c); - } else { - if (P::reportErrors) { - errBogusComment(); - } - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - index++; - continue; - } else { - clearStrBufAfterUse(); - cstart = pos; - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::CDATA_SECTION, reconsume, - pos); - break; - } - } - [[fallthrough]]; - } - case CDATA_SECTION: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case ']': { - flushChars(buf, pos); - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::CDATA_RSQB, reconsume, pos); - NS_HTML5_BREAK(cdatasectionloop); - } - case '\0': { - maybeEmitReplacementCharacter(buf, pos); - continue; - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - continue; - } - } - } - cdatasectionloop_end:; - [[fallthrough]]; - } - case CDATA_RSQB: { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case ']': { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::CDATA_RSQB_RSQB, - reconsume, pos); - break; - } - default: { - tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 1); - cstart = pos; - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::CDATA_SECTION, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - } - [[fallthrough]]; - } - case CDATA_RSQB_RSQB: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case ']': { - tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 1); - continue; - } - case '>': { - cstart = pos + 1; - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - suspendIfRequestedAfterCurrentNonTextToken(); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - default: { - tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 2); - cstart = pos; - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::CDATA_SECTION, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - } - case ATTRIBUTE_VALUE_SINGLE_QUOTED: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '\'': { - addAttributeWithValue(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - case '&': { - MOZ_ASSERT(!charRefBufLen, - "charRefBufLen not reset after previous use!"); - appendCharRefBuf(c); - setAdditionalAndRememberAmpersandLocation('\''); - returnState = state; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, - reconsume, pos); - NS_HTML5_BREAK(attributevaluesinglequotedloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - continue; - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - continue; - } - } - } - attributevaluesinglequotedloop_end:; - [[fallthrough]]; - } - case CONSUME_CHARACTER_REFERENCE: { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case ' ': - case '\t': - case '\n': - case '\r': - case '\f': - case '<': - case '&': - case '\0': - case ';': { - emitOrAppendCharRefBuf(returnState); - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos; - } - reconsume = true; - state = - P::transition(mViewSource.get(), returnState, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '#': { - appendCharRefBuf('#'); - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::CONSUME_NCR, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - default: { - if (c == additional) { - emitOrAppendCharRefBuf(returnState); - reconsume = true; - state = P::transition(mViewSource.get(), returnState, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - if (c >= 'a' && c <= 'z') { - firstCharKey = c - 'a' + 26; - } else if (c >= 'A' && c <= 'Z') { - firstCharKey = c - 'A'; - } else { - if (c == ';') { - if (P::reportErrors) { - errNoNamedCharacterMatch(); - } - } - emitOrAppendCharRefBuf(returnState); - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos; - } - reconsume = true; - state = P::transition(mViewSource.get(), returnState, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - appendCharRefBuf(c); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::CHARACTER_REFERENCE_HILO_LOOKUP, reconsume, - pos); - break; - } - } - [[fallthrough]]; - } - case CHARACTER_REFERENCE_HILO_LOOKUP: { - { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - int32_t hilo = 0; - if (c <= 'z') { - const int32_t* row = nsHtml5NamedCharactersAccel::HILO_ACCEL[c]; - if (row) { - hilo = row[firstCharKey]; - } - } - if (!hilo) { - if (c == ';') { - if (P::reportErrors) { - errNoNamedCharacterMatch(); - } - } - emitOrAppendCharRefBuf(returnState); - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos; - } - reconsume = true; - state = - P::transition(mViewSource.get(), returnState, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - appendCharRefBuf(c); - lo = hilo & 0xFFFF; - hi = hilo >> 16; - entCol = -1; - candidate = -1; - charRefBufMark = 0; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::CHARACTER_REFERENCE_TAIL, - reconsume, pos); - } - [[fallthrough]]; - } - case CHARACTER_REFERENCE_TAIL: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - entCol++; - for (;;) { - if (hi < lo) { - NS_HTML5_BREAK(outer); - } - if (entCol == nsHtml5NamedCharacters::NAMES[lo].length()) { - candidate = lo; - charRefBufMark = charRefBufLen; - lo++; - } else if (entCol > nsHtml5NamedCharacters::NAMES[lo].length()) { - NS_HTML5_BREAK(outer); - } else if (c > nsHtml5NamedCharacters::NAMES[lo].charAt(entCol)) { - lo++; - } else { - NS_HTML5_BREAK(loloop); - } - } - loloop_end:; - for (;;) { - if (hi < lo) { - NS_HTML5_BREAK(outer); - } - if (entCol == nsHtml5NamedCharacters::NAMES[hi].length()) { - NS_HTML5_BREAK(hiloop); - } - if (entCol > nsHtml5NamedCharacters::NAMES[hi].length()) { - NS_HTML5_BREAK(outer); - } else if (c < nsHtml5NamedCharacters::NAMES[hi].charAt(entCol)) { - hi--; - } else { - NS_HTML5_BREAK(hiloop); - } - } - hiloop_end:; - if (c == ';') { - if (entCol + 1 == nsHtml5NamedCharacters::NAMES[lo].length()) { - candidate = lo; - charRefBufMark = charRefBufLen; - } - NS_HTML5_BREAK(outer); - } - if (hi < lo) { - NS_HTML5_BREAK(outer); - } - appendCharRefBuf(c); - continue; - } - outer_end:; - if (candidate == -1) { - if (c == ';') { - if (P::reportErrors) { - errNoNamedCharacterMatch(); - } - } - emitOrAppendCharRefBuf(returnState); - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos; - } - reconsume = true; - state = - P::transition(mViewSource.get(), returnState, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } else { - const nsHtml5CharacterName& candidateName = - nsHtml5NamedCharacters::NAMES[candidate]; - if (!candidateName.length() || - candidateName.charAt(candidateName.length() - 1) != ';') { - if ((returnState & DATA_AND_RCDATA_MASK)) { - char16_t ch; - if (charRefBufMark == charRefBufLen) { - ch = c; - } else { - ch = charRefBuf[charRefBufMark]; - } - if (ch == '=' || (ch >= '0' && ch <= '9') || - (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) { - if (c == ';') { - if (P::reportErrors) { - errNoNamedCharacterMatch(); - } - } - appendCharRefBufToStrBuf(); - reconsume = true; - state = P::transition(mViewSource.get(), returnState, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - if ((returnState & DATA_AND_RCDATA_MASK)) { - if (P::reportErrors) { - errUnescapedAmpersandInterpretedAsCharacterReference(); - } - } else { - if (P::reportErrors) { - errNotSemicolonTerminated(); - } - } - } - P::completedNamedCharacterReference(mViewSource.get()); - const char16_t* val = nsHtml5NamedCharacters::VALUES[candidate]; - if (!val[1]) { - emitOrAppendOne(val, returnState); - } else { - emitOrAppendTwo(val, returnState); - } - if (charRefBufMark < charRefBufLen) { - if ((returnState & DATA_AND_RCDATA_MASK)) { - appendStrBuf(charRefBuf, charRefBufMark, - charRefBufLen - charRefBufMark); - } else { - tokenHandler->characters(charRefBuf, charRefBufMark, - charRefBufLen - charRefBufMark); - } - } - bool earlyBreak = (c == ';' && charRefBufMark == charRefBufLen); - charRefBufLen = 0; - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = earlyBreak ? pos + 1 : pos; - } - reconsume = !earlyBreak; - state = - P::transition(mViewSource.get(), returnState, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - case CONSUME_NCR: { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - value = 0; - seenDigits = false; - switch (c) { - case 'x': - case 'X': { - appendCharRefBuf(c); - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::HEX_NCR_LOOP, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - default: { - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::DECIMAL_NRC_LOOP, - reconsume, pos); - break; - } - } - [[fallthrough]]; - } - case DECIMAL_NRC_LOOP: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - MOZ_ASSERT(value >= 0, "value must not become negative."); - if (c >= '0' && c <= '9') { - seenDigits = true; - if (value <= 0x10FFFF) { - value *= 10; - value += c - '0'; - } - continue; - } else if (c == ';') { - if (seenDigits) { - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos + 1; - } - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::HANDLE_NCR_VALUE, - reconsume, pos); - NS_HTML5_BREAK(decimalloop); - } else { - if (P::reportErrors) { - errNoDigitsInNCR(); - } - appendCharRefBuf(';'); - emitOrAppendCharRefBuf(returnState); - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos + 1; - } - state = P::transition(mViewSource.get(), returnState, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - } else { - if (!seenDigits) { - if (P::reportErrors) { - errNoDigitsInNCR(); - } - emitOrAppendCharRefBuf(returnState); - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos; - } - reconsume = true; - state = P::transition(mViewSource.get(), returnState, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } else { - if (P::reportErrors) { - errCharRefLacksSemicolon(); - } - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos; - } - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::HANDLE_NCR_VALUE, - reconsume, pos); - NS_HTML5_BREAK(decimalloop); - } - } - } - decimalloop_end:; - [[fallthrough]]; - } - case HANDLE_NCR_VALUE: { - charRefBufLen = 0; - handleNcrValue(returnState); - state = P::transition(mViewSource.get(), returnState, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case HEX_NCR_LOOP: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - MOZ_ASSERT(value >= 0, "value must not become negative."); - if (c >= '0' && c <= '9') { - seenDigits = true; - if (value <= 0x10FFFF) { - value *= 16; - value += c - '0'; - } - continue; - } else if (c >= 'A' && c <= 'F') { - seenDigits = true; - if (value <= 0x10FFFF) { - value *= 16; - value += c - 'A' + 10; - } - continue; - } else if (c >= 'a' && c <= 'f') { - seenDigits = true; - if (value <= 0x10FFFF) { - value *= 16; - value += c - 'a' + 10; - } - continue; - } else if (c == ';') { - if (seenDigits) { - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos + 1; - } - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::HANDLE_NCR_VALUE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } else { - if (P::reportErrors) { - errNoDigitsInNCR(); - } - appendCharRefBuf(';'); - emitOrAppendCharRefBuf(returnState); - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos + 1; - } - state = P::transition(mViewSource.get(), returnState, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - } else { - if (!seenDigits) { - if (P::reportErrors) { - errNoDigitsInNCR(); - } - emitOrAppendCharRefBuf(returnState); - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos; - } - reconsume = true; - state = P::transition(mViewSource.get(), returnState, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } else { - if (P::reportErrors) { - errCharRefLacksSemicolon(); - } - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos; - } - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::HANDLE_NCR_VALUE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - } - case PLAINTEXT: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '\0': { - emitPlaintextReplacementCharacter(buf, pos); - continue; - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - continue; - } - } - } - } - case CLOSE_TAG_OPEN: { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '>': { - if (P::reportErrors) { - errLtSlashGt(); - } - cstart = pos + 1; - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - P::silentCarriageReturn(this); - if (P::reportErrors) { - errGarbageAfterLtSlash(); - } - clearStrBufBeforeUse(); - appendStrBuf('\n'); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, - pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - if (P::reportErrors) { - errGarbageAfterLtSlash(); - } - clearStrBufBeforeUse(); - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - if (c >= 'A' && c <= 'Z') { - c += 0x20; - } - if (c >= 'a' && c <= 'z') { - endTag = true; - clearStrBufBeforeUse(); - appendStrBuf(c); - containsHyphen = false; - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::TAG_NAME, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } else { - if (P::reportErrors) { - errGarbageAfterLtSlash(); - } - clearStrBufBeforeUse(); - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - } - case RCDATA: { - for (;;) { - if (reconsume) { - reconsume = false; - switch (c) { - case '&': { - flushChars(buf, pos); - MOZ_ASSERT(!charRefBufLen, - "charRefBufLen not reset after previous use!"); - appendCharRefBuf(c); - setAdditionalAndRememberAmpersandLocation('\0'); - returnState = state; - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - case '<': { - flushChars(buf, pos); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - maybeEmitReplacementCharacter(buf, pos); - break; - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - break; - } - } - } - rcdatamiddle: - for (;;) { - ++pos; - pos += P::accelerateAdvancementData(this, buf, pos, endPos); - for (;;) { - if (pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '&': { - flushChars(buf, pos); - MOZ_ASSERT(!charRefBufLen, - "charRefBufLen not reset after previous use!"); - appendCharRefBuf(c); - setAdditionalAndRememberAmpersandLocation('\0'); - returnState = state; - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '<': { - flushChars(buf, pos); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - maybeEmitReplacementCharacter(buf, pos); - NS_HTML5_CONTINUE(rcdatamiddle); - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - NS_HTML5_CONTINUE(rcdatamiddle); - } - default: { - ++pos; - continue; - } - } - } - } - } - } - case RAWTEXT: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '<': { - flushChars(buf, pos); - returnState = state; - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, - pos); - NS_HTML5_BREAK(rawtextloop); - } - case '\0': { - emitReplacementCharacter(buf, pos); - continue; - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - continue; - } - } - } - rawtextloop_end:; - [[fallthrough]]; - } - case RAWTEXT_RCDATA_LESS_THAN_SIGN: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '/': { - index = 0; - clearStrBufBeforeUse(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::NON_DATA_END_TAG_NAME, - reconsume, pos); - NS_HTML5_BREAK(rawtextrcdatalessthansignloop); - } - default: { - tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); - cstart = pos; - reconsume = true; - state = P::transition(mViewSource.get(), returnState, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - rawtextrcdatalessthansignloop_end:; - [[fallthrough]]; - } - case NON_DATA_END_TAG_NAME: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - if (!endTagExpectationAsArray) { - tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2); - cstart = pos; - reconsume = true; - state = - P::transition(mViewSource.get(), returnState, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } else if (index < endTagExpectationAsArray.length) { - char16_t e = endTagExpectationAsArray[index]; - char16_t folded = c; - if (c >= 'A' && c <= 'Z') { - folded += 0x20; - } - if (folded != e) { - tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2); - emitStrBuf(); - cstart = pos; - reconsume = true; - state = P::transition(mViewSource.get(), returnState, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - appendStrBuf(c); - index++; - continue; - } else { - endTag = true; - tagName = endTagExpectation; - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - clearStrBufAfterUse(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - clearStrBufAfterUse(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '/': { - clearStrBufAfterUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SELF_CLOSING_START_TAG, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - clearStrBufAfterUse(); - state = P::transition(mViewSource.get(), - emitCurrentTagToken(false, pos), - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - default: { - tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2); - emitStrBuf(); - cstart = pos; - reconsume = true; - state = P::transition(mViewSource.get(), returnState, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - } - } - case BOGUS_COMMENT: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '>': { - emitComment(0, pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '-': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT_HYPHEN, - reconsume, pos); - NS_HTML5_BREAK(boguscommentloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - continue; - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - continue; - } - } - } - boguscommentloop_end:; - [[fallthrough]]; - } - case BOGUS_COMMENT_HYPHEN: { - boguscommenthyphenloop: - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '>': { - emitComment(0, pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '-': { - appendSecondHyphenToBogusComment(); - NS_HTML5_CONTINUE(boguscommenthyphenloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - } - case SCRIPT_DATA: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '<': { - flushChars(buf, pos); - returnState = state; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_LESS_THAN_SIGN, - reconsume, pos); - NS_HTML5_BREAK(scriptdataloop); - } - case '\0': { - emitReplacementCharacter(buf, pos); - continue; - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - continue; - } - } - } - scriptdataloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_LESS_THAN_SIGN: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '/': { - index = 0; - clearStrBufBeforeUse(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::NON_DATA_END_TAG_NAME, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '!': { - tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); - cstart = pos; - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPE_START, reconsume, pos); - NS_HTML5_BREAK(scriptdatalessthansignloop); - } - default: { - tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); - cstart = pos; - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - scriptdatalessthansignloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_ESCAPE_START: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPE_START_DASH, reconsume, - pos); - NS_HTML5_BREAK(scriptdataescapestartloop); - } - default: { - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - scriptdataescapestartloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_ESCAPE_START_DASH: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, - pos); - NS_HTML5_BREAK(scriptdataescapestartdashloop); - } - default: { - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - scriptdataescapestartdashloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_ESCAPED_DASH_DASH: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - continue; - } - case '<': { - flushChars(buf, pos); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - emitReplacementCharacter(buf, pos); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, - reconsume, pos); - NS_HTML5_BREAK(scriptdataescapeddashdashloop); - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, - reconsume, pos); - NS_HTML5_BREAK(scriptdataescapeddashdashloop); - } - } - } - scriptdataescapeddashdashloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_ESCAPED: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '-': { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH, reconsume, pos); - NS_HTML5_BREAK(scriptdataescapedloop); - } - case '<': { - flushChars(buf, pos); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - emitReplacementCharacter(buf, pos); - continue; - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - continue; - } - } - } - scriptdataescapedloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_ESCAPED_DASH: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - case '<': { - flushChars(buf, pos); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, - reconsume, pos); - NS_HTML5_BREAK(scriptdataescapeddashloop); - } - case '\0': { - emitReplacementCharacter(buf, pos); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - scriptdataescapeddashloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '/': { - index = 0; - clearStrBufBeforeUse(); - returnState = nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::NON_DATA_END_TAG_NAME, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case 'S': - case 's': { - tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); - cstart = pos; - index = 1; - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPE_START, - reconsume, pos); - NS_HTML5_BREAK(scriptdataescapedlessthanloop); - } - default: { - tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); - cstart = pos; - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - scriptdataescapedlessthanloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_DOUBLE_ESCAPE_START: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - MOZ_ASSERT(index > 0); - if (index < 6) { - char16_t folded = c; - if (c >= 'A' && c <= 'Z') { - folded += 0x20; - } - if (folded != nsHtml5Tokenizer::SCRIPT_ARR[index]) { - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - index++; - continue; - } - switch (c) { - case '\r': { - emitCarriageReturn<P>(buf, pos); - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': - case '/': - case '>': { - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, - reconsume, pos); - NS_HTML5_BREAK(scriptdatadoubleescapestartloop); - } - default: { - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - scriptdatadoubleescapestartloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_DOUBLE_ESCAPED: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '-': { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_DASH, - reconsume, pos); - NS_HTML5_BREAK(scriptdatadoubleescapedloop); - } - case '<': { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - emitReplacementCharacter(buf, pos); - continue; - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - continue; - } - } - } - scriptdatadoubleescapedloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_DOUBLE_ESCAPED_DASH: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH, - reconsume, pos); - NS_HTML5_BREAK(scriptdatadoubleescapeddashloop); - } - case '<': { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - emitReplacementCharacter(buf, pos); - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - scriptdatadoubleescapeddashloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - continue; - } - case '<': { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, - reconsume, pos); - NS_HTML5_BREAK(scriptdatadoubleescapeddashdashloop); - } - case '>': { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - emitReplacementCharacter(buf, pos); - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - scriptdatadoubleescapeddashdashloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '/': { - index = 0; - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPE_END, reconsume, - pos); - NS_HTML5_BREAK(scriptdatadoubleescapedlessthanloop); - } - default: { - reconsume = true; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - scriptdatadoubleescapedlessthanloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_DOUBLE_ESCAPE_END: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - if (index < 6) { - char16_t folded = c; - if (c >= 'A' && c <= 'Z') { - folded += 0x20; - } - if (folded != nsHtml5Tokenizer::SCRIPT_ARR[index]) { - reconsume = true; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - index++; - continue; - } - switch (c) { - case '\r': { - emitCarriageReturn<P>(buf, pos); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': - case '/': - case '>': { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - default: { - reconsume = true; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - } - case MARKUP_DECLARATION_OCTYPE: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - if (index < 6) { - char16_t folded = c; - if (c >= 'A' && c <= 'Z') { - folded += 0x20; - } - if (folded == nsHtml5Tokenizer::OCTYPE[index]) { - appendStrBuf(c); - } else { - if (P::reportErrors) { - errBogusComment(); - } - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - index++; - continue; - } else { - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE, reconsume, pos); - NS_HTML5_BREAK(markupdeclarationdoctypeloop); - } - } - markupdeclarationdoctypeloop_end:; - [[fallthrough]]; - } - case DOCTYPE: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - initDoctypeFields(); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME, - reconsume, pos); - NS_HTML5_BREAK(doctypeloop); - } - default: { - if (P::reportErrors) { - errMissingSpaceBeforeDoctypeName(); - } - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME, - reconsume, pos); - NS_HTML5_BREAK(doctypeloop); - } - } - } - doctypeloop_end:; - [[fallthrough]]; - } - case BEFORE_DOCTYPE_NAME: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - continue; - } - case '>': { - if (P::reportErrors) { - errNamelessDoctype(); - } - forceQuirks = true; - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - if (c >= 'A' && c <= 'Z') { - c += 0x20; - } - clearStrBufBeforeUse(); - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_NAME, reconsume, - pos); - NS_HTML5_BREAK(beforedoctypenameloop); - } - } - } - beforedoctypenameloop_end:; - [[fallthrough]]; - } - case DOCTYPE_NAME: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - strBufToDoctypeName(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::AFTER_DOCTYPE_NAME, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - strBufToDoctypeName(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::AFTER_DOCTYPE_NAME, - reconsume, pos); - NS_HTML5_BREAK(doctypenameloop); - } - case '>': { - strBufToDoctypeName(); - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - if (c >= 'A' && c <= 'Z') { - c += 0x0020; - } - appendStrBuf(c); - continue; - } - } - } - doctypenameloop_end:; - [[fallthrough]]; - } - case AFTER_DOCTYPE_NAME: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - continue; - } - case '>': { - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case 'p': - case 'P': { - index = 0; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_UBLIC, - reconsume, pos); - NS_HTML5_BREAK(afterdoctypenameloop); - } - case 's': - case 'S': { - index = 0; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_YSTEM, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - default: { - bogusDoctype(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_DOCTYPE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - afterdoctypenameloop_end:; - [[fallthrough]]; - } - case DOCTYPE_UBLIC: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - if (index < 5) { - char16_t folded = c; - if (c >= 'A' && c <= 'Z') { - folded += 0x20; - } - if (folded != nsHtml5Tokenizer::UBLIC[index]) { - bogusDoctype(); - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_DOCTYPE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - index++; - continue; - } else { - reconsume = true; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_KEYWORD, - reconsume, pos); - NS_HTML5_BREAK(doctypeublicloop); - } - } - doctypeublicloop_end:; - [[fallthrough]]; - } - case AFTER_DOCTYPE_PUBLIC_KEYWORD: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, - reconsume, pos); - NS_HTML5_BREAK(afterdoctypepublickeywordloop); - } - case '\"': { - if (P::reportErrors) { - errNoSpaceBetweenDoctypePublicKeywordAndQuote(); - } - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\'': { - if (P::reportErrors) { - errNoSpaceBetweenDoctypePublicKeywordAndQuote(); - } - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - if (P::reportErrors) { - errExpectedPublicId(); - } - forceQuirks = true; - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - default: { - bogusDoctype(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_DOCTYPE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - afterdoctypepublickeywordloop_end:; - [[fallthrough]]; - } - case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - continue; - } - case '\"': { - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, - reconsume, pos); - NS_HTML5_BREAK(beforedoctypepublicidentifierloop); - } - case '\'': { - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - if (P::reportErrors) { - errExpectedPublicId(); - } - forceQuirks = true; - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - default: { - bogusDoctype(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_DOCTYPE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - beforedoctypepublicidentifierloop_end:; - [[fallthrough]]; - } - case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\"': { - publicIdentifier = strBufToString(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER, - reconsume, pos); - NS_HTML5_BREAK(doctypepublicidentifierdoublequotedloop); - } - case '>': { - if (P::reportErrors) { - errGtInPublicId(); - } - forceQuirks = true; - publicIdentifier = strBufToString(); - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - continue; - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - continue; - } - } - } - doctypepublicidentifierdoublequotedloop_end:; - [[fallthrough]]; - } - case AFTER_DOCTYPE_PUBLIC_IDENTIFIER: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer:: - BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer:: - BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, - reconsume, pos); - NS_HTML5_BREAK(afterdoctypepublicidentifierloop); - } - case '>': { - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\"': { - if (P::reportErrors) { - errNoSpaceBetweenPublicAndSystemIds(); - } - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\'': { - if (P::reportErrors) { - errNoSpaceBetweenPublicAndSystemIds(); - } - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - default: { - bogusDoctype(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_DOCTYPE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - afterdoctypepublicidentifierloop_end:; - [[fallthrough]]; - } - case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - continue; - } - case '>': { - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\"': { - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, - reconsume, pos); - NS_HTML5_BREAK(betweendoctypepublicandsystemidentifiersloop); - } - case '\'': { - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - default: { - bogusDoctype(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_DOCTYPE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - betweendoctypepublicandsystemidentifiersloop_end:; - [[fallthrough]]; - } - case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\"': { - systemIdentifier = strBufToString(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER, - reconsume, pos); - NS_HTML5_BREAK(doctypesystemidentifierdoublequotedloop); - } - case '>': { - if (P::reportErrors) { - errGtInSystemId(); - } - forceQuirks = true; - systemIdentifier = strBufToString(); - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - continue; - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - continue; - } - } - } - doctypesystemidentifierdoublequotedloop_end:; - [[fallthrough]]; - } - case AFTER_DOCTYPE_SYSTEM_IDENTIFIER: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - continue; - } - case '>': { - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - default: { - bogusDoctypeWithoutQuirks(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_DOCTYPE, - reconsume, pos); - NS_HTML5_BREAK(afterdoctypesystemidentifierloop); - } - } - } - afterdoctypesystemidentifierloop_end:; - [[fallthrough]]; - } - case BOGUS_DOCTYPE: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '>': { - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - P::silentCarriageReturn(this); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - continue; - } - } - } - } - case DOCTYPE_YSTEM: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - if (index < 5) { - char16_t folded = c; - if (c >= 'A' && c <= 'Z') { - folded += 0x20; - } - if (folded != nsHtml5Tokenizer::YSTEM[index]) { - bogusDoctype(); - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_DOCTYPE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - index++; - NS_HTML5_CONTINUE(stateloop); - } else { - reconsume = true; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_KEYWORD, - reconsume, pos); - NS_HTML5_BREAK(doctypeystemloop); - } - } - doctypeystemloop_end:; - [[fallthrough]]; - } - case AFTER_DOCTYPE_SYSTEM_KEYWORD: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, - reconsume, pos); - NS_HTML5_BREAK(afterdoctypesystemkeywordloop); - } - case '\"': { - if (P::reportErrors) { - errNoSpaceBetweenDoctypeSystemKeywordAndQuote(); - } - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\'': { - if (P::reportErrors) { - errNoSpaceBetweenDoctypeSystemKeywordAndQuote(); - } - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - if (P::reportErrors) { - errExpectedPublicId(); - } - forceQuirks = true; - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - default: { - bogusDoctype(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_DOCTYPE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - afterdoctypesystemkeywordloop_end:; - [[fallthrough]]; - } - case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - continue; - } - case '\"': { - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\'': { - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, - reconsume, pos); - NS_HTML5_BREAK(beforedoctypesystemidentifierloop); - } - case '>': { - if (P::reportErrors) { - errExpectedSystemId(); - } - forceQuirks = true; - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - default: { - bogusDoctype(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_DOCTYPE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - beforedoctypesystemidentifierloop_end:; - [[fallthrough]]; - } - case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\'': { - systemIdentifier = strBufToString(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - if (P::reportErrors) { - errGtInSystemId(); - } - forceQuirks = true; - systemIdentifier = strBufToString(); - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - continue; - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - continue; - } - } - } - } - case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\'': { - publicIdentifier = strBufToString(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - if (P::reportErrors) { - errGtInPublicId(); - } - forceQuirks = true; - publicIdentifier = strBufToString(); - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - continue; - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - continue; - } - } - } - } - case PROCESSING_INSTRUCTION: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\?': { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::PROCESSING_INSTRUCTION_QUESTION_MARK, - reconsume, pos); - NS_HTML5_BREAK(processinginstructionloop); - } - default: { - continue; - } - } - } - processinginstructionloop_end:; - [[fallthrough]]; - } - case PROCESSING_INSTRUCTION_QUESTION_MARK: { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '>': { - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - suspendIfRequestedAfterCurrentNonTextToken(); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - default: { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::PROCESSING_INSTRUCTION, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - } - } - stateloop_end:; - flushChars(buf, pos); - stateSave = state; - returnStateSave = returnState; - return pos; - } - + int32_t stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* buf, + bool reconsume, int32_t returnState, int32_t endPos); void initDoctypeFields(); template <class P> - inline void adjustDoubleHyphenAndAppendToStrBufCarriageReturn() { - P::silentCarriageReturn(this); - adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false); - } - + void adjustDoubleHyphenAndAppendToStrBufCarriageReturn(); template <class P> - inline void adjustDoubleHyphenAndAppendToStrBufLineFeed() { - P::silentLineFeed(this); - adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false); - } - + void adjustDoubleHyphenAndAppendToStrBufLineFeed(); template <class P> - inline void appendStrBufLineFeed() { - P::silentLineFeed(this); - appendStrBuf('\n'); - } - + void appendStrBufLineFeed(); template <class P> - inline void appendStrBufCarriageReturn() { - P::silentCarriageReturn(this); - appendStrBuf('\n'); - } - + void appendStrBufCarriageReturn(); template <class P> - inline void emitCarriageReturn(char16_t* buf, int32_t pos) { - P::silentCarriageReturn(this); - flushChars(buf, pos); - tokenHandler->characters(nsHtml5Tokenizer::LF, 0, 1); - cstart = INT32_MAX; - } - + void emitCarriageReturn(char16_t* buf, int32_t pos); void emitReplacementCharacter(char16_t* buf, int32_t pos); void maybeEmitReplacementCharacter(char16_t* buf, int32_t pos); void emitPlaintextReplacementCharacter(char16_t* buf, int32_t pos); - inline void setAdditionalAndRememberAmpersandLocation(char16_t add) { - additional = add; - } - + void setAdditionalAndRememberAmpersandLocation(char16_t add); void bogusDoctype(); void bogusDoctypeWithoutQuirks(); void handleNcrValue(int32_t returnState); @@ -4584,13 +434,7 @@ class nsHtml5Tokenizer { private: void emitDoctypeToken(int32_t pos); - inline void suspendIfRequestedAfterCurrentNonTextToken() { - if (suspendAfterCurrentNonTextToken) { - suspendAfterCurrentNonTextToken = false; - shouldSuspend = true; - } - } - + void suspendIfRequestedAfterCurrentNonTextToken(); void suspendAfterCurrentTokenIfNotInText(); bool suspensionAfterCurrentNonTextTokenPending(); @@ -4598,29 +442,13 @@ class nsHtml5Tokenizer { bool internalEncodingDeclaration(nsHtml5String internalCharset); private: - inline void emitOrAppendTwo(const char16_t* val, int32_t returnState) { - if ((returnState & DATA_AND_RCDATA_MASK)) { - appendStrBuf(val[0]); - appendStrBuf(val[1]); - } else { - tokenHandler->characters(val, 0, 2); - } - } - - inline void emitOrAppendOne(const char16_t* val, int32_t returnState) { - if ((returnState & DATA_AND_RCDATA_MASK)) { - appendStrBuf(val[0]); - } else { - tokenHandler->characters(val, 0, 1); - } - } + void emitOrAppendTwo(const char16_t* val, int32_t returnState); + void emitOrAppendOne(const char16_t* val, int32_t returnState); public: void end(); - inline void requestSuspension() { shouldSuspend = true; } - - inline bool isInDataState() { return (stateSave == DATA); } - + void requestSuspension(); + bool isInDataState(); void resetToDataState(); void loadState(nsHtml5Tokenizer* other); void initializeWithoutStarting(); diff --git a/parser/html/nsHtml5TokenizerALU.cpp b/parser/html/nsHtml5TokenizerALU.cpp @@ -1,33 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#include "nsHtml5Tokenizer.h" -#include "nsHtml5TokenizerLoopPoliciesALU.h" - -int32_t nsHtml5Tokenizer::StateLoopFastestALU(int32_t state, char16_t c, - int32_t pos, char16_t* buf, - bool reconsume, - int32_t returnState, - int32_t endPos) { - return stateLoop<nsHtml5FastestPolicyALU>(state, c, pos, buf, reconsume, - returnState, endPos); -} - -int32_t nsHtml5Tokenizer::StateLoopLineColALU(int32_t state, char16_t c, - int32_t pos, char16_t* buf, - bool reconsume, - int32_t returnState, - int32_t endPos) { - return stateLoop<nsHtml5LineColPolicyALU>(state, c, pos, buf, reconsume, - returnState, endPos); -} - -int32_t nsHtml5Tokenizer::StateLoopViewSourceALU(int32_t state, char16_t c, - int32_t pos, char16_t* buf, - bool reconsume, - int32_t returnState, - int32_t endPos) { - return stateLoop<nsHtml5ViewSourcePolicyALU>(state, c, pos, buf, reconsume, - returnState, endPos); -} diff --git a/parser/html/nsHtml5TokenizerALUStubs.cpp b/parser/html/nsHtml5TokenizerALUStubs.cpp @@ -1,32 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#include "nsHtml5Tokenizer.h" - -int32_t nsHtml5Tokenizer::StateLoopFastestALU(int32_t state, char16_t c, - int32_t pos, char16_t* buf, - bool reconsume, - int32_t returnState, - int32_t endPos) { - MOZ_RELEASE_ASSERT(false, "Inconsistent build config"); - return 0; -} - -int32_t nsHtml5Tokenizer::StateLoopLineColALU(int32_t state, char16_t c, - int32_t pos, char16_t* buf, - bool reconsume, - int32_t returnState, - int32_t endPos) { - MOZ_RELEASE_ASSERT(false, "Inconsistent build config"); - return 0; -} - -int32_t nsHtml5Tokenizer::StateLoopViewSourceALU(int32_t state, char16_t c, - int32_t pos, char16_t* buf, - bool reconsume, - int32_t returnState, - int32_t endPos) { - MOZ_RELEASE_ASSERT(false, "Inconsistent build config"); - return 0; -} diff --git a/parser/html/nsHtml5TokenizerHSupplement.h b/parser/html/nsHtml5TokenizerHSupplement.h @@ -2,48 +2,14 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -friend struct nsHtml5ViewSourcePolicySIMD; -friend struct nsHtml5ViewSourcePolicyALU; -friend struct nsHtml5LineColPolicySIMD; -friend struct nsHtml5LineColPolicyALU; -friend struct nsHtml5FastestPolicySIMD; -friend struct nsHtml5FastestPolicyALU; +friend struct nsHtml5ViewSourcePolicy; +friend struct nsHtml5LineColPolicy; +friend struct nsHtml5FastestPolicy; private: int32_t col; bool nextCharOnNewLine; -// These functions are wrappers for template parametrized stateLoop and -// stateLoopCompilerWorkaround so that the instantiations can go into -// separate compilation units both to allow different compiler flags -// and to make LLVM perform LICM on SIMD constants in functions whose size -// isn't too large for LLVM to perform LICM before LLVM looks for inlining -// opportunities. - -int32_t StateLoopFastestSIMD(int32_t state, char16_t c, int32_t pos, - char16_t* buf, bool reconsume, int32_t returnState, - int32_t endPos); - -int32_t StateLoopFastestALU(int32_t state, char16_t c, int32_t pos, - char16_t* buf, bool reconsume, int32_t returnState, - int32_t endPos); - -int32_t StateLoopLineColSIMD(int32_t state, char16_t c, int32_t pos, - char16_t* buf, bool reconsume, int32_t returnState, - int32_t endPos); - -int32_t StateLoopLineColALU(int32_t state, char16_t c, int32_t pos, - char16_t* buf, bool reconsume, int32_t returnState, - int32_t endPos); - -int32_t StateLoopViewSourceSIMD(int32_t state, char16_t c, int32_t pos, - char16_t* buf, bool reconsume, - int32_t returnState, int32_t endPos); - -int32_t StateLoopViewSourceALU(int32_t state, char16_t c, int32_t pos, - char16_t* buf, bool reconsume, - int32_t returnState, int32_t endPos); - public: inline int32_t getColumnNumber() { return col; } diff --git a/parser/html/nsHtml5TokenizerLoopPolicies.h b/parser/html/nsHtml5TokenizerLoopPolicies.h @@ -0,0 +1,123 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsHtml5TokenizerLoopPolicies_h +#define nsHtml5TokenizerLoopPolicies_h + +/** + * This policy does not report tokenizer transitions anywhere and does not + * track line and column numbers. To be used for innerHTML. + */ +struct nsHtml5FastestPolicy { + static const bool reportErrors = false; + static int32_t transition(nsHtml5Highlighter* aHighlighter, int32_t aState, + bool aReconsume, int32_t aPos) { + return aState; + } + static void completedNamedCharacterReference( + nsHtml5Highlighter* aHighlighter) {} + + static char16_t checkChar(nsHtml5Tokenizer* aTokenizer, char16_t* buf, + int32_t pos) { + return buf[pos]; + } + + static void silentCarriageReturn(nsHtml5Tokenizer* aTokenizer) { + aTokenizer->lastCR = true; + } + + static void silentLineFeed(nsHtml5Tokenizer* aTokenizer) {} +}; + +/** + * This policy does not report tokenizer transitions anywhere. To be used + * when _not_ viewing source and when not parsing innerHTML (or other + * script execution-preventing fragment). + */ +struct nsHtml5LineColPolicy { + static const bool reportErrors = false; + static int32_t transition(nsHtml5Highlighter* aHighlighter, int32_t aState, + bool aReconsume, int32_t aPos) { + return aState; + } + static void completedNamedCharacterReference( + nsHtml5Highlighter* aHighlighter) {} + + static char16_t checkChar(nsHtml5Tokenizer* aTokenizer, char16_t* buf, + int32_t pos) { + // The name of this method comes from the validator. + // We aren't checking a char here. We read the next + // UTF-16 code unit and, before returning it, adjust + // the line and column numbers. + char16_t c = buf[pos]; + if (MOZ_UNLIKELY(aTokenizer->nextCharOnNewLine)) { + // By changing the line and column here instead + // of doing so eagerly when seeing the line break + // causes the line break itself to be considered + // column-wise at the end of a line. + aTokenizer->line++; + aTokenizer->col = 1; + aTokenizer->nextCharOnNewLine = false; + } else if (MOZ_LIKELY(!NS_IS_LOW_SURROGATE(c))) { + // SpiderMonkey wants to count scalar values + // instead of UTF-16 code units. We omit low + // surrogates from the count so that only the + // high surrogate increments the count for + // two-code-unit scalar values. + // + // It's somewhat questionable from the performance + // perspective to make the human-perceivable column + // count correct for non-BMP characters in the case + // where there is a single scalar value per extended + // grapheme cluster when even on the BMP there are + // various cases where the scalar count doesn't make + // much sense as a human-perceived "column count" due + // to extended grapheme clusters consisting of more + // than one scalar value. + aTokenizer->col++; + } + return c; + } + + static void silentCarriageReturn(nsHtml5Tokenizer* aTokenizer) { + aTokenizer->nextCharOnNewLine = true; + aTokenizer->lastCR = true; + } + + static void silentLineFeed(nsHtml5Tokenizer* aTokenizer) { + aTokenizer->nextCharOnNewLine = true; + } +}; + +/** + * This policy reports the tokenizer transitions to a highlighter. To be used + * when viewing source. + */ +struct nsHtml5ViewSourcePolicy { + static const bool reportErrors = true; + static int32_t transition(nsHtml5Highlighter* aHighlighter, int32_t aState, + bool aReconsume, int32_t aPos) { + return aHighlighter->Transition(aState, aReconsume, aPos); + } + static void completedNamedCharacterReference( + nsHtml5Highlighter* aHighlighter) { + aHighlighter->CompletedNamedCharacterReference(); + } + + static char16_t checkChar(nsHtml5Tokenizer* aTokenizer, char16_t* buf, + int32_t pos) { + return buf[pos]; + } + + static void silentCarriageReturn(nsHtml5Tokenizer* aTokenizer) { + aTokenizer->line++; + aTokenizer->lastCR = true; + } + + static void silentLineFeed(nsHtml5Tokenizer* aTokenizer) { + aTokenizer->line++; + } +}; + +#endif // nsHtml5TokenizerLoopPolicies_h diff --git a/parser/html/nsHtml5TokenizerLoopPoliciesALU.h b/parser/html/nsHtml5TokenizerLoopPoliciesALU.h @@ -1,150 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#ifndef nsHtml5TokenizerLoopPoliciesALU_h -#define nsHtml5TokenizerLoopPoliciesALU_h - -/** - * This policy does not report tokenizer transitions anywhere and does not - * track line and column numbers. To be used for innerHTML. Non-SIMD version. - */ -struct nsHtml5FastestPolicyALU { - static const bool reportErrors = false; - MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition( - nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume, - int32_t aPos) { - return aState; - } - MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference( - nsHtml5Highlighter* aHighlighter) {} - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData( - nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos, - int32_t endPos) { - return 0; - } - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar( - nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) { - return buf[pos]; - } - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn( - nsHtml5Tokenizer* aTokenizer) { - aTokenizer->lastCR = true; - } - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed( - nsHtml5Tokenizer* aTokenizer) {} -}; - -/** - * This policy does not report tokenizer transitions anywhere. To be used - * when _not_ viewing source and when not parsing innerHTML (or other - * script execution-preventing fragment). - */ -struct nsHtml5LineColPolicyALU { - static const bool reportErrors = false; - MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition( - nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume, - int32_t aPos) { - return aState; - } - MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference( - nsHtml5Highlighter* aHighlighter) {} - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData( - nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos, - int32_t endPos) { - return 0; - } - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar( - nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) { - // The name of this method comes from the validator. - // We aren't checking a char here. We read the next - // UTF-16 code unit and, before returning it, adjust - // the line and column numbers. - char16_t c = buf[pos]; - if (MOZ_UNLIKELY(aTokenizer->nextCharOnNewLine)) { - // By changing the line and column here instead - // of doing so eagerly when seeing the line break - // causes the line break itself to be considered - // column-wise at the end of a line. - aTokenizer->line++; - aTokenizer->col = 1; - aTokenizer->nextCharOnNewLine = false; - } else if (MOZ_LIKELY(!NS_IS_LOW_SURROGATE(c))) { - // SpiderMonkey wants to count scalar values - // instead of UTF-16 code units. We omit low - // surrogates from the count so that only the - // high surrogate increments the count for - // two-code-unit scalar values. - // - // It's somewhat questionable from the performance - // perspective to make the human-perceivable column - // count correct for non-BMP characters in the case - // where there is a single scalar value per extended - // grapheme cluster when even on the BMP there are - // various cases where the scalar count doesn't make - // much sense as a human-perceived "column count" due - // to extended grapheme clusters consisting of more - // than one scalar value. - aTokenizer->col++; - } - return c; - } - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn( - nsHtml5Tokenizer* aTokenizer) { - aTokenizer->nextCharOnNewLine = true; - aTokenizer->lastCR = true; - } - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed( - nsHtml5Tokenizer* aTokenizer) { - aTokenizer->nextCharOnNewLine = true; - } -}; - -/** - * This policy reports the tokenizer transitions to a highlighter. To be used - * when viewing source. - */ -struct nsHtml5ViewSourcePolicyALU { - static const bool reportErrors = true; - MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition( - nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume, - int32_t aPos) { - return aHighlighter->Transition(aState, aReconsume, aPos); - } - MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference( - nsHtml5Highlighter* aHighlighter) { - aHighlighter->CompletedNamedCharacterReference(); - } - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData( - nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos, - int32_t endPos) { - return 0; - } - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar( - nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) { - return buf[pos]; - } - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn( - nsHtml5Tokenizer* aTokenizer) { - aTokenizer->line++; - aTokenizer->lastCR = true; - } - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed( - nsHtml5Tokenizer* aTokenizer) { - aTokenizer->line++; - } -}; - -#endif // nsHtml5TokenizerLoopPoliciesALU_h diff --git a/parser/html/nsHtml5TokenizerLoopPoliciesSIMD.h b/parser/html/nsHtml5TokenizerLoopPoliciesSIMD.h @@ -1,211 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#ifndef nsHtml5TokenizerLoopPoliciesSIMD_h -#define nsHtml5TokenizerLoopPoliciesSIMD_h - -#include "mozilla/Attributes.h" -#include "mozilla/htmlaccel/htmlaccelNotInline.h" - -/** - * This policy does not report tokenizer transitions anywhere and does not - * track line and column numbers. To be used for innerHTML. - * - * This the SIMD version for aarch64 and SSSE3-enabled x86/x86_64. - */ -struct nsHtml5FastestPolicySIMD { - static const bool reportErrors = false; - MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition( - nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume, - int32_t aPos) { - return aState; - } - MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference( - nsHtml5Highlighter* aHighlighter) {} - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData( - nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos, - int32_t endPos) { - // We need to check bounds for the `buf[pos]` access below to be OK. - // Instead of just checking that `pos` isn't equal to `endPos`, let's - // check that have at least one SIMD stride of data in the same branch, - // since if we don't have at least one SIMD stride of data, we don't - // need to proceed. - if (endPos - pos < 16) { - return 0; - } - if (buf[pos] == '<') { - // Quickly handle the case where there is one tag immediately - // after another and the very first thing in the data state is a - // less-than sign. - return 0; - } - return mozilla::htmlaccel::AccelerateDataFastest(buf + pos, buf + endPos); - } - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar( - nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) { - return buf[pos]; - } - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn( - nsHtml5Tokenizer* aTokenizer) { - aTokenizer->lastCR = true; - } - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed( - nsHtml5Tokenizer* aTokenizer) {} -}; - -/** - * This policy does not report tokenizer transitions anywhere. To be used - * when _not_ viewing source and when not parsing innerHTML (or other - * script execution-preventing fragment). - */ -struct nsHtml5LineColPolicySIMD { - static const bool reportErrors = false; - MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition( - nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume, - int32_t aPos) { - return aState; - } - MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference( - nsHtml5Highlighter* aHighlighter) {} - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData( - nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos, - int32_t endPos) { - // We need to check bounds for the `buf[pos]` access below to be OK. - // Instead of just checking that `pos` isn't equal to `endPos`, let's - // check that have at least one SIMD stride of data in the same branch, - // since if we don't have at least one SIMD stride of data, we don't - // need to proceed. - if (endPos - pos < 16) { - return 0; - } - char16_t c = buf[pos]; - if (c == '<' || c == '\n') { - // Quickly handle the case where there is one tag immediately - // after another and the very first thing in the data state is a - // less-than sign and the case where a tag is immediately followed - // by a line feed. - return 0; - } - if (MOZ_UNLIKELY(aTokenizer->nextCharOnNewLine)) { - // By changing the line and column here instead - // of doing so eagerly when seeing the line break - // causes the line break itself to be considered - // column-wise at the end of a line. - aTokenizer->line++; - aTokenizer->col = 1; - aTokenizer->nextCharOnNewLine = false; - } - return mozilla::htmlaccel::AccelerateDataLineCol(buf + pos, buf + endPos); - } - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar( - nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) { - // The name of this method comes from the validator. - // We aren't checking a char here. We read the next - // UTF-16 code unit and, before returning it, adjust - // the line and column numbers. - char16_t c = buf[pos]; - if (MOZ_UNLIKELY(aTokenizer->nextCharOnNewLine)) { - // By changing the line and column here instead - // of doing so eagerly when seeing the line break - // causes the line break itself to be considered - // column-wise at the end of a line. - aTokenizer->line++; - aTokenizer->col = 1; - aTokenizer->nextCharOnNewLine = false; - } else if (MOZ_LIKELY(!NS_IS_LOW_SURROGATE(c))) { - // SpiderMonkey wants to count scalar values - // instead of UTF-16 code units. We omit low - // surrogates from the count so that only the - // high surrogate increments the count for - // two-code-unit scalar values. - // - // It's somewhat questionable from the performance - // perspective to make the human-perceivable column - // count correct for non-BMP characters in the case - // where there is a single scalar value per extended - // grapheme cluster when even on the BMP there are - // various cases where the scalar count doesn't make - // much sense as a human-perceived "column count" due - // to extended grapheme clusters consisting of more - // than one scalar value. - aTokenizer->col++; - } - return c; - } - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn( - nsHtml5Tokenizer* aTokenizer) { - aTokenizer->nextCharOnNewLine = true; - aTokenizer->lastCR = true; - } - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed( - nsHtml5Tokenizer* aTokenizer) { - aTokenizer->nextCharOnNewLine = true; - } -}; - -/** - * This policy reports the tokenizer transitions to a highlighter. To be used - * when viewing source. - */ -struct nsHtml5ViewSourcePolicySIMD { - static const bool reportErrors = true; - MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition( - nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume, - int32_t aPos) { - return aHighlighter->Transition(aState, aReconsume, aPos); - } - MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference( - nsHtml5Highlighter* aHighlighter) { - aHighlighter->CompletedNamedCharacterReference(); - } - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData( - nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos, - int32_t endPos) { - // We need to check bounds for the `buf[pos]` access below to be OK. - // Instead of just checking that `pos` isn't equal to `endPos`, let's - // check that have at least one SIMD stride of data in the same branch, - // since if we don't have at least one SIMD stride of data, we don't - // need to proceed. - if (endPos - pos < 16) { - return 0; - } - char16_t c = buf[pos]; - if (c == '<' || c == '\n') { - // Quickly handle the case where there is one tag immediately - // after another and the very first thing in the data state is a - // less-than sign and the case where a tag is immediately followed - // by a line feed. - return 0; - } - return mozilla::htmlaccel::AccelerateDataViewSource(buf + pos, - buf + endPos); - } - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar( - nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) { - return buf[pos]; - } - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn( - nsHtml5Tokenizer* aTokenizer) { - aTokenizer->line++; - aTokenizer->lastCR = true; - } - - MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed( - nsHtml5Tokenizer* aTokenizer) { - aTokenizer->line++; - } -}; - -#endif // nsHtml5TokenizerLoopPoliciesSIMD_h diff --git a/parser/html/nsHtml5TokenizerSIMD.cpp b/parser/html/nsHtml5TokenizerSIMD.cpp @@ -1,33 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#include "nsHtml5Tokenizer.h" -#include "nsHtml5TokenizerLoopPoliciesSIMD.h" - -int32_t nsHtml5Tokenizer::StateLoopFastestSIMD(int32_t state, char16_t c, - int32_t pos, char16_t* buf, - bool reconsume, - int32_t returnState, - int32_t endPos) { - return stateLoop<nsHtml5FastestPolicySIMD>(state, c, pos, buf, reconsume, - returnState, endPos); -} - -int32_t nsHtml5Tokenizer::StateLoopLineColSIMD(int32_t state, char16_t c, - int32_t pos, char16_t* buf, - bool reconsume, - int32_t returnState, - int32_t endPos) { - return stateLoop<nsHtml5LineColPolicySIMD>(state, c, pos, buf, reconsume, - returnState, endPos); -} - -int32_t nsHtml5Tokenizer::StateLoopViewSourceSIMD(int32_t state, char16_t c, - int32_t pos, char16_t* buf, - bool reconsume, - int32_t returnState, - int32_t endPos) { - return stateLoop<nsHtml5ViewSourcePolicySIMD>(state, c, pos, buf, reconsume, - returnState, endPos); -} diff --git a/parser/html/nsHtml5TokenizerSIMDStubs.cpp b/parser/html/nsHtml5TokenizerSIMDStubs.cpp @@ -1,32 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#include "nsHtml5Tokenizer.h" - -int32_t nsHtml5Tokenizer::StateLoopFastestSIMD(int32_t state, char16_t c, - int32_t pos, char16_t* buf, - bool reconsume, - int32_t returnState, - int32_t endPos) { - MOZ_RELEASE_ASSERT(false, "Inconsistent build config"); - return 0; -} - -int32_t nsHtml5Tokenizer::StateLoopLineColSIMD(int32_t state, char16_t c, - int32_t pos, char16_t* buf, - bool reconsume, - int32_t returnState, - int32_t endPos) { - MOZ_RELEASE_ASSERT(false, "Inconsistent build config"); - return 0; -} - -int32_t nsHtml5Tokenizer::StateLoopViewSourceSIMD(int32_t state, char16_t c, - int32_t pos, char16_t* buf, - bool reconsume, - int32_t returnState, - int32_t endPos) { - MOZ_RELEASE_ASSERT(false, "Inconsistent build config"); - return 0; -} diff --git a/parser/htmlaccel/gtest/TestHtmlSimd.cpp b/parser/htmlaccel/gtest/TestHtmlSimd.cpp @@ -1,62 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#include "gtest/gtest.h" -#include "mozilla/htmlaccel/htmlaccelNotInline.h" - -// Match in the first half -const char16_t HTML_SIMD_TEST_INPUT_LOW[16] = { - 'a', - 0xD834, // Surrogate pair - 0xDD65, '\n', '<', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', -}; - -// Match in the second half -const char16_t HTML_SIMD_TEST_INPUT_HIGH[16] = { - 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'a', - 0xD834, // Surrogate pair - 0xDD65, '\n', '<', 'f', 'g', 'h', -}; - -TEST(HtmlSimd, TestTextNodeAllowSurrogatesAndLf) -{ - int32_t index = mozilla::htmlaccel::AccelerateDataFastest( - HTML_SIMD_TEST_INPUT_LOW, HTML_SIMD_TEST_INPUT_LOW + 16); - ASSERT_EQ(index, 4); -} - -TEST(HtmlSimd, TestTextNodeAllowSurrogatesDisallowLf) -{ - int32_t index = mozilla::htmlaccel::AccelerateDataViewSource( - HTML_SIMD_TEST_INPUT_LOW, HTML_SIMD_TEST_INPUT_LOW + 16); - ASSERT_EQ(index, 3); -} - -TEST(HtmlSimd, TestTextNodeDisallowSurrogatesAndLf) -{ - int32_t index = mozilla::htmlaccel::AccelerateDataLineCol( - HTML_SIMD_TEST_INPUT_LOW, HTML_SIMD_TEST_INPUT_LOW + 16); - ASSERT_EQ(index, 1); -} - -TEST(HtmlSimd, TestTextNodeAllowSurrogatesAndLfHigh) -{ - int32_t index = mozilla::htmlaccel::AccelerateDataFastest( - HTML_SIMD_TEST_INPUT_HIGH, HTML_SIMD_TEST_INPUT_HIGH + 16); - ASSERT_EQ(index, 4 + 8); -} - -TEST(HtmlSimd, TestTextNodeAllowSurrogatesDisallowLfHigh) -{ - int32_t index = mozilla::htmlaccel::AccelerateDataViewSource( - HTML_SIMD_TEST_INPUT_HIGH, HTML_SIMD_TEST_INPUT_HIGH + 16); - ASSERT_EQ(index, 3 + 8); -} - -TEST(HtmlSimd, TestTextNodeDisallowSurrogatesAndLfHigh) -{ - int32_t index = mozilla::htmlaccel::AccelerateDataLineCol( - HTML_SIMD_TEST_INPUT_HIGH, HTML_SIMD_TEST_INPUT_HIGH + 16); - ASSERT_EQ(index, 1 + 8); -} diff --git a/parser/htmlaccel/gtest/moz.build b/parser/htmlaccel/gtest/moz.build @@ -1,15 +0,0 @@ -# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- -# vim: set filetype=python: -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -if CONFIG["TARGET_CPU"] == "x86_64" or ( - CONFIG["TARGET_CPU"] == "aarch64" and CONFIG["TARGET_ENDIANNESS"] == "little" -): - SOURCES += { - "TestHtmlSimd.cpp", - } - SOURCES["TestHtmlSimd.cpp"].flags += CONFIG["HTML_ACCEL_FLAGS"] - -FINAL_LIBRARY = "xul-gtest" diff --git a/parser/htmlaccel/htmlaccel.h b/parser/htmlaccel/htmlaccel.h @@ -1,322 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#ifndef mozilla_htmlaccel_htmlaccel_h -#define mozilla_htmlaccel_htmlaccel_h - -#include <string.h> -#include <stdint.h> - -// Avoid adding more Gecko-specific headers to keep it easy enough to -// copy and paste the contents of this file to Compiler Explorer. -#include "mozilla/Attributes.h" - -// This file provides SIMD code for skipping over characters that -// the caller doesn't need to act upon. For example, this code can -// skip over characters that the HTML tokenizer doesn't need to handle -// specially in a given state or this code could be used to skip over -// characters that don't need to be escaped in an HTML serializer. - -// ISA SUPPORT: Do not include this file unless the compilation unit is -// being compiled either for little-endian aarch64 or for x86/x86_64 with -// at least SSSE3 enabled. -// -// It's probably feasible to extend this to support little-endian POWER -// by defining -// MOZ_ALWAYS_INLINE_EVEN_DEBUG uint8x16_t TableLookup(uint8x16_t table, -// uint8x16_t nibbles) { -// return vec_perm(table, table, nibbles); -// } -// but since I don't have a little-endian POWER system to test with, -// this is left as an exercise to the reader. (The x86/x86_64 reduction -// code should be portable to POWER10 using vec_extractm and the aarch64 -// reduction code should be portable to older POWER using vec_max.) -// -// ARMv7 is deliberately not supported due to vqtbl1q_u8 being a newer -// addition to NEON. -#if !defined(__LITTLE_ENDIAN__) -# error "A little-endian target is required." -#endif -#if !(defined(__aarch64__) || defined(__SSSE3__)) -# error "Must be targeting aarch64 or SSSE3." -#endif - -// NOTE: This file uses GCC/clang built-ins that provide SIMD portability. -// Compared to pretending unawareness of what arm_neon.h and tmmintrin.h -// map to in GCC and clang, this has the benefit that the code is not stuck -// at an SSSE3 local maximum but adapts maximally to upgrades to SSE 4.2, -// AVX2, and BMI. (Yes, enabling BMI seems to affect more than just -// __builtin_ctz!) -// (We need to check for __clang__, because clang-cl does not define __GNUC__.) -#if !(defined(__GNUC__) || defined(__clang__)) -# error "A compiler that supports GCC-style portable SIMD is required." -#endif - -// # General -// -// There is an entry point per combination of what characters terminate -// the acceleration loop (i.e. characters that the HTML tokenizer would not -// simply skip over). The shared implementation code is inlined into these -// FFI entry point functions, so the parametrization made inside the FFI -// functions constant-propagates through the implementation internals. -// -// The code examines 16 UTF-16 code units at a time as two 128-bit SIMD -// vectors. First, the bytes are regrouped to so that one SIMD vector -// contains the high halves of the UTF-16 code units (zeros for ASCII/Basic -// Latin) and another one contains the low halves. -// -// In the case of the low half, we mask the vector to take the low 4 bits of -// each 8-bit value and do a lookup from a lookup table contained in a SIMD -// vector. The 4 bits index into 16 lanes of the other SIMD vector such that -// we get a vector where the positions corresponding to positions of the -// original code units contain the 8-bit value looked up from by the 4-bit -// index. -// -// The lookup operation is available unconditionally on aarch64. On -// x86/x86_64, it is part of the SSSE3 instruction set extension, which is -// why on x86/x86_64 we must not call into this code unless SSSE3 is -// available. (Each additional level of compiling this code with SSE4.2, -// AVX2, or AVX2 + BMI makes this code shorter, which presumably means more -// efficient, so instead of compiling this just with SSSE3, we compile this -// with AVX2+BMI on x86_64, considering that CPUs with such capabilities -// have been available for 12 years at the time of landing this code.) -// -// The lookup table contains the loop-terminating ASCII characters in the -// positions given by their low 4 bits. For example, the less-than sign is -// U+003C, so the value 0x3C is at index 0xC (decimal 12). Positions that -// don’t correspond to a character of interest have the value 1, except lane -// 1 has the placeholder value 2. This way, characters that we don’t want to -// match anything in the lookup table get a non-matching placeholder: U+0001 -// gets compared with 2 (semantically U+0002) and everything else not of -// interest gets compared with 1 (semantically U+0001) to produce a -// non-matching lane. -// -// This means that instead of comparing the vector of the low halves of the -// UTF-16 code units against multiple constant vectors each filled in all -// lanes with a given ASCII character of interest, the table lookup gives us -// one vector to compare against where each lane can have a different ASCII -// character of interest to compare with. -// -// This requires the ASCII characters of interest to have mutually distinct -// low 4 bits. This is true for U+0000, &, <, LF, CR, ", and ', but, -// unfortunately, CR, ] and - share the low 4 bits, so cases where we need -// to include a check for ] or - needs to do a separate check, since CR is -// always in the lookup table. (Checks for ", ', ], and - are not here at -// this time but will come in follow-up patches.) -// -// From these operations, we get a vector of 16 8-bit mask lanes where a -// lane is 0xFF if the low 8 bits of the UTF-16 code unit matched an ASCII -// character that terminates the loop and 0x00 otherwise. We lane-wise -// compare the high halves with zero and AND the resulting mask vector -// together with the mask vector that resulted from processing the low 8 -// bits to confirm which low 8 bits had 0 as the high 8 bits, i.e. the -// UTF-16 code unit really was Basic Latin. -// -// If we have a configuration that requires terminating the loop on -// surrogates, we check the vector containing the high halves of the UTF-16 -// code units for surrogates (by masking certain high bits to compare them -// with a constant) and OR the resulting mask vector together with the -// vector computed above. -// -// Now we have a vector of 16 8-bit mask lanes that corresponds to the input -// of 16 UTF-16 code units to indicate which code units in the run of 16 -// UTF-16 code units require terminating the loop (i.e. must not be skipped -// over). At this point, the handling diverges for x86/x86_64 and aarch64. -// -// ## x86/x86_64 -// -// We convert the SIMD mask into bits in an ALU register. The operation -// returns a 32-bit type, but only the low 16 bits can be non-zero. If the -// integer is non-zero, the loop terminates, since some lane in the mask was -// non-zero. In this case, we return the number of trailing zeros in the -// integer. (We already know must have a non-zero bit somewhere in the low -// 16 bits, so we can’t end up counting to the high half of the 32-bit type.) -// Due to the little-endian semantics, the first UTF-16 code unit in the -// input corresponds to the least-significant bit in the integer, so when the -// first UTF-16 code unit in the input is unskippable, the least-significant -// bit in the integer is 1, so there are 0 trailing zeros, i.e. 0 skippable -// UTF-16 code units. -// -// ## aarch64 -// -// We want to know if any lane is the mask is non-zero to decide whether to -// terminate the loop. If there is a non-zero lane, we want to know the -// position of the first (in the content order of the input UTF-16 text) -// non-zero lane. To accomplish these goals, we bitwise AND the mask vector -// with a vector of 16 constants. Since ANDing with a mask lane set to zero -// results in zero, we need all 16 constants to be non-zero. Yet, we need to -// be able to accommodate the possibility of first lane in content order -// being set, which means we need to compute 0 as the result. To be able to -// compute 0 but have the constants be non-zero, the constants are numbers -// that need be subtracted from 16. That is, the constant vector has lanes -// set to numbers from 16 to 1 (inclusive). We do the reduction of the -// resulting SIMD vector to an ALU integer by taking the value of the lane -// with the largest value. -// -// If no mask lane was set, the max operation results in 0, so if the -// integer is zero, the loop continues. Otherwise, we get the number of -// skippable UTF-16 code units by subtracting the integer from 16. That is, -// if the first UTF-16 unit is unstoppable, we get 16 as the max lane value -// and 16-16=0. -// -// # Alignment -// -// These functions use unaligned SIMD loads, because alignment -// doesn't matter on aarch64 CPUs or on x86_64 CPUs from the most -// recent decade or so. It's not worthwhile to add complexity for -// old CPUs. -// -// # Inlining -// -// The public functions here are expected to be called from a loop. To give -// LICM the opportunity to hoist the SIMD constants out of the loop, make -// sure that every function on the path from the loop to here is declared -// MOZ_ALWAYS_INLINE_EVEN_DEBUG and that all these and the loop itself are -// compiled with the same instruction set extension flags (if applicable). -// -// # Acknowledments -// -// https://lemire.me/blog/2024/06/08/scan-html-faster-with-simd-instructions-chrome-edition/ - -#if defined(__aarch64__) - -# include <arm_neon.h> - -#else // x86/x86_64 - -# include <tmmintrin.h> -// Using syntax that clang-tidy doesn't like to match GCC guidance. -typedef uint8_t uint8x16_t __attribute__((vector_size(16))); - -#endif - -namespace mozilla::htmlaccel { - -namespace detail { - -#if defined(__aarch64__) -// The idea is that when this is ANDed with the mask, we get 0 in the -// non-match positions and the leftmost match ends up with higest number. -// This way, taking the max value of the result is zero if all positions -// are non-match, and otherwise we get a value that when subtracted from -// 16 indicates the index of the leftmost match. -const uint8x16_t INVERTED_ADVANCES = {16, 15, 14, 13, 12, 11, 10, 9, - 8, 7, 6, 5, 4, 3, 2, 1}; - -MOZ_ALWAYS_INLINE_EVEN_DEBUG uint8x16_t TableLookup(uint8x16_t aTable, - uint8x16_t aNibbles) { - return vqtbl1q_u8(aTable, aNibbles); -} - -#else // x86/x86_64 - -MOZ_ALWAYS_INLINE_EVEN_DEBUG uint8x16_t TableLookup(uint8x16_t aTable, - uint8x16_t aNibbles) { - // GCC wants reinterpret_cast - return reinterpret_cast<uint8x16_t>(_mm_shuffle_epi8(aTable, aNibbles)); -} - -#endif - -// These formulations optimize nicely, so no point in trying something fancier -// to fill all lanes with the same byte. -const uint8x16_t ALL_ZEROS = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -const uint8x16_t NIBBLE_MASK = {0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, - 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF}; -const uint8x16_t SURROGATE_MASK = {0xF8, 0xF8, 0xF8, 0xF8, 0xF8, 0xF8, - 0xF8, 0xF8, 0xF8, 0xF8, 0xF8, 0xF8, - 0xF8, 0xF8, 0xF8, 0xF8}; -const uint8x16_t SURROGATE_MATCH = {0xD8, 0xD8, 0xD8, 0xD8, 0xD8, 0xD8, - 0xD8, 0xD8, 0xD8, 0xD8, 0xD8, 0xD8, - 0xD8, 0xD8, 0xD8, 0xD8}; - -// The approach here supports disallowing up to 16 different -// characters that 1) are in the Latin1 range, i.e. U+00FF or -// below, and 2) do not have the lowest 4 bits in common with -// each other. -// -// The code point value of each disallowed character needs -// to be placed in the vector at the position indexed by the -// low 4 bits of the character (low four bits 0 is the leftmost -// position and low four bits 15 is the rightmost position). -// -// U+0001 neither occurs in typical HTML nor is one of the -// code points we care about, so use 1 as the non-matching -// value. We do care about U+0000, unfortunately. -// We use U+0002 at position 1 to make sure it doesn't -// match, either. That is, we put 1 in the positions we -// don't care about except we put 2 at position 1. - -/// Disallow U+0000, less-than, ampersand, and carriage return. -const uint8x16_t ZERO_LT_AMP_CR = {0, 2, 1, 1, 1, 1, '&', 1, - 1, 1, 1, 1, '<', '\r', 1, 1}; -/// Disallow U+0000, less-than, ampersand, carriage return, and line feed. -const uint8x16_t ZERO_LT_AMP_CR_LF = {0, 2, 1, 1, 1, 1, '&', 1, - 1, 1, '\n', 1, '<', '\r', 1, 1}; - -/// Compute a 16-lane mask for for 16 UTF-16 code units, where a lane -/// is 0x00 if OK to skip and 0xFF in not OK to skip. -MOZ_ALWAYS_INLINE_EVEN_DEBUG uint8x16_t -StrideToMask(const char16_t* aArr /* len = 16 */, uint8x16_t aTable, - bool aAllowSurrogates) { - uint8x16_t first; - uint8x16_t second; - // memcpy generates a single unaligned load instruction with both ISAs. - memcpy(&first, aArr, 16); - memcpy(&second, aArr + 8, 16); - // Each shuffle maps to a single instruction on aarch64. - // On x86/x86_64, how efficiently these shuffles maps to instructions - // depends on the level of instruction set extensions chosen, which - // is the main reason that we compile this file at a higher extension - // level than the minimum SSSE3 (and the main reason why this file - // uses GNU C portable SIMD instead of sticking to what's in the - // Intel-defined headers). - uint8x16_t low_halves = __builtin_shufflevector( - first, second, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); - uint8x16_t high_halves = __builtin_shufflevector( - first, second, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); - uint8x16_t high_half_matches = high_halves == ALL_ZEROS; - uint8x16_t low_half_matches = - low_halves == TableLookup(aTable, low_halves & NIBBLE_MASK); - uint8x16_t ret = low_half_matches & high_half_matches; - if (!aAllowSurrogates) { // Assumed to be constant-propagated - ret |= (high_halves & SURROGATE_MASK) == SURROGATE_MATCH; - } - return ret; -} - -MOZ_ALWAYS_INLINE_EVEN_DEBUG int32_t AccelerateTextNode(const char16_t* aInput, - const char16_t* aEnd, - uint8x16_t aTable, - bool aAllowSurrogates) { - const char16_t* current = aInput; - while (aEnd - current >= 16) { - uint8x16_t mask = StrideToMask(current, aTable, aAllowSurrogates); -#if defined(__aarch64__) - uint8_t max = vmaxvq_u8(mask & INVERTED_ADVANCES); - if (max != 0) { - return int32_t((current - aInput) + 16 - max); - } -#else // x86/x86_64 - int int_mask = _mm_movemask_epi8(mask); - if (int_mask != 0) { - // The least-significant bit in the integer corresponds to - // the first SIMD lane in text order. Hence, we need to count - // trailing zeros. We already checked that the bits are not - // all zeros, so __builtin_ctz isn't UB. - return int32_t((current - aInput) + __builtin_ctz(int_mask)); - } -#endif - current += 16; - } - return int32_t(current - aInput); -} - -} // namespace detail - -// Public entry points are in htmlaccelNotInline.h for now. - -} // namespace mozilla::htmlaccel - -#endif // mozilla_htmlaccel_htmlaccel_h diff --git a/parser/htmlaccel/htmlaccelEnabled.h b/parser/htmlaccel/htmlaccelEnabled.h @@ -1,30 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#ifndef mozilla_htmlaccel_htmlaccelEnabled_h -#define mozilla_htmlaccel_htmlaccelEnabled_h - -#if defined(__x86_64__) -# include "mozilla/SSE.h" -#endif - -namespace mozilla::htmlaccel { - -/// This function is appropriate to call when the SIMD path is compiled -/// with `HTML_ACCEL_FLAGS`. -/// -/// Keep this in sync with `HTML_ACCEL_FLAGS` in `toolchain.configure`. -inline bool htmlaccelEnabled() { -#if defined(__aarch64__) && defined(__LITTLE_ENDIAN__) - return true; -#elif defined(__x86_64__) - return mozilla::supports_bmi() && mozilla::supports_avx(); -#else - return false; -#endif -} - -} // namespace mozilla::htmlaccel - -#endif // mozilla_htmlaccel_htmlaccelEnabled_h diff --git a/parser/htmlaccel/htmlaccelNotInline.cpp b/parser/htmlaccel/htmlaccelNotInline.cpp @@ -1,30 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#include "mozilla/htmlaccel/htmlaccel.h" -#include "mozilla/htmlaccel/htmlaccelNotInline.h" - -namespace mozilla::htmlaccel { - -/// The innerHTML / DOMParser case for the data state in the HTML parser -MOZ_NEVER_INLINE int32_t AccelerateDataFastest(const char16_t* aPtr, - const char16_t* aEnd) { - return detail::AccelerateTextNode(aPtr, aEnd, detail::ZERO_LT_AMP_CR, true); -} - -/// View Source case for the data state in the HTML parser -MOZ_NEVER_INLINE int32_t AccelerateDataViewSource(const char16_t* aPtr, - const char16_t* aEnd) { - return detail::AccelerateTextNode(aPtr, aEnd, detail::ZERO_LT_AMP_CR_LF, - true); -} - -/// Normal network case for the data state in the HTML parser -MOZ_NEVER_INLINE int32_t AccelerateDataLineCol(const char16_t* aPtr, - const char16_t* aEnd) { - return detail::AccelerateTextNode(aPtr, aEnd, detail::ZERO_LT_AMP_CR_LF, - false); -} - -} // namespace mozilla::htmlaccel diff --git a/parser/htmlaccel/htmlaccelNotInline.h b/parser/htmlaccel/htmlaccelNotInline.h @@ -1,34 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#ifndef mozilla_htmlaccel_htmlaccelNotInline_h -#define mozilla_htmlaccel_htmlaccelNotInline_h - -#include "mozilla/Attributes.h" - -namespace mozilla::htmlaccel { -// Logically these should be MOZ_ALWAYS_INLINE_EVEN_DEBUG if LLVM was working -// as expected. However, these are MOZ_NEVER_INLINE to work around -// https://github.com/llvm/llvm-project/issues/160886 . This way, we get -// a little bit of LICM for the SIMD constants that need to be loaded -// from the constant pool instead of getting materialized by splatting -// an immediate. Once the LLVM bug is fixed, these should be changed -// to MOZ_ALWAYS_INLINE_EVEN_DEBUG to allow the constants to move further -// up to the top of nsHtml5Tokenizer::stateLoop. - -/// The innerHTML / DOMParser case for the data state in the HTML parser -MOZ_NEVER_INLINE int32_t AccelerateDataFastest(const char16_t* aPtr, - const char16_t* aEnd); - -/// View Source case for the data state in the HTML parser -MOZ_NEVER_INLINE int32_t AccelerateDataViewSource(const char16_t* aPtr, - const char16_t* aEnd); - -/// Normal network case for the data state in the HTML parser -MOZ_NEVER_INLINE int32_t AccelerateDataLineCol(const char16_t* aPtr, - const char16_t* aEnd); - -} // namespace mozilla::htmlaccel - -#endif // mozilla_htmlaccel_htmlaccelNotInline_h diff --git a/parser/htmlaccel/moz.build b/parser/htmlaccel/moz.build @@ -1,29 +0,0 @@ -# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- -# vim: set filetype=python: -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. - -EXPORTS.mozilla.htmlaccel += [ - "htmlaccel.h", - "htmlaccelEnabled.h", - "htmlaccelNotInline.h", -] - -# Make sure the result is consistent with mozilla::htmlaccel::htmlaccelEnabled(). -# -# Due to https://github.com/llvm/llvm-project/issues/160886, the entry points -# need to be _not_ inline and, therefore, need a compilation unit. This should -# go away once the LLVM bug is fixed. - -if (CONFIG["TARGET_CPU"] == "x86_64") or ( - CONFIG["TARGET_CPU"] == "aarch64" and CONFIG["TARGET_ENDIANNESS"] == "little" -): - SOURCES += [ - "htmlaccelNotInline.cpp", - ] - SOURCES["htmlaccelNotInline.cpp"].flags += CONFIG["HTML_ACCEL_FLAGS"] - -TEST_DIRS += ["gtest"] - -FINAL_LIBRARY = "xul" diff --git a/parser/moz.build b/parser/moz.build @@ -7,7 +7,7 @@ with Files("**"): BUG_COMPONENT = ("Core", "DOM: HTML Parser") -DIRS += ["expat", "prototype", "xml", "htmlaccel", "htmlparser", "html"] +DIRS += ["expat", "prototype", "xml", "htmlparser", "html"] EXPORTS += [ "nsCharsetSource.h",