tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

commit 86a403a0d1ffba84084bac978873d36fd47a020b
parent 62e14be41df23e73c62a7aa72fd18e371a942b23
Author: Henri Sivonen <hsivonen@hsivonen.fi>
Date:   Tue, 28 Oct 2025 16:44:08 +0000

Bug 1499682 - SIMD-accelerate the data state in the HTML tokenizer. r=smaug,sergesanspaille

Other tokenizer states and the serializer are potential follow-ups.

The code movement from nsHtml5Tokenizer.cpp to nsHtml5Tokeniner.h is
for enabling the eventual non-unified build of nsHtml5TokenizerSIMD.cpp
once the LLVM bug has been fixed.

Differential Revision: https://phabricator.services.mozilla.com/D227317

Diffstat:
Mbuild/moz.configure/toolchain.configure | 29+++++++++++++++++++++++++++++
Mparser/html/javasrc/Tokenizer.java | 378++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
Mparser/html/moz.build | 36++++++++++++++++++++++++++++++++++++
Mparser/html/nsHtml5Tokenizer.cpp | 4561+++++--------------------------------------------------------------------------
Mparser/html/nsHtml5Tokenizer.h | 4240++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Aparser/html/nsHtml5TokenizerALU.cpp | 33+++++++++++++++++++++++++++++++++
Aparser/html/nsHtml5TokenizerALUStubs.cpp | 32++++++++++++++++++++++++++++++++
Mparser/html/nsHtml5TokenizerHSupplement.h | 40+++++++++++++++++++++++++++++++++++++---
Dparser/html/nsHtml5TokenizerLoopPolicies.h | 123-------------------------------------------------------------------------------
Aparser/html/nsHtml5TokenizerLoopPoliciesALU.h | 150+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aparser/html/nsHtml5TokenizerLoopPoliciesSIMD.h | 219+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aparser/html/nsHtml5TokenizerSIMD.cpp | 33+++++++++++++++++++++++++++++++++
Aparser/html/nsHtml5TokenizerSIMDStubs.cpp | 32++++++++++++++++++++++++++++++++
Aparser/htmlaccel/gtest/TestHtmlSimd.cpp | 62++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aparser/htmlaccel/gtest/moz.build | 16++++++++++++++++
Aparser/htmlaccel/htmlaccel.h | 322+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aparser/htmlaccel/htmlaccelEnabled.h | 30++++++++++++++++++++++++++++++
Aparser/htmlaccel/htmlaccelNotInline.cpp | 30++++++++++++++++++++++++++++++
Aparser/htmlaccel/htmlaccelNotInline.h | 34++++++++++++++++++++++++++++++++++
Aparser/htmlaccel/moz.build | 30++++++++++++++++++++++++++++++
Mparser/moz.build | 2+-
21 files changed, 5844 insertions(+), 4588 deletions(-)

diff --git a/build/moz.configure/toolchain.configure b/build/moz.configure/toolchain.configure @@ -3914,6 +3914,35 @@ set_config( ), ) + +@depends(target, c_compiler) +def htmlaccel_config(target, c_compiler): + # Keep this is sync with the mozilla::htmlaccel::htmlaccelEnabled function. + # + # The code compiles on SSSE3, but AVX+BMI generates better code + # and has been available for 12 years at the time of landing this, + # so let's give the best code to users with reasonably recent hardware. + # + # Not enabled on 32-bit x86, due to lack of insight into what hardware is + # representative at this point in time and due to lack of such hardware + # for testing to see what config would actually be an optimization. + # + # aarch64 does not need extra flags. + # + # clang-cl doesn't tolerate -flax-vector-conversions but GCC requires it. + # + # -mavx2 doesn't change codegen vs. -mavx. AVX2 and BMI always co-occur + # in Intel CPUs, but there are AMD CPUs that have AVX and BMI without + # AVX2. + if target.cpu != "x86_64": + return [] + if c_compiler.type == "gcc": + return ["-mavx", "-mbmi", "-flax-vector-conversions"] + return ["-mavx", "-mbmi"] + + +set_config("HTML_ACCEL_FLAGS", htmlaccel_config) + # dtrace support ## option("--enable-dtrace", help="Build with dtrace support") diff --git a/parser/html/javasrc/Tokenizer.java b/parser/html/javasrc/Tokenizer.java @@ -932,7 +932,7 @@ public class Tokenizer implements Locator, Locator2 { // ]NOCPP] - HtmlAttributes emptyAttributes() { + @Inline HtmlAttributes emptyAttributes() { // [NOCPP[ if (newAttributesEachTime) { return new HtmlAttributes(mappingLangToXmlLang); @@ -944,7 +944,7 @@ public class Tokenizer implements Locator, Locator2 { // ]NOCPP] } - @Inline private void appendCharRefBuf(char c) { + private void appendCharRefBuf(char c) { // CPPONLY: assert charRefBufLen < charRefBuf.length: // CPPONLY: "RELEASE: Attempted to overrun charRefBuf!"; charRefBuf[charRefBufLen++] = c; @@ -982,7 +982,7 @@ public class Tokenizer implements Locator, Locator2 { * @param c * the UTF-16 code unit to append */ - @Inline private void appendStrBuf(char c) { + private void appendStrBuf(char c) { // CPPONLY: assert strBufLen < strBuf.length: "Previous buffer length insufficient."; // CPPONLY: if (strBufLen == strBuf.length) { // CPPONLY: if (!EnsureBufferSpace(1)) { @@ -1000,7 +1000,7 @@ public class Tokenizer implements Locator, Locator2 { * * @return the buffer as a string */ - protected String strBufToString() { + @Inline protected String strBufToString() { String str = Portability.newStringFromBuffer(strBuf, 0, strBufLen // CPPONLY: , tokenHandler, !newAttributesEachTime && attributeName == AttributeName.CLASS ); @@ -1014,7 +1014,7 @@ public class Tokenizer implements Locator, Locator2 { * * @return the buffer as local name */ - private void strBufToDoctypeName() { + @Inline private void strBufToDoctypeName() { doctypeName = Portability.newLocalNameFromBuffer(strBuf, strBufLen, interner); clearStrBufAfterUse(); } @@ -1025,7 +1025,7 @@ public class Tokenizer implements Locator, Locator2 { * @throws SAXException * if the token handler threw */ - private void emitStrBuf() throws SAXException { + @Inline private void emitStrBuf() throws SAXException { if (strBufLen > 0) { tokenHandler.characters(strBuf, 0, strBufLen); clearStrBufAfterUse(); @@ -1455,12 +1455,6 @@ public class Tokenizer implements Locator, Locator2 { */ int pos = start - 1; - /** - * The index of the first <code>char</code> in <code>buf</code> that is - * part of a coalesced run of character tokens or - * <code>Integer.MAX_VALUE</code> if there is not a current run being - * coalesced. - */ switch (state) { case DATA: case RCDATA: @@ -1486,19 +1480,24 @@ public class Tokenizer implements Locator, Locator2 { break; } - /** - * The number of <code>char</code>s in <code>buf</code> that have - * meaning. (The rest of the array is garbage and should not be - * examined.) - */ // CPPONLY: if (mViewSource) { // CPPONLY: mViewSource.SetBuffer(buffer); - // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); + // CPPONLY: if (htmlaccelEnabled()) { + // CPPONLY: pos = StateLoopViewSourceSIMD(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); + // CPPONLY: } else { + // CPPONLY: pos = StateLoopViewSourceALU(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); + // CPPONLY: } // CPPONLY: mViewSource.DropBuffer((pos == buffer.getEnd()) ? pos : pos + 1); // CPPONLY: } else if (tokenHandler.WantsLineAndColumn()) { - // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); + // CPPONLY: if (htmlaccelEnabled()) { + // CPPONLY: pos = StateLoopLineColSIMD(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); + // CPPONLY: } else { + // CPPONLY: pos = StateLoopLineColALU(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); + // CPPONLY: } + // CPPONLY: } else if (htmlaccelEnabled()) { + // CPPONLY: pos = StateLoopFastestSIMD(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); // CPPONLY: } else { - // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); + // CPPONLY: pos = StateLoopFastestALU(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd()); // CPPONLY: } // [NOCPP[ pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, @@ -1547,7 +1546,7 @@ public class Tokenizer implements Locator, Locator2 { } // ]NOCPP] - @SuppressWarnings("unused") private int stateLoop(int state, char c, + @SuppressWarnings("unused") @Inline private int stateLoop(int state, char c, int pos, @NoLength char[] buf, boolean reconsume, int returnState, int endPos) throws SAXException { boolean reportedConsecutiveHyphens = false; @@ -1623,54 +1622,127 @@ public class Tokenizer implements Locator, Locator2 { switch (state) { case DATA: dataloop: for (;;) { + // Ideally this reconsume block would be a separate state, DATA_RECONSUME above this one + // with fallthrough into this state. However, such a change would be disruptive to + // TransitionHandler and everything that works with returnState. if (reconsume) { reconsume = false; - } else { - if (++pos == endPos) { - break stateloop; + // This is a manual copy of the switch below with break/continue + // adjusted as relevant. Make sure to keep in sync with the switch below! + switch (c) { + case '&': + /* + * U+0026 AMPERSAND (&) Switch to the character + * reference in data state. + */ + flushChars(buf, pos); + assert charRefBufLen == 0: "charRefBufLen not reset after previous use!"; + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\u0000'); + returnState = state; + state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); + continue stateloop; + case '<': + /* + * U+003C LESS-THAN SIGN (<) Switch to the tag + * open state. + */ + flushChars(buf, pos); + + state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos); + // `break` optimizes; `continue stateloop;` would be valid + break dataloop; + case '\u0000': + maybeEmitReplacementCharacter(buf, pos); + break; + case '\r': + emitCarriageReturn(buf, pos); + break stateloop; + case '\n': + silentLineFeed(); + // CPPONLY: MOZ_FALLTHROUGH; + default: + /* + * Anything else Emit the input character as a + * character token. + * + * Stay in the data state. + */ + break; } - c = checkChar(buf, pos); } - switch (c) { - case '&': - /* - * U+0026 AMPERSAND (&) Switch to the character - * reference in data state. - */ - flushChars(buf, pos); - assert charRefBufLen == 0: "charRefBufLen not reset after previous use!"; - appendCharRefBuf(c); - setAdditionalAndRememberAmpersandLocation('\u0000'); - returnState = state; - state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); - continue stateloop; - case '<': - /* - * U+003C LESS-THAN SIGN (<) Switch to the tag - * open state. - */ - flushChars(buf, pos); - - state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos); - // `break` optimizes; `continue stateloop;` would be valid - break dataloop; - case '\u0000': - maybeEmitReplacementCharacter(buf, pos); - continue; - case '\r': - emitCarriageReturn(buf, pos); - break stateloop; - case '\n': - silentLineFeed(); - // CPPONLY: MOZ_FALLTHROUGH; - default: - /* - * Anything else Emit the input character as a - * character token. - * - * Stay in the data state. - */ - continue; + datamiddle: for (;;) { + ++pos; + // Perhaps at some point, it will be appropriate to do SIMD in Java, but not today. + // The line below advances pos by some number of code units that this state is indifferent to. + // CPPONLY: pos += accelerateAdvancementData(buf, pos, endPos); + for (;;) { + if (pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + // Make sure to keep in sync with the switch above in the reconsume block! + switch (c) { + case '&': + /* + * U+0026 AMPERSAND (&) Switch to the character + * reference in data state. + */ + flushChars(buf, pos); + assert charRefBufLen == 0: "charRefBufLen not reset after previous use!"; + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\u0000'); + returnState = state; + state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); + continue stateloop; + case '<': + /* + * U+003C LESS-THAN SIGN (<) Switch to the tag + * open state. + */ + flushChars(buf, pos); + + state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos); + // `break` optimizes; `continue stateloop;` would be valid + break dataloop; + case '\u0000': + maybeEmitReplacementCharacter(buf, pos); + // Continue from above the accelerateAdvancementData call. + continue datamiddle; + case '\r': + emitCarriageReturn(buf, pos); + break stateloop; + case '\n': + silentLineFeed(); + // Continue from above the accelerateAdvancementData call. + continue datamiddle; + default: + /* + * Anything else Emit the input character as a + * character token. + * + * Stay in the data state. + */ + // Don't go back to accelerateAdvancementData to avoid + // bouncing back and forth in a way that doesn't make good + // use of SIMD when we have less than a SIMD stride to go + // or when we come here due to a non-BMP characters. + // The SIMD code doesn't have ALU handling for the remainder + // that is shorter than a SIMD stride, because this case + // in this switch has to exist anyway (for SIMD-unavailable + // and for non-BMP cases) and this innermost loop can serve + // that purpose, too. In the non-BMP case we stay on the + // ALU path until we end up in one of the other cases in this + // switch (e.g. end of line) in order to avoid bouncing back + // and forth when we have text in a non-BMP script instead + // of an isolated emoji. + // + // We need to increment pos when staying in this innermost + // loop! + ++pos; + continue; + } + } } } // CPPONLY: MOZ_FALLTHROUGH; @@ -4002,52 +4074,122 @@ public class Tokenizer implements Locator, Locator2 { // no fallthrough, reordering opportunity case RCDATA: rcdataloop: for (;;) { + // Ideally this reconsume block would be a separate state, RCDATA_RECONSUME above this one + // with fallthrough into this state. However, such a change would be disruptive to + // TransitionHandler and everything that works with returnState. if (reconsume) { reconsume = false; - } else { - if (++pos == endPos) { - break stateloop; + // This is a manual copy of the switch below with break/continue + // adjusted as relevant. Make sure to keep in sync with the switch below! + switch (c) { + case '&': + /* + * U+0026 AMPERSAND (&) Switch to the character + * reference in RCDATA state. + */ + flushChars(buf, pos); + assert charRefBufLen == 0: "charRefBufLen not reset after previous use!"; + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\u0000'); + returnState = state; + state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); + continue stateloop; + case '<': + /* + * U+003C LESS-THAN SIGN (<) Switch to the + * RCDATA less-than sign state. + */ + flushChars(buf, pos); + returnState = state; + state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos); + continue stateloop; + case '\u0000': + maybeEmitReplacementCharacter(buf, pos); + break; + case '\r': + emitCarriageReturn(buf, pos); + break stateloop; + case '\n': + silentLineFeed(); + // CPPONLY: MOZ_FALLTHROUGH; + default: + /* + * Emit the current input character as a + * character token. Stay in the RCDATA state. + */ + break; } - c = checkChar(buf, pos); } - switch (c) { - case '&': - /* - * U+0026 AMPERSAND (&) Switch to the character - * reference in RCDATA state. - */ - flushChars(buf, pos); - assert charRefBufLen == 0: "charRefBufLen not reset after previous use!"; - appendCharRefBuf(c); - setAdditionalAndRememberAmpersandLocation('\u0000'); - returnState = state; - state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); - continue stateloop; - case '<': - /* - * U+003C LESS-THAN SIGN (<) Switch to the - * RCDATA less-than sign state. - */ - flushChars(buf, pos); - - returnState = state; - state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos); - continue stateloop; - case '\u0000': - emitReplacementCharacter(buf, pos); - continue; - case '\r': - emitCarriageReturn(buf, pos); - break stateloop; - case '\n': - silentLineFeed(); - // CPPONLY: MOZ_FALLTHROUGH; - default: - /* - * Emit the current input character as a - * character token. Stay in the RCDATA state. - */ - continue; + rcdatamiddle: for (;;) { + ++pos; + // Perhaps at some point, it will be appropriate to do SIMD in Java, but not today. + // The line below advances pos by some number of code units that this state is indifferent to. + // RCDATA and DATA have the same set of characters that they are indifferent to, hence accelerateData. + // CPPONLY: pos += accelerateAdvancementData(buf, pos, endPos); + for (;;) { + if (pos == endPos) { + break stateloop; + } + c = checkChar(buf, pos); + // Make sure to keep in sync with the switch above in the reconsume block! + switch (c) { + case '&': + /* + * U+0026 AMPERSAND (&) Switch to the character + * reference in RCDATA state. + */ + flushChars(buf, pos); + assert charRefBufLen == 0: "charRefBufLen not reset after previous use!"; + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\u0000'); + returnState = state; + state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos); + continue stateloop; + case '<': + /* + * U+003C LESS-THAN SIGN (<) Switch to the + * RCDATA less-than sign state. + */ + flushChars(buf, pos); + returnState = state; + state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos); + continue stateloop; + case '\u0000': + maybeEmitReplacementCharacter(buf, pos); + // Continue from above the accelerateAdvancementData call. + continue rcdatamiddle; + case '\r': + emitCarriageReturn(buf, pos); + break stateloop; + case '\n': + silentLineFeed(); + // Continue from above the accelerateAdvancementData call. + continue rcdatamiddle; + default: + /* + * Emit the current input character as a + * character token. Stay in the RCDATA state. + */ + // Don't go back to accelerateAdvancementData to avoid + // bouncing back and forth in a way that doesn't make good + // use of SIMD when we have less than a SIMD stride to go + // or when we come here due to a non-BMP characters. + // The SIMD code doesn't have ALU handling for the remainder + // that is shorter than a SIMD stride, because this case + // in this switch has to exist anyway (for SIMD-unavailable + // and for non-BMP cases) and this innermost loop can serve + // that purpose, too. In the non-BMP case we stay on the + // ALU path until we end up in one of the other cases in this + // switch (e.g. end of line) in order to avoid bouncing back + // and forth when we have text in a non-BMP script instead + // of an isolated emoji. + // + // We need to increment pos when staying in this innermost + // loop! + ++pos; + continue; + } + } } } // no fallthrough, reordering opportunity @@ -6348,24 +6490,24 @@ public class Tokenizer implements Locator, Locator2 { forceQuirks = false; } - private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn() + @Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn() throws SAXException { silentCarriageReturn(); adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false); } - private void adjustDoubleHyphenAndAppendToStrBufLineFeed() + @Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed() throws SAXException { silentLineFeed(); adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false); } - private void appendStrBufLineFeed() { + @Inline private void appendStrBufLineFeed() { silentLineFeed(); appendStrBuf('\n'); } - private void appendStrBufCarriageReturn() { + @Inline private void appendStrBufCarriageReturn() { silentCarriageReturn(); appendStrBuf('\n'); } @@ -6383,7 +6525,7 @@ public class Tokenizer implements Locator, Locator2 { // ]NOCPP] - private void emitCarriageReturn(@NoLength char[] buf, int pos) + @Inline private void emitCarriageReturn(@NoLength char[] buf, int pos) throws SAXException { silentCarriageReturn(); flushChars(buf, pos); @@ -6412,7 +6554,7 @@ public class Tokenizer implements Locator, Locator2 { cstart = pos + 1; } - private void setAdditionalAndRememberAmpersandLocation(char add) { + @Inline private void setAdditionalAndRememberAmpersandLocation(char add) { additional = add; // [NOCPP[ ampersandLocation = new LocatorImpl(this); @@ -7077,7 +7219,7 @@ public class Tokenizer implements Locator, Locator2 { * happened in a non-text context, this method turns that deferred suspension * request into an immediately-pending suspension request. */ - private void suspendIfRequestedAfterCurrentNonTextToken() { + @Inline private void suspendIfRequestedAfterCurrentNonTextToken() { if (suspendAfterCurrentNonTextToken) { suspendAfterCurrentNonTextToken = false; shouldSuspend = true; @@ -7221,7 +7363,7 @@ public class Tokenizer implements Locator, Locator2 { * @param val * @throws SAXException */ - private void emitOrAppendTwo(@Const @NoLength char[] val, int returnState) + @Inline private void emitOrAppendTwo(@Const @NoLength char[] val, int returnState) throws SAXException { if ((returnState & DATA_AND_RCDATA_MASK) != 0) { appendStrBuf(val[0]); @@ -7231,7 +7373,7 @@ public class Tokenizer implements Locator, Locator2 { } } - private void emitOrAppendOne(@Const @NoLength char[] val, int returnState) + @Inline private void emitOrAppendOne(@Const @NoLength char[] val, int returnState) throws SAXException { if ((returnState & DATA_AND_RCDATA_MASK) != 0) { appendStrBuf(val[0]); @@ -7268,7 +7410,7 @@ public class Tokenizer implements Locator, Locator2 { } } - public void requestSuspension() { + @Inline public void requestSuspension() { shouldSuspend = true; } @@ -7311,7 +7453,7 @@ public class Tokenizer implements Locator, Locator2 { // ]NOCPP] - public boolean isInDataState() { + @Inline public boolean isInDataState() { return (stateSave == DATA); } diff --git a/parser/html/moz.build b/parser/html/moz.build @@ -85,6 +85,42 @@ UNIFIED_SOURCES += [ "nsParserUtils.cpp", ] +# Each target needs to compile: +# (nsHtml5TokenizerALU.cpp XOR nsHtml5TokenizerALUStubs.cpp) +# AND +# (nsHtml5TokenizerSIMD.cpp XOR nsHtml5TokenizerSIMDStubs.cpp) +# AND +# (nsHtml5TokenizerALU.cpp OR nsHtml5TokenizerSIMD.cpp) +# +# Make sure the result is consistent with mozilla::htmlaccel::htmlaccelEnabled(). +# +# Due to https://github.com/llvm/llvm-project/issues/160886, none of the +# code here actually ends up with SIMD instructions, and SIMD stays in +# htmlaccelNotInline.cpp instead. Once the LLVM bug is fixed, the functions +# in htmlaccelNotInline.cpp should becomed always inlined and +# nsHtml5TokenizerSIMD.cpp should be built with HTML_ACCEL_FLAGS. + +if (CONFIG["TARGET_CPU"] == "x86_64") and ( + CONFIG["CC_TYPE"] != "gcc" or int(CONFIG["CC_VERSION"].split(".")[0]) >= 12 +): + UNIFIED_SOURCES += [ + "nsHtml5TokenizerALU.cpp", + "nsHtml5TokenizerSIMD.cpp", + ] +elif ( + CONFIG["TARGET_CPU"] == "aarch64" and CONFIG["TARGET_ENDIANNESS"] == "little" +) and (CONFIG["CC_TYPE"] != "gcc" or int(CONFIG["CC_VERSION"].split(".")[0]) >= 12): + # aarch64 doesn't need special flags for SIMD. + UNIFIED_SOURCES += [ + "nsHtml5TokenizerALUStubs.cpp", + "nsHtml5TokenizerSIMD.cpp", + ] +else: + UNIFIED_SOURCES += [ + "nsHtml5TokenizerALU.cpp", + "nsHtml5TokenizerSIMDStubs.cpp", + ] + FINAL_LIBRARY = "xul" LOCAL_INCLUDES += [ diff --git a/parser/html/nsHtml5Tokenizer.cpp b/parser/html/nsHtml5Tokenizer.cpp @@ -40,8 +40,6 @@ #include "nsHtml5Tokenizer.h" -#include "nsHtml5TokenizerLoopPolicies.h" - char16_t nsHtml5Tokenizer::LT_GT[] = {'<', '>'}; char16_t nsHtml5Tokenizer::LT_SOLIDUS[] = {'<', '/'}; char16_t nsHtml5Tokenizer::RSQB_RSQB[] = {']', ']'}; @@ -215,4297 +213,274 @@ void nsHtml5Tokenizer::endTagExpectationToArray() { return; } default: { - MOZ_ASSERT(false, "Bad end tag expectation."); - return; - } - } -} - -void nsHtml5Tokenizer::setLineNumber(int32_t line) { - this->attributeLine = line; - this->line = line; -} - -nsHtml5HtmlAttributes* nsHtml5Tokenizer::emptyAttributes() { - return nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES; -} - -void nsHtml5Tokenizer::emitOrAppendCharRefBuf(int32_t returnState) { - if ((returnState & DATA_AND_RCDATA_MASK)) { - appendCharRefBufToStrBuf(); - } else { - if (charRefBufLen > 0) { - tokenHandler->characters(charRefBuf, 0, charRefBufLen); - charRefBufLen = 0; - } - } -} - -nsHtml5String nsHtml5Tokenizer::strBufToString() { - nsHtml5String str = nsHtml5Portability::newStringFromBuffer( - strBuf, 0, strBufLen, tokenHandler, - !newAttributesEachTime && - attributeName == nsHtml5AttributeName::ATTR_CLASS); - clearStrBufAfterUse(); - return str; -} - -void nsHtml5Tokenizer::strBufToDoctypeName() { - doctypeName = - nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen, interner); - clearStrBufAfterUse(); -} - -void nsHtml5Tokenizer::emitStrBuf() { - if (strBufLen > 0) { - tokenHandler->characters(strBuf, 0, strBufLen); - clearStrBufAfterUse(); - } -} - -void nsHtml5Tokenizer::appendStrBuf(char16_t* buffer, int32_t offset, - int32_t length) { - int32_t newLen = nsHtml5Portability::checkedAdd(strBufLen, length); - MOZ_ASSERT(newLen <= strBuf.length, "Previous buffer length insufficient."); - if (MOZ_UNLIKELY(strBuf.length < newLen)) { - if (MOZ_UNLIKELY(!EnsureBufferSpace(length))) { - MOZ_CRASH("Unable to recover from buffer reallocation failure"); - } - } - nsHtml5ArrayCopy::arraycopy(buffer, offset, strBuf, strBufLen, length); - strBufLen = newLen; -} - -void nsHtml5Tokenizer::emitComment(int32_t provisionalHyphens, int32_t pos) { - RememberGt(pos); - tokenHandler->comment(strBuf, 0, strBufLen - provisionalHyphens); - clearStrBufAfterUse(); - cstart = pos + 1; - suspendIfRequestedAfterCurrentNonTextToken(); -} - -void nsHtml5Tokenizer::flushChars(char16_t* buf, int32_t pos) { - if (pos > cstart) { - tokenHandler->characters(buf, cstart, pos - cstart); - } - cstart = INT32_MAX; -} - -void nsHtml5Tokenizer::strBufToElementNameString() { - if (containsHyphen) { - nsAtom* annotationName = nsHtml5ElementName::ELT_ANNOTATION_XML->getName(); - if (nsHtml5Portability::localEqualsBuffer(annotationName, strBuf, - strBufLen)) { - tagName = nsHtml5ElementName::ELT_ANNOTATION_XML; - } else { - nonInternedTagName->setNameForNonInterned( - nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen, - interner), - true); - tagName = nonInternedTagName; - } - } else { - tagName = nsHtml5ElementName::elementNameByBuffer(strBuf, strBufLen); - if (!tagName) { - nonInternedTagName->setNameForNonInterned( - nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen, - interner), - false); - tagName = nonInternedTagName; - } - } - containsHyphen = false; - clearStrBufAfterUse(); -} - -int32_t nsHtml5Tokenizer::emitCurrentTagToken(bool selfClosing, int32_t pos) { - RememberGt(pos); - cstart = pos + 1; - maybeErrSlashInEndTag(selfClosing); - stateSave = nsHtml5Tokenizer::DATA; - nsHtml5HtmlAttributes* attrs = - (!attributes ? nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES : attributes); - if (endTag) { - maybeErrAttributesOnEndTag(attrs); - if (!viewingXmlSource) { - tokenHandler->endTag(tagName); - } - if (newAttributesEachTime) { - delete attributes; - attributes = nullptr; - } - } else { - if (viewingXmlSource) { - MOZ_ASSERT(newAttributesEachTime); - delete attributes; - attributes = nullptr; - } else { - tokenHandler->startTag(tagName, attrs, selfClosing); - } - } - tagName = nullptr; - if (newAttributesEachTime) { - attributes = nullptr; - } else { - attributes->clear(0); - } - suspendIfRequestedAfterCurrentNonTextToken(); - return stateSave; -} - -void nsHtml5Tokenizer::attributeNameComplete() { - attributeName = - nsHtml5AttributeName::nameByBuffer(strBuf, strBufLen, interner); - if (!attributeName) { - nonInternedAttributeName->setNameForNonInterned( - nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen, - interner)); - attributeName = nonInternedAttributeName; - } - clearStrBufAfterUse(); - if (!attributes) { - attributes = new nsHtml5HtmlAttributes(0); - } - if (attributes->contains(attributeName)) { - errDuplicateAttribute(); - attributeName = nullptr; - } -} - -void nsHtml5Tokenizer::addAttributeWithoutValue() { - if (attributeName) { - attributes->addAttribute( - attributeName, nsHtml5Portability::newEmptyString(), attributeLine); - attributeName = nullptr; - } else { - clearStrBufAfterUse(); - } -} - -void nsHtml5Tokenizer::addAttributeWithValue() { - if (attributeName) { - nsHtml5String val = strBufToString(); - if (mViewSource) { - mViewSource->MaybeLinkifyAttributeValue(attributeName, val); - } - attributes->addAttribute(attributeName, val, attributeLine); - attributeName = nullptr; - } else { - clearStrBufAfterUse(); - } -} - -void nsHtml5Tokenizer::start() { - initializeWithoutStarting(); - tokenHandler->startTokenization(this); - if (mViewSource) { - line = 1; - col = -1; - nextCharOnNewLine = false; - } else if (tokenHandler->WantsLineAndColumn()) { - line = 0; - col = 1; - nextCharOnNewLine = true; - } else { - line = -1; - col = -1; - nextCharOnNewLine = false; - } -} - -bool nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer) { - int32_t state = stateSave; - int32_t returnState = returnStateSave; - char16_t c = '\0'; - shouldSuspend = false; - lastCR = false; - int32_t start = buffer->getStart(); - int32_t end = buffer->getEnd(); - int32_t pos = start - 1; - switch (state) { - case DATA: - case RCDATA: - case SCRIPT_DATA: - case PLAINTEXT: - case RAWTEXT: - case CDATA_SECTION: - case SCRIPT_DATA_ESCAPED: - case SCRIPT_DATA_ESCAPE_START: - case SCRIPT_DATA_ESCAPE_START_DASH: - case SCRIPT_DATA_ESCAPED_DASH: - case SCRIPT_DATA_ESCAPED_DASH_DASH: - case SCRIPT_DATA_DOUBLE_ESCAPE_START: - case SCRIPT_DATA_DOUBLE_ESCAPED: - case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: - case SCRIPT_DATA_DOUBLE_ESCAPED_DASH: - case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: - case SCRIPT_DATA_DOUBLE_ESCAPE_END: { - cstart = start; - break; - } - default: { - cstart = INT32_MAX; - break; - } - } - if (mViewSource) { - mViewSource->SetBuffer(buffer); - pos = stateLoop<nsHtml5ViewSourcePolicy>(state, c, pos, buffer->getBuffer(), - false, returnState, - buffer->getEnd()); - mViewSource->DropBuffer((pos == buffer->getEnd()) ? pos : pos + 1); - } else if (tokenHandler->WantsLineAndColumn()) { - pos = stateLoop<nsHtml5LineColPolicy>(state, c, pos, buffer->getBuffer(), - false, returnState, buffer->getEnd()); - } else { - pos = stateLoop<nsHtml5FastestPolicy>(state, c, pos, buffer->getBuffer(), - false, returnState, buffer->getEnd()); - } - if (pos == end) { - buffer->setStart(pos); - } else { - buffer->setStart(pos + 1); - } - return lastCR; -} - -template <class P> -int32_t nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos, - char16_t* buf, bool reconsume, - int32_t returnState, int32_t endPos) { - bool reportedConsecutiveHyphens = false; -stateloop: - for (;;) { - switch (state) { - case DATA: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '&': { - flushChars(buf, pos); - MOZ_ASSERT(!charRefBufLen, - "charRefBufLen not reset after previous use!"); - appendCharRefBuf(c); - setAdditionalAndRememberAmpersandLocation('\0'); - returnState = state; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '<': { - flushChars(buf, pos); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::TAG_OPEN, reconsume, pos); - NS_HTML5_BREAK(dataloop); - } - case '\0': { - maybeEmitReplacementCharacter(buf, pos); - continue; - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - continue; - } - } - } - dataloop_end:; - [[fallthrough]]; - } - case TAG_OPEN: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - if (c >= 'A' && c <= 'Z') { - endTag = false; - clearStrBufBeforeUse(); - appendStrBuf((char16_t)(c + 0x20)); - containsHyphen = false; - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::TAG_NAME, - reconsume, pos); - NS_HTML5_BREAK(tagopenloop); - } else if (c >= 'a' && c <= 'z') { - endTag = false; - clearStrBufBeforeUse(); - appendStrBuf(c); - containsHyphen = false; - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::TAG_NAME, - reconsume, pos); - NS_HTML5_BREAK(tagopenloop); - } - switch (c) { - case '!': { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::MARKUP_DECLARATION_OPEN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '/': { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::CLOSE_TAG_OPEN, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\?': { - if (viewingXmlSource) { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::PROCESSING_INSTRUCTION, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - if (P::reportErrors) { - errProcessingInstruction(); - } - clearStrBufBeforeUse(); - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - if (P::reportErrors) { - errLtGt(); - } - tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 2); - cstart = pos + 1; - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - default: { - if (P::reportErrors) { - errBadCharAfterLt(c); - } - tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); - cstart = pos; - reconsume = true; - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - tagopenloop_end:; - [[fallthrough]]; - } - case TAG_NAME: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - strBufToElementNameString(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - strBufToElementNameString(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_BREAK(tagnameloop); - } - case '/': { - strBufToElementNameString(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SELF_CLOSING_START_TAG, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - strBufToElementNameString(); - state = P::transition(mViewSource.get(), - emitCurrentTagToken(false, pos), reconsume, - pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - if (c >= 'A' && c <= 'Z') { - c += 0x20; - } else if (c == '-') { - containsHyphen = true; - } - appendStrBuf(c); - continue; - } - } - } - tagnameloop_end:; - [[fallthrough]]; - } - case BEFORE_ATTRIBUTE_NAME: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - continue; - } - case '/': { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SELF_CLOSING_START_TAG, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - state = P::transition(mViewSource.get(), - emitCurrentTagToken(false, pos), reconsume, - pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - case '\"': - case '\'': - case '<': - case '=': { - if (P::reportErrors) { - errBadCharBeforeAttributeNameOrNull(c); - } - [[fallthrough]]; - } - default: { - if (c >= 'A' && c <= 'Z') { - c += 0x20; - } - attributeLine = line; - clearStrBufBeforeUse(); - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::ATTRIBUTE_NAME, reconsume, - pos); - NS_HTML5_BREAK(beforeattributenameloop); - } - } - } - beforeattributenameloop_end:; - [[fallthrough]]; - } - case ATTRIBUTE_NAME: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - attributeNameComplete(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - attributeNameComplete(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '/': { - attributeNameComplete(); - addAttributeWithoutValue(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SELF_CLOSING_START_TAG, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '=': { - attributeNameComplete(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE, - reconsume, pos); - NS_HTML5_BREAK(attributenameloop); - } - case '>': { - attributeNameComplete(); - addAttributeWithoutValue(); - state = P::transition(mViewSource.get(), - emitCurrentTagToken(false, pos), reconsume, - pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - case '\"': - case '\'': - case '<': { - if (P::reportErrors) { - errQuoteOrLtInAttributeNameOrNull(c); - } - [[fallthrough]]; - } - default: { - if (c >= 'A' && c <= 'Z') { - c += 0x20; - } - appendStrBuf(c); - continue; - } - } - } - attributenameloop_end:; - [[fallthrough]]; - } - case BEFORE_ATTRIBUTE_VALUE: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - continue; - } - case '\"': { - attributeLine = line; - clearStrBufBeforeUse(); - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::ATTRIBUTE_VALUE_DOUBLE_QUOTED, - reconsume, pos); - NS_HTML5_BREAK(beforeattributevalueloop); - } - case '&': { - attributeLine = line; - clearStrBufBeforeUse(); - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED, - reconsume, pos); - - NS_HTML5_CONTINUE(stateloop); - } - case '\'': { - attributeLine = line; - clearStrBufBeforeUse(); - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::ATTRIBUTE_VALUE_SINGLE_QUOTED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - if (P::reportErrors) { - errAttributeValueMissing(); - } - addAttributeWithoutValue(); - state = P::transition(mViewSource.get(), - emitCurrentTagToken(false, pos), reconsume, - pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - case '<': - case '=': - case '`': { - if (P::reportErrors) { - errLtOrEqualsOrGraveInUnquotedAttributeOrNull(c); - } - [[fallthrough]]; - } - default: { - attributeLine = line; - clearStrBufBeforeUse(); - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED, - reconsume, pos); - - NS_HTML5_CONTINUE(stateloop); - } - } - } - beforeattributevalueloop_end:; - [[fallthrough]]; - } - case ATTRIBUTE_VALUE_DOUBLE_QUOTED: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '\"': { - addAttributeWithValue(); - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED, - reconsume, pos); - NS_HTML5_BREAK(attributevaluedoublequotedloop); - } - case '&': { - MOZ_ASSERT(!charRefBufLen, - "charRefBufLen not reset after previous use!"); - appendCharRefBuf(c); - setAdditionalAndRememberAmpersandLocation('\"'); - returnState = state; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - continue; - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - continue; - } - } - } - attributevaluedoublequotedloop_end:; - [[fallthrough]]; - } - case AFTER_ATTRIBUTE_VALUE_QUOTED: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '/': { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SELF_CLOSING_START_TAG, - reconsume, pos); - NS_HTML5_BREAK(afterattributevaluequotedloop); - } - case '>': { - state = P::transition(mViewSource.get(), - emitCurrentTagToken(false, pos), reconsume, - pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - default: { - if (P::reportErrors) { - errNoSpaceBetweenAttributes(); - } - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - afterattributevaluequotedloop_end:; - [[fallthrough]]; - } - case SELF_CLOSING_START_TAG: { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '>': { - state = - P::transition(mViewSource.get(), emitCurrentTagToken(true, pos), - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - default: { - if (P::reportErrors) { - errSlashNotFollowedByGt(); - } - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - case ATTRIBUTE_VALUE_UNQUOTED: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - addAttributeWithValue(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - addAttributeWithValue(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '&': { - MOZ_ASSERT(!charRefBufLen, - "charRefBufLen not reset after previous use!"); - appendCharRefBuf(c); - setAdditionalAndRememberAmpersandLocation('>'); - returnState = state; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - addAttributeWithValue(); - state = P::transition(mViewSource.get(), - emitCurrentTagToken(false, pos), reconsume, - pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - case '<': - case '\"': - case '\'': - case '=': - case '`': { - if (P::reportErrors) { - errUnquotedAttributeValOrNull(c); - } - [[fallthrough]]; - } - default: { - appendStrBuf(c); - continue; - } - } - } - } - case AFTER_ATTRIBUTE_NAME: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - continue; - } - case '/': { - addAttributeWithoutValue(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SELF_CLOSING_START_TAG, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '=': { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - addAttributeWithoutValue(); - state = P::transition(mViewSource.get(), - emitCurrentTagToken(false, pos), reconsume, - pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - case '\"': - case '\'': - case '<': { - if (P::reportErrors) { - errQuoteOrLtInAttributeNameOrNull(c); - } - [[fallthrough]]; - } - default: { - addAttributeWithoutValue(); - if (c >= 'A' && c <= 'Z') { - c += 0x20; - } - clearStrBufBeforeUse(); - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::ATTRIBUTE_NAME, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - } - case MARKUP_DECLARATION_OPEN: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - clearStrBufBeforeUse(); - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::MARKUP_DECLARATION_HYPHEN, - reconsume, pos); - NS_HTML5_BREAK(markupdeclarationopenloop); - } - case 'd': - case 'D': { - clearStrBufBeforeUse(); - appendStrBuf(c); - index = 0; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::MARKUP_DECLARATION_OCTYPE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '[': { - if (tokenHandler->cdataSectionAllowed()) { - clearStrBufBeforeUse(); - appendStrBuf(c); - index = 0; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::CDATA_START, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - [[fallthrough]]; - } - default: { - if (P::reportErrors) { - errBogusComment(); - } - clearStrBufBeforeUse(); - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - markupdeclarationopenloop_end:; - [[fallthrough]]; - } - case MARKUP_DECLARATION_HYPHEN: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - clearStrBufAfterOneHyphen(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_START, reconsume, - pos); - NS_HTML5_BREAK(markupdeclarationhyphenloop); - } - default: { - if (P::reportErrors) { - errBogusComment(); - } - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - markupdeclarationhyphenloop_end:; - [[fallthrough]]; - } - case COMMENT_START: { - reportedConsecutiveHyphens = false; - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_START_DASH, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - if (P::reportErrors) { - errPrematureEndOfComment(); - } - emitComment(0, pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '<': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_LESSTHAN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_BREAK(commentstartloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_BREAK(commentstartloop); - } - } - } - commentstartloop_end:; - [[fallthrough]]; - } - case COMMENT: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_END_DASH, - reconsume, pos); - NS_HTML5_BREAK(commentloop); - } - case '<': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_LESSTHAN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - continue; - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - continue; - } - } - } - commentloop_end:; - [[fallthrough]]; - } - case COMMENT_END_DASH: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - appendStrBuf(c); - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_END, reconsume, pos); - NS_HTML5_BREAK(commentenddashloop); - } - case '<': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_LESSTHAN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - commentenddashloop_end:; - [[fallthrough]]; - } - case COMMENT_END: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '>': { - emitComment(2, pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '-': { - adjustDoubleHyphenAndAppendToStrBufAndErr( - c, reportedConsecutiveHyphens); - reportedConsecutiveHyphens = true; - continue; - } - case '<': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_LESSTHAN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - adjustDoubleHyphenAndAppendToStrBufCarriageReturn<P>(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - adjustDoubleHyphenAndAppendToStrBufLineFeed<P>(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '!': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_END_BANG, - reconsume, pos); - NS_HTML5_BREAK(commentendloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - adjustDoubleHyphenAndAppendToStrBufAndErr( - c, reportedConsecutiveHyphens); - reportedConsecutiveHyphens = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - commentendloop_end:; - [[fallthrough]]; - } - case COMMENT_END_BANG: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '>': { - emitComment(3, pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '-': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_END_DASH, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - } - case COMMENT_LESSTHAN: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '!': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG, - reconsume, pos); - NS_HTML5_BREAK(commentlessthanloop); - } - case '<': { - appendStrBuf(c); - continue; - } - case '-': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_END_DASH, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - commentlessthanloop_end:; - [[fallthrough]]; - } - case COMMENT_LESSTHAN_BANG: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - appendStrBuf(c); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG_DASH, reconsume, pos); - NS_HTML5_BREAK(commentlessthanbangloop); - } - case '<': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_LESSTHAN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - commentlessthanbangloop_end:; - [[fallthrough]]; - } - case COMMENT_LESSTHAN_BANG_DASH: { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - appendStrBuf(c); - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG_DASH_DASH, - reconsume, pos); - break; - } - case '<': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_LESSTHAN, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - [[fallthrough]]; - } - case COMMENT_LESSTHAN_BANG_DASH_DASH: { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '>': { - appendStrBuf(c); - emitComment(3, pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '-': { - if (P::reportErrors) { - errNestedComment(); - } - adjustDoubleHyphenAndAppendToStrBufAndErr( - c, reportedConsecutiveHyphens); - reportedConsecutiveHyphens = true; - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT_END, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - c = '\n'; - P::silentCarriageReturn(this); - if (P::reportErrors) { - errNestedComment(); - } - adjustDoubleHyphenAndAppendToStrBufAndErr( - c, reportedConsecutiveHyphens); - reportedConsecutiveHyphens = true; - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - if (P::reportErrors) { - errNestedComment(); - } - adjustDoubleHyphenAndAppendToStrBufAndErr( - c, reportedConsecutiveHyphens); - reportedConsecutiveHyphens = true; - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '!': { - if (P::reportErrors) { - errNestedComment(); - } - adjustDoubleHyphenAndAppendToStrBufAndErr( - c, reportedConsecutiveHyphens); - reportedConsecutiveHyphens = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_END_BANG, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - if (P::reportErrors) { - errNestedComment(); - } - adjustDoubleHyphenAndAppendToStrBufAndErr( - c, reportedConsecutiveHyphens); - reportedConsecutiveHyphens = true; - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - case COMMENT_START_DASH: { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - appendStrBuf(c); - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT_END, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - if (P::reportErrors) { - errPrematureEndOfComment(); - } - emitComment(1, pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '<': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::COMMENT_LESSTHAN, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - case CDATA_START: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - if (index < 6) { - if (c == nsHtml5Tokenizer::CDATA_LSQB[index]) { - appendStrBuf(c); - } else { - if (P::reportErrors) { - errBogusComment(); - } - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - index++; - continue; - } else { - clearStrBufAfterUse(); - cstart = pos; - reconsume = true; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::CDATA_SECTION, reconsume, pos); - break; - } - } - [[fallthrough]]; - } - case CDATA_SECTION: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case ']': { - flushChars(buf, pos); - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::CDATA_RSQB, - reconsume, pos); - NS_HTML5_BREAK(cdatasectionloop); - } - case '\0': { - maybeEmitReplacementCharacter(buf, pos); - continue; - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - continue; - } - } - } - cdatasectionloop_end:; - [[fallthrough]]; - } - case CDATA_RSQB: { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case ']': { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::CDATA_RSQB_RSQB, reconsume, - pos); - break; - } - default: { - tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 1); - cstart = pos; - reconsume = true; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::CDATA_SECTION, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - [[fallthrough]]; - } - case CDATA_RSQB_RSQB: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case ']': { - tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 1); - continue; - } - case '>': { - cstart = pos + 1; - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - suspendIfRequestedAfterCurrentNonTextToken(); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - default: { - tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 2); - cstart = pos; - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::CDATA_SECTION, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - } - case ATTRIBUTE_VALUE_SINGLE_QUOTED: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '\'': { - addAttributeWithValue(); - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '&': { - MOZ_ASSERT(!charRefBufLen, - "charRefBufLen not reset after previous use!"); - appendCharRefBuf(c); - setAdditionalAndRememberAmpersandLocation('\''); - returnState = state; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, - reconsume, pos); - NS_HTML5_BREAK(attributevaluesinglequotedloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - continue; - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - continue; - } - } - } - attributevaluesinglequotedloop_end:; - [[fallthrough]]; - } - case CONSUME_CHARACTER_REFERENCE: { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case ' ': - case '\t': - case '\n': - case '\r': - case '\f': - case '<': - case '&': - case '\0': - case ';': { - emitOrAppendCharRefBuf(returnState); - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos; - } - reconsume = true; - state = - P::transition(mViewSource.get(), returnState, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '#': { - appendCharRefBuf('#'); - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::CONSUME_NCR, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - default: { - if (c == additional) { - emitOrAppendCharRefBuf(returnState); - reconsume = true; - state = - P::transition(mViewSource.get(), returnState, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - if (c >= 'a' && c <= 'z') { - firstCharKey = c - 'a' + 26; - } else if (c >= 'A' && c <= 'Z') { - firstCharKey = c - 'A'; - } else { - if (c == ';') { - if (P::reportErrors) { - errNoNamedCharacterMatch(); - } - } - emitOrAppendCharRefBuf(returnState); - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos; - } - reconsume = true; - state = - P::transition(mViewSource.get(), returnState, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - appendCharRefBuf(c); - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::CHARACTER_REFERENCE_HILO_LOOKUP, - reconsume, pos); - break; - } - } - [[fallthrough]]; - } - case CHARACTER_REFERENCE_HILO_LOOKUP: { - { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - int32_t hilo = 0; - if (c <= 'z') { - const int32_t* row = nsHtml5NamedCharactersAccel::HILO_ACCEL[c]; - if (row) { - hilo = row[firstCharKey]; - } - } - if (!hilo) { - if (c == ';') { - if (P::reportErrors) { - errNoNamedCharacterMatch(); - } - } - emitOrAppendCharRefBuf(returnState); - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos; - } - reconsume = true; - state = - P::transition(mViewSource.get(), returnState, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - appendCharRefBuf(c); - lo = hilo & 0xFFFF; - hi = hilo >> 16; - entCol = -1; - candidate = -1; - charRefBufMark = 0; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::CHARACTER_REFERENCE_TAIL, - reconsume, pos); - } - [[fallthrough]]; - } - case CHARACTER_REFERENCE_TAIL: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - entCol++; - for (;;) { - if (hi < lo) { - NS_HTML5_BREAK(outer); - } - if (entCol == nsHtml5NamedCharacters::NAMES[lo].length()) { - candidate = lo; - charRefBufMark = charRefBufLen; - lo++; - } else if (entCol > nsHtml5NamedCharacters::NAMES[lo].length()) { - NS_HTML5_BREAK(outer); - } else if (c > nsHtml5NamedCharacters::NAMES[lo].charAt(entCol)) { - lo++; - } else { - NS_HTML5_BREAK(loloop); - } - } - loloop_end:; - for (;;) { - if (hi < lo) { - NS_HTML5_BREAK(outer); - } - if (entCol == nsHtml5NamedCharacters::NAMES[hi].length()) { - NS_HTML5_BREAK(hiloop); - } - if (entCol > nsHtml5NamedCharacters::NAMES[hi].length()) { - NS_HTML5_BREAK(outer); - } else if (c < nsHtml5NamedCharacters::NAMES[hi].charAt(entCol)) { - hi--; - } else { - NS_HTML5_BREAK(hiloop); - } - } - hiloop_end:; - if (c == ';') { - if (entCol + 1 == nsHtml5NamedCharacters::NAMES[lo].length()) { - candidate = lo; - charRefBufMark = charRefBufLen; - } - NS_HTML5_BREAK(outer); - } - if (hi < lo) { - NS_HTML5_BREAK(outer); - } - appendCharRefBuf(c); - continue; - } - outer_end:; - if (candidate == -1) { - if (c == ';') { - if (P::reportErrors) { - errNoNamedCharacterMatch(); - } - } - emitOrAppendCharRefBuf(returnState); - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos; - } - reconsume = true; - state = P::transition(mViewSource.get(), returnState, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } else { - const nsHtml5CharacterName& candidateName = - nsHtml5NamedCharacters::NAMES[candidate]; - if (!candidateName.length() || - candidateName.charAt(candidateName.length() - 1) != ';') { - if ((returnState & DATA_AND_RCDATA_MASK)) { - char16_t ch; - if (charRefBufMark == charRefBufLen) { - ch = c; - } else { - ch = charRefBuf[charRefBufMark]; - } - if (ch == '=' || (ch >= '0' && ch <= '9') || - (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) { - if (c == ';') { - if (P::reportErrors) { - errNoNamedCharacterMatch(); - } - } - appendCharRefBufToStrBuf(); - reconsume = true; - state = P::transition(mViewSource.get(), returnState, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - } - if ((returnState & DATA_AND_RCDATA_MASK)) { - if (P::reportErrors) { - errUnescapedAmpersandInterpretedAsCharacterReference(); - } - } else { - if (P::reportErrors) { - errNotSemicolonTerminated(); - } - } - } - P::completedNamedCharacterReference(mViewSource.get()); - const char16_t* val = nsHtml5NamedCharacters::VALUES[candidate]; - if (!val[1]) { - emitOrAppendOne(val, returnState); - } else { - emitOrAppendTwo(val, returnState); - } - if (charRefBufMark < charRefBufLen) { - if ((returnState & DATA_AND_RCDATA_MASK)) { - appendStrBuf(charRefBuf, charRefBufMark, - charRefBufLen - charRefBufMark); - } else { - tokenHandler->characters(charRefBuf, charRefBufMark, - charRefBufLen - charRefBufMark); - } - } - bool earlyBreak = (c == ';' && charRefBufMark == charRefBufLen); - charRefBufLen = 0; - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = earlyBreak ? pos + 1 : pos; - } - reconsume = !earlyBreak; - state = P::transition(mViewSource.get(), returnState, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - case CONSUME_NCR: { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - value = 0; - seenDigits = false; - switch (c) { - case 'x': - case 'X': { - appendCharRefBuf(c); - state = - P::transition(mViewSource.get(), nsHtml5Tokenizer::HEX_NCR_LOOP, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - default: { - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::DECIMAL_NRC_LOOP, reconsume, - pos); - break; - } - } - [[fallthrough]]; - } - case DECIMAL_NRC_LOOP: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - MOZ_ASSERT(value >= 0, "value must not become negative."); - if (c >= '0' && c <= '9') { - seenDigits = true; - if (value <= 0x10FFFF) { - value *= 10; - value += c - '0'; - } - continue; - } else if (c == ';') { - if (seenDigits) { - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos + 1; - } - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::HANDLE_NCR_VALUE, - reconsume, pos); - NS_HTML5_BREAK(decimalloop); - } else { - if (P::reportErrors) { - errNoDigitsInNCR(); - } - appendCharRefBuf(';'); - emitOrAppendCharRefBuf(returnState); - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos + 1; - } - state = - P::transition(mViewSource.get(), returnState, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } else { - if (!seenDigits) { - if (P::reportErrors) { - errNoDigitsInNCR(); - } - emitOrAppendCharRefBuf(returnState); - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos; - } - reconsume = true; - state = - P::transition(mViewSource.get(), returnState, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } else { - if (P::reportErrors) { - errCharRefLacksSemicolon(); - } - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos; - } - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::HANDLE_NCR_VALUE, - reconsume, pos); - NS_HTML5_BREAK(decimalloop); - } - } - } - decimalloop_end:; - [[fallthrough]]; - } - case HANDLE_NCR_VALUE: { - charRefBufLen = 0; - handleNcrValue(returnState); - state = P::transition(mViewSource.get(), returnState, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case HEX_NCR_LOOP: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - MOZ_ASSERT(value >= 0, "value must not become negative."); - if (c >= '0' && c <= '9') { - seenDigits = true; - if (value <= 0x10FFFF) { - value *= 16; - value += c - '0'; - } - continue; - } else if (c >= 'A' && c <= 'F') { - seenDigits = true; - if (value <= 0x10FFFF) { - value *= 16; - value += c - 'A' + 10; - } - continue; - } else if (c >= 'a' && c <= 'f') { - seenDigits = true; - if (value <= 0x10FFFF) { - value *= 16; - value += c - 'a' + 10; - } - continue; - } else if (c == ';') { - if (seenDigits) { - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos + 1; - } - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::HANDLE_NCR_VALUE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } else { - if (P::reportErrors) { - errNoDigitsInNCR(); - } - appendCharRefBuf(';'); - emitOrAppendCharRefBuf(returnState); - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos + 1; - } - state = - P::transition(mViewSource.get(), returnState, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } else { - if (!seenDigits) { - if (P::reportErrors) { - errNoDigitsInNCR(); - } - emitOrAppendCharRefBuf(returnState); - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos; - } - reconsume = true; - state = - P::transition(mViewSource.get(), returnState, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } else { - if (P::reportErrors) { - errCharRefLacksSemicolon(); - } - if (!(returnState & DATA_AND_RCDATA_MASK)) { - cstart = pos; - } - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::HANDLE_NCR_VALUE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - } - case PLAINTEXT: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '\0': { - emitPlaintextReplacementCharacter(buf, pos); - continue; - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - continue; - } - } - } - } - case CLOSE_TAG_OPEN: { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '>': { - if (P::reportErrors) { - errLtSlashGt(); - } - cstart = pos + 1; - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - P::silentCarriageReturn(this); - if (P::reportErrors) { - errGarbageAfterLtSlash(); - } - clearStrBufBeforeUse(); - appendStrBuf('\n'); - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - if (P::reportErrors) { - errGarbageAfterLtSlash(); - } - clearStrBufBeforeUse(); - appendStrBuf(c); - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - if (c >= 'A' && c <= 'Z') { - c += 0x20; - } - if (c >= 'a' && c <= 'z') { - endTag = true; - clearStrBufBeforeUse(); - appendStrBuf(c); - containsHyphen = false; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::TAG_NAME, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } else { - if (P::reportErrors) { - errGarbageAfterLtSlash(); - } - clearStrBufBeforeUse(); - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - } - case RCDATA: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '&': { - flushChars(buf, pos); - MOZ_ASSERT(!charRefBufLen, - "charRefBufLen not reset after previous use!"); - appendCharRefBuf(c); - setAdditionalAndRememberAmpersandLocation('\0'); - returnState = state; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '<': { - flushChars(buf, pos); - returnState = state; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - emitReplacementCharacter(buf, pos); - continue; - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - continue; - } - } - } - } - case RAWTEXT: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '<': { - flushChars(buf, pos); - returnState = state; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN, - reconsume, pos); - NS_HTML5_BREAK(rawtextloop); - } - case '\0': { - emitReplacementCharacter(buf, pos); - continue; - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - continue; - } - } - } - rawtextloop_end:; - [[fallthrough]]; - } - case RAWTEXT_RCDATA_LESS_THAN_SIGN: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '/': { - index = 0; - clearStrBufBeforeUse(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::NON_DATA_END_TAG_NAME, - reconsume, pos); - NS_HTML5_BREAK(rawtextrcdatalessthansignloop); - } - default: { - tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); - cstart = pos; - reconsume = true; - state = - P::transition(mViewSource.get(), returnState, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - rawtextrcdatalessthansignloop_end:; - [[fallthrough]]; - } - case NON_DATA_END_TAG_NAME: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - if (!endTagExpectationAsArray) { - tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2); - cstart = pos; - reconsume = true; - state = - P::transition(mViewSource.get(), returnState, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } else if (index < endTagExpectationAsArray.length) { - char16_t e = endTagExpectationAsArray[index]; - char16_t folded = c; - if (c >= 'A' && c <= 'Z') { - folded += 0x20; - } - if (folded != e) { - tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2); - emitStrBuf(); - cstart = pos; - reconsume = true; - state = - P::transition(mViewSource.get(), returnState, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - appendStrBuf(c); - index++; - continue; - } else { - endTag = true; - tagName = endTagExpectation; - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - clearStrBufAfterUse(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - clearStrBufAfterUse(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '/': { - clearStrBufAfterUse(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SELF_CLOSING_START_TAG, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - clearStrBufAfterUse(); - state = P::transition(mViewSource.get(), - emitCurrentTagToken(false, pos), - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - default: { - tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2); - emitStrBuf(); - cstart = pos; - reconsume = true; - state = P::transition(mViewSource.get(), returnState, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - } - } - case BOGUS_COMMENT: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '>': { - emitComment(0, pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '-': { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT_HYPHEN, - reconsume, pos); - NS_HTML5_BREAK(boguscommentloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - continue; - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - continue; - } - } - } - boguscommentloop_end:; - [[fallthrough]]; - } - case BOGUS_COMMENT_HYPHEN: { - boguscommenthyphenloop: - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '>': { - emitComment(0, pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '-': { - appendSecondHyphenToBogusComment(); - NS_HTML5_CONTINUE(boguscommenthyphenloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, - pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - } - case SCRIPT_DATA: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '<': { - flushChars(buf, pos); - returnState = state; - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_LESS_THAN_SIGN, reconsume, pos); - NS_HTML5_BREAK(scriptdataloop); - } - case '\0': { - emitReplacementCharacter(buf, pos); - continue; - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - continue; - } - } - } - scriptdataloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_LESS_THAN_SIGN: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '/': { - index = 0; - clearStrBufBeforeUse(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::NON_DATA_END_TAG_NAME, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '!': { - tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); - cstart = pos; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPE_START, - reconsume, pos); - NS_HTML5_BREAK(scriptdatalessthansignloop); - } - default: { - tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); - cstart = pos; - reconsume = true; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - scriptdatalessthansignloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_ESCAPE_START: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPE_START_DASH, - reconsume, pos); - NS_HTML5_BREAK(scriptdataescapestartloop); - } - default: { - reconsume = true; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - scriptdataescapestartloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_ESCAPE_START_DASH: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH_DASH, - reconsume, pos); - NS_HTML5_BREAK(scriptdataescapestartdashloop); - } - default: { - reconsume = true; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - scriptdataescapestartdashloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_ESCAPED_DASH_DASH: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - continue; - } - case '<': { - flushChars(buf, pos); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - emitReplacementCharacter(buf, pos); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, - reconsume, pos); - NS_HTML5_BREAK(scriptdataescapeddashdashloop); - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, - reconsume, pos); - NS_HTML5_BREAK(scriptdataescapeddashdashloop); - } - } - } - scriptdataescapeddashdashloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_ESCAPED: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '-': { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH, - reconsume, pos); - NS_HTML5_BREAK(scriptdataescapedloop); - } - case '<': { - flushChars(buf, pos); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - emitReplacementCharacter(buf, pos); - continue; - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - continue; - } - } - } - scriptdataescapedloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_ESCAPED_DASH: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH_DASH, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '<': { - flushChars(buf, pos); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, - reconsume, pos); - NS_HTML5_BREAK(scriptdataescapeddashloop); - } - case '\0': { - emitReplacementCharacter(buf, pos); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - scriptdataescapeddashloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '/': { - index = 0; - clearStrBufBeforeUse(); - returnState = nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::NON_DATA_END_TAG_NAME, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case 'S': - case 's': { - tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); - cstart = pos; - index = 1; - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPE_START, reconsume, - pos); - NS_HTML5_BREAK(scriptdataescapedlessthanloop); - } - default: { - tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); - cstart = pos; - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - scriptdataescapedlessthanloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_DOUBLE_ESCAPE_START: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - MOZ_ASSERT(index > 0); - if (index < 6) { - char16_t folded = c; - if (c >= 'A' && c <= 'Z') { - folded += 0x20; - } - if (folded != nsHtml5Tokenizer::SCRIPT_ARR[index]) { - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - index++; - continue; - } - switch (c) { - case '\r': { - emitCarriageReturn<P>(buf, pos); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': - case '/': - case '>': { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); - NS_HTML5_BREAK(scriptdatadoubleescapestartloop); - } - default: { - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - scriptdatadoubleescapestartloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_DOUBLE_ESCAPED: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '-': { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_DASH, reconsume, - pos); - NS_HTML5_BREAK(scriptdatadoubleescapedloop); - } - case '<': { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - emitReplacementCharacter(buf, pos); - continue; - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - continue; - } - } - } - scriptdatadoubleescapedloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_DOUBLE_ESCAPED_DASH: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH, - reconsume, pos); - NS_HTML5_BREAK(scriptdatadoubleescapeddashloop); - } - case '<': { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - emitReplacementCharacter(buf, pos); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - scriptdatadoubleescapeddashloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '-': { - continue; - } - case '<': { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, - reconsume, pos); - NS_HTML5_BREAK(scriptdatadoubleescapeddashdashloop); - } - case '>': { - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - emitReplacementCharacter(buf, pos); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - emitCarriageReturn<P>(buf, pos); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - scriptdatadoubleescapeddashdashloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '/': { - index = 0; - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPE_END, - reconsume, pos); - NS_HTML5_BREAK(scriptdatadoubleescapedlessthanloop); - } - default: { - reconsume = true; - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - scriptdatadoubleescapedlessthanloop_end:; - [[fallthrough]]; - } - case SCRIPT_DATA_DOUBLE_ESCAPE_END: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - if (index < 6) { - char16_t folded = c; - if (c >= 'A' && c <= 'Z') { - folded += 0x20; - } - if (folded != nsHtml5Tokenizer::SCRIPT_ARR[index]) { - reconsume = true; - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - index++; - continue; - } - switch (c) { - case '\r': { - emitCarriageReturn<P>(buf, pos); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': - case '/': - case '>': { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - default: { - reconsume = true; - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - } - case MARKUP_DECLARATION_OCTYPE: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - if (index < 6) { - char16_t folded = c; - if (c >= 'A' && c <= 'Z') { - folded += 0x20; - } - if (folded == nsHtml5Tokenizer::OCTYPE[index]) { - appendStrBuf(c); - } else { - if (P::reportErrors) { - errBogusComment(); - } - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - index++; - continue; - } else { - reconsume = true; - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DOCTYPE, - reconsume, pos); - NS_HTML5_BREAK(markupdeclarationdoctypeloop); - } - } - markupdeclarationdoctypeloop_end:; - [[fallthrough]]; - } - case DOCTYPE: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - initDoctypeFields(); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME, - reconsume, pos); - NS_HTML5_BREAK(doctypeloop); - } - default: { - if (P::reportErrors) { - errMissingSpaceBeforeDoctypeName(); - } - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME, - reconsume, pos); - NS_HTML5_BREAK(doctypeloop); - } - } - } - doctypeloop_end:; - [[fallthrough]]; - } - case BEFORE_DOCTYPE_NAME: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - continue; - } - case '>': { - if (P::reportErrors) { - errNamelessDoctype(); - } - forceQuirks = true; - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - if (c >= 'A' && c <= 'Z') { - c += 0x20; - } - clearStrBufBeforeUse(); - appendStrBuf(c); - state = - P::transition(mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_NAME, reconsume, pos); - NS_HTML5_BREAK(beforedoctypenameloop); - } - } - } - beforedoctypenameloop_end:; - [[fallthrough]]; - } - case DOCTYPE_NAME: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - strBufToDoctypeName(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::AFTER_DOCTYPE_NAME, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - strBufToDoctypeName(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::AFTER_DOCTYPE_NAME, - reconsume, pos); - NS_HTML5_BREAK(doctypenameloop); - } - case '>': { - strBufToDoctypeName(); - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - if (c >= 'A' && c <= 'Z') { - c += 0x0020; - } - appendStrBuf(c); - continue; - } - } - } - doctypenameloop_end:; - [[fallthrough]]; - } - case AFTER_DOCTYPE_NAME: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - continue; - } - case '>': { - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case 'p': - case 'P': { - index = 0; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_UBLIC, reconsume, - pos); - NS_HTML5_BREAK(afterdoctypenameloop); - } - case 's': - case 'S': { - index = 0; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_YSTEM, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - default: { - bogusDoctype(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - afterdoctypenameloop_end:; - [[fallthrough]]; - } - case DOCTYPE_UBLIC: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - if (index < 5) { - char16_t folded = c; - if (c >= 'A' && c <= 'Z') { - folded += 0x20; - } - if (folded != nsHtml5Tokenizer::UBLIC[index]) { - bogusDoctype(); - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - index++; - continue; - } else { - reconsume = true; - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_KEYWORD, reconsume, pos); - NS_HTML5_BREAK(doctypeublicloop); - } - } - doctypeublicloop_end:; - [[fallthrough]]; - } - case AFTER_DOCTYPE_PUBLIC_KEYWORD: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, - pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, - pos); - NS_HTML5_BREAK(afterdoctypepublickeywordloop); - } - case '\"': { - if (P::reportErrors) { - errNoSpaceBetweenDoctypePublicKeywordAndQuote(); - } - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\'': { - if (P::reportErrors) { - errNoSpaceBetweenDoctypePublicKeywordAndQuote(); - } - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - if (P::reportErrors) { - errExpectedPublicId(); - } - forceQuirks = true; - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - default: { - bogusDoctype(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - afterdoctypepublickeywordloop_end:; - [[fallthrough]]; - } - case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - continue; - } - case '\"': { - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, - reconsume, pos); - NS_HTML5_BREAK(beforedoctypepublicidentifierloop); - } - case '\'': { - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - if (P::reportErrors) { - errExpectedPublicId(); - } - forceQuirks = true; - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - default: { - bogusDoctype(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - beforedoctypepublicidentifierloop_end:; - [[fallthrough]]; - } - case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\"': { - publicIdentifier = strBufToString(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, - pos); - NS_HTML5_BREAK(doctypepublicidentifierdoublequotedloop); - } - case '>': { - if (P::reportErrors) { - errGtInPublicId(); - } - forceQuirks = true; - publicIdentifier = strBufToString(); - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - continue; - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - continue; - } - } - } - doctypepublicidentifierdoublequotedloop_end:; - [[fallthrough]]; - } - case AFTER_DOCTYPE_PUBLIC_IDENTIFIER: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer:: - BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, - reconsume, pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer:: - BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, - reconsume, pos); - NS_HTML5_BREAK(afterdoctypepublicidentifierloop); - } - case '>': { - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\"': { - if (P::reportErrors) { - errNoSpaceBetweenPublicAndSystemIds(); - } - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\'': { - if (P::reportErrors) { - errNoSpaceBetweenPublicAndSystemIds(); - } - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - default: { - bogusDoctype(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - afterdoctypepublicidentifierloop_end:; - [[fallthrough]]; - } - case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - continue; - } - case '>': { - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\"': { - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, - reconsume, pos); - NS_HTML5_BREAK(betweendoctypepublicandsystemidentifiersloop); - } - case '\'': { - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - default: { - bogusDoctype(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - betweendoctypepublicandsystemidentifiersloop_end:; - [[fallthrough]]; - } - case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\"': { - systemIdentifier = strBufToString(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, - pos); - NS_HTML5_BREAK(doctypesystemidentifierdoublequotedloop); - } - case '>': { - if (P::reportErrors) { - errGtInSystemId(); - } - forceQuirks = true; - systemIdentifier = strBufToString(); - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - continue; - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - continue; - } - } - } - doctypesystemidentifierdoublequotedloop_end:; - [[fallthrough]]; - } - case AFTER_DOCTYPE_SYSTEM_IDENTIFIER: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - continue; - } - case '>': { - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - default: { - bogusDoctypeWithoutQuirks(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume, - pos); - NS_HTML5_BREAK(afterdoctypesystemidentifierloop); - } - } - } - afterdoctypesystemidentifierloop_end:; - [[fallthrough]]; - } - case BOGUS_DOCTYPE: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '>': { - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - P::silentCarriageReturn(this); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - default: { - continue; - } - } - } - } - case DOCTYPE_YSTEM: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - if (index < 5) { - char16_t folded = c; - if (c >= 'A' && c <= 'Z') { - folded += 0x20; - } - if (folded != nsHtml5Tokenizer::YSTEM[index]) { - bogusDoctype(); - reconsume = true; - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - index++; - NS_HTML5_CONTINUE(stateloop); - } else { - reconsume = true; - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_KEYWORD, reconsume, pos); - NS_HTML5_BREAK(doctypeystemloop); - } - } - doctypeystemloop_end:; - [[fallthrough]]; - } - case AFTER_DOCTYPE_SYSTEM_KEYWORD: { - for (;;) { - if (reconsume) { - reconsume = false; - } else { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - } - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, - pos); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, - pos); - NS_HTML5_BREAK(afterdoctypesystemkeywordloop); - } - case '\"': { - if (P::reportErrors) { - errNoSpaceBetweenDoctypeSystemKeywordAndQuote(); - } - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\'': { - if (P::reportErrors) { - errNoSpaceBetweenDoctypeSystemKeywordAndQuote(); - } - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - if (P::reportErrors) { - errExpectedPublicId(); - } - forceQuirks = true; - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - default: { - bogusDoctype(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - afterdoctypesystemkeywordloop_end:; - [[fallthrough]]; - } - case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\r': { - P::silentCarriageReturn(this); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - P::silentLineFeed(this); - [[fallthrough]]; - } - case ' ': - case '\t': - case '\f': { - continue; - } - case '\"': { - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - case '\'': { - clearStrBufBeforeUse(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, - reconsume, pos); - NS_HTML5_BREAK(beforedoctypesystemidentifierloop); - } - case '>': { - if (P::reportErrors) { - errExpectedSystemId(); - } - forceQuirks = true; - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - default: { - bogusDoctype(); - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } - beforedoctypesystemidentifierloop_end:; - [[fallthrough]]; - } - case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\'': { - systemIdentifier = strBufToString(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - if (P::reportErrors) { - errGtInSystemId(); - } - forceQuirks = true; - systemIdentifier = strBufToString(); - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - continue; - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - continue; - } - } - } - } - case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\'': { - publicIdentifier = strBufToString(); - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, - pos); - NS_HTML5_CONTINUE(stateloop); - } - case '>': { - if (P::reportErrors) { - errGtInPublicId(); - } - forceQuirks = true; - publicIdentifier = strBufToString(); - emitDoctypeToken(pos); - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - case '\r': { - appendStrBufCarriageReturn<P>(); - NS_HTML5_BREAK(stateloop); - } - case '\n': { - appendStrBufLineFeed<P>(); - continue; - } - case '\0': { - c = 0xfffd; - [[fallthrough]]; - } - default: { - appendStrBuf(c); - continue; - } - } - } - } - case PROCESSING_INSTRUCTION: { - for (;;) { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '\?': { - state = P::transition( - mViewSource.get(), - nsHtml5Tokenizer::PROCESSING_INSTRUCTION_QUESTION_MARK, - reconsume, pos); - NS_HTML5_BREAK(processinginstructionloop); - } - default: { - continue; - } - } - } - processinginstructionloop_end:; - [[fallthrough]]; - } - case PROCESSING_INSTRUCTION_QUESTION_MARK: { - if (++pos == endPos) { - NS_HTML5_BREAK(stateloop); - } - c = P::checkChar(this, buf, pos); - switch (c) { - case '>': { - state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, - reconsume, pos); - suspendIfRequestedAfterCurrentNonTextToken(); - if (shouldSuspend) { - NS_HTML5_BREAK(stateloop); - } - NS_HTML5_CONTINUE(stateloop); - } - default: { - state = P::transition(mViewSource.get(), - nsHtml5Tokenizer::PROCESSING_INSTRUCTION, - reconsume, pos); - NS_HTML5_CONTINUE(stateloop); - } - } - } + MOZ_ASSERT(false, "Bad end tag expectation."); + return; } } -stateloop_end:; - flushChars(buf, pos); - stateSave = state; - returnStateSave = returnState; - return pos; } -void nsHtml5Tokenizer::initDoctypeFields() { +void nsHtml5Tokenizer::setLineNumber(int32_t line) { + this->attributeLine = line; + this->line = line; +} + +void nsHtml5Tokenizer::appendCharRefBuf(char16_t c) { + MOZ_RELEASE_ASSERT(charRefBufLen < charRefBuf.length, + "Attempted to overrun charRefBuf!"); + charRefBuf[charRefBufLen++] = c; +} + +void nsHtml5Tokenizer::emitOrAppendCharRefBuf(int32_t returnState) { + if ((returnState & DATA_AND_RCDATA_MASK)) { + appendCharRefBufToStrBuf(); + } else { + if (charRefBufLen > 0) { + tokenHandler->characters(charRefBuf, 0, charRefBufLen); + charRefBufLen = 0; + } + } +} + +void nsHtml5Tokenizer::appendStrBuf(char16_t c) { + MOZ_ASSERT(strBufLen < strBuf.length, "Previous buffer length insufficient."); + if (MOZ_UNLIKELY(strBufLen == strBuf.length)) { + if (MOZ_UNLIKELY(!EnsureBufferSpace(1))) { + MOZ_CRASH("Unable to recover from buffer reallocation failure"); + } + } + strBuf[strBufLen++] = c; +} + +void nsHtml5Tokenizer::appendStrBuf(char16_t* buffer, int32_t offset, + int32_t length) { + int32_t newLen = nsHtml5Portability::checkedAdd(strBufLen, length); + MOZ_ASSERT(newLen <= strBuf.length, "Previous buffer length insufficient."); + if (MOZ_UNLIKELY(strBuf.length < newLen)) { + if (MOZ_UNLIKELY(!EnsureBufferSpace(length))) { + MOZ_CRASH("Unable to recover from buffer reallocation failure"); + } + } + nsHtml5ArrayCopy::arraycopy(buffer, offset, strBuf, strBufLen, length); + strBufLen = newLen; +} + +void nsHtml5Tokenizer::emitComment(int32_t provisionalHyphens, int32_t pos) { + RememberGt(pos); + tokenHandler->comment(strBuf, 0, strBufLen - provisionalHyphens); clearStrBufAfterUse(); - doctypeName = nullptr; - if (systemIdentifier) { - systemIdentifier.Release(); - systemIdentifier = nullptr; + cstart = pos + 1; + suspendIfRequestedAfterCurrentNonTextToken(); +} + +void nsHtml5Tokenizer::flushChars(char16_t* buf, int32_t pos) { + if (pos > cstart) { + tokenHandler->characters(buf, cstart, pos - cstart); } - if (publicIdentifier) { - publicIdentifier.Release(); - publicIdentifier = nullptr; + cstart = INT32_MAX; +} + +void nsHtml5Tokenizer::strBufToElementNameString() { + if (containsHyphen) { + nsAtom* annotationName = nsHtml5ElementName::ELT_ANNOTATION_XML->getName(); + if (nsHtml5Portability::localEqualsBuffer(annotationName, strBuf, + strBufLen)) { + tagName = nsHtml5ElementName::ELT_ANNOTATION_XML; + } else { + nonInternedTagName->setNameForNonInterned( + nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen, + interner), + true); + tagName = nonInternedTagName; + } + } else { + tagName = nsHtml5ElementName::elementNameByBuffer(strBuf, strBufLen); + if (!tagName) { + nonInternedTagName->setNameForNonInterned( + nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen, + interner), + false); + tagName = nonInternedTagName; + } + } + containsHyphen = false; + clearStrBufAfterUse(); +} + +int32_t nsHtml5Tokenizer::emitCurrentTagToken(bool selfClosing, int32_t pos) { + RememberGt(pos); + cstart = pos + 1; + maybeErrSlashInEndTag(selfClosing); + stateSave = nsHtml5Tokenizer::DATA; + nsHtml5HtmlAttributes* attrs = + (!attributes ? nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES : attributes); + if (endTag) { + maybeErrAttributesOnEndTag(attrs); + if (!viewingXmlSource) { + tokenHandler->endTag(tagName); + } + if (newAttributesEachTime) { + delete attributes; + attributes = nullptr; + } + } else { + if (viewingXmlSource) { + MOZ_ASSERT(newAttributesEachTime); + delete attributes; + attributes = nullptr; + } else { + tokenHandler->startTag(tagName, attrs, selfClosing); + } + } + tagName = nullptr; + if (newAttributesEachTime) { + attributes = nullptr; + } else { + attributes->clear(0); + } + suspendIfRequestedAfterCurrentNonTextToken(); + return stateSave; +} + +void nsHtml5Tokenizer::attributeNameComplete() { + attributeName = + nsHtml5AttributeName::nameByBuffer(strBuf, strBufLen, interner); + if (!attributeName) { + nonInternedAttributeName->setNameForNonInterned( + nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen, + interner)); + attributeName = nonInternedAttributeName; + } + clearStrBufAfterUse(); + if (!attributes) { + attributes = new nsHtml5HtmlAttributes(0); + } + if (attributes->contains(attributeName)) { + errDuplicateAttribute(); + attributeName = nullptr; } - forceQuirks = false; } -template <class P> -void nsHtml5Tokenizer::adjustDoubleHyphenAndAppendToStrBufCarriageReturn() { - P::silentCarriageReturn(this); - adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false); +void nsHtml5Tokenizer::addAttributeWithoutValue() { + if (attributeName) { + attributes->addAttribute( + attributeName, nsHtml5Portability::newEmptyString(), attributeLine); + attributeName = nullptr; + } else { + clearStrBufAfterUse(); + } } -template <class P> -void nsHtml5Tokenizer::adjustDoubleHyphenAndAppendToStrBufLineFeed() { - P::silentLineFeed(this); - adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false); +void nsHtml5Tokenizer::addAttributeWithValue() { + if (attributeName) { + nsHtml5String val = strBufToString(); + if (mViewSource) { + mViewSource->MaybeLinkifyAttributeValue(attributeName, val); + } + attributes->addAttribute(attributeName, val, attributeLine); + attributeName = nullptr; + } else { + clearStrBufAfterUse(); + } } -template <class P> -void nsHtml5Tokenizer::appendStrBufLineFeed() { - P::silentLineFeed(this); - appendStrBuf('\n'); +void nsHtml5Tokenizer::start() { + initializeWithoutStarting(); + tokenHandler->startTokenization(this); + if (mViewSource) { + line = 1; + col = -1; + nextCharOnNewLine = false; + } else if (tokenHandler->WantsLineAndColumn()) { + line = 0; + col = 1; + nextCharOnNewLine = true; + } else { + line = -1; + col = -1; + nextCharOnNewLine = false; + } } -template <class P> -void nsHtml5Tokenizer::appendStrBufCarriageReturn() { - P::silentCarriageReturn(this); - appendStrBuf('\n'); +bool nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer) { + int32_t state = stateSave; + int32_t returnState = returnStateSave; + char16_t c = '\0'; + shouldSuspend = false; + lastCR = false; + int32_t start = buffer->getStart(); + int32_t end = buffer->getEnd(); + int32_t pos = start - 1; + switch (state) { + case DATA: + case RCDATA: + case SCRIPT_DATA: + case PLAINTEXT: + case RAWTEXT: + case CDATA_SECTION: + case SCRIPT_DATA_ESCAPED: + case SCRIPT_DATA_ESCAPE_START: + case SCRIPT_DATA_ESCAPE_START_DASH: + case SCRIPT_DATA_ESCAPED_DASH: + case SCRIPT_DATA_ESCAPED_DASH_DASH: + case SCRIPT_DATA_DOUBLE_ESCAPE_START: + case SCRIPT_DATA_DOUBLE_ESCAPED: + case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: + case SCRIPT_DATA_DOUBLE_ESCAPED_DASH: + case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: + case SCRIPT_DATA_DOUBLE_ESCAPE_END: { + cstart = start; + break; + } + default: { + cstart = INT32_MAX; + break; + } + } + if (mViewSource) { + mViewSource->SetBuffer(buffer); + if (mozilla::htmlaccel::htmlaccelEnabled()) { + pos = StateLoopViewSourceSIMD(state, c, pos, buffer->getBuffer(), false, + returnState, buffer->getEnd()); + } else { + pos = StateLoopViewSourceALU(state, c, pos, buffer->getBuffer(), false, + returnState, buffer->getEnd()); + } + mViewSource->DropBuffer((pos == buffer->getEnd()) ? pos : pos + 1); + } else if (tokenHandler->WantsLineAndColumn()) { + if (mozilla::htmlaccel::htmlaccelEnabled()) { + pos = StateLoopLineColSIMD(state, c, pos, buffer->getBuffer(), false, + returnState, buffer->getEnd()); + } else { + pos = StateLoopLineColALU(state, c, pos, buffer->getBuffer(), false, + returnState, buffer->getEnd()); + } + } else if (mozilla::htmlaccel::htmlaccelEnabled()) { + pos = StateLoopFastestSIMD(state, c, pos, buffer->getBuffer(), false, + returnState, buffer->getEnd()); + } else { + pos = StateLoopFastestALU(state, c, pos, buffer->getBuffer(), false, + returnState, buffer->getEnd()); + } + if (pos == end) { + buffer->setStart(pos); + } else { + buffer->setStart(pos + 1); + } + return lastCR; } -template <class P> -void nsHtml5Tokenizer::emitCarriageReturn(char16_t* buf, int32_t pos) { - P::silentCarriageReturn(this); - flushChars(buf, pos); - tokenHandler->characters(nsHtml5Tokenizer::LF, 0, 1); - cstart = INT32_MAX; +void nsHtml5Tokenizer::initDoctypeFields() { + clearStrBufAfterUse(); + doctypeName = nullptr; + if (systemIdentifier) { + systemIdentifier.Release(); + systemIdentifier = nullptr; + } + if (publicIdentifier) { + publicIdentifier.Release(); + publicIdentifier = nullptr; + } + forceQuirks = false; } void nsHtml5Tokenizer::emitReplacementCharacter(char16_t* buf, int32_t pos) { @@ -4528,10 +503,6 @@ void nsHtml5Tokenizer::emitPlaintextReplacementCharacter(char16_t* buf, cstart = pos + 1; } -void nsHtml5Tokenizer::setAdditionalAndRememberAmpersandLocation(char16_t add) { - additional = add; -} - void nsHtml5Tokenizer::bogusDoctype() { errBogusDoctype(); forceQuirks = true; @@ -4897,13 +868,6 @@ void nsHtml5Tokenizer::emitDoctypeToken(int32_t pos) { suspendIfRequestedAfterCurrentNonTextToken(); } -void nsHtml5Tokenizer::suspendIfRequestedAfterCurrentNonTextToken() { - if (suspendAfterCurrentNonTextToken) { - suspendAfterCurrentNonTextToken = false; - shouldSuspend = true; - } -} - void nsHtml5Tokenizer::suspendAfterCurrentTokenIfNotInText() { switch (stateSave) { case DATA: @@ -5015,25 +979,6 @@ bool nsHtml5Tokenizer::internalEncodingDeclaration( return false; } -void nsHtml5Tokenizer::emitOrAppendTwo(const char16_t* val, - int32_t returnState) { - if ((returnState & DATA_AND_RCDATA_MASK)) { - appendStrBuf(val[0]); - appendStrBuf(val[1]); - } else { - tokenHandler->characters(val, 0, 2); - } -} - -void nsHtml5Tokenizer::emitOrAppendOne(const char16_t* val, - int32_t returnState) { - if ((returnState & DATA_AND_RCDATA_MASK)) { - appendStrBuf(val[0]); - } else { - tokenHandler->characters(val, 0, 1); - } -} - void nsHtml5Tokenizer::end() { if (!keepBuffer) { strBuf = nullptr; @@ -5057,10 +1002,6 @@ void nsHtml5Tokenizer::end() { } } -void nsHtml5Tokenizer::requestSuspension() { shouldSuspend = true; } - -bool nsHtml5Tokenizer::isInDataState() { return (stateSave == DATA); } - void nsHtml5Tokenizer::resetToDataState() { clearStrBufAfterUse(); charRefBufLen = 0; diff --git a/parser/html/nsHtml5Tokenizer.h b/parser/html/nsHtml5Tokenizer.h @@ -43,8 +43,10 @@ #include "nsHtml5NamedCharacters.h" #include "nsHtml5NamedCharactersAccel.h" #include "nsHtml5String.h" +#include "nsHtml5TreeBuilder.h" #include "nsIContent.h" #include "nsTraceRefcnt.h" +#include "mozilla/htmlaccel/htmlaccelEnabled.h" class nsHtml5StreamParser; @@ -337,15 +339,12 @@ class nsHtml5Tokenizer { void setLineNumber(int32_t line); inline int32_t getLineNumber() { return line; } - nsHtml5HtmlAttributes* emptyAttributes(); - - private: - inline void appendCharRefBuf(char16_t c) { - MOZ_RELEASE_ASSERT(charRefBufLen < charRefBuf.length, - "Attempted to overrun charRefBuf!"); - charRefBuf[charRefBufLen++] = c; + inline nsHtml5HtmlAttributes* emptyAttributes() { + return nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES; } + private: + void appendCharRefBuf(char16_t c); void emitOrAppendCharRefBuf(int32_t returnState); inline void clearStrBufAfterUse() { strBufLen = 0; } @@ -360,23 +359,32 @@ class nsHtml5Tokenizer { strBufLen = 0; } - inline void appendStrBuf(char16_t c) { - MOZ_ASSERT(strBufLen < strBuf.length, - "Previous buffer length insufficient."); - if (MOZ_UNLIKELY(strBufLen == strBuf.length)) { - if (MOZ_UNLIKELY(!EnsureBufferSpace(1))) { - MOZ_CRASH("Unable to recover from buffer reallocation failure"); - } - } - strBuf[strBufLen++] = c; - } + void appendStrBuf(char16_t c); protected: - nsHtml5String strBufToString(); + inline nsHtml5String strBufToString() { + nsHtml5String str = nsHtml5Portability::newStringFromBuffer( + strBuf, 0, strBufLen, tokenHandler, + !newAttributesEachTime && + attributeName == nsHtml5AttributeName::ATTR_CLASS); + clearStrBufAfterUse(); + return str; + } private: - void strBufToDoctypeName(); - void emitStrBuf(); + inline void strBufToDoctypeName() { + doctypeName = + nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen, interner); + clearStrBufAfterUse(); + } + + inline void emitStrBuf() { + if (strBufLen > 0) { + tokenHandler->characters(strBuf, 0, strBufLen); + clearStrBufAfterUse(); + } + } + inline void appendSecondHyphenToBogusComment() { appendStrBuf('-'); } inline void adjustDoubleHyphenAndAppendToStrBufAndErr( @@ -408,23 +416,4167 @@ class nsHtml5Tokenizer { private: template <class P> - int32_t stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* buf, - bool reconsume, int32_t returnState, int32_t endPos); + inline int32_t stateLoop(int32_t state, char16_t c, int32_t pos, + char16_t* buf, bool reconsume, int32_t returnState, + int32_t endPos) { + bool reportedConsecutiveHyphens = false; + stateloop: + for (;;) { + switch (state) { + case DATA: { + for (;;) { + if (reconsume) { + reconsume = false; + switch (c) { + case '&': { + flushChars(buf, pos); + MOZ_ASSERT(!charRefBufLen, + "charRefBufLen not reset after previous use!"); + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\0'); + returnState = state; + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + case '<': { + flushChars(buf, pos); + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::TAG_OPEN, reconsume, pos); + NS_HTML5_BREAK(dataloop); + } + case '\0': { + maybeEmitReplacementCharacter(buf, pos); + break; + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + break; + } + } + } + datamiddle: + for (;;) { + ++pos; + pos += P::accelerateAdvancementData(this, buf, pos, endPos); + for (;;) { + if (pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '&': { + flushChars(buf, pos); + MOZ_ASSERT(!charRefBufLen, + "charRefBufLen not reset after previous use!"); + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\0'); + returnState = state; + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '<': { + flushChars(buf, pos); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::TAG_OPEN, reconsume, + pos); + NS_HTML5_BREAK(dataloop); + } + case '\0': { + maybeEmitReplacementCharacter(buf, pos); + NS_HTML5_CONTINUE(datamiddle); + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + NS_HTML5_CONTINUE(datamiddle); + } + default: { + ++pos; + continue; + } + } + } + } + } + dataloop_end:; + [[fallthrough]]; + } + case TAG_OPEN: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + if (c >= 'A' && c <= 'Z') { + endTag = false; + clearStrBufBeforeUse(); + appendStrBuf((char16_t)(c + 0x20)); + containsHyphen = false; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::TAG_NAME, reconsume, pos); + NS_HTML5_BREAK(tagopenloop); + } else if (c >= 'a' && c <= 'z') { + endTag = false; + clearStrBufBeforeUse(); + appendStrBuf(c); + containsHyphen = false; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::TAG_NAME, reconsume, pos); + NS_HTML5_BREAK(tagopenloop); + } + switch (c) { + case '!': { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::MARKUP_DECLARATION_OPEN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '/': { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::CLOSE_TAG_OPEN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\?': { + if (viewingXmlSource) { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::PROCESSING_INSTRUCTION, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + if (P::reportErrors) { + errProcessingInstruction(); + } + clearStrBufBeforeUse(); + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + if (P::reportErrors) { + errLtGt(); + } + tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 2); + cstart = pos + 1; + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + default: { + if (P::reportErrors) { + errBadCharAfterLt(c); + } + tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); + cstart = pos; + reconsume = true; + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + tagopenloop_end:; + [[fallthrough]]; + } + case TAG_NAME: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + strBufToElementNameString(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + strBufToElementNameString(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_BREAK(tagnameloop); + } + case '/': { + strBufToElementNameString(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SELF_CLOSING_START_TAG, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + strBufToElementNameString(); + state = P::transition(mViewSource.get(), + emitCurrentTagToken(false, pos), + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } else if (c == '-') { + containsHyphen = true; + } + appendStrBuf(c); + continue; + } + } + } + tagnameloop_end:; + [[fallthrough]]; + } + case BEFORE_ATTRIBUTE_NAME: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + continue; + } + case '/': { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SELF_CLOSING_START_TAG, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + state = P::transition(mViewSource.get(), + emitCurrentTagToken(false, pos), + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + case '\"': + case '\'': + case '<': + case '=': { + if (P::reportErrors) { + errBadCharBeforeAttributeNameOrNull(c); + } + [[fallthrough]]; + } + default: { + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } + attributeLine = line; + clearStrBufBeforeUse(); + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_BREAK(beforeattributenameloop); + } + } + } + beforeattributenameloop_end:; + [[fallthrough]]; + } + case ATTRIBUTE_NAME: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + attributeNameComplete(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + attributeNameComplete(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '/': { + attributeNameComplete(); + addAttributeWithoutValue(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SELF_CLOSING_START_TAG, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '=': { + attributeNameComplete(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE, + reconsume, pos); + NS_HTML5_BREAK(attributenameloop); + } + case '>': { + attributeNameComplete(); + addAttributeWithoutValue(); + state = P::transition(mViewSource.get(), + emitCurrentTagToken(false, pos), + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + case '\"': + case '\'': + case '<': { + if (P::reportErrors) { + errQuoteOrLtInAttributeNameOrNull(c); + } + [[fallthrough]]; + } + default: { + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } + appendStrBuf(c); + continue; + } + } + } + attributenameloop_end:; + [[fallthrough]]; + } + case BEFORE_ATTRIBUTE_VALUE: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + continue; + } + case '\"': { + attributeLine = line; + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::ATTRIBUTE_VALUE_DOUBLE_QUOTED, reconsume, + pos); + NS_HTML5_BREAK(beforeattributevalueloop); + } + case '&': { + attributeLine = line; + clearStrBufBeforeUse(); + reconsume = true; + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos); + + NS_HTML5_CONTINUE(stateloop); + } + case '\'': { + attributeLine = line; + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::ATTRIBUTE_VALUE_SINGLE_QUOTED, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + if (P::reportErrors) { + errAttributeValueMissing(); + } + addAttributeWithoutValue(); + state = P::transition(mViewSource.get(), + emitCurrentTagToken(false, pos), + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + case '<': + case '=': + case '`': { + if (P::reportErrors) { + errLtOrEqualsOrGraveInUnquotedAttributeOrNull(c); + } + [[fallthrough]]; + } + default: { + attributeLine = line; + clearStrBufBeforeUse(); + appendStrBuf(c); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos); + + NS_HTML5_CONTINUE(stateloop); + } + } + } + beforeattributevalueloop_end:; + [[fallthrough]]; + } + case ATTRIBUTE_VALUE_DOUBLE_QUOTED: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '\"': { + addAttributeWithValue(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, + pos); + NS_HTML5_BREAK(attributevaluedoublequotedloop); + } + case '&': { + MOZ_ASSERT(!charRefBufLen, + "charRefBufLen not reset after previous use!"); + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\"'); + returnState = state; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + continue; + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + continue; + } + } + } + attributevaluedoublequotedloop_end:; + [[fallthrough]]; + } + case AFTER_ATTRIBUTE_VALUE_QUOTED: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '/': { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SELF_CLOSING_START_TAG, + reconsume, pos); + NS_HTML5_BREAK(afterattributevaluequotedloop); + } + case '>': { + state = P::transition(mViewSource.get(), + emitCurrentTagToken(false, pos), + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + default: { + if (P::reportErrors) { + errNoSpaceBetweenAttributes(); + } + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + afterattributevaluequotedloop_end:; + [[fallthrough]]; + } + case SELF_CLOSING_START_TAG: { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '>': { + state = + P::transition(mViewSource.get(), + emitCurrentTagToken(true, pos), reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + default: { + if (P::reportErrors) { + errSlashNotFollowedByGt(); + } + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + case ATTRIBUTE_VALUE_UNQUOTED: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + addAttributeWithValue(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + addAttributeWithValue(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '&': { + MOZ_ASSERT(!charRefBufLen, + "charRefBufLen not reset after previous use!"); + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('>'); + returnState = state; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + addAttributeWithValue(); + state = P::transition(mViewSource.get(), + emitCurrentTagToken(false, pos), + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + case '<': + case '\"': + case '\'': + case '=': + case '`': { + if (P::reportErrors) { + errUnquotedAttributeValOrNull(c); + } + [[fallthrough]]; + } + default: { + appendStrBuf(c); + continue; + } + } + } + } + case AFTER_ATTRIBUTE_NAME: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + continue; + } + case '/': { + addAttributeWithoutValue(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SELF_CLOSING_START_TAG, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '=': { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + addAttributeWithoutValue(); + state = P::transition(mViewSource.get(), + emitCurrentTagToken(false, pos), + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + case '\"': + case '\'': + case '<': { + if (P::reportErrors) { + errQuoteOrLtInAttributeNameOrNull(c); + } + [[fallthrough]]; + } + default: { + addAttributeWithoutValue(); + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } + clearStrBufBeforeUse(); + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + } + case MARKUP_DECLARATION_OPEN: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + clearStrBufBeforeUse(); + appendStrBuf(c); + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::MARKUP_DECLARATION_HYPHEN, + reconsume, pos); + NS_HTML5_BREAK(markupdeclarationopenloop); + } + case 'd': + case 'D': { + clearStrBufBeforeUse(); + appendStrBuf(c); + index = 0; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::MARKUP_DECLARATION_OCTYPE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '[': { + if (tokenHandler->cdataSectionAllowed()) { + clearStrBufBeforeUse(); + appendStrBuf(c); + index = 0; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::CDATA_START, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + [[fallthrough]]; + } + default: { + if (P::reportErrors) { + errBogusComment(); + } + clearStrBufBeforeUse(); + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + markupdeclarationopenloop_end:; + [[fallthrough]]; + } + case MARKUP_DECLARATION_HYPHEN: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + clearStrBufAfterOneHyphen(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_START, + reconsume, pos); + NS_HTML5_BREAK(markupdeclarationhyphenloop); + } + default: { + if (P::reportErrors) { + errBogusComment(); + } + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + markupdeclarationhyphenloop_end:; + [[fallthrough]]; + } + case COMMENT_START: { + reportedConsecutiveHyphens = false; + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_START_DASH, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + if (P::reportErrors) { + errPrematureEndOfComment(); + } + emitComment(0, pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '<': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_LESSTHAN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_BREAK(commentstartloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_BREAK(commentstartloop); + } + } + } + commentstartloop_end:; + [[fallthrough]]; + } + case COMMENT: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_END_DASH, + reconsume, pos); + NS_HTML5_BREAK(commentloop); + } + case '<': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_LESSTHAN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + continue; + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + continue; + } + } + } + commentloop_end:; + [[fallthrough]]; + } + case COMMENT_END_DASH: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_END, reconsume, + pos); + NS_HTML5_BREAK(commentenddashloop); + } + case '<': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_LESSTHAN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + commentenddashloop_end:; + [[fallthrough]]; + } + case COMMENT_END: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '>': { + emitComment(2, pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '-': { + adjustDoubleHyphenAndAppendToStrBufAndErr( + c, reportedConsecutiveHyphens); + reportedConsecutiveHyphens = true; + continue; + } + case '<': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_LESSTHAN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + adjustDoubleHyphenAndAppendToStrBufCarriageReturn<P>(); + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + adjustDoubleHyphenAndAppendToStrBufLineFeed<P>(); + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '!': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_END_BANG, + reconsume, pos); + NS_HTML5_BREAK(commentendloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + adjustDoubleHyphenAndAppendToStrBufAndErr( + c, reportedConsecutiveHyphens); + reportedConsecutiveHyphens = true; + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + commentendloop_end:; + [[fallthrough]]; + } + case COMMENT_END_BANG: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '>': { + emitComment(3, pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '-': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_END_DASH, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + } + case COMMENT_LESSTHAN: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '!': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG, + reconsume, pos); + NS_HTML5_BREAK(commentlessthanloop); + } + case '<': { + appendStrBuf(c); + continue; + } + case '-': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_END_DASH, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + commentlessthanloop_end:; + [[fallthrough]]; + } + case COMMENT_LESSTHAN_BANG: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + appendStrBuf(c); + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG_DASH, + reconsume, pos); + NS_HTML5_BREAK(commentlessthanbangloop); + } + case '<': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_LESSTHAN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + commentlessthanbangloop_end:; + [[fallthrough]]; + } + case COMMENT_LESSTHAN_BANG_DASH: { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + appendStrBuf(c); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG_DASH_DASH, reconsume, + pos); + break; + } + case '<': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_LESSTHAN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + [[fallthrough]]; + } + case COMMENT_LESSTHAN_BANG_DASH_DASH: { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '>': { + appendStrBuf(c); + emitComment(3, pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '-': { + if (P::reportErrors) { + errNestedComment(); + } + adjustDoubleHyphenAndAppendToStrBufAndErr( + c, reportedConsecutiveHyphens); + reportedConsecutiveHyphens = true; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_END, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + c = '\n'; + P::silentCarriageReturn(this); + if (P::reportErrors) { + errNestedComment(); + } + adjustDoubleHyphenAndAppendToStrBufAndErr( + c, reportedConsecutiveHyphens); + reportedConsecutiveHyphens = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + if (P::reportErrors) { + errNestedComment(); + } + adjustDoubleHyphenAndAppendToStrBufAndErr( + c, reportedConsecutiveHyphens); + reportedConsecutiveHyphens = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '!': { + if (P::reportErrors) { + errNestedComment(); + } + adjustDoubleHyphenAndAppendToStrBufAndErr( + c, reportedConsecutiveHyphens); + reportedConsecutiveHyphens = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_END_BANG, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + if (P::reportErrors) { + errNestedComment(); + } + adjustDoubleHyphenAndAppendToStrBufAndErr( + c, reportedConsecutiveHyphens); + reportedConsecutiveHyphens = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + case COMMENT_START_DASH: { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + appendStrBuf(c); + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_END, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + if (P::reportErrors) { + errPrematureEndOfComment(); + } + emitComment(1, pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '<': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT_LESSTHAN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::COMMENT, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + case CDATA_START: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + if (index < 6) { + if (c == nsHtml5Tokenizer::CDATA_LSQB[index]) { + appendStrBuf(c); + } else { + if (P::reportErrors) { + errBogusComment(); + } + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + index++; + continue; + } else { + clearStrBufAfterUse(); + cstart = pos; + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::CDATA_SECTION, reconsume, + pos); + break; + } + } + [[fallthrough]]; + } + case CDATA_SECTION: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case ']': { + flushChars(buf, pos); + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::CDATA_RSQB, reconsume, pos); + NS_HTML5_BREAK(cdatasectionloop); + } + case '\0': { + maybeEmitReplacementCharacter(buf, pos); + continue; + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + continue; + } + } + } + cdatasectionloop_end:; + [[fallthrough]]; + } + case CDATA_RSQB: { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case ']': { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::CDATA_RSQB_RSQB, + reconsume, pos); + break; + } + default: { + tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 1); + cstart = pos; + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::CDATA_SECTION, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + } + [[fallthrough]]; + } + case CDATA_RSQB_RSQB: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case ']': { + tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 1); + continue; + } + case '>': { + cstart = pos + 1; + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + suspendIfRequestedAfterCurrentNonTextToken(); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + default: { + tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 2); + cstart = pos; + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::CDATA_SECTION, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + } + case ATTRIBUTE_VALUE_SINGLE_QUOTED: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '\'': { + addAttributeWithValue(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + case '&': { + MOZ_ASSERT(!charRefBufLen, + "charRefBufLen not reset after previous use!"); + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\''); + returnState = state; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, + reconsume, pos); + NS_HTML5_BREAK(attributevaluesinglequotedloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + continue; + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + continue; + } + } + } + attributevaluesinglequotedloop_end:; + [[fallthrough]]; + } + case CONSUME_CHARACTER_REFERENCE: { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case ' ': + case '\t': + case '\n': + case '\r': + case '\f': + case '<': + case '&': + case '\0': + case ';': { + emitOrAppendCharRefBuf(returnState); + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos; + } + reconsume = true; + state = + P::transition(mViewSource.get(), returnState, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '#': { + appendCharRefBuf('#'); + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::CONSUME_NCR, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + default: { + if (c == additional) { + emitOrAppendCharRefBuf(returnState); + reconsume = true; + state = P::transition(mViewSource.get(), returnState, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + if (c >= 'a' && c <= 'z') { + firstCharKey = c - 'a' + 26; + } else if (c >= 'A' && c <= 'Z') { + firstCharKey = c - 'A'; + } else { + if (c == ';') { + if (P::reportErrors) { + errNoNamedCharacterMatch(); + } + } + emitOrAppendCharRefBuf(returnState); + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos; + } + reconsume = true; + state = P::transition(mViewSource.get(), returnState, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + appendCharRefBuf(c); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::CHARACTER_REFERENCE_HILO_LOOKUP, reconsume, + pos); + break; + } + } + [[fallthrough]]; + } + case CHARACTER_REFERENCE_HILO_LOOKUP: { + { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + int32_t hilo = 0; + if (c <= 'z') { + const int32_t* row = nsHtml5NamedCharactersAccel::HILO_ACCEL[c]; + if (row) { + hilo = row[firstCharKey]; + } + } + if (!hilo) { + if (c == ';') { + if (P::reportErrors) { + errNoNamedCharacterMatch(); + } + } + emitOrAppendCharRefBuf(returnState); + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos; + } + reconsume = true; + state = + P::transition(mViewSource.get(), returnState, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + appendCharRefBuf(c); + lo = hilo & 0xFFFF; + hi = hilo >> 16; + entCol = -1; + candidate = -1; + charRefBufMark = 0; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::CHARACTER_REFERENCE_TAIL, + reconsume, pos); + } + [[fallthrough]]; + } + case CHARACTER_REFERENCE_TAIL: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + entCol++; + for (;;) { + if (hi < lo) { + NS_HTML5_BREAK(outer); + } + if (entCol == nsHtml5NamedCharacters::NAMES[lo].length()) { + candidate = lo; + charRefBufMark = charRefBufLen; + lo++; + } else if (entCol > nsHtml5NamedCharacters::NAMES[lo].length()) { + NS_HTML5_BREAK(outer); + } else if (c > nsHtml5NamedCharacters::NAMES[lo].charAt(entCol)) { + lo++; + } else { + NS_HTML5_BREAK(loloop); + } + } + loloop_end:; + for (;;) { + if (hi < lo) { + NS_HTML5_BREAK(outer); + } + if (entCol == nsHtml5NamedCharacters::NAMES[hi].length()) { + NS_HTML5_BREAK(hiloop); + } + if (entCol > nsHtml5NamedCharacters::NAMES[hi].length()) { + NS_HTML5_BREAK(outer); + } else if (c < nsHtml5NamedCharacters::NAMES[hi].charAt(entCol)) { + hi--; + } else { + NS_HTML5_BREAK(hiloop); + } + } + hiloop_end:; + if (c == ';') { + if (entCol + 1 == nsHtml5NamedCharacters::NAMES[lo].length()) { + candidate = lo; + charRefBufMark = charRefBufLen; + } + NS_HTML5_BREAK(outer); + } + if (hi < lo) { + NS_HTML5_BREAK(outer); + } + appendCharRefBuf(c); + continue; + } + outer_end:; + if (candidate == -1) { + if (c == ';') { + if (P::reportErrors) { + errNoNamedCharacterMatch(); + } + } + emitOrAppendCharRefBuf(returnState); + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos; + } + reconsume = true; + state = + P::transition(mViewSource.get(), returnState, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } else { + const nsHtml5CharacterName& candidateName = + nsHtml5NamedCharacters::NAMES[candidate]; + if (!candidateName.length() || + candidateName.charAt(candidateName.length() - 1) != ';') { + if ((returnState & DATA_AND_RCDATA_MASK)) { + char16_t ch; + if (charRefBufMark == charRefBufLen) { + ch = c; + } else { + ch = charRefBuf[charRefBufMark]; + } + if (ch == '=' || (ch >= '0' && ch <= '9') || + (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) { + if (c == ';') { + if (P::reportErrors) { + errNoNamedCharacterMatch(); + } + } + appendCharRefBufToStrBuf(); + reconsume = true; + state = P::transition(mViewSource.get(), returnState, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + if ((returnState & DATA_AND_RCDATA_MASK)) { + if (P::reportErrors) { + errUnescapedAmpersandInterpretedAsCharacterReference(); + } + } else { + if (P::reportErrors) { + errNotSemicolonTerminated(); + } + } + } + P::completedNamedCharacterReference(mViewSource.get()); + const char16_t* val = nsHtml5NamedCharacters::VALUES[candidate]; + if (!val[1]) { + emitOrAppendOne(val, returnState); + } else { + emitOrAppendTwo(val, returnState); + } + if (charRefBufMark < charRefBufLen) { + if ((returnState & DATA_AND_RCDATA_MASK)) { + appendStrBuf(charRefBuf, charRefBufMark, + charRefBufLen - charRefBufMark); + } else { + tokenHandler->characters(charRefBuf, charRefBufMark, + charRefBufLen - charRefBufMark); + } + } + bool earlyBreak = (c == ';' && charRefBufMark == charRefBufLen); + charRefBufLen = 0; + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = earlyBreak ? pos + 1 : pos; + } + reconsume = !earlyBreak; + state = + P::transition(mViewSource.get(), returnState, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + case CONSUME_NCR: { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + value = 0; + seenDigits = false; + switch (c) { + case 'x': + case 'X': { + appendCharRefBuf(c); + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::HEX_NCR_LOOP, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + default: { + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::DECIMAL_NRC_LOOP, + reconsume, pos); + break; + } + } + [[fallthrough]]; + } + case DECIMAL_NRC_LOOP: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + MOZ_ASSERT(value >= 0, "value must not become negative."); + if (c >= '0' && c <= '9') { + seenDigits = true; + if (value <= 0x10FFFF) { + value *= 10; + value += c - '0'; + } + continue; + } else if (c == ';') { + if (seenDigits) { + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos + 1; + } + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::HANDLE_NCR_VALUE, + reconsume, pos); + NS_HTML5_BREAK(decimalloop); + } else { + if (P::reportErrors) { + errNoDigitsInNCR(); + } + appendCharRefBuf(';'); + emitOrAppendCharRefBuf(returnState); + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos + 1; + } + state = P::transition(mViewSource.get(), returnState, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + } else { + if (!seenDigits) { + if (P::reportErrors) { + errNoDigitsInNCR(); + } + emitOrAppendCharRefBuf(returnState); + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos; + } + reconsume = true; + state = P::transition(mViewSource.get(), returnState, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } else { + if (P::reportErrors) { + errCharRefLacksSemicolon(); + } + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos; + } + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::HANDLE_NCR_VALUE, + reconsume, pos); + NS_HTML5_BREAK(decimalloop); + } + } + } + decimalloop_end:; + [[fallthrough]]; + } + case HANDLE_NCR_VALUE: { + charRefBufLen = 0; + handleNcrValue(returnState); + state = P::transition(mViewSource.get(), returnState, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case HEX_NCR_LOOP: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + MOZ_ASSERT(value >= 0, "value must not become negative."); + if (c >= '0' && c <= '9') { + seenDigits = true; + if (value <= 0x10FFFF) { + value *= 16; + value += c - '0'; + } + continue; + } else if (c >= 'A' && c <= 'F') { + seenDigits = true; + if (value <= 0x10FFFF) { + value *= 16; + value += c - 'A' + 10; + } + continue; + } else if (c >= 'a' && c <= 'f') { + seenDigits = true; + if (value <= 0x10FFFF) { + value *= 16; + value += c - 'a' + 10; + } + continue; + } else if (c == ';') { + if (seenDigits) { + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos + 1; + } + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::HANDLE_NCR_VALUE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } else { + if (P::reportErrors) { + errNoDigitsInNCR(); + } + appendCharRefBuf(';'); + emitOrAppendCharRefBuf(returnState); + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos + 1; + } + state = P::transition(mViewSource.get(), returnState, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + } else { + if (!seenDigits) { + if (P::reportErrors) { + errNoDigitsInNCR(); + } + emitOrAppendCharRefBuf(returnState); + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos; + } + reconsume = true; + state = P::transition(mViewSource.get(), returnState, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } else { + if (P::reportErrors) { + errCharRefLacksSemicolon(); + } + if (!(returnState & DATA_AND_RCDATA_MASK)) { + cstart = pos; + } + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::HANDLE_NCR_VALUE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + } + case PLAINTEXT: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '\0': { + emitPlaintextReplacementCharacter(buf, pos); + continue; + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + continue; + } + } + } + } + case CLOSE_TAG_OPEN: { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '>': { + if (P::reportErrors) { + errLtSlashGt(); + } + cstart = pos + 1; + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + P::silentCarriageReturn(this); + if (P::reportErrors) { + errGarbageAfterLtSlash(); + } + clearStrBufBeforeUse(); + appendStrBuf('\n'); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, + pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + if (P::reportErrors) { + errGarbageAfterLtSlash(); + } + clearStrBufBeforeUse(); + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } + if (c >= 'a' && c <= 'z') { + endTag = true; + clearStrBufBeforeUse(); + appendStrBuf(c); + containsHyphen = false; + state = + P::transition(mViewSource.get(), nsHtml5Tokenizer::TAG_NAME, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } else { + if (P::reportErrors) { + errGarbageAfterLtSlash(); + } + clearStrBufBeforeUse(); + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + } + case RCDATA: { + for (;;) { + if (reconsume) { + reconsume = false; + switch (c) { + case '&': { + flushChars(buf, pos); + MOZ_ASSERT(!charRefBufLen, + "charRefBufLen not reset after previous use!"); + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\0'); + returnState = state; + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + case '<': { + flushChars(buf, pos); + returnState = state; + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + maybeEmitReplacementCharacter(buf, pos); + break; + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + break; + } + } + } + rcdatamiddle: + for (;;) { + ++pos; + pos += P::accelerateAdvancementData(this, buf, pos, endPos); + for (;;) { + if (pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '&': { + flushChars(buf, pos); + MOZ_ASSERT(!charRefBufLen, + "charRefBufLen not reset after previous use!"); + appendCharRefBuf(c); + setAdditionalAndRememberAmpersandLocation('\0'); + returnState = state; + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '<': { + flushChars(buf, pos); + returnState = state; + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + maybeEmitReplacementCharacter(buf, pos); + NS_HTML5_CONTINUE(rcdatamiddle); + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + NS_HTML5_CONTINUE(rcdatamiddle); + } + default: { + ++pos; + continue; + } + } + } + } + } + } + case RAWTEXT: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '<': { + flushChars(buf, pos); + returnState = state; + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, + pos); + NS_HTML5_BREAK(rawtextloop); + } + case '\0': { + emitReplacementCharacter(buf, pos); + continue; + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + continue; + } + } + } + rawtextloop_end:; + [[fallthrough]]; + } + case RAWTEXT_RCDATA_LESS_THAN_SIGN: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '/': { + index = 0; + clearStrBufBeforeUse(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::NON_DATA_END_TAG_NAME, + reconsume, pos); + NS_HTML5_BREAK(rawtextrcdatalessthansignloop); + } + default: { + tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); + cstart = pos; + reconsume = true; + state = P::transition(mViewSource.get(), returnState, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + rawtextrcdatalessthansignloop_end:; + [[fallthrough]]; + } + case NON_DATA_END_TAG_NAME: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + if (!endTagExpectationAsArray) { + tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2); + cstart = pos; + reconsume = true; + state = + P::transition(mViewSource.get(), returnState, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } else if (index < endTagExpectationAsArray.length) { + char16_t e = endTagExpectationAsArray[index]; + char16_t folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded != e) { + tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2); + emitStrBuf(); + cstart = pos; + reconsume = true; + state = P::transition(mViewSource.get(), returnState, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + appendStrBuf(c); + index++; + continue; + } else { + endTag = true; + tagName = endTagExpectation; + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + clearStrBufAfterUse(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + clearStrBufAfterUse(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '/': { + clearStrBufAfterUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SELF_CLOSING_START_TAG, reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + clearStrBufAfterUse(); + state = P::transition(mViewSource.get(), + emitCurrentTagToken(false, pos), + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + default: { + tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2); + emitStrBuf(); + cstart = pos; + reconsume = true; + state = P::transition(mViewSource.get(), returnState, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + } + } + case BOGUS_COMMENT: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '>': { + emitComment(0, pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '-': { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT_HYPHEN, + reconsume, pos); + NS_HTML5_BREAK(boguscommentloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + continue; + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + continue; + } + } + } + boguscommentloop_end:; + [[fallthrough]]; + } + case BOGUS_COMMENT_HYPHEN: { + boguscommenthyphenloop: + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '>': { + emitComment(0, pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '-': { + appendSecondHyphenToBogusComment(); + NS_HTML5_CONTINUE(boguscommenthyphenloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + } + case SCRIPT_DATA: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '<': { + flushChars(buf, pos); + returnState = state; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_LESS_THAN_SIGN, + reconsume, pos); + NS_HTML5_BREAK(scriptdataloop); + } + case '\0': { + emitReplacementCharacter(buf, pos); + continue; + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + continue; + } + } + } + scriptdataloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_LESS_THAN_SIGN: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '/': { + index = 0; + clearStrBufBeforeUse(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::NON_DATA_END_TAG_NAME, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '!': { + tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); + cstart = pos; + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPE_START, reconsume, pos); + NS_HTML5_BREAK(scriptdatalessthansignloop); + } + default: { + tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); + cstart = pos; + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + scriptdatalessthansignloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_ESCAPE_START: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPE_START_DASH, reconsume, + pos); + NS_HTML5_BREAK(scriptdataescapestartloop); + } + default: { + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + scriptdataescapestartloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_ESCAPE_START_DASH: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, + pos); + NS_HTML5_BREAK(scriptdataescapestartdashloop); + } + default: { + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + scriptdataescapestartdashloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_ESCAPED_DASH_DASH: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + continue; + } + case '<': { + flushChars(buf, pos); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + emitReplacementCharacter(buf, pos); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, + reconsume, pos); + NS_HTML5_BREAK(scriptdataescapeddashdashloop); + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, + reconsume, pos); + NS_HTML5_BREAK(scriptdataescapeddashdashloop); + } + } + } + scriptdataescapeddashdashloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_ESCAPED: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '-': { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH, reconsume, pos); + NS_HTML5_BREAK(scriptdataescapedloop); + } + case '<': { + flushChars(buf, pos); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + emitReplacementCharacter(buf, pos); + continue; + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + continue; + } + } + } + scriptdataescapedloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_ESCAPED_DASH: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + case '<': { + flushChars(buf, pos); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN, + reconsume, pos); + NS_HTML5_BREAK(scriptdataescapeddashloop); + } + case '\0': { + emitReplacementCharacter(buf, pos); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + scriptdataescapeddashloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '/': { + index = 0; + clearStrBufBeforeUse(); + returnState = nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::NON_DATA_END_TAG_NAME, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case 'S': + case 's': { + tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); + cstart = pos; + index = 1; + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPE_START, + reconsume, pos); + NS_HTML5_BREAK(scriptdataescapedlessthanloop); + } + default: { + tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1); + cstart = pos; + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + scriptdataescapedlessthanloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_DOUBLE_ESCAPE_START: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + MOZ_ASSERT(index > 0); + if (index < 6) { + char16_t folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded != nsHtml5Tokenizer::SCRIPT_ARR[index]) { + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + index++; + continue; + } + switch (c) { + case '\r': { + emitCarriageReturn<P>(buf, pos); + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': + case '/': + case '>': { + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, + reconsume, pos); + NS_HTML5_BREAK(scriptdatadoubleescapestartloop); + } + default: { + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + scriptdatadoubleescapestartloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_DOUBLE_ESCAPED: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '-': { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_DASH, + reconsume, pos); + NS_HTML5_BREAK(scriptdatadoubleescapedloop); + } + case '<': { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + emitReplacementCharacter(buf, pos); + continue; + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + continue; + } + } + } + scriptdatadoubleescapedloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_DOUBLE_ESCAPED_DASH: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH, + reconsume, pos); + NS_HTML5_BREAK(scriptdatadoubleescapeddashloop); + } + case '<': { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + emitReplacementCharacter(buf, pos); + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + scriptdatadoubleescapeddashloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '-': { + continue; + } + case '<': { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN, + reconsume, pos); + NS_HTML5_BREAK(scriptdatadoubleescapeddashdashloop); + } + case '>': { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA, reconsume, + pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + emitReplacementCharacter(buf, pos); + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + emitCarriageReturn<P>(buf, pos); + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + scriptdatadoubleescapeddashdashloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '/': { + index = 0; + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPE_END, reconsume, + pos); + NS_HTML5_BREAK(scriptdatadoubleescapedlessthanloop); + } + default: { + reconsume = true; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + scriptdatadoubleescapedlessthanloop_end:; + [[fallthrough]]; + } + case SCRIPT_DATA_DOUBLE_ESCAPE_END: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + if (index < 6) { + char16_t folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded != nsHtml5Tokenizer::SCRIPT_ARR[index]) { + reconsume = true; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + index++; + continue; + } + switch (c) { + case '\r': { + emitCarriageReturn<P>(buf, pos); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': + case '/': + case '>': { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + default: { + reconsume = true; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + } + case MARKUP_DECLARATION_OCTYPE: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + if (index < 6) { + char16_t folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded == nsHtml5Tokenizer::OCTYPE[index]) { + appendStrBuf(c); + } else { + if (P::reportErrors) { + errBogusComment(); + } + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_COMMENT, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + index++; + continue; + } else { + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE, reconsume, pos); + NS_HTML5_BREAK(markupdeclarationdoctypeloop); + } + } + markupdeclarationdoctypeloop_end:; + [[fallthrough]]; + } + case DOCTYPE: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + initDoctypeFields(); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME, + reconsume, pos); + NS_HTML5_BREAK(doctypeloop); + } + default: { + if (P::reportErrors) { + errMissingSpaceBeforeDoctypeName(); + } + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME, + reconsume, pos); + NS_HTML5_BREAK(doctypeloop); + } + } + } + doctypeloop_end:; + [[fallthrough]]; + } + case BEFORE_DOCTYPE_NAME: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + continue; + } + case '>': { + if (P::reportErrors) { + errNamelessDoctype(); + } + forceQuirks = true; + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + if (c >= 'A' && c <= 'Z') { + c += 0x20; + } + clearStrBufBeforeUse(); + appendStrBuf(c); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_NAME, reconsume, + pos); + NS_HTML5_BREAK(beforedoctypenameloop); + } + } + } + beforedoctypenameloop_end:; + [[fallthrough]]; + } + case DOCTYPE_NAME: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + strBufToDoctypeName(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::AFTER_DOCTYPE_NAME, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + strBufToDoctypeName(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::AFTER_DOCTYPE_NAME, + reconsume, pos); + NS_HTML5_BREAK(doctypenameloop); + } + case '>': { + strBufToDoctypeName(); + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + if (c >= 'A' && c <= 'Z') { + c += 0x0020; + } + appendStrBuf(c); + continue; + } + } + } + doctypenameloop_end:; + [[fallthrough]]; + } + case AFTER_DOCTYPE_NAME: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + continue; + } + case '>': { + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case 'p': + case 'P': { + index = 0; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_UBLIC, + reconsume, pos); + NS_HTML5_BREAK(afterdoctypenameloop); + } + case 's': + case 'S': { + index = 0; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_YSTEM, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + default: { + bogusDoctype(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_DOCTYPE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + afterdoctypenameloop_end:; + [[fallthrough]]; + } + case DOCTYPE_UBLIC: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + if (index < 5) { + char16_t folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded != nsHtml5Tokenizer::UBLIC[index]) { + bogusDoctype(); + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_DOCTYPE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + index++; + continue; + } else { + reconsume = true; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_KEYWORD, + reconsume, pos); + NS_HTML5_BREAK(doctypeublicloop); + } + } + doctypeublicloop_end:; + [[fallthrough]]; + } + case AFTER_DOCTYPE_PUBLIC_KEYWORD: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, + reconsume, pos); + NS_HTML5_BREAK(afterdoctypepublickeywordloop); + } + case '\"': { + if (P::reportErrors) { + errNoSpaceBetweenDoctypePublicKeywordAndQuote(); + } + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\'': { + if (P::reportErrors) { + errNoSpaceBetweenDoctypePublicKeywordAndQuote(); + } + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + if (P::reportErrors) { + errExpectedPublicId(); + } + forceQuirks = true; + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + default: { + bogusDoctype(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_DOCTYPE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + afterdoctypepublickeywordloop_end:; + [[fallthrough]]; + } + case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + continue; + } + case '\"': { + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, + reconsume, pos); + NS_HTML5_BREAK(beforedoctypepublicidentifierloop); + } + case '\'': { + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + if (P::reportErrors) { + errExpectedPublicId(); + } + forceQuirks = true; + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + default: { + bogusDoctype(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_DOCTYPE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + beforedoctypepublicidentifierloop_end:; + [[fallthrough]]; + } + case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\"': { + publicIdentifier = strBufToString(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER, + reconsume, pos); + NS_HTML5_BREAK(doctypepublicidentifierdoublequotedloop); + } + case '>': { + if (P::reportErrors) { + errGtInPublicId(); + } + forceQuirks = true; + publicIdentifier = strBufToString(); + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + continue; + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + continue; + } + } + } + doctypepublicidentifierdoublequotedloop_end:; + [[fallthrough]]; + } + case AFTER_DOCTYPE_PUBLIC_IDENTIFIER: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer:: + BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer:: + BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS, + reconsume, pos); + NS_HTML5_BREAK(afterdoctypepublicidentifierloop); + } + case '>': { + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\"': { + if (P::reportErrors) { + errNoSpaceBetweenPublicAndSystemIds(); + } + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\'': { + if (P::reportErrors) { + errNoSpaceBetweenPublicAndSystemIds(); + } + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + default: { + bogusDoctype(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_DOCTYPE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + afterdoctypepublicidentifierloop_end:; + [[fallthrough]]; + } + case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + continue; + } + case '>': { + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\"': { + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, + reconsume, pos); + NS_HTML5_BREAK(betweendoctypepublicandsystemidentifiersloop); + } + case '\'': { + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + default: { + bogusDoctype(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_DOCTYPE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + betweendoctypepublicandsystemidentifiersloop_end:; + [[fallthrough]]; + } + case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\"': { + systemIdentifier = strBufToString(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER, + reconsume, pos); + NS_HTML5_BREAK(doctypesystemidentifierdoublequotedloop); + } + case '>': { + if (P::reportErrors) { + errGtInSystemId(); + } + forceQuirks = true; + systemIdentifier = strBufToString(); + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + continue; + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + continue; + } + } + } + doctypesystemidentifierdoublequotedloop_end:; + [[fallthrough]]; + } + case AFTER_DOCTYPE_SYSTEM_IDENTIFIER: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + continue; + } + case '>': { + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + default: { + bogusDoctypeWithoutQuirks(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_DOCTYPE, + reconsume, pos); + NS_HTML5_BREAK(afterdoctypesystemidentifierloop); + } + } + } + afterdoctypesystemidentifierloop_end:; + [[fallthrough]]; + } + case BOGUS_DOCTYPE: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '>': { + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + P::silentCarriageReturn(this); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + default: { + continue; + } + } + } + } + case DOCTYPE_YSTEM: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + if (index < 5) { + char16_t folded = c; + if (c >= 'A' && c <= 'Z') { + folded += 0x20; + } + if (folded != nsHtml5Tokenizer::YSTEM[index]) { + bogusDoctype(); + reconsume = true; + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_DOCTYPE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + index++; + NS_HTML5_CONTINUE(stateloop); + } else { + reconsume = true; + state = + P::transition(mViewSource.get(), + nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_KEYWORD, + reconsume, pos); + NS_HTML5_BREAK(doctypeystemloop); + } + } + doctypeystemloop_end:; + [[fallthrough]]; + } + case AFTER_DOCTYPE_SYSTEM_KEYWORD: { + for (;;) { + if (reconsume) { + reconsume = false; + } else { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + } + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, + reconsume, pos); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, + reconsume, pos); + NS_HTML5_BREAK(afterdoctypesystemkeywordloop); + } + case '\"': { + if (P::reportErrors) { + errNoSpaceBetweenDoctypeSystemKeywordAndQuote(); + } + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\'': { + if (P::reportErrors) { + errNoSpaceBetweenDoctypeSystemKeywordAndQuote(); + } + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + if (P::reportErrors) { + errExpectedPublicId(); + } + forceQuirks = true; + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + default: { + bogusDoctype(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_DOCTYPE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + afterdoctypesystemkeywordloop_end:; + [[fallthrough]]; + } + case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\r': { + P::silentCarriageReturn(this); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + P::silentLineFeed(this); + [[fallthrough]]; + } + case ' ': + case '\t': + case '\f': { + continue; + } + case '\"': { + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '\'': { + clearStrBufBeforeUse(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, + reconsume, pos); + NS_HTML5_BREAK(beforedoctypesystemidentifierloop); + } + case '>': { + if (P::reportErrors) { + errExpectedSystemId(); + } + forceQuirks = true; + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + default: { + bogusDoctype(); + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::BOGUS_DOCTYPE, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + beforedoctypesystemidentifierloop_end:; + [[fallthrough]]; + } + case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\'': { + systemIdentifier = strBufToString(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + if (P::reportErrors) { + errGtInSystemId(); + } + forceQuirks = true; + systemIdentifier = strBufToString(); + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + continue; + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + continue; + } + } + } + } + case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\'': { + publicIdentifier = strBufToString(); + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + case '>': { + if (P::reportErrors) { + errGtInPublicId(); + } + forceQuirks = true; + publicIdentifier = strBufToString(); + emitDoctypeToken(pos); + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + case '\r': { + appendStrBufCarriageReturn<P>(); + NS_HTML5_BREAK(stateloop); + } + case '\n': { + appendStrBufLineFeed<P>(); + continue; + } + case '\0': { + c = 0xfffd; + [[fallthrough]]; + } + default: { + appendStrBuf(c); + continue; + } + } + } + } + case PROCESSING_INSTRUCTION: { + for (;;) { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '\?': { + state = P::transition( + mViewSource.get(), + nsHtml5Tokenizer::PROCESSING_INSTRUCTION_QUESTION_MARK, + reconsume, pos); + NS_HTML5_BREAK(processinginstructionloop); + } + default: { + continue; + } + } + } + processinginstructionloop_end:; + [[fallthrough]]; + } + case PROCESSING_INSTRUCTION_QUESTION_MARK: { + if (++pos == endPos) { + NS_HTML5_BREAK(stateloop); + } + c = P::checkChar(this, buf, pos); + switch (c) { + case '>': { + state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA, + reconsume, pos); + suspendIfRequestedAfterCurrentNonTextToken(); + if (shouldSuspend) { + NS_HTML5_BREAK(stateloop); + } + NS_HTML5_CONTINUE(stateloop); + } + default: { + state = P::transition(mViewSource.get(), + nsHtml5Tokenizer::PROCESSING_INSTRUCTION, + reconsume, pos); + NS_HTML5_CONTINUE(stateloop); + } + } + } + } + } + stateloop_end:; + flushChars(buf, pos); + stateSave = state; + returnStateSave = returnState; + return pos; + } + void initDoctypeFields(); template <class P> - void adjustDoubleHyphenAndAppendToStrBufCarriageReturn(); + inline void adjustDoubleHyphenAndAppendToStrBufCarriageReturn() { + P::silentCarriageReturn(this); + adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false); + } + template <class P> - void adjustDoubleHyphenAndAppendToStrBufLineFeed(); + inline void adjustDoubleHyphenAndAppendToStrBufLineFeed() { + P::silentLineFeed(this); + adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false); + } + template <class P> - void appendStrBufLineFeed(); + inline void appendStrBufLineFeed() { + P::silentLineFeed(this); + appendStrBuf('\n'); + } + template <class P> - void appendStrBufCarriageReturn(); + inline void appendStrBufCarriageReturn() { + P::silentCarriageReturn(this); + appendStrBuf('\n'); + } + template <class P> - void emitCarriageReturn(char16_t* buf, int32_t pos); + inline void emitCarriageReturn(char16_t* buf, int32_t pos) { + P::silentCarriageReturn(this); + flushChars(buf, pos); + tokenHandler->characters(nsHtml5Tokenizer::LF, 0, 1); + cstart = INT32_MAX; + } + void emitReplacementCharacter(char16_t* buf, int32_t pos); void maybeEmitReplacementCharacter(char16_t* buf, int32_t pos); void emitPlaintextReplacementCharacter(char16_t* buf, int32_t pos); - void setAdditionalAndRememberAmpersandLocation(char16_t add); + inline void setAdditionalAndRememberAmpersandLocation(char16_t add) { + additional = add; + } + void bogusDoctype(); void bogusDoctypeWithoutQuirks(); void handleNcrValue(int32_t returnState); @@ -434,7 +4586,13 @@ class nsHtml5Tokenizer { private: void emitDoctypeToken(int32_t pos); - void suspendIfRequestedAfterCurrentNonTextToken(); + inline void suspendIfRequestedAfterCurrentNonTextToken() { + if (suspendAfterCurrentNonTextToken) { + suspendAfterCurrentNonTextToken = false; + shouldSuspend = true; + } + } + void suspendAfterCurrentTokenIfNotInText(); bool suspensionAfterCurrentNonTextTokenPending(); @@ -442,13 +4600,29 @@ class nsHtml5Tokenizer { bool internalEncodingDeclaration(nsHtml5String internalCharset); private: - void emitOrAppendTwo(const char16_t* val, int32_t returnState); - void emitOrAppendOne(const char16_t* val, int32_t returnState); + inline void emitOrAppendTwo(const char16_t* val, int32_t returnState) { + if ((returnState & DATA_AND_RCDATA_MASK)) { + appendStrBuf(val[0]); + appendStrBuf(val[1]); + } else { + tokenHandler->characters(val, 0, 2); + } + } + + inline void emitOrAppendOne(const char16_t* val, int32_t returnState) { + if ((returnState & DATA_AND_RCDATA_MASK)) { + appendStrBuf(val[0]); + } else { + tokenHandler->characters(val, 0, 1); + } + } public: void end(); - void requestSuspension(); - bool isInDataState(); + inline void requestSuspension() { shouldSuspend = true; } + + inline bool isInDataState() { return (stateSave == DATA); } + void resetToDataState(); void loadState(nsHtml5Tokenizer* other); void initializeWithoutStarting(); diff --git a/parser/html/nsHtml5TokenizerALU.cpp b/parser/html/nsHtml5TokenizerALU.cpp @@ -0,0 +1,33 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsHtml5Tokenizer.h" +#include "nsHtml5TokenizerLoopPoliciesALU.h" + +int32_t nsHtml5Tokenizer::StateLoopFastestALU(int32_t state, char16_t c, + int32_t pos, char16_t* buf, + bool reconsume, + int32_t returnState, + int32_t endPos) { + return stateLoop<nsHtml5FastestPolicyALU>(state, c, pos, buf, reconsume, + returnState, endPos); +} + +int32_t nsHtml5Tokenizer::StateLoopLineColALU(int32_t state, char16_t c, + int32_t pos, char16_t* buf, + bool reconsume, + int32_t returnState, + int32_t endPos) { + return stateLoop<nsHtml5LineColPolicyALU>(state, c, pos, buf, reconsume, + returnState, endPos); +} + +int32_t nsHtml5Tokenizer::StateLoopViewSourceALU(int32_t state, char16_t c, + int32_t pos, char16_t* buf, + bool reconsume, + int32_t returnState, + int32_t endPos) { + return stateLoop<nsHtml5ViewSourcePolicyALU>(state, c, pos, buf, reconsume, + returnState, endPos); +} diff --git a/parser/html/nsHtml5TokenizerALUStubs.cpp b/parser/html/nsHtml5TokenizerALUStubs.cpp @@ -0,0 +1,32 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsHtml5Tokenizer.h" + +int32_t nsHtml5Tokenizer::StateLoopFastestALU(int32_t state, char16_t c, + int32_t pos, char16_t* buf, + bool reconsume, + int32_t returnState, + int32_t endPos) { + MOZ_RELEASE_ASSERT(false, "Inconsistent build config"); + return 0; +} + +int32_t nsHtml5Tokenizer::StateLoopLineColALU(int32_t state, char16_t c, + int32_t pos, char16_t* buf, + bool reconsume, + int32_t returnState, + int32_t endPos) { + MOZ_RELEASE_ASSERT(false, "Inconsistent build config"); + return 0; +} + +int32_t nsHtml5Tokenizer::StateLoopViewSourceALU(int32_t state, char16_t c, + int32_t pos, char16_t* buf, + bool reconsume, + int32_t returnState, + int32_t endPos) { + MOZ_RELEASE_ASSERT(false, "Inconsistent build config"); + return 0; +} diff --git a/parser/html/nsHtml5TokenizerHSupplement.h b/parser/html/nsHtml5TokenizerHSupplement.h @@ -2,14 +2,48 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -friend struct nsHtml5ViewSourcePolicy; -friend struct nsHtml5LineColPolicy; -friend struct nsHtml5FastestPolicy; +friend struct nsHtml5ViewSourcePolicySIMD; +friend struct nsHtml5ViewSourcePolicyALU; +friend struct nsHtml5LineColPolicySIMD; +friend struct nsHtml5LineColPolicyALU; +friend struct nsHtml5FastestPolicySIMD; +friend struct nsHtml5FastestPolicyALU; private: int32_t col; bool nextCharOnNewLine; +// These functions are wrappers for template parametrized stateLoop and +// stateLoopCompilerWorkaround so that the instantiations can go into +// separate compilation units both to allow different compiler flags +// and to make LLVM perform LICM on SIMD constants in functions whose size +// isn't too large for LLVM to perform LICM before LLVM looks for inlining +// opportunities. + +int32_t StateLoopFastestSIMD(int32_t state, char16_t c, int32_t pos, + char16_t* buf, bool reconsume, int32_t returnState, + int32_t endPos); + +int32_t StateLoopFastestALU(int32_t state, char16_t c, int32_t pos, + char16_t* buf, bool reconsume, int32_t returnState, + int32_t endPos); + +int32_t StateLoopLineColSIMD(int32_t state, char16_t c, int32_t pos, + char16_t* buf, bool reconsume, int32_t returnState, + int32_t endPos); + +int32_t StateLoopLineColALU(int32_t state, char16_t c, int32_t pos, + char16_t* buf, bool reconsume, int32_t returnState, + int32_t endPos); + +int32_t StateLoopViewSourceSIMD(int32_t state, char16_t c, int32_t pos, + char16_t* buf, bool reconsume, + int32_t returnState, int32_t endPos); + +int32_t StateLoopViewSourceALU(int32_t state, char16_t c, int32_t pos, + char16_t* buf, bool reconsume, + int32_t returnState, int32_t endPos); + public: inline int32_t getColumnNumber() { return col; } diff --git a/parser/html/nsHtml5TokenizerLoopPolicies.h b/parser/html/nsHtml5TokenizerLoopPolicies.h @@ -1,123 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#ifndef nsHtml5TokenizerLoopPolicies_h -#define nsHtml5TokenizerLoopPolicies_h - -/** - * This policy does not report tokenizer transitions anywhere and does not - * track line and column numbers. To be used for innerHTML. - */ -struct nsHtml5FastestPolicy { - static const bool reportErrors = false; - static int32_t transition(nsHtml5Highlighter* aHighlighter, int32_t aState, - bool aReconsume, int32_t aPos) { - return aState; - } - static void completedNamedCharacterReference( - nsHtml5Highlighter* aHighlighter) {} - - static char16_t checkChar(nsHtml5Tokenizer* aTokenizer, char16_t* buf, - int32_t pos) { - return buf[pos]; - } - - static void silentCarriageReturn(nsHtml5Tokenizer* aTokenizer) { - aTokenizer->lastCR = true; - } - - static void silentLineFeed(nsHtml5Tokenizer* aTokenizer) {} -}; - -/** - * This policy does not report tokenizer transitions anywhere. To be used - * when _not_ viewing source and when not parsing innerHTML (or other - * script execution-preventing fragment). - */ -struct nsHtml5LineColPolicy { - static const bool reportErrors = false; - static int32_t transition(nsHtml5Highlighter* aHighlighter, int32_t aState, - bool aReconsume, int32_t aPos) { - return aState; - } - static void completedNamedCharacterReference( - nsHtml5Highlighter* aHighlighter) {} - - static char16_t checkChar(nsHtml5Tokenizer* aTokenizer, char16_t* buf, - int32_t pos) { - // The name of this method comes from the validator. - // We aren't checking a char here. We read the next - // UTF-16 code unit and, before returning it, adjust - // the line and column numbers. - char16_t c = buf[pos]; - if (MOZ_UNLIKELY(aTokenizer->nextCharOnNewLine)) { - // By changing the line and column here instead - // of doing so eagerly when seeing the line break - // causes the line break itself to be considered - // column-wise at the end of a line. - aTokenizer->line++; - aTokenizer->col = 1; - aTokenizer->nextCharOnNewLine = false; - } else if (MOZ_LIKELY(!NS_IS_LOW_SURROGATE(c))) { - // SpiderMonkey wants to count scalar values - // instead of UTF-16 code units. We omit low - // surrogates from the count so that only the - // high surrogate increments the count for - // two-code-unit scalar values. - // - // It's somewhat questionable from the performance - // perspective to make the human-perceivable column - // count correct for non-BMP characters in the case - // where there is a single scalar value per extended - // grapheme cluster when even on the BMP there are - // various cases where the scalar count doesn't make - // much sense as a human-perceived "column count" due - // to extended grapheme clusters consisting of more - // than one scalar value. - aTokenizer->col++; - } - return c; - } - - static void silentCarriageReturn(nsHtml5Tokenizer* aTokenizer) { - aTokenizer->nextCharOnNewLine = true; - aTokenizer->lastCR = true; - } - - static void silentLineFeed(nsHtml5Tokenizer* aTokenizer) { - aTokenizer->nextCharOnNewLine = true; - } -}; - -/** - * This policy reports the tokenizer transitions to a highlighter. To be used - * when viewing source. - */ -struct nsHtml5ViewSourcePolicy { - static const bool reportErrors = true; - static int32_t transition(nsHtml5Highlighter* aHighlighter, int32_t aState, - bool aReconsume, int32_t aPos) { - return aHighlighter->Transition(aState, aReconsume, aPos); - } - static void completedNamedCharacterReference( - nsHtml5Highlighter* aHighlighter) { - aHighlighter->CompletedNamedCharacterReference(); - } - - static char16_t checkChar(nsHtml5Tokenizer* aTokenizer, char16_t* buf, - int32_t pos) { - return buf[pos]; - } - - static void silentCarriageReturn(nsHtml5Tokenizer* aTokenizer) { - aTokenizer->line++; - aTokenizer->lastCR = true; - } - - static void silentLineFeed(nsHtml5Tokenizer* aTokenizer) { - aTokenizer->line++; - } -}; - -#endif // nsHtml5TokenizerLoopPolicies_h diff --git a/parser/html/nsHtml5TokenizerLoopPoliciesALU.h b/parser/html/nsHtml5TokenizerLoopPoliciesALU.h @@ -0,0 +1,150 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsHtml5TokenizerLoopPoliciesALU_h +#define nsHtml5TokenizerLoopPoliciesALU_h + +/** + * This policy does not report tokenizer transitions anywhere and does not + * track line and column numbers. To be used for innerHTML. Non-SIMD version. + */ +struct nsHtml5FastestPolicyALU { + static const bool reportErrors = false; + MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition( + nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume, + int32_t aPos) { + return aState; + } + MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference( + nsHtml5Highlighter* aHighlighter) {} + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData( + nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos, + int32_t endPos) { + return 0; + } + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar( + nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) { + return buf[pos]; + } + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn( + nsHtml5Tokenizer* aTokenizer) { + aTokenizer->lastCR = true; + } + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed( + nsHtml5Tokenizer* aTokenizer) {} +}; + +/** + * This policy does not report tokenizer transitions anywhere. To be used + * when _not_ viewing source and when not parsing innerHTML (or other + * script execution-preventing fragment). + */ +struct nsHtml5LineColPolicyALU { + static const bool reportErrors = false; + MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition( + nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume, + int32_t aPos) { + return aState; + } + MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference( + nsHtml5Highlighter* aHighlighter) {} + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData( + nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos, + int32_t endPos) { + return 0; + } + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar( + nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) { + // The name of this method comes from the validator. + // We aren't checking a char here. We read the next + // UTF-16 code unit and, before returning it, adjust + // the line and column numbers. + char16_t c = buf[pos]; + if (MOZ_UNLIKELY(aTokenizer->nextCharOnNewLine)) { + // By changing the line and column here instead + // of doing so eagerly when seeing the line break + // causes the line break itself to be considered + // column-wise at the end of a line. + aTokenizer->line++; + aTokenizer->col = 1; + aTokenizer->nextCharOnNewLine = false; + } else if (MOZ_LIKELY(!NS_IS_LOW_SURROGATE(c))) { + // SpiderMonkey wants to count scalar values + // instead of UTF-16 code units. We omit low + // surrogates from the count so that only the + // high surrogate increments the count for + // two-code-unit scalar values. + // + // It's somewhat questionable from the performance + // perspective to make the human-perceivable column + // count correct for non-BMP characters in the case + // where there is a single scalar value per extended + // grapheme cluster when even on the BMP there are + // various cases where the scalar count doesn't make + // much sense as a human-perceived "column count" due + // to extended grapheme clusters consisting of more + // than one scalar value. + aTokenizer->col++; + } + return c; + } + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn( + nsHtml5Tokenizer* aTokenizer) { + aTokenizer->nextCharOnNewLine = true; + aTokenizer->lastCR = true; + } + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed( + nsHtml5Tokenizer* aTokenizer) { + aTokenizer->nextCharOnNewLine = true; + } +}; + +/** + * This policy reports the tokenizer transitions to a highlighter. To be used + * when viewing source. + */ +struct nsHtml5ViewSourcePolicyALU { + static const bool reportErrors = true; + MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition( + nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume, + int32_t aPos) { + return aHighlighter->Transition(aState, aReconsume, aPos); + } + MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference( + nsHtml5Highlighter* aHighlighter) { + aHighlighter->CompletedNamedCharacterReference(); + } + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData( + nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos, + int32_t endPos) { + return 0; + } + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar( + nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) { + return buf[pos]; + } + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn( + nsHtml5Tokenizer* aTokenizer) { + aTokenizer->line++; + aTokenizer->lastCR = true; + } + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed( + nsHtml5Tokenizer* aTokenizer) { + aTokenizer->line++; + } +}; + +#endif // nsHtml5TokenizerLoopPoliciesALU_h diff --git a/parser/html/nsHtml5TokenizerLoopPoliciesSIMD.h b/parser/html/nsHtml5TokenizerLoopPoliciesSIMD.h @@ -0,0 +1,219 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef nsHtml5TokenizerLoopPoliciesSIMD_h +#define nsHtml5TokenizerLoopPoliciesSIMD_h + +#include "mozilla/Attributes.h" +#include "mozilla/htmlaccel/htmlaccelNotInline.h" + +/** + * This policy does not report tokenizer transitions anywhere and does not + * track line and column numbers. To be used for innerHTML. + * + * This the SIMD version for aarch64 and SSSE3-enabled x86/x86_64. + */ +struct nsHtml5FastestPolicySIMD { + static const bool reportErrors = false; + MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition( + nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume, + int32_t aPos) { + return aState; + } + MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference( + nsHtml5Highlighter* aHighlighter) {} + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData( + nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos, + int32_t endPos) { + // We need to check bounds for the `buf[pos]` access below to be OK. + // Instead of just checking that `pos` isn't equal to `endPos`, let's + // check that have at least one SIMD stride of data in the same branch, + // since if we don't have at least one SIMD stride of data, we don't + // need to proceed. + if (endPos - pos < 16) { + return 0; + } + if (buf[pos] == '<') { + // Quickly handle the case where there is one tag immediately + // after another and the very first thing in the data state is a + // less-than sign. + return 0; + } + return mozilla::htmlaccel::AccelerateDataFastest(buf + pos, buf + endPos); + } + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar( + nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) { + return buf[pos]; + } + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn( + nsHtml5Tokenizer* aTokenizer) { + aTokenizer->lastCR = true; + } + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed( + nsHtml5Tokenizer* aTokenizer) {} +}; + +/** + * This policy does not report tokenizer transitions anywhere. To be used + * when _not_ viewing source and when not parsing innerHTML (or other + * script execution-preventing fragment). + */ +struct nsHtml5LineColPolicySIMD { + static const bool reportErrors = false; + MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition( + nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume, + int32_t aPos) { + return aState; + } + MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference( + nsHtml5Highlighter* aHighlighter) {} + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData( + nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos, + int32_t endPos) { + // We need to check bounds for the `buf[pos]` access below to be OK. + // Instead of just checking that `pos` isn't equal to `endPos`, let's + // check that have at least one SIMD stride of data in the same branch, + // since if we don't have at least one SIMD stride of data, we don't + // need to proceed. + if (endPos - pos < 16) { + return 0; + } + char16_t c = buf[pos]; + if (c == '<' || c == '\n') { + // Quickly handle the case where there is one tag immediately + // after another and the very first thing in the data state is a + // less-than sign and the case where a tag is immediately followed + // by a line feed. + return 0; + } + int32_t advance = + mozilla::htmlaccel::AccelerateDataLineCol(buf + pos, buf + endPos); + if (!advance) { + // When the SIMD advance is zero, don't touch the line and col tracking. + return 0; + } + if (MOZ_UNLIKELY(aTokenizer->nextCharOnNewLine)) { + // By changing the line and column here instead + // of doing so eagerly when seeing the line break + // causes the line break itself to be considered + // column-wise at the end of a line. + aTokenizer->line++; + aTokenizer->col = advance; + aTokenizer->nextCharOnNewLine = false; + } else { + aTokenizer->col += advance; + } + return advance; + } + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar( + nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) { + // The name of this method comes from the validator. + // We aren't checking a char here. We read the next + // UTF-16 code unit and, before returning it, adjust + // the line and column numbers. + char16_t c = buf[pos]; + if (MOZ_UNLIKELY(aTokenizer->nextCharOnNewLine)) { + // By changing the line and column here instead + // of doing so eagerly when seeing the line break + // causes the line break itself to be considered + // column-wise at the end of a line. + aTokenizer->line++; + aTokenizer->col = 1; + aTokenizer->nextCharOnNewLine = false; + } else if (MOZ_LIKELY(!NS_IS_LOW_SURROGATE(c))) { + // SpiderMonkey wants to count scalar values + // instead of UTF-16 code units. We omit low + // surrogates from the count so that only the + // high surrogate increments the count for + // two-code-unit scalar values. + // + // It's somewhat questionable from the performance + // perspective to make the human-perceivable column + // count correct for non-BMP characters in the case + // where there is a single scalar value per extended + // grapheme cluster when even on the BMP there are + // various cases where the scalar count doesn't make + // much sense as a human-perceived "column count" due + // to extended grapheme clusters consisting of more + // than one scalar value. + aTokenizer->col++; + } + return c; + } + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn( + nsHtml5Tokenizer* aTokenizer) { + aTokenizer->nextCharOnNewLine = true; + aTokenizer->lastCR = true; + } + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed( + nsHtml5Tokenizer* aTokenizer) { + aTokenizer->nextCharOnNewLine = true; + } +}; + +/** + * This policy reports the tokenizer transitions to a highlighter. To be used + * when viewing source. + */ +struct nsHtml5ViewSourcePolicySIMD { + static const bool reportErrors = true; + MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition( + nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume, + int32_t aPos) { + return aHighlighter->Transition(aState, aReconsume, aPos); + } + MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference( + nsHtml5Highlighter* aHighlighter) { + aHighlighter->CompletedNamedCharacterReference(); + } + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData( + nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos, + int32_t endPos) { + // We need to check bounds for the `buf[pos]` access below to be OK. + // Instead of just checking that `pos` isn't equal to `endPos`, let's + // check that have at least one SIMD stride of data in the same branch, + // since if we don't have at least one SIMD stride of data, we don't + // need to proceed. + if (endPos - pos < 16) { + return 0; + } + char16_t c = buf[pos]; + if (c == '<' || c == '\n') { + // Quickly handle the case where there is one tag immediately + // after another and the very first thing in the data state is a + // less-than sign and the case where a tag is immediately followed + // by a line feed. + return 0; + } + return mozilla::htmlaccel::AccelerateDataViewSource(buf + pos, + buf + endPos); + } + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar( + nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) { + return buf[pos]; + } + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn( + nsHtml5Tokenizer* aTokenizer) { + aTokenizer->line++; + aTokenizer->lastCR = true; + } + + MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed( + nsHtml5Tokenizer* aTokenizer) { + aTokenizer->line++; + } +}; + +#endif // nsHtml5TokenizerLoopPoliciesSIMD_h diff --git a/parser/html/nsHtml5TokenizerSIMD.cpp b/parser/html/nsHtml5TokenizerSIMD.cpp @@ -0,0 +1,33 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsHtml5Tokenizer.h" +#include "nsHtml5TokenizerLoopPoliciesSIMD.h" + +int32_t nsHtml5Tokenizer::StateLoopFastestSIMD(int32_t state, char16_t c, + int32_t pos, char16_t* buf, + bool reconsume, + int32_t returnState, + int32_t endPos) { + return stateLoop<nsHtml5FastestPolicySIMD>(state, c, pos, buf, reconsume, + returnState, endPos); +} + +int32_t nsHtml5Tokenizer::StateLoopLineColSIMD(int32_t state, char16_t c, + int32_t pos, char16_t* buf, + bool reconsume, + int32_t returnState, + int32_t endPos) { + return stateLoop<nsHtml5LineColPolicySIMD>(state, c, pos, buf, reconsume, + returnState, endPos); +} + +int32_t nsHtml5Tokenizer::StateLoopViewSourceSIMD(int32_t state, char16_t c, + int32_t pos, char16_t* buf, + bool reconsume, + int32_t returnState, + int32_t endPos) { + return stateLoop<nsHtml5ViewSourcePolicySIMD>(state, c, pos, buf, reconsume, + returnState, endPos); +} diff --git a/parser/html/nsHtml5TokenizerSIMDStubs.cpp b/parser/html/nsHtml5TokenizerSIMDStubs.cpp @@ -0,0 +1,32 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsHtml5Tokenizer.h" + +int32_t nsHtml5Tokenizer::StateLoopFastestSIMD(int32_t state, char16_t c, + int32_t pos, char16_t* buf, + bool reconsume, + int32_t returnState, + int32_t endPos) { + MOZ_RELEASE_ASSERT(false, "Inconsistent build config"); + return 0; +} + +int32_t nsHtml5Tokenizer::StateLoopLineColSIMD(int32_t state, char16_t c, + int32_t pos, char16_t* buf, + bool reconsume, + int32_t returnState, + int32_t endPos) { + MOZ_RELEASE_ASSERT(false, "Inconsistent build config"); + return 0; +} + +int32_t nsHtml5Tokenizer::StateLoopViewSourceSIMD(int32_t state, char16_t c, + int32_t pos, char16_t* buf, + bool reconsume, + int32_t returnState, + int32_t endPos) { + MOZ_RELEASE_ASSERT(false, "Inconsistent build config"); + return 0; +} diff --git a/parser/htmlaccel/gtest/TestHtmlSimd.cpp b/parser/htmlaccel/gtest/TestHtmlSimd.cpp @@ -0,0 +1,62 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "gtest/gtest.h" +#include "mozilla/htmlaccel/htmlaccelNotInline.h" + +// Match in the first half +const char16_t HTML_SIMD_TEST_INPUT_LOW[16] = { + 'a', + 0xD834, // Surrogate pair + 0xDD65, '\n', '<', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', +}; + +// Match in the second half +const char16_t HTML_SIMD_TEST_INPUT_HIGH[16] = { + 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'a', + 0xD834, // Surrogate pair + 0xDD65, '\n', '<', 'f', 'g', 'h', +}; + +TEST(HtmlSimd, TestTextNodeAllowSurrogatesAndLf) +{ + int32_t index = mozilla::htmlaccel::AccelerateDataFastest( + HTML_SIMD_TEST_INPUT_LOW, HTML_SIMD_TEST_INPUT_LOW + 16); + ASSERT_EQ(index, 4); +} + +TEST(HtmlSimd, TestTextNodeAllowSurrogatesDisallowLf) +{ + int32_t index = mozilla::htmlaccel::AccelerateDataViewSource( + HTML_SIMD_TEST_INPUT_LOW, HTML_SIMD_TEST_INPUT_LOW + 16); + ASSERT_EQ(index, 3); +} + +TEST(HtmlSimd, TestTextNodeDisallowSurrogatesAndLf) +{ + int32_t index = mozilla::htmlaccel::AccelerateDataLineCol( + HTML_SIMD_TEST_INPUT_LOW, HTML_SIMD_TEST_INPUT_LOW + 16); + ASSERT_EQ(index, 1); +} + +TEST(HtmlSimd, TestTextNodeAllowSurrogatesAndLfHigh) +{ + int32_t index = mozilla::htmlaccel::AccelerateDataFastest( + HTML_SIMD_TEST_INPUT_HIGH, HTML_SIMD_TEST_INPUT_HIGH + 16); + ASSERT_EQ(index, 4 + 8); +} + +TEST(HtmlSimd, TestTextNodeAllowSurrogatesDisallowLfHigh) +{ + int32_t index = mozilla::htmlaccel::AccelerateDataViewSource( + HTML_SIMD_TEST_INPUT_HIGH, HTML_SIMD_TEST_INPUT_HIGH + 16); + ASSERT_EQ(index, 3 + 8); +} + +TEST(HtmlSimd, TestTextNodeDisallowSurrogatesAndLfHigh) +{ + int32_t index = mozilla::htmlaccel::AccelerateDataLineCol( + HTML_SIMD_TEST_INPUT_HIGH, HTML_SIMD_TEST_INPUT_HIGH + 16); + ASSERT_EQ(index, 1 + 8); +} diff --git a/parser/htmlaccel/gtest/moz.build b/parser/htmlaccel/gtest/moz.build @@ -0,0 +1,16 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +if ( + CONFIG["TARGET_CPU"] == "x86_64" + or (CONFIG["TARGET_CPU"] == "aarch64" and CONFIG["TARGET_ENDIANNESS"] == "little") +) and (CONFIG["CC_TYPE"] != "gcc" or int(CONFIG["CC_VERSION"].split(".")[0]) >= 12): + SOURCES += { + "TestHtmlSimd.cpp", + } + SOURCES["TestHtmlSimd.cpp"].flags += CONFIG["HTML_ACCEL_FLAGS"] + +FINAL_LIBRARY = "xul-gtest" diff --git a/parser/htmlaccel/htmlaccel.h b/parser/htmlaccel/htmlaccel.h @@ -0,0 +1,322 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_htmlaccel_htmlaccel_h +#define mozilla_htmlaccel_htmlaccel_h + +#include <string.h> +#include <stdint.h> + +// Avoid adding more Gecko-specific headers to keep it easy enough to +// copy and paste the contents of this file to Compiler Explorer. +#include "mozilla/Attributes.h" + +// This file provides SIMD code for skipping over characters that +// the caller doesn't need to act upon. For example, this code can +// skip over characters that the HTML tokenizer doesn't need to handle +// specially in a given state or this code could be used to skip over +// characters that don't need to be escaped in an HTML serializer. + +// ISA SUPPORT: Do not include this file unless the compilation unit is +// being compiled either for little-endian aarch64 or for x86/x86_64 with +// at least SSSE3 enabled. +// +// It's probably feasible to extend this to support little-endian POWER +// by defining +// MOZ_ALWAYS_INLINE_EVEN_DEBUG uint8x16_t TableLookup(uint8x16_t table, +// uint8x16_t nibbles) { +// return vec_perm(table, table, nibbles); +// } +// but since I don't have a little-endian POWER system to test with, +// this is left as an exercise to the reader. (The x86/x86_64 reduction +// code should be portable to POWER10 using vec_extractm and the aarch64 +// reduction code should be portable to older POWER using vec_max.) +// +// ARMv7 is deliberately not supported due to vqtbl1q_u8 being a newer +// addition to NEON. +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ +# error "A little-endian target is required." +#endif +#if !(defined(__aarch64__) || defined(__SSSE3__)) +# error "Must be targeting aarch64 or SSSE3." +#endif + +// NOTE: This file uses GCC/clang built-ins that provide SIMD portability. +// Compared to pretending unawareness of what arm_neon.h and tmmintrin.h +// map to in GCC and clang, this has the benefit that the code is not stuck +// at an SSSE3 local maximum but adapts maximally to upgrades to SSE 4.2, +// AVX2, and BMI. (Yes, enabling BMI seems to affect more than just +// __builtin_ctz!) +// (We need to check for __clang__, because clang-cl does not define __GNUC__.) +#if !(defined(__GNUC__) || defined(__clang__)) +# error "A compiler that supports GCC-style portable SIMD is required." +#endif + +// # General +// +// There is an entry point per combination of what characters terminate +// the acceleration loop (i.e. characters that the HTML tokenizer would not +// simply skip over). The shared implementation code is inlined into these +// FFI entry point functions, so the parametrization made inside the FFI +// functions constant-propagates through the implementation internals. +// +// The code examines 16 UTF-16 code units at a time as two 128-bit SIMD +// vectors. First, the bytes are regrouped to so that one SIMD vector +// contains the high halves of the UTF-16 code units (zeros for ASCII/Basic +// Latin) and another one contains the low halves. +// +// In the case of the low half, we mask the vector to take the low 4 bits of +// each 8-bit value and do a lookup from a lookup table contained in a SIMD +// vector. The 4 bits index into 16 lanes of the other SIMD vector such that +// we get a vector where the positions corresponding to positions of the +// original code units contain the 8-bit value looked up from by the 4-bit +// index. +// +// The lookup operation is available unconditionally on aarch64. On +// x86/x86_64, it is part of the SSSE3 instruction set extension, which is +// why on x86/x86_64 we must not call into this code unless SSSE3 is +// available. (Each additional level of compiling this code with SSE4.2, +// AVX2, or AVX2 + BMI makes this code shorter, which presumably means more +// efficient, so instead of compiling this just with SSSE3, we compile this +// with AVX2+BMI on x86_64, considering that CPUs with such capabilities +// have been available for 12 years at the time of landing this code.) +// +// The lookup table contains the loop-terminating ASCII characters in the +// positions given by their low 4 bits. For example, the less-than sign is +// U+003C, so the value 0x3C is at index 0xC (decimal 12). Positions that +// don’t correspond to a character of interest have the value 1, except lane +// 1 has the placeholder value 2. This way, characters that we don’t want to +// match anything in the lookup table get a non-matching placeholder: U+0001 +// gets compared with 2 (semantically U+0002) and everything else not of +// interest gets compared with 1 (semantically U+0001) to produce a +// non-matching lane. +// +// This means that instead of comparing the vector of the low halves of the +// UTF-16 code units against multiple constant vectors each filled in all +// lanes with a given ASCII character of interest, the table lookup gives us +// one vector to compare against where each lane can have a different ASCII +// character of interest to compare with. +// +// This requires the ASCII characters of interest to have mutually distinct +// low 4 bits. This is true for U+0000, &, <, LF, CR, ", and ', but, +// unfortunately, CR, ] and - share the low 4 bits, so cases where we need +// to include a check for ] or - needs to do a separate check, since CR is +// always in the lookup table. (Checks for ", ', ], and - are not here at +// this time but will come in follow-up patches.) +// +// From these operations, we get a vector of 16 8-bit mask lanes where a +// lane is 0xFF if the low 8 bits of the UTF-16 code unit matched an ASCII +// character that terminates the loop and 0x00 otherwise. We lane-wise +// compare the high halves with zero and AND the resulting mask vector +// together with the mask vector that resulted from processing the low 8 +// bits to confirm which low 8 bits had 0 as the high 8 bits, i.e. the +// UTF-16 code unit really was Basic Latin. +// +// If we have a configuration that requires terminating the loop on +// surrogates, we check the vector containing the high halves of the UTF-16 +// code units for surrogates (by masking certain high bits to compare them +// with a constant) and OR the resulting mask vector together with the +// vector computed above. +// +// Now we have a vector of 16 8-bit mask lanes that corresponds to the input +// of 16 UTF-16 code units to indicate which code units in the run of 16 +// UTF-16 code units require terminating the loop (i.e. must not be skipped +// over). At this point, the handling diverges for x86/x86_64 and aarch64. +// +// ## x86/x86_64 +// +// We convert the SIMD mask into bits in an ALU register. The operation +// returns a 32-bit type, but only the low 16 bits can be non-zero. If the +// integer is non-zero, the loop terminates, since some lane in the mask was +// non-zero. In this case, we return the number of trailing zeros in the +// integer. (We already know must have a non-zero bit somewhere in the low +// 16 bits, so we can’t end up counting to the high half of the 32-bit type.) +// Due to the little-endian semantics, the first UTF-16 code unit in the +// input corresponds to the least-significant bit in the integer, so when the +// first UTF-16 code unit in the input is unskippable, the least-significant +// bit in the integer is 1, so there are 0 trailing zeros, i.e. 0 skippable +// UTF-16 code units. +// +// ## aarch64 +// +// We want to know if any lane is the mask is non-zero to decide whether to +// terminate the loop. If there is a non-zero lane, we want to know the +// position of the first (in the content order of the input UTF-16 text) +// non-zero lane. To accomplish these goals, we bitwise AND the mask vector +// with a vector of 16 constants. Since ANDing with a mask lane set to zero +// results in zero, we need all 16 constants to be non-zero. Yet, we need to +// be able to accommodate the possibility of first lane in content order +// being set, which means we need to compute 0 as the result. To be able to +// compute 0 but have the constants be non-zero, the constants are numbers +// that need be subtracted from 16. That is, the constant vector has lanes +// set to numbers from 16 to 1 (inclusive). We do the reduction of the +// resulting SIMD vector to an ALU integer by taking the value of the lane +// with the largest value. +// +// If no mask lane was set, the max operation results in 0, so if the +// integer is zero, the loop continues. Otherwise, we get the number of +// skippable UTF-16 code units by subtracting the integer from 16. That is, +// if the first UTF-16 unit is unstoppable, we get 16 as the max lane value +// and 16-16=0. +// +// # Alignment +// +// These functions use unaligned SIMD loads, because alignment +// doesn't matter on aarch64 CPUs or on x86_64 CPUs from the most +// recent decade or so. It's not worthwhile to add complexity for +// old CPUs. +// +// # Inlining +// +// The public functions here are expected to be called from a loop. To give +// LICM the opportunity to hoist the SIMD constants out of the loop, make +// sure that every function on the path from the loop to here is declared +// MOZ_ALWAYS_INLINE_EVEN_DEBUG and that all these and the loop itself are +// compiled with the same instruction set extension flags (if applicable). +// +// # Acknowledments +// +// https://lemire.me/blog/2024/06/08/scan-html-faster-with-simd-instructions-chrome-edition/ + +#if defined(__aarch64__) + +# include <arm_neon.h> + +#else // x86/x86_64 + +# include <tmmintrin.h> +// Using syntax that clang-tidy doesn't like to match GCC guidance. +typedef uint8_t uint8x16_t __attribute__((vector_size(16))); + +#endif + +namespace mozilla::htmlaccel { + +namespace detail { + +#if defined(__aarch64__) +// The idea is that when this is ANDed with the mask, we get 0 in the +// non-match positions and the leftmost match ends up with higest number. +// This way, taking the max value of the result is zero if all positions +// are non-match, and otherwise we get a value that when subtracted from +// 16 indicates the index of the leftmost match. +const uint8x16_t INVERTED_ADVANCES = {16, 15, 14, 13, 12, 11, 10, 9, + 8, 7, 6, 5, 4, 3, 2, 1}; + +MOZ_ALWAYS_INLINE_EVEN_DEBUG uint8x16_t TableLookup(uint8x16_t aTable, + uint8x16_t aNibbles) { + return vqtbl1q_u8(aTable, aNibbles); +} + +#else // x86/x86_64 + +MOZ_ALWAYS_INLINE_EVEN_DEBUG uint8x16_t TableLookup(uint8x16_t aTable, + uint8x16_t aNibbles) { + // GCC wants reinterpret_cast + return reinterpret_cast<uint8x16_t>(_mm_shuffle_epi8(aTable, aNibbles)); +} + +#endif + +// These formulations optimize nicely, so no point in trying something fancier +// to fill all lanes with the same byte. +const uint8x16_t ALL_ZEROS = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +const uint8x16_t NIBBLE_MASK = {0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, + 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF}; +const uint8x16_t SURROGATE_MASK = {0xF8, 0xF8, 0xF8, 0xF8, 0xF8, 0xF8, + 0xF8, 0xF8, 0xF8, 0xF8, 0xF8, 0xF8, + 0xF8, 0xF8, 0xF8, 0xF8}; +const uint8x16_t SURROGATE_MATCH = {0xD8, 0xD8, 0xD8, 0xD8, 0xD8, 0xD8, + 0xD8, 0xD8, 0xD8, 0xD8, 0xD8, 0xD8, + 0xD8, 0xD8, 0xD8, 0xD8}; + +// The approach here supports disallowing up to 16 different +// characters that 1) are in the Latin1 range, i.e. U+00FF or +// below, and 2) do not have the lowest 4 bits in common with +// each other. +// +// The code point value of each disallowed character needs +// to be placed in the vector at the position indexed by the +// low 4 bits of the character (low four bits 0 is the leftmost +// position and low four bits 15 is the rightmost position). +// +// U+0001 neither occurs in typical HTML nor is one of the +// code points we care about, so use 1 as the non-matching +// value. We do care about U+0000, unfortunately. +// We use U+0002 at position 1 to make sure it doesn't +// match, either. That is, we put 1 in the positions we +// don't care about except we put 2 at position 1. + +/// Disallow U+0000, less-than, ampersand, and carriage return. +const uint8x16_t ZERO_LT_AMP_CR = {0, 2, 1, 1, 1, 1, '&', 1, + 1, 1, 1, 1, '<', '\r', 1, 1}; +/// Disallow U+0000, less-than, ampersand, carriage return, and line feed. +const uint8x16_t ZERO_LT_AMP_CR_LF = {0, 2, 1, 1, 1, 1, '&', 1, + 1, 1, '\n', 1, '<', '\r', 1, 1}; + +/// Compute a 16-lane mask for for 16 UTF-16 code units, where a lane +/// is 0x00 if OK to skip and 0xFF in not OK to skip. +MOZ_ALWAYS_INLINE_EVEN_DEBUG uint8x16_t +StrideToMask(const char16_t* aArr /* len = 16 */, uint8x16_t aTable, + bool aAllowSurrogates) { + uint8x16_t first; + uint8x16_t second; + // memcpy generates a single unaligned load instruction with both ISAs. + memcpy(&first, aArr, 16); + memcpy(&second, aArr + 8, 16); + // Each shuffle maps to a single instruction on aarch64. + // On x86/x86_64, how efficiently these shuffles maps to instructions + // depends on the level of instruction set extensions chosen, which + // is the main reason that we compile this file at a higher extension + // level than the minimum SSSE3 (and the main reason why this file + // uses GNU C portable SIMD instead of sticking to what's in the + // Intel-defined headers). + uint8x16_t low_halves = __builtin_shufflevector( + first, second, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); + uint8x16_t high_halves = __builtin_shufflevector( + first, second, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); + uint8x16_t high_half_matches = high_halves == ALL_ZEROS; + uint8x16_t low_half_matches = + low_halves == TableLookup(aTable, low_halves & NIBBLE_MASK); + uint8x16_t ret = low_half_matches & high_half_matches; + if (!aAllowSurrogates) { // Assumed to be constant-propagated + ret |= (high_halves & SURROGATE_MASK) == SURROGATE_MATCH; + } + return ret; +} + +MOZ_ALWAYS_INLINE_EVEN_DEBUG int32_t AccelerateTextNode(const char16_t* aInput, + const char16_t* aEnd, + uint8x16_t aTable, + bool aAllowSurrogates) { + const char16_t* current = aInput; + while (aEnd - current >= 16) { + uint8x16_t mask = StrideToMask(current, aTable, aAllowSurrogates); +#if defined(__aarch64__) + uint8_t max = vmaxvq_u8(mask & INVERTED_ADVANCES); + if (max != 0) { + return int32_t((current - aInput) + 16 - max); + } +#else // x86/x86_64 + int int_mask = _mm_movemask_epi8(mask); + if (int_mask != 0) { + // The least-significant bit in the integer corresponds to + // the first SIMD lane in text order. Hence, we need to count + // trailing zeros. We already checked that the bits are not + // all zeros, so __builtin_ctz isn't UB. + return int32_t((current - aInput) + __builtin_ctz(int_mask)); + } +#endif + current += 16; + } + return int32_t(current - aInput); +} + +} // namespace detail + +// Public entry points are in htmlaccelNotInline.h for now. + +} // namespace mozilla::htmlaccel + +#endif // mozilla_htmlaccel_htmlaccel_h diff --git a/parser/htmlaccel/htmlaccelEnabled.h b/parser/htmlaccel/htmlaccelEnabled.h @@ -0,0 +1,30 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_htmlaccel_htmlaccelEnabled_h +#define mozilla_htmlaccel_htmlaccelEnabled_h + +#if defined(__x86_64__) +# include "mozilla/SSE.h" +#endif + +namespace mozilla::htmlaccel { + +/// This function is appropriate to call when the SIMD path is compiled +/// with `HTML_ACCEL_FLAGS`. +/// +/// Keep this in sync with `HTML_ACCEL_FLAGS` in `toolchain.configure`. +inline bool htmlaccelEnabled() { +#if defined(__aarch64__) && defined(__LITTLE_ENDIAN__) + return true; +#elif defined(__x86_64__) + return mozilla::supports_bmi() && mozilla::supports_avx(); +#else + return false; +#endif +} + +} // namespace mozilla::htmlaccel + +#endif // mozilla_htmlaccel_htmlaccelEnabled_h diff --git a/parser/htmlaccel/htmlaccelNotInline.cpp b/parser/htmlaccel/htmlaccelNotInline.cpp @@ -0,0 +1,30 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/htmlaccel/htmlaccel.h" +#include "mozilla/htmlaccel/htmlaccelNotInline.h" + +namespace mozilla::htmlaccel { + +/// The innerHTML / DOMParser case for the data state in the HTML parser +MOZ_NEVER_INLINE int32_t AccelerateDataFastest(const char16_t* aPtr, + const char16_t* aEnd) { + return detail::AccelerateTextNode(aPtr, aEnd, detail::ZERO_LT_AMP_CR, true); +} + +/// View Source case for the data state in the HTML parser +MOZ_NEVER_INLINE int32_t AccelerateDataViewSource(const char16_t* aPtr, + const char16_t* aEnd) { + return detail::AccelerateTextNode(aPtr, aEnd, detail::ZERO_LT_AMP_CR_LF, + true); +} + +/// Normal network case for the data state in the HTML parser +MOZ_NEVER_INLINE int32_t AccelerateDataLineCol(const char16_t* aPtr, + const char16_t* aEnd) { + return detail::AccelerateTextNode(aPtr, aEnd, detail::ZERO_LT_AMP_CR_LF, + false); +} + +} // namespace mozilla::htmlaccel diff --git a/parser/htmlaccel/htmlaccelNotInline.h b/parser/htmlaccel/htmlaccelNotInline.h @@ -0,0 +1,34 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_htmlaccel_htmlaccelNotInline_h +#define mozilla_htmlaccel_htmlaccelNotInline_h + +#include "mozilla/Attributes.h" + +namespace mozilla::htmlaccel { +// Logically these should be MOZ_ALWAYS_INLINE_EVEN_DEBUG if LLVM was working +// as expected. However, these are MOZ_NEVER_INLINE to work around +// https://github.com/llvm/llvm-project/issues/160886 . This way, we get +// a little bit of LICM for the SIMD constants that need to be loaded +// from the constant pool instead of getting materialized by splatting +// an immediate. Once the LLVM bug is fixed, these should be changed +// to MOZ_ALWAYS_INLINE_EVEN_DEBUG to allow the constants to move further +// up to the top of nsHtml5Tokenizer::stateLoop. + +/// The innerHTML / DOMParser case for the data state in the HTML parser +MOZ_NEVER_INLINE int32_t AccelerateDataFastest(const char16_t* aPtr, + const char16_t* aEnd); + +/// View Source case for the data state in the HTML parser +MOZ_NEVER_INLINE int32_t AccelerateDataViewSource(const char16_t* aPtr, + const char16_t* aEnd); + +/// Normal network case for the data state in the HTML parser +MOZ_NEVER_INLINE int32_t AccelerateDataLineCol(const char16_t* aPtr, + const char16_t* aEnd); + +} // namespace mozilla::htmlaccel + +#endif // mozilla_htmlaccel_htmlaccelNotInline_h diff --git a/parser/htmlaccel/moz.build b/parser/htmlaccel/moz.build @@ -0,0 +1,30 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +EXPORTS.mozilla.htmlaccel += [ + "htmlaccel.h", + "htmlaccelEnabled.h", + "htmlaccelNotInline.h", +] + +# Make sure the result is consistent with mozilla::htmlaccel::htmlaccelEnabled(). +# +# Due to https://github.com/llvm/llvm-project/issues/160886, the entry points +# need to be _not_ inline and, therefore, need a compilation unit. This should +# go away once the LLVM bug is fixed. + +if ( + (CONFIG["TARGET_CPU"] == "x86_64") + or (CONFIG["TARGET_CPU"] == "aarch64" and CONFIG["TARGET_ENDIANNESS"] == "little") +) and (CONFIG["CC_TYPE"] != "gcc" or int(CONFIG["CC_VERSION"].split(".")[0]) >= 12): + SOURCES += [ + "htmlaccelNotInline.cpp", + ] + SOURCES["htmlaccelNotInline.cpp"].flags += CONFIG["HTML_ACCEL_FLAGS"] + +TEST_DIRS += ["gtest"] + +FINAL_LIBRARY = "xul" diff --git a/parser/moz.build b/parser/moz.build @@ -7,7 +7,7 @@ with Files("**"): BUG_COMPONENT = ("Core", "DOM: HTML Parser") -DIRS += ["expat", "prototype", "xml", "htmlparser", "html"] +DIRS += ["expat", "prototype", "xml", "htmlaccel", "htmlparser", "html"] EXPORTS += [ "nsCharsetSource.h",