commit aec3d65abbe8a0bea7f7bdd1f033e1148e87e6b0
parent 703157f328c5b800349a2b9b13a5150e556ae55e
Author: Atila Butkovits <abutkovits@mozilla.com>
Date: Mon, 20 Oct 2025 15:37:49 +0300
Revert "Bug 1499682 - SIMD-accelerate the data state in the HTML tokenizer. r=smaug,sergesanspaille" for causing failures at test_html5_tree_construction.html.
This reverts commit 96fdec23a214937120ab575f6bd9e41a96706d40.
Diffstat:
21 files changed, 4483 insertions(+), 5723 deletions(-)
diff --git a/build/moz.configure/toolchain.configure b/build/moz.configure/toolchain.configure
@@ -3893,35 +3893,6 @@ set_config(
),
)
-
-@depends(target, c_compiler)
-def htmlaccel_config(target, c_compiler):
- # Keep this is sync with the mozilla::htmlaccel::htmlaccelEnabled function.
- #
- # The code compiles on SSSE3, but AVX+BMI generates better code
- # and has been available for 12 years at the time of landing this,
- # so let's give the best code to users with reasonably recent hardware.
- #
- # Not enabled on 32-bit x86, due to lack of insight into what hardware is
- # representative at this point in time and due to lack of such hardware
- # for testing to see what config would actually be an optimization.
- #
- # aarch64 does not need extra flags.
- #
- # clang-cl doesn't tolerate -flax-vector-conversions but GCC requires it.
- #
- # -mavx2 doesn't change codegen vs. -mavx. AVX2 and BMI always co-occur
- # in Intel CPUs, but there are AMD CPUs that have AVX and BMI without
- # AVX2.
- if target.cpu != "x86_64":
- return []
- if c_compiler.type == "gcc":
- return ["-mavx", "-mbmi", "-flax-vector-conversions"]
- return ["-mavx", "-mbmi"]
-
-
-set_config("HTML_ACCEL_FLAGS", htmlaccel_config)
-
# dtrace support
##
option("--enable-dtrace", help="Build with dtrace support")
diff --git a/parser/html/javasrc/Tokenizer.java b/parser/html/javasrc/Tokenizer.java
@@ -932,7 +932,7 @@ public class Tokenizer implements Locator, Locator2 {
// ]NOCPP]
- @Inline HtmlAttributes emptyAttributes() {
+ HtmlAttributes emptyAttributes() {
// [NOCPP[
if (newAttributesEachTime) {
return new HtmlAttributes(mappingLangToXmlLang);
@@ -944,7 +944,7 @@ public class Tokenizer implements Locator, Locator2 {
// ]NOCPP]
}
- private void appendCharRefBuf(char c) {
+ @Inline private void appendCharRefBuf(char c) {
// CPPONLY: assert charRefBufLen < charRefBuf.length:
// CPPONLY: "RELEASE: Attempted to overrun charRefBuf!";
charRefBuf[charRefBufLen++] = c;
@@ -982,7 +982,7 @@ public class Tokenizer implements Locator, Locator2 {
* @param c
* the UTF-16 code unit to append
*/
- private void appendStrBuf(char c) {
+ @Inline private void appendStrBuf(char c) {
// CPPONLY: assert strBufLen < strBuf.length: "Previous buffer length insufficient.";
// CPPONLY: if (strBufLen == strBuf.length) {
// CPPONLY: if (!EnsureBufferSpace(1)) {
@@ -1000,7 +1000,7 @@ public class Tokenizer implements Locator, Locator2 {
*
* @return the buffer as a string
*/
- @Inline protected String strBufToString() {
+ protected String strBufToString() {
String str = Portability.newStringFromBuffer(strBuf, 0, strBufLen
// CPPONLY: , tokenHandler, !newAttributesEachTime && attributeName == AttributeName.CLASS
);
@@ -1014,7 +1014,7 @@ public class Tokenizer implements Locator, Locator2 {
*
* @return the buffer as local name
*/
- @Inline private void strBufToDoctypeName() {
+ private void strBufToDoctypeName() {
doctypeName = Portability.newLocalNameFromBuffer(strBuf, strBufLen, interner);
clearStrBufAfterUse();
}
@@ -1025,7 +1025,7 @@ public class Tokenizer implements Locator, Locator2 {
* @throws SAXException
* if the token handler threw
*/
- @Inline private void emitStrBuf() throws SAXException {
+ private void emitStrBuf() throws SAXException {
if (strBufLen > 0) {
tokenHandler.characters(strBuf, 0, strBufLen);
clearStrBufAfterUse();
@@ -1455,6 +1455,12 @@ public class Tokenizer implements Locator, Locator2 {
*/
int pos = start - 1;
+ /**
+ * The index of the first <code>char</code> in <code>buf</code> that is
+ * part of a coalesced run of character tokens or
+ * <code>Integer.MAX_VALUE</code> if there is not a current run being
+ * coalesced.
+ */
switch (state) {
case DATA:
case RCDATA:
@@ -1480,24 +1486,19 @@ public class Tokenizer implements Locator, Locator2 {
break;
}
+ /**
+ * The number of <code>char</code>s in <code>buf</code> that have
+ * meaning. (The rest of the array is garbage and should not be
+ * examined.)
+ */
// CPPONLY: if (mViewSource) {
// CPPONLY: mViewSource.SetBuffer(buffer);
- // CPPONLY: if (htmlaccelEnabled()) {
- // CPPONLY: pos = StateLoopViewSourceSIMD(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
- // CPPONLY: } else {
- // CPPONLY: pos = StateLoopViewSourceALU(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
- // CPPONLY: }
+ // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
// CPPONLY: mViewSource.DropBuffer((pos == buffer.getEnd()) ? pos : pos + 1);
// CPPONLY: } else if (tokenHandler.WantsLineAndColumn()) {
- // CPPONLY: if (htmlaccelEnabled()) {
- // CPPONLY: pos = StateLoopLineColSIMD(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
- // CPPONLY: } else {
- // CPPONLY: pos = StateLoopLineColALU(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
- // CPPONLY: }
- // CPPONLY: } else if (htmlaccelEnabled()) {
- // CPPONLY: pos = StateLoopFastestSIMD(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
+ // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
// CPPONLY: } else {
- // CPPONLY: pos = StateLoopFastestALU(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
+ // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
// CPPONLY: }
// [NOCPP[
pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState,
@@ -1546,7 +1547,7 @@ public class Tokenizer implements Locator, Locator2 {
}
// ]NOCPP]
- @SuppressWarnings("unused") @Inline private int stateLoop(int state, char c,
+ @SuppressWarnings("unused") private int stateLoop(int state, char c,
int pos, @NoLength char[] buf, boolean reconsume, int returnState,
int endPos) throws SAXException {
boolean reportedConsecutiveHyphens = false;
@@ -1622,127 +1623,54 @@ public class Tokenizer implements Locator, Locator2 {
switch (state) {
case DATA:
dataloop: for (;;) {
- // Ideally this reconsume block would be a separate state, DATA_RECONSUME above this one
- // with fallthrough into this state. However, such a change would be disruptive to
- // TransitionHandler and everything that works with returnState.
if (reconsume) {
reconsume = false;
- // This is a manual copy of the switch below with break/continue
- // adjusted as relevant. Make sure to keep in sync with the switch below!
- switch (c) {
- case '&':
- /*
- * U+0026 AMPERSAND (&) Switch to the character
- * reference in data state.
- */
- flushChars(buf, pos);
- assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
- appendCharRefBuf(c);
- setAdditionalAndRememberAmpersandLocation('\u0000');
- returnState = state;
- state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
- continue stateloop;
- case '<':
- /*
- * U+003C LESS-THAN SIGN (<) Switch to the tag
- * open state.
- */
- flushChars(buf, pos);
-
- state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos);
- // `break` optimizes; `continue stateloop;` would be valid
- break dataloop;
- case '\u0000':
- maybeEmitReplacementCharacter(buf, pos);
- break;
- case '\r':
- emitCarriageReturn(buf, pos);
- break stateloop;
- case '\n':
- silentLineFeed();
- // CPPONLY: MOZ_FALLTHROUGH;
- default:
- /*
- * Anything else Emit the input character as a
- * character token.
- *
- * Stay in the data state.
- */
- break;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
}
+ c = checkChar(buf, pos);
}
- datamiddle: for (;;) {
- ++pos;
- // Perhaps at some point, it will be appropriate to do SIMD in Java, but not today.
- // The line below advances pos by some number of code units that this state is indifferent to.
- // CPPONLY: pos += accelerateAdvancementData(buf, pos, endPos);
- for (;;) {
- if (pos == endPos) {
- break stateloop;
- }
- c = checkChar(buf, pos);
- // Make sure to keep in sync with the switch above in the reconsume block!
- switch (c) {
- case '&':
- /*
- * U+0026 AMPERSAND (&) Switch to the character
- * reference in data state.
- */
- flushChars(buf, pos);
- assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
- appendCharRefBuf(c);
- setAdditionalAndRememberAmpersandLocation('\u0000');
- returnState = state;
- state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
- continue stateloop;
- case '<':
- /*
- * U+003C LESS-THAN SIGN (<) Switch to the tag
- * open state.
- */
- flushChars(buf, pos);
-
- state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos);
- // `break` optimizes; `continue stateloop;` would be valid
- break dataloop;
- case '\u0000':
- maybeEmitReplacementCharacter(buf, pos);
- // Continue from above the accelerateAdvancementData call.
- continue datamiddle;
- case '\r':
- emitCarriageReturn(buf, pos);
- break stateloop;
- case '\n':
- silentLineFeed();
- // Continue from above the accelerateAdvancementData call.
- continue datamiddle;
- default:
- /*
- * Anything else Emit the input character as a
- * character token.
- *
- * Stay in the data state.
- */
- // Don't go back to accelerateAdvancementData to avoid
- // bouncing back and forth in a way that doesn't make good
- // use of SIMD when we have less than a SIMD stride to go
- // or when we come here due to a non-BMP characters.
- // The SIMD code doesn't have ALU handling for the remainder
- // that is shorter than a SIMD stride, because this case
- // in this switch has to exist anyway (for SIMD-unavailable
- // and for non-BMP cases) and this innermost loop can serve
- // that purpose, too. In the non-BMP case we stay on the
- // ALU path until we end up in one of the other cases in this
- // switch (e.g. end of line) in order to avoid bouncing back
- // and forth when we have text in a non-BMP script instead
- // of an isolated emoji.
- //
- // We need to increment pos when staying in this innermost
- // loop!
- ++pos;
- continue;
- }
- }
+ switch (c) {
+ case '&':
+ /*
+ * U+0026 AMPERSAND (&) Switch to the character
+ * reference in data state.
+ */
+ flushChars(buf, pos);
+ assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\u0000');
+ returnState = state;
+ state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
+ continue stateloop;
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Switch to the tag
+ * open state.
+ */
+ flushChars(buf, pos);
+
+ state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos);
+ // `break` optimizes; `continue stateloop;` would be valid
+ break dataloop;
+ case '\u0000':
+ maybeEmitReplacementCharacter(buf, pos);
+ continue;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // CPPONLY: MOZ_FALLTHROUGH;
+ default:
+ /*
+ * Anything else Emit the input character as a
+ * character token.
+ *
+ * Stay in the data state.
+ */
+ continue;
}
}
// CPPONLY: MOZ_FALLTHROUGH;
@@ -4074,122 +4002,52 @@ public class Tokenizer implements Locator, Locator2 {
// no fallthrough, reordering opportunity
case RCDATA:
rcdataloop: for (;;) {
- // Ideally this reconsume block would be a separate state, RCDATA_RECONSUME above this one
- // with fallthrough into this state. However, such a change would be disruptive to
- // TransitionHandler and everything that works with returnState.
if (reconsume) {
reconsume = false;
- // This is a manual copy of the switch below with break/continue
- // adjusted as relevant. Make sure to keep in sync with the switch below!
- switch (c) {
- case '&':
- /*
- * U+0026 AMPERSAND (&) Switch to the character
- * reference in RCDATA state.
- */
- flushChars(buf, pos);
- assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
- appendCharRefBuf(c);
- setAdditionalAndRememberAmpersandLocation('\u0000');
- returnState = state;
- state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
- continue stateloop;
- case '<':
- /*
- * U+003C LESS-THAN SIGN (<) Switch to the
- * RCDATA less-than sign state.
- */
- flushChars(buf, pos);
-
- state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
- continue stateloop;
- case '\u0000':
- maybeEmitReplacementCharacter(buf, pos);
- break;
- case '\r':
- emitCarriageReturn(buf, pos);
- break stateloop;
- case '\n':
- silentLineFeed();
- // CPPONLY: MOZ_FALLTHROUGH;
- default:
- /*
- * Emit the current input character as a
- * character token. Stay in the RCDATA state.
- */
- break;
+ } else {
+ if (++pos == endPos) {
+ break stateloop;
}
+ c = checkChar(buf, pos);
}
- rcdatamiddle: for (;;) {
- ++pos;
- // Perhaps at some point, it will be appropriate to do SIMD in Java, but not today.
- // The line below advances pos by some number of code units that this state is indifferent to.
- // RCDATA and DATA have the same set of characters that they are indifferent to, hence accelerateData.
- // CPPONLY: pos += accelerateAdvancementData(buf, pos, endPos);
- for (;;) {
- if (pos == endPos) {
- break stateloop;
- }
- c = checkChar(buf, pos);
- // Make sure to keep in sync with the switch above in the reconsume block!
- switch (c) {
- case '&':
- /*
- * U+0026 AMPERSAND (&) Switch to the character
- * reference in RCDATA state.
- */
- flushChars(buf, pos);
- assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
- appendCharRefBuf(c);
- setAdditionalAndRememberAmpersandLocation('\u0000');
- returnState = state;
- state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
- continue stateloop;
- case '<':
- /*
- * U+003C LESS-THAN SIGN (<) Switch to the
- * RCDATA less-than sign state.
- */
- flushChars(buf, pos);
-
- state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
- continue stateloop;
- case '\u0000':
- maybeEmitReplacementCharacter(buf, pos);
- // Continue from above the accelerateAdvancementData call.
- continue rcdatamiddle;
- case '\r':
- emitCarriageReturn(buf, pos);
- break stateloop;
- case '\n':
- silentLineFeed();
- // Continue from above the accelerateAdvancementData call.
- continue rcdatamiddle;
- default:
- /*
- * Emit the current input character as a
- * character token. Stay in the RCDATA state.
- */
- // Don't go back to accelerateAdvancementData to avoid
- // bouncing back and forth in a way that doesn't make good
- // use of SIMD when we have less than a SIMD stride to go
- // or when we come here due to a non-BMP characters.
- // The SIMD code doesn't have ALU handling for the remainder
- // that is shorter than a SIMD stride, because this case
- // in this switch has to exist anyway (for SIMD-unavailable
- // and for non-BMP cases) and this innermost loop can serve
- // that purpose, too. In the non-BMP case we stay on the
- // ALU path until we end up in one of the other cases in this
- // switch (e.g. end of line) in order to avoid bouncing back
- // and forth when we have text in a non-BMP script instead
- // of an isolated emoji.
- //
- // We need to increment pos when staying in this innermost
- // loop!
- ++pos;
- continue;
- }
- }
+ switch (c) {
+ case '&':
+ /*
+ * U+0026 AMPERSAND (&) Switch to the character
+ * reference in RCDATA state.
+ */
+ flushChars(buf, pos);
+ assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\u0000');
+ returnState = state;
+ state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
+ continue stateloop;
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Switch to the
+ * RCDATA less-than sign state.
+ */
+ flushChars(buf, pos);
+
+ returnState = state;
+ state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ emitReplacementCharacter(buf, pos);
+ continue;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // CPPONLY: MOZ_FALLTHROUGH;
+ default:
+ /*
+ * Emit the current input character as a
+ * character token. Stay in the RCDATA state.
+ */
+ continue;
}
}
// no fallthrough, reordering opportunity
@@ -6490,24 +6348,24 @@ public class Tokenizer implements Locator, Locator2 {
forceQuirks = false;
}
- @Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
+ private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
throws SAXException {
silentCarriageReturn();
adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
}
- @Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed()
+ private void adjustDoubleHyphenAndAppendToStrBufLineFeed()
throws SAXException {
silentLineFeed();
adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
}
- @Inline private void appendStrBufLineFeed() {
+ private void appendStrBufLineFeed() {
silentLineFeed();
appendStrBuf('\n');
}
- @Inline private void appendStrBufCarriageReturn() {
+ private void appendStrBufCarriageReturn() {
silentCarriageReturn();
appendStrBuf('\n');
}
@@ -6525,7 +6383,7 @@ public class Tokenizer implements Locator, Locator2 {
// ]NOCPP]
- @Inline private void emitCarriageReturn(@NoLength char[] buf, int pos)
+ private void emitCarriageReturn(@NoLength char[] buf, int pos)
throws SAXException {
silentCarriageReturn();
flushChars(buf, pos);
@@ -6554,7 +6412,7 @@ public class Tokenizer implements Locator, Locator2 {
cstart = pos + 1;
}
- @Inline private void setAdditionalAndRememberAmpersandLocation(char add) {
+ private void setAdditionalAndRememberAmpersandLocation(char add) {
additional = add;
// [NOCPP[
ampersandLocation = new LocatorImpl(this);
@@ -7219,7 +7077,7 @@ public class Tokenizer implements Locator, Locator2 {
* happened in a non-text context, this method turns that deferred suspension
* request into an immediately-pending suspension request.
*/
- @Inline private void suspendIfRequestedAfterCurrentNonTextToken() {
+ private void suspendIfRequestedAfterCurrentNonTextToken() {
if (suspendAfterCurrentNonTextToken) {
suspendAfterCurrentNonTextToken = false;
shouldSuspend = true;
@@ -7363,7 +7221,7 @@ public class Tokenizer implements Locator, Locator2 {
* @param val
* @throws SAXException
*/
- @Inline private void emitOrAppendTwo(@Const @NoLength char[] val, int returnState)
+ private void emitOrAppendTwo(@Const @NoLength char[] val, int returnState)
throws SAXException {
if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
appendStrBuf(val[0]);
@@ -7373,7 +7231,7 @@ public class Tokenizer implements Locator, Locator2 {
}
}
- @Inline private void emitOrAppendOne(@Const @NoLength char[] val, int returnState)
+ private void emitOrAppendOne(@Const @NoLength char[] val, int returnState)
throws SAXException {
if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
appendStrBuf(val[0]);
@@ -7410,7 +7268,7 @@ public class Tokenizer implements Locator, Locator2 {
}
}
- @Inline public void requestSuspension() {
+ public void requestSuspension() {
shouldSuspend = true;
}
@@ -7453,7 +7311,7 @@ public class Tokenizer implements Locator, Locator2 {
// ]NOCPP]
- @Inline public boolean isInDataState() {
+ public boolean isInDataState() {
return (stateSave == DATA);
}
diff --git a/parser/html/moz.build b/parser/html/moz.build
@@ -85,38 +85,6 @@ UNIFIED_SOURCES += [
"nsParserUtils.cpp",
]
-# Each target needs to compile:
-# (nsHtml5TokenizerALU.cpp XOR nsHtml5TokenizerALUStubs.cpp)
-# AND
-# (nsHtml5TokenizerSIMD.cpp XOR nsHtml5TokenizerSIMDStubs.cpp)
-# AND
-# (nsHtml5TokenizerALU.cpp OR nsHtml5TokenizerSIMD.cpp)
-#
-# Make sure the result is consistent with mozilla::htmlaccel::htmlaccelEnabled().
-#
-# Due to https://github.com/llvm/llvm-project/issues/160886, none of the
-# code here actually ends up with SIMD instructions, and SIMD stays in
-# htmlaccelNotInline.cpp instead. Once the LLVM bug is fixed, the functions
-# in htmlaccelNotInline.cpp should becomed always inlined and
-# nsHtml5TokenizerSIMD.cpp should be built with HTML_ACCEL_FLAGS.
-
-if CONFIG["TARGET_CPU"] == "x86_64":
- UNIFIED_SOURCES += [
- "nsHtml5TokenizerALU.cpp",
- "nsHtml5TokenizerSIMD.cpp",
- ]
-elif CONFIG["TARGET_CPU"] == "aarch64" and CONFIG["TARGET_ENDIANNESS"] == "little":
- # aarch64 doesn't need special flags for SIMD.
- UNIFIED_SOURCES += [
- "nsHtml5TokenizerALUStubs.cpp",
- "nsHtml5TokenizerSIMD.cpp",
- ]
-else:
- UNIFIED_SOURCES += [
- "nsHtml5TokenizerALU.cpp",
- "nsHtml5TokenizerSIMDStubs.cpp",
- ]
-
FINAL_LIBRARY = "xul"
LOCAL_INCLUDES += [
diff --git a/parser/html/nsHtml5Tokenizer.cpp b/parser/html/nsHtml5Tokenizer.cpp
@@ -40,6 +40,8 @@
#include "nsHtml5Tokenizer.h"
+#include "nsHtml5TokenizerLoopPolicies.h"
+
char16_t nsHtml5Tokenizer::LT_GT[] = {'<', '>'};
char16_t nsHtml5Tokenizer::LT_SOLIDUS[] = {'<', '/'};
char16_t nsHtml5Tokenizer::RSQB_RSQB[] = {']', ']'};
@@ -224,10 +226,8 @@ void nsHtml5Tokenizer::setLineNumber(int32_t line) {
this->line = line;
}
-void nsHtml5Tokenizer::appendCharRefBuf(char16_t c) {
- MOZ_RELEASE_ASSERT(charRefBufLen < charRefBuf.length,
- "Attempted to overrun charRefBuf!");
- charRefBuf[charRefBufLen++] = c;
+nsHtml5HtmlAttributes* nsHtml5Tokenizer::emptyAttributes() {
+ return nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES;
}
void nsHtml5Tokenizer::emitOrAppendCharRefBuf(int32_t returnState) {
@@ -241,14 +241,26 @@ void nsHtml5Tokenizer::emitOrAppendCharRefBuf(int32_t returnState) {
}
}
-void nsHtml5Tokenizer::appendStrBuf(char16_t c) {
- MOZ_ASSERT(strBufLen < strBuf.length, "Previous buffer length insufficient.");
- if (MOZ_UNLIKELY(strBufLen == strBuf.length)) {
- if (MOZ_UNLIKELY(!EnsureBufferSpace(1))) {
- MOZ_CRASH("Unable to recover from buffer reallocation failure");
- }
+nsHtml5String nsHtml5Tokenizer::strBufToString() {
+ nsHtml5String str = nsHtml5Portability::newStringFromBuffer(
+ strBuf, 0, strBufLen, tokenHandler,
+ !newAttributesEachTime &&
+ attributeName == nsHtml5AttributeName::ATTR_CLASS);
+ clearStrBufAfterUse();
+ return str;
+}
+
+void nsHtml5Tokenizer::strBufToDoctypeName() {
+ doctypeName =
+ nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen, interner);
+ clearStrBufAfterUse();
+}
+
+void nsHtml5Tokenizer::emitStrBuf() {
+ if (strBufLen > 0) {
+ tokenHandler->characters(strBuf, 0, strBufLen);
+ clearStrBufAfterUse();
}
- strBuf[strBufLen++] = c;
}
void nsHtml5Tokenizer::appendStrBuf(char16_t* buffer, int32_t offset,
@@ -331,156 +343,4169 @@ int32_t nsHtml5Tokenizer::emitCurrentTagToken(bool selfClosing, int32_t pos) {
tokenHandler->startTag(tagName, attrs, selfClosing);
}
}
- tagName = nullptr;
- if (newAttributesEachTime) {
- attributes = nullptr;
- } else {
- attributes->clear(0);
- }
- suspendIfRequestedAfterCurrentNonTextToken();
- return stateSave;
+ tagName = nullptr;
+ if (newAttributesEachTime) {
+ attributes = nullptr;
+ } else {
+ attributes->clear(0);
+ }
+ suspendIfRequestedAfterCurrentNonTextToken();
+ return stateSave;
+}
+
+void nsHtml5Tokenizer::attributeNameComplete() {
+ attributeName =
+ nsHtml5AttributeName::nameByBuffer(strBuf, strBufLen, interner);
+ if (!attributeName) {
+ nonInternedAttributeName->setNameForNonInterned(
+ nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen,
+ interner));
+ attributeName = nonInternedAttributeName;
+ }
+ clearStrBufAfterUse();
+ if (!attributes) {
+ attributes = new nsHtml5HtmlAttributes(0);
+ }
+ if (attributes->contains(attributeName)) {
+ errDuplicateAttribute();
+ attributeName = nullptr;
+ }
+}
+
+void nsHtml5Tokenizer::addAttributeWithoutValue() {
+ if (attributeName) {
+ attributes->addAttribute(
+ attributeName, nsHtml5Portability::newEmptyString(), attributeLine);
+ attributeName = nullptr;
+ } else {
+ clearStrBufAfterUse();
+ }
+}
+
+void nsHtml5Tokenizer::addAttributeWithValue() {
+ if (attributeName) {
+ nsHtml5String val = strBufToString();
+ if (mViewSource) {
+ mViewSource->MaybeLinkifyAttributeValue(attributeName, val);
+ }
+ attributes->addAttribute(attributeName, val, attributeLine);
+ attributeName = nullptr;
+ } else {
+ clearStrBufAfterUse();
+ }
+}
+
+void nsHtml5Tokenizer::start() {
+ initializeWithoutStarting();
+ tokenHandler->startTokenization(this);
+ if (mViewSource) {
+ line = 1;
+ col = -1;
+ nextCharOnNewLine = false;
+ } else if (tokenHandler->WantsLineAndColumn()) {
+ line = 0;
+ col = 1;
+ nextCharOnNewLine = true;
+ } else {
+ line = -1;
+ col = -1;
+ nextCharOnNewLine = false;
+ }
+}
+
+bool nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer) {
+ int32_t state = stateSave;
+ int32_t returnState = returnStateSave;
+ char16_t c = '\0';
+ shouldSuspend = false;
+ lastCR = false;
+ int32_t start = buffer->getStart();
+ int32_t end = buffer->getEnd();
+ int32_t pos = start - 1;
+ switch (state) {
+ case DATA:
+ case RCDATA:
+ case SCRIPT_DATA:
+ case PLAINTEXT:
+ case RAWTEXT:
+ case CDATA_SECTION:
+ case SCRIPT_DATA_ESCAPED:
+ case SCRIPT_DATA_ESCAPE_START:
+ case SCRIPT_DATA_ESCAPE_START_DASH:
+ case SCRIPT_DATA_ESCAPED_DASH:
+ case SCRIPT_DATA_ESCAPED_DASH_DASH:
+ case SCRIPT_DATA_DOUBLE_ESCAPE_START:
+ case SCRIPT_DATA_DOUBLE_ESCAPED:
+ case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
+ case SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
+ case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
+ case SCRIPT_DATA_DOUBLE_ESCAPE_END: {
+ cstart = start;
+ break;
+ }
+ default: {
+ cstart = INT32_MAX;
+ break;
+ }
+ }
+ if (mViewSource) {
+ mViewSource->SetBuffer(buffer);
+ pos = stateLoop<nsHtml5ViewSourcePolicy>(state, c, pos, buffer->getBuffer(),
+ false, returnState,
+ buffer->getEnd());
+ mViewSource->DropBuffer((pos == buffer->getEnd()) ? pos : pos + 1);
+ } else if (tokenHandler->WantsLineAndColumn()) {
+ pos = stateLoop<nsHtml5LineColPolicy>(state, c, pos, buffer->getBuffer(),
+ false, returnState, buffer->getEnd());
+ } else {
+ pos = stateLoop<nsHtml5FastestPolicy>(state, c, pos, buffer->getBuffer(),
+ false, returnState, buffer->getEnd());
+ }
+ if (pos == end) {
+ buffer->setStart(pos);
+ } else {
+ buffer->setStart(pos + 1);
+ }
+ return lastCR;
+}
+
+template <class P>
+int32_t nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos,
+ char16_t* buf, bool reconsume,
+ int32_t returnState, int32_t endPos) {
+ bool reportedConsecutiveHyphens = false;
+stateloop:
+ for (;;) {
+ switch (state) {
+ case DATA: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '&': {
+ flushChars(buf, pos);
+ MOZ_ASSERT(!charRefBufLen,
+ "charRefBufLen not reset after previous use!");
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\0');
+ returnState = state;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '<': {
+ flushChars(buf, pos);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::TAG_OPEN, reconsume, pos);
+ NS_HTML5_BREAK(dataloop);
+ }
+ case '\0': {
+ maybeEmitReplacementCharacter(buf, pos);
+ continue;
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ continue;
+ }
+ }
+ }
+ dataloop_end:;
+ [[fallthrough]];
+ }
+ case TAG_OPEN: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ if (c >= 'A' && c <= 'Z') {
+ endTag = false;
+ clearStrBufBeforeUse();
+ appendStrBuf((char16_t)(c + 0x20));
+ containsHyphen = false;
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::TAG_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(tagopenloop);
+ } else if (c >= 'a' && c <= 'z') {
+ endTag = false;
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ containsHyphen = false;
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::TAG_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(tagopenloop);
+ }
+ switch (c) {
+ case '!': {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::MARKUP_DECLARATION_OPEN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '/': {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CLOSE_TAG_OPEN, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\?': {
+ if (viewingXmlSource) {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::PROCESSING_INSTRUCTION,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ if (P::reportErrors) {
+ errProcessingInstruction();
+ }
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errLtGt();
+ }
+ tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 2);
+ cstart = pos + 1;
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ if (P::reportErrors) {
+ errBadCharAfterLt(c);
+ }
+ tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
+ cstart = pos;
+ reconsume = true;
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ tagopenloop_end:;
+ [[fallthrough]];
+ }
+ case TAG_NAME: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ strBufToElementNameString();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ strBufToElementNameString();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(tagnameloop);
+ }
+ case '/': {
+ strBufToElementNameString();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SELF_CLOSING_START_TAG,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ strBufToElementNameString();
+ state = P::transition(mViewSource.get(),
+ emitCurrentTagToken(false, pos), reconsume,
+ pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ } else if (c == '-') {
+ containsHyphen = true;
+ }
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ tagnameloop_end:;
+ [[fallthrough]];
+ }
+ case BEFORE_ATTRIBUTE_NAME: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ continue;
+ }
+ case '/': {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SELF_CLOSING_START_TAG,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ state = P::transition(mViewSource.get(),
+ emitCurrentTagToken(false, pos), reconsume,
+ pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ case '\"':
+ case '\'':
+ case '<':
+ case '=': {
+ if (P::reportErrors) {
+ errBadCharBeforeAttributeNameOrNull(c);
+ }
+ [[fallthrough]];
+ }
+ default: {
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ }
+ attributeLine = line;
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::ATTRIBUTE_NAME, reconsume,
+ pos);
+ NS_HTML5_BREAK(beforeattributenameloop);
+ }
+ }
+ }
+ beforeattributenameloop_end:;
+ [[fallthrough]];
+ }
+ case ATTRIBUTE_NAME: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ attributeNameComplete();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ attributeNameComplete();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '/': {
+ attributeNameComplete();
+ addAttributeWithoutValue();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SELF_CLOSING_START_TAG,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '=': {
+ attributeNameComplete();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE,
+ reconsume, pos);
+ NS_HTML5_BREAK(attributenameloop);
+ }
+ case '>': {
+ attributeNameComplete();
+ addAttributeWithoutValue();
+ state = P::transition(mViewSource.get(),
+ emitCurrentTagToken(false, pos), reconsume,
+ pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ case '\"':
+ case '\'':
+ case '<': {
+ if (P::reportErrors) {
+ errQuoteOrLtInAttributeNameOrNull(c);
+ }
+ [[fallthrough]];
+ }
+ default: {
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ }
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ attributenameloop_end:;
+ [[fallthrough]];
+ }
+ case BEFORE_ATTRIBUTE_VALUE: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ continue;
+ }
+ case '\"': {
+ attributeLine = line;
+ clearStrBufBeforeUse();
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::ATTRIBUTE_VALUE_DOUBLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_BREAK(beforeattributevalueloop);
+ }
+ case '&': {
+ attributeLine = line;
+ clearStrBufBeforeUse();
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED,
+ reconsume, pos);
+
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\'': {
+ attributeLine = line;
+ clearStrBufBeforeUse();
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::ATTRIBUTE_VALUE_SINGLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errAttributeValueMissing();
+ }
+ addAttributeWithoutValue();
+ state = P::transition(mViewSource.get(),
+ emitCurrentTagToken(false, pos), reconsume,
+ pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ case '<':
+ case '=':
+ case '`': {
+ if (P::reportErrors) {
+ errLtOrEqualsOrGraveInUnquotedAttributeOrNull(c);
+ }
+ [[fallthrough]];
+ }
+ default: {
+ attributeLine = line;
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED,
+ reconsume, pos);
+
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ beforeattributevalueloop_end:;
+ [[fallthrough]];
+ }
+ case ATTRIBUTE_VALUE_DOUBLE_QUOTED: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '\"': {
+ addAttributeWithValue();
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_BREAK(attributevaluedoublequotedloop);
+ }
+ case '&': {
+ MOZ_ASSERT(!charRefBufLen,
+ "charRefBufLen not reset after previous use!");
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\"');
+ returnState = state;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ attributevaluedoublequotedloop_end:;
+ [[fallthrough]];
+ }
+ case AFTER_ATTRIBUTE_VALUE_QUOTED: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '/': {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SELF_CLOSING_START_TAG,
+ reconsume, pos);
+ NS_HTML5_BREAK(afterattributevaluequotedloop);
+ }
+ case '>': {
+ state = P::transition(mViewSource.get(),
+ emitCurrentTagToken(false, pos), reconsume,
+ pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ if (P::reportErrors) {
+ errNoSpaceBetweenAttributes();
+ }
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ afterattributevaluequotedloop_end:;
+ [[fallthrough]];
+ }
+ case SELF_CLOSING_START_TAG: {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '>': {
+ state =
+ P::transition(mViewSource.get(), emitCurrentTagToken(true, pos),
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ if (P::reportErrors) {
+ errSlashNotFollowedByGt();
+ }
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ case ATTRIBUTE_VALUE_UNQUOTED: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ addAttributeWithValue();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ addAttributeWithValue();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '&': {
+ MOZ_ASSERT(!charRefBufLen,
+ "charRefBufLen not reset after previous use!");
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('>');
+ returnState = state;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ addAttributeWithValue();
+ state = P::transition(mViewSource.get(),
+ emitCurrentTagToken(false, pos), reconsume,
+ pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ case '<':
+ case '\"':
+ case '\'':
+ case '=':
+ case '`': {
+ if (P::reportErrors) {
+ errUnquotedAttributeValOrNull(c);
+ }
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ }
+ case AFTER_ATTRIBUTE_NAME: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ continue;
+ }
+ case '/': {
+ addAttributeWithoutValue();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SELF_CLOSING_START_TAG,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '=': {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ addAttributeWithoutValue();
+ state = P::transition(mViewSource.get(),
+ emitCurrentTagToken(false, pos), reconsume,
+ pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ case '\"':
+ case '\'':
+ case '<': {
+ if (P::reportErrors) {
+ errQuoteOrLtInAttributeNameOrNull(c);
+ }
+ [[fallthrough]];
+ }
+ default: {
+ addAttributeWithoutValue();
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ }
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::ATTRIBUTE_NAME, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ }
+ case MARKUP_DECLARATION_OPEN: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::MARKUP_DECLARATION_HYPHEN,
+ reconsume, pos);
+ NS_HTML5_BREAK(markupdeclarationopenloop);
+ }
+ case 'd':
+ case 'D': {
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ index = 0;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::MARKUP_DECLARATION_OCTYPE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '[': {
+ if (tokenHandler->cdataSectionAllowed()) {
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ index = 0;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CDATA_START, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ [[fallthrough]];
+ }
+ default: {
+ if (P::reportErrors) {
+ errBogusComment();
+ }
+ clearStrBufBeforeUse();
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ markupdeclarationopenloop_end:;
+ [[fallthrough]];
+ }
+ case MARKUP_DECLARATION_HYPHEN: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ clearStrBufAfterOneHyphen();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_START, reconsume,
+ pos);
+ NS_HTML5_BREAK(markupdeclarationhyphenloop);
+ }
+ default: {
+ if (P::reportErrors) {
+ errBogusComment();
+ }
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ markupdeclarationhyphenloop_end:;
+ [[fallthrough]];
+ }
+ case COMMENT_START: {
+ reportedConsecutiveHyphens = false;
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_START_DASH,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errPrematureEndOfComment();
+ }
+ emitComment(0, pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '<': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_LESSTHAN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_BREAK(commentstartloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_BREAK(commentstartloop);
+ }
+ }
+ }
+ commentstartloop_end:;
+ [[fallthrough]];
+ }
+ case COMMENT: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_END_DASH,
+ reconsume, pos);
+ NS_HTML5_BREAK(commentloop);
+ }
+ case '<': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_LESSTHAN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ commentloop_end:;
+ [[fallthrough]];
+ }
+ case COMMENT_END_DASH: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ appendStrBuf(c);
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_END, reconsume, pos);
+ NS_HTML5_BREAK(commentenddashloop);
+ }
+ case '<': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_LESSTHAN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ commentenddashloop_end:;
+ [[fallthrough]];
+ }
+ case COMMENT_END: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '>': {
+ emitComment(2, pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '-': {
+ adjustDoubleHyphenAndAppendToStrBufAndErr(
+ c, reportedConsecutiveHyphens);
+ reportedConsecutiveHyphens = true;
+ continue;
+ }
+ case '<': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_LESSTHAN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ adjustDoubleHyphenAndAppendToStrBufCarriageReturn<P>();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ adjustDoubleHyphenAndAppendToStrBufLineFeed<P>();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '!': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_END_BANG,
+ reconsume, pos);
+ NS_HTML5_BREAK(commentendloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ adjustDoubleHyphenAndAppendToStrBufAndErr(
+ c, reportedConsecutiveHyphens);
+ reportedConsecutiveHyphens = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ commentendloop_end:;
+ [[fallthrough]];
+ }
+ case COMMENT_END_BANG: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '>': {
+ emitComment(3, pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '-': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_END_DASH,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ }
+ case COMMENT_LESSTHAN: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '!': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG,
+ reconsume, pos);
+ NS_HTML5_BREAK(commentlessthanloop);
+ }
+ case '<': {
+ appendStrBuf(c);
+ continue;
+ }
+ case '-': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_END_DASH,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ commentlessthanloop_end:;
+ [[fallthrough]];
+ }
+ case COMMENT_LESSTHAN_BANG: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ appendStrBuf(c);
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG_DASH, reconsume, pos);
+ NS_HTML5_BREAK(commentlessthanbangloop);
+ }
+ case '<': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_LESSTHAN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ commentlessthanbangloop_end:;
+ [[fallthrough]];
+ }
+ case COMMENT_LESSTHAN_BANG_DASH: {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ appendStrBuf(c);
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG_DASH_DASH,
+ reconsume, pos);
+ break;
+ }
+ case '<': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_LESSTHAN, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ [[fallthrough]];
+ }
+ case COMMENT_LESSTHAN_BANG_DASH_DASH: {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '>': {
+ appendStrBuf(c);
+ emitComment(3, pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '-': {
+ if (P::reportErrors) {
+ errNestedComment();
+ }
+ adjustDoubleHyphenAndAppendToStrBufAndErr(
+ c, reportedConsecutiveHyphens);
+ reportedConsecutiveHyphens = true;
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT_END,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ c = '\n';
+ P::silentCarriageReturn(this);
+ if (P::reportErrors) {
+ errNestedComment();
+ }
+ adjustDoubleHyphenAndAppendToStrBufAndErr(
+ c, reportedConsecutiveHyphens);
+ reportedConsecutiveHyphens = true;
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ if (P::reportErrors) {
+ errNestedComment();
+ }
+ adjustDoubleHyphenAndAppendToStrBufAndErr(
+ c, reportedConsecutiveHyphens);
+ reportedConsecutiveHyphens = true;
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '!': {
+ if (P::reportErrors) {
+ errNestedComment();
+ }
+ adjustDoubleHyphenAndAppendToStrBufAndErr(
+ c, reportedConsecutiveHyphens);
+ reportedConsecutiveHyphens = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_END_BANG, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ if (P::reportErrors) {
+ errNestedComment();
+ }
+ adjustDoubleHyphenAndAppendToStrBufAndErr(
+ c, reportedConsecutiveHyphens);
+ reportedConsecutiveHyphens = true;
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ case COMMENT_START_DASH: {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ appendStrBuf(c);
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT_END,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errPrematureEndOfComment();
+ }
+ emitComment(1, pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '<': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_LESSTHAN, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ case CDATA_START: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ if (index < 6) {
+ if (c == nsHtml5Tokenizer::CDATA_LSQB[index]) {
+ appendStrBuf(c);
+ } else {
+ if (P::reportErrors) {
+ errBogusComment();
+ }
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ index++;
+ continue;
+ } else {
+ clearStrBufAfterUse();
+ cstart = pos;
+ reconsume = true;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CDATA_SECTION, reconsume, pos);
+ break;
+ }
+ }
+ [[fallthrough]];
+ }
+ case CDATA_SECTION: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case ']': {
+ flushChars(buf, pos);
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::CDATA_RSQB,
+ reconsume, pos);
+ NS_HTML5_BREAK(cdatasectionloop);
+ }
+ case '\0': {
+ maybeEmitReplacementCharacter(buf, pos);
+ continue;
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ continue;
+ }
+ }
+ }
+ cdatasectionloop_end:;
+ [[fallthrough]];
+ }
+ case CDATA_RSQB: {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case ']': {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CDATA_RSQB_RSQB, reconsume,
+ pos);
+ break;
+ }
+ default: {
+ tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 1);
+ cstart = pos;
+ reconsume = true;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CDATA_SECTION, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ [[fallthrough]];
+ }
+ case CDATA_RSQB_RSQB: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case ']': {
+ tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 1);
+ continue;
+ }
+ case '>': {
+ cstart = pos + 1;
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ suspendIfRequestedAfterCurrentNonTextToken();
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 2);
+ cstart = pos;
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CDATA_SECTION, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ }
+ case ATTRIBUTE_VALUE_SINGLE_QUOTED: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '\'': {
+ addAttributeWithValue();
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '&': {
+ MOZ_ASSERT(!charRefBufLen,
+ "charRefBufLen not reset after previous use!");
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\'');
+ returnState = state;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE,
+ reconsume, pos);
+ NS_HTML5_BREAK(attributevaluesinglequotedloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ attributevaluesinglequotedloop_end:;
+ [[fallthrough]];
+ }
+ case CONSUME_CHARACTER_REFERENCE: {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ case '\f':
+ case '<':
+ case '&':
+ case '\0':
+ case ';': {
+ emitOrAppendCharRefBuf(returnState);
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state =
+ P::transition(mViewSource.get(), returnState, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '#': {
+ appendCharRefBuf('#');
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::CONSUME_NCR,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ if (c == additional) {
+ emitOrAppendCharRefBuf(returnState);
+ reconsume = true;
+ state =
+ P::transition(mViewSource.get(), returnState, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ if (c >= 'a' && c <= 'z') {
+ firstCharKey = c - 'a' + 26;
+ } else if (c >= 'A' && c <= 'Z') {
+ firstCharKey = c - 'A';
+ } else {
+ if (c == ';') {
+ if (P::reportErrors) {
+ errNoNamedCharacterMatch();
+ }
+ }
+ emitOrAppendCharRefBuf(returnState);
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state =
+ P::transition(mViewSource.get(), returnState, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ appendCharRefBuf(c);
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CHARACTER_REFERENCE_HILO_LOOKUP,
+ reconsume, pos);
+ break;
+ }
+ }
+ [[fallthrough]];
+ }
+ case CHARACTER_REFERENCE_HILO_LOOKUP: {
+ {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ int32_t hilo = 0;
+ if (c <= 'z') {
+ const int32_t* row = nsHtml5NamedCharactersAccel::HILO_ACCEL[c];
+ if (row) {
+ hilo = row[firstCharKey];
+ }
+ }
+ if (!hilo) {
+ if (c == ';') {
+ if (P::reportErrors) {
+ errNoNamedCharacterMatch();
+ }
+ }
+ emitOrAppendCharRefBuf(returnState);
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state =
+ P::transition(mViewSource.get(), returnState, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ appendCharRefBuf(c);
+ lo = hilo & 0xFFFF;
+ hi = hilo >> 16;
+ entCol = -1;
+ candidate = -1;
+ charRefBufMark = 0;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CHARACTER_REFERENCE_TAIL,
+ reconsume, pos);
+ }
+ [[fallthrough]];
+ }
+ case CHARACTER_REFERENCE_TAIL: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ entCol++;
+ for (;;) {
+ if (hi < lo) {
+ NS_HTML5_BREAK(outer);
+ }
+ if (entCol == nsHtml5NamedCharacters::NAMES[lo].length()) {
+ candidate = lo;
+ charRefBufMark = charRefBufLen;
+ lo++;
+ } else if (entCol > nsHtml5NamedCharacters::NAMES[lo].length()) {
+ NS_HTML5_BREAK(outer);
+ } else if (c > nsHtml5NamedCharacters::NAMES[lo].charAt(entCol)) {
+ lo++;
+ } else {
+ NS_HTML5_BREAK(loloop);
+ }
+ }
+ loloop_end:;
+ for (;;) {
+ if (hi < lo) {
+ NS_HTML5_BREAK(outer);
+ }
+ if (entCol == nsHtml5NamedCharacters::NAMES[hi].length()) {
+ NS_HTML5_BREAK(hiloop);
+ }
+ if (entCol > nsHtml5NamedCharacters::NAMES[hi].length()) {
+ NS_HTML5_BREAK(outer);
+ } else if (c < nsHtml5NamedCharacters::NAMES[hi].charAt(entCol)) {
+ hi--;
+ } else {
+ NS_HTML5_BREAK(hiloop);
+ }
+ }
+ hiloop_end:;
+ if (c == ';') {
+ if (entCol + 1 == nsHtml5NamedCharacters::NAMES[lo].length()) {
+ candidate = lo;
+ charRefBufMark = charRefBufLen;
+ }
+ NS_HTML5_BREAK(outer);
+ }
+ if (hi < lo) {
+ NS_HTML5_BREAK(outer);
+ }
+ appendCharRefBuf(c);
+ continue;
+ }
+ outer_end:;
+ if (candidate == -1) {
+ if (c == ';') {
+ if (P::reportErrors) {
+ errNoNamedCharacterMatch();
+ }
+ }
+ emitOrAppendCharRefBuf(returnState);
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = P::transition(mViewSource.get(), returnState, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ } else {
+ const nsHtml5CharacterName& candidateName =
+ nsHtml5NamedCharacters::NAMES[candidate];
+ if (!candidateName.length() ||
+ candidateName.charAt(candidateName.length() - 1) != ';') {
+ if ((returnState & DATA_AND_RCDATA_MASK)) {
+ char16_t ch;
+ if (charRefBufMark == charRefBufLen) {
+ ch = c;
+ } else {
+ ch = charRefBuf[charRefBufMark];
+ }
+ if (ch == '=' || (ch >= '0' && ch <= '9') ||
+ (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) {
+ if (c == ';') {
+ if (P::reportErrors) {
+ errNoNamedCharacterMatch();
+ }
+ }
+ appendCharRefBufToStrBuf();
+ reconsume = true;
+ state = P::transition(mViewSource.get(), returnState, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ if ((returnState & DATA_AND_RCDATA_MASK)) {
+ if (P::reportErrors) {
+ errUnescapedAmpersandInterpretedAsCharacterReference();
+ }
+ } else {
+ if (P::reportErrors) {
+ errNotSemicolonTerminated();
+ }
+ }
+ }
+ P::completedNamedCharacterReference(mViewSource.get());
+ const char16_t* val = nsHtml5NamedCharacters::VALUES[candidate];
+ if (!val[1]) {
+ emitOrAppendOne(val, returnState);
+ } else {
+ emitOrAppendTwo(val, returnState);
+ }
+ if (charRefBufMark < charRefBufLen) {
+ if ((returnState & DATA_AND_RCDATA_MASK)) {
+ appendStrBuf(charRefBuf, charRefBufMark,
+ charRefBufLen - charRefBufMark);
+ } else {
+ tokenHandler->characters(charRefBuf, charRefBufMark,
+ charRefBufLen - charRefBufMark);
+ }
+ }
+ bool earlyBreak = (c == ';' && charRefBufMark == charRefBufLen);
+ charRefBufLen = 0;
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = earlyBreak ? pos + 1 : pos;
+ }
+ reconsume = !earlyBreak;
+ state = P::transition(mViewSource.get(), returnState, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ case CONSUME_NCR: {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ value = 0;
+ seenDigits = false;
+ switch (c) {
+ case 'x':
+ case 'X': {
+ appendCharRefBuf(c);
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::HEX_NCR_LOOP,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::DECIMAL_NRC_LOOP, reconsume,
+ pos);
+ break;
+ }
+ }
+ [[fallthrough]];
+ }
+ case DECIMAL_NRC_LOOP: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ MOZ_ASSERT(value >= 0, "value must not become negative.");
+ if (c >= '0' && c <= '9') {
+ seenDigits = true;
+ if (value <= 0x10FFFF) {
+ value *= 10;
+ value += c - '0';
+ }
+ continue;
+ } else if (c == ';') {
+ if (seenDigits) {
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos + 1;
+ }
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::HANDLE_NCR_VALUE,
+ reconsume, pos);
+ NS_HTML5_BREAK(decimalloop);
+ } else {
+ if (P::reportErrors) {
+ errNoDigitsInNCR();
+ }
+ appendCharRefBuf(';');
+ emitOrAppendCharRefBuf(returnState);
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos + 1;
+ }
+ state =
+ P::transition(mViewSource.get(), returnState, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ } else {
+ if (!seenDigits) {
+ if (P::reportErrors) {
+ errNoDigitsInNCR();
+ }
+ emitOrAppendCharRefBuf(returnState);
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state =
+ P::transition(mViewSource.get(), returnState, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ } else {
+ if (P::reportErrors) {
+ errCharRefLacksSemicolon();
+ }
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::HANDLE_NCR_VALUE,
+ reconsume, pos);
+ NS_HTML5_BREAK(decimalloop);
+ }
+ }
+ }
+ decimalloop_end:;
+ [[fallthrough]];
+ }
+ case HANDLE_NCR_VALUE: {
+ charRefBufLen = 0;
+ handleNcrValue(returnState);
+ state = P::transition(mViewSource.get(), returnState, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case HEX_NCR_LOOP: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ MOZ_ASSERT(value >= 0, "value must not become negative.");
+ if (c >= '0' && c <= '9') {
+ seenDigits = true;
+ if (value <= 0x10FFFF) {
+ value *= 16;
+ value += c - '0';
+ }
+ continue;
+ } else if (c >= 'A' && c <= 'F') {
+ seenDigits = true;
+ if (value <= 0x10FFFF) {
+ value *= 16;
+ value += c - 'A' + 10;
+ }
+ continue;
+ } else if (c >= 'a' && c <= 'f') {
+ seenDigits = true;
+ if (value <= 0x10FFFF) {
+ value *= 16;
+ value += c - 'a' + 10;
+ }
+ continue;
+ } else if (c == ';') {
+ if (seenDigits) {
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos + 1;
+ }
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::HANDLE_NCR_VALUE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ } else {
+ if (P::reportErrors) {
+ errNoDigitsInNCR();
+ }
+ appendCharRefBuf(';');
+ emitOrAppendCharRefBuf(returnState);
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos + 1;
+ }
+ state =
+ P::transition(mViewSource.get(), returnState, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ } else {
+ if (!seenDigits) {
+ if (P::reportErrors) {
+ errNoDigitsInNCR();
+ }
+ emitOrAppendCharRefBuf(returnState);
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state =
+ P::transition(mViewSource.get(), returnState, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ } else {
+ if (P::reportErrors) {
+ errCharRefLacksSemicolon();
+ }
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::HANDLE_NCR_VALUE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ }
+ case PLAINTEXT: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '\0': {
+ emitPlaintextReplacementCharacter(buf, pos);
+ continue;
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ continue;
+ }
+ }
+ }
+ }
+ case CLOSE_TAG_OPEN: {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '>': {
+ if (P::reportErrors) {
+ errLtSlashGt();
+ }
+ cstart = pos + 1;
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ P::silentCarriageReturn(this);
+ if (P::reportErrors) {
+ errGarbageAfterLtSlash();
+ }
+ clearStrBufBeforeUse();
+ appendStrBuf('\n');
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ if (P::reportErrors) {
+ errGarbageAfterLtSlash();
+ }
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ }
+ if (c >= 'a' && c <= 'z') {
+ endTag = true;
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ containsHyphen = false;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::TAG_NAME, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ } else {
+ if (P::reportErrors) {
+ errGarbageAfterLtSlash();
+ }
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ }
+ case RCDATA: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '&': {
+ flushChars(buf, pos);
+ MOZ_ASSERT(!charRefBufLen,
+ "charRefBufLen not reset after previous use!");
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\0');
+ returnState = state;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '<': {
+ flushChars(buf, pos);
+ returnState = state;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ continue;
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ continue;
+ }
+ }
+ }
+ }
+ case RAWTEXT: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '<': {
+ flushChars(buf, pos);
+ returnState = state;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN,
+ reconsume, pos);
+ NS_HTML5_BREAK(rawtextloop);
+ }
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ continue;
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ continue;
+ }
+ }
+ }
+ rawtextloop_end:;
+ [[fallthrough]];
+ }
+ case RAWTEXT_RCDATA_LESS_THAN_SIGN: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '/': {
+ index = 0;
+ clearStrBufBeforeUse();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::NON_DATA_END_TAG_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(rawtextrcdatalessthansignloop);
+ }
+ default: {
+ tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
+ cstart = pos;
+ reconsume = true;
+ state =
+ P::transition(mViewSource.get(), returnState, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ rawtextrcdatalessthansignloop_end:;
+ [[fallthrough]];
+ }
+ case NON_DATA_END_TAG_NAME: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ if (!endTagExpectationAsArray) {
+ tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2);
+ cstart = pos;
+ reconsume = true;
+ state =
+ P::transition(mViewSource.get(), returnState, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ } else if (index < endTagExpectationAsArray.length) {
+ char16_t e = endTagExpectationAsArray[index];
+ char16_t folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded != e) {
+ tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2);
+ emitStrBuf();
+ cstart = pos;
+ reconsume = true;
+ state =
+ P::transition(mViewSource.get(), returnState, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ appendStrBuf(c);
+ index++;
+ continue;
+ } else {
+ endTag = true;
+ tagName = endTagExpectation;
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ clearStrBufAfterUse();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ clearStrBufAfterUse();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '/': {
+ clearStrBufAfterUse();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SELF_CLOSING_START_TAG,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ clearStrBufAfterUse();
+ state = P::transition(mViewSource.get(),
+ emitCurrentTagToken(false, pos),
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2);
+ emitStrBuf();
+ cstart = pos;
+ reconsume = true;
+ state = P::transition(mViewSource.get(), returnState, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ }
+ }
+ case BOGUS_COMMENT: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '>': {
+ emitComment(0, pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '-': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT_HYPHEN,
+ reconsume, pos);
+ NS_HTML5_BREAK(boguscommentloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ boguscommentloop_end:;
+ [[fallthrough]];
+ }
+ case BOGUS_COMMENT_HYPHEN: {
+ boguscommenthyphenloop:
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '>': {
+ emitComment(0, pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '-': {
+ appendSecondHyphenToBogusComment();
+ NS_HTML5_CONTINUE(boguscommenthyphenloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT, reconsume,
+ pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ }
+ case SCRIPT_DATA: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '<': {
+ flushChars(buf, pos);
+ returnState = state;
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_LESS_THAN_SIGN, reconsume, pos);
+ NS_HTML5_BREAK(scriptdataloop);
+ }
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ continue;
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ continue;
+ }
+ }
+ }
+ scriptdataloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_LESS_THAN_SIGN: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '/': {
+ index = 0;
+ clearStrBufBeforeUse();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::NON_DATA_END_TAG_NAME,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '!': {
+ tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
+ cstart = pos;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPE_START,
+ reconsume, pos);
+ NS_HTML5_BREAK(scriptdatalessthansignloop);
+ }
+ default: {
+ tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
+ cstart = pos;
+ reconsume = true;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ scriptdatalessthansignloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_ESCAPE_START: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPE_START_DASH,
+ reconsume, pos);
+ NS_HTML5_BREAK(scriptdataescapestartloop);
+ }
+ default: {
+ reconsume = true;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ scriptdataescapestartloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_ESCAPE_START_DASH: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH_DASH,
+ reconsume, pos);
+ NS_HTML5_BREAK(scriptdataescapestartdashloop);
+ }
+ default: {
+ reconsume = true;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ scriptdataescapestartdashloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_ESCAPED_DASH_DASH: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ continue;
+ }
+ case '<': {
+ flushChars(buf, pos);
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_BREAK(scriptdataescapeddashdashloop);
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_BREAK(scriptdataescapeddashdashloop);
+ }
+ }
+ }
+ scriptdataescapeddashdashloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_ESCAPED: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '-': {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH,
+ reconsume, pos);
+ NS_HTML5_BREAK(scriptdataescapedloop);
+ }
+ case '<': {
+ flushChars(buf, pos);
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ continue;
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ continue;
+ }
+ }
+ }
+ scriptdataescapedloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_ESCAPED_DASH: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH_DASH,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '<': {
+ flushChars(buf, pos);
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN,
+ reconsume, pos);
+ NS_HTML5_BREAK(scriptdataescapeddashloop);
+ }
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ scriptdataescapeddashloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '/': {
+ index = 0;
+ clearStrBufBeforeUse();
+ returnState = nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::NON_DATA_END_TAG_NAME,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case 'S':
+ case 's': {
+ tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
+ cstart = pos;
+ index = 1;
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPE_START, reconsume,
+ pos);
+ NS_HTML5_BREAK(scriptdataescapedlessthanloop);
+ }
+ default: {
+ tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
+ cstart = pos;
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ scriptdataescapedlessthanloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_DOUBLE_ESCAPE_START: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ MOZ_ASSERT(index > 0);
+ if (index < 6) {
+ char16_t folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded != nsHtml5Tokenizer::SCRIPT_ARR[index]) {
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ index++;
+ continue;
+ }
+ switch (c) {
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f':
+ case '/':
+ case '>': {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ NS_HTML5_BREAK(scriptdatadoubleescapestartloop);
+ }
+ default: {
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ scriptdatadoubleescapestartloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_DOUBLE_ESCAPED: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '-': {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_DASH, reconsume,
+ pos);
+ NS_HTML5_BREAK(scriptdatadoubleescapedloop);
+ }
+ case '<': {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ continue;
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ continue;
+ }
+ }
+ }
+ scriptdatadoubleescapedloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_DOUBLE_ESCAPED_DASH: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH,
+ reconsume, pos);
+ NS_HTML5_BREAK(scriptdatadoubleescapeddashloop);
+ }
+ case '<': {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ scriptdatadoubleescapeddashloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ continue;
+ }
+ case '<': {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN,
+ reconsume, pos);
+ NS_HTML5_BREAK(scriptdatadoubleescapeddashdashloop);
+ }
+ case '>': {
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ scriptdatadoubleescapeddashdashloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '/': {
+ index = 0;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPE_END,
+ reconsume, pos);
+ NS_HTML5_BREAK(scriptdatadoubleescapedlessthanloop);
+ }
+ default: {
+ reconsume = true;
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ scriptdatadoubleescapedlessthanloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_DOUBLE_ESCAPE_END: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ if (index < 6) {
+ char16_t folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded != nsHtml5Tokenizer::SCRIPT_ARR[index]) {
+ reconsume = true;
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ index++;
+ continue;
+ }
+ switch (c) {
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f':
+ case '/':
+ case '>': {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ reconsume = true;
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ }
+ case MARKUP_DECLARATION_OCTYPE: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ if (index < 6) {
+ char16_t folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded == nsHtml5Tokenizer::OCTYPE[index]) {
+ appendStrBuf(c);
+ } else {
+ if (P::reportErrors) {
+ errBogusComment();
+ }
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ index++;
+ continue;
+ } else {
+ reconsume = true;
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DOCTYPE,
+ reconsume, pos);
+ NS_HTML5_BREAK(markupdeclarationdoctypeloop);
+ }
+ }
+ markupdeclarationdoctypeloop_end:;
+ [[fallthrough]];
+ }
+ case DOCTYPE: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ initDoctypeFields();
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(doctypeloop);
+ }
+ default: {
+ if (P::reportErrors) {
+ errMissingSpaceBeforeDoctypeName();
+ }
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(doctypeloop);
+ }
+ }
+ }
+ doctypeloop_end:;
+ [[fallthrough]];
+ }
+ case BEFORE_DOCTYPE_NAME: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ continue;
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errNamelessDoctype();
+ }
+ forceQuirks = true;
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ }
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_NAME, reconsume, pos);
+ NS_HTML5_BREAK(beforedoctypenameloop);
+ }
+ }
+ }
+ beforedoctypenameloop_end:;
+ [[fallthrough]];
+ }
+ case DOCTYPE_NAME: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ strBufToDoctypeName();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_DOCTYPE_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ strBufToDoctypeName();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_DOCTYPE_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(doctypenameloop);
+ }
+ case '>': {
+ strBufToDoctypeName();
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x0020;
+ }
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ doctypenameloop_end:;
+ [[fallthrough]];
+ }
+ case AFTER_DOCTYPE_NAME: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ continue;
+ }
+ case '>': {
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case 'p':
+ case 'P': {
+ index = 0;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_UBLIC, reconsume,
+ pos);
+ NS_HTML5_BREAK(afterdoctypenameloop);
+ }
+ case 's':
+ case 'S': {
+ index = 0;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_YSTEM, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ bogusDoctype();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ afterdoctypenameloop_end:;
+ [[fallthrough]];
+ }
+ case DOCTYPE_UBLIC: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ if (index < 5) {
+ char16_t folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded != nsHtml5Tokenizer::UBLIC[index]) {
+ bogusDoctype();
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ index++;
+ continue;
+ } else {
+ reconsume = true;
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_KEYWORD, reconsume, pos);
+ NS_HTML5_BREAK(doctypeublicloop);
+ }
+ }
+ doctypeublicloop_end:;
+ [[fallthrough]];
+ }
+ case AFTER_DOCTYPE_PUBLIC_KEYWORD: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume,
+ pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume,
+ pos);
+ NS_HTML5_BREAK(afterdoctypepublickeywordloop);
+ }
+ case '\"': {
+ if (P::reportErrors) {
+ errNoSpaceBetweenDoctypePublicKeywordAndQuote();
+ }
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\'': {
+ if (P::reportErrors) {
+ errNoSpaceBetweenDoctypePublicKeywordAndQuote();
+ }
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errExpectedPublicId();
+ }
+ forceQuirks = true;
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ bogusDoctype();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ afterdoctypepublickeywordloop_end:;
+ [[fallthrough]];
+ }
+ case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ continue;
+ }
+ case '\"': {
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_BREAK(beforedoctypepublicidentifierloop);
+ }
+ case '\'': {
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errExpectedPublicId();
+ }
+ forceQuirks = true;
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ bogusDoctype();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ beforedoctypepublicidentifierloop_end:;
+ [[fallthrough]];
+ }
+ case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\"': {
+ publicIdentifier = strBufToString();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume,
+ pos);
+ NS_HTML5_BREAK(doctypepublicidentifierdoublequotedloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errGtInPublicId();
+ }
+ forceQuirks = true;
+ publicIdentifier = strBufToString();
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ doctypepublicidentifierdoublequotedloop_end:;
+ [[fallthrough]];
+ }
+ case AFTER_DOCTYPE_PUBLIC_IDENTIFIER: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::
+ BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::
+ BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS,
+ reconsume, pos);
+ NS_HTML5_BREAK(afterdoctypepublicidentifierloop);
+ }
+ case '>': {
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\"': {
+ if (P::reportErrors) {
+ errNoSpaceBetweenPublicAndSystemIds();
+ }
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\'': {
+ if (P::reportErrors) {
+ errNoSpaceBetweenPublicAndSystemIds();
+ }
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ bogusDoctype();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ afterdoctypepublicidentifierloop_end:;
+ [[fallthrough]];
+ }
+ case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ continue;
+ }
+ case '>': {
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\"': {
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_BREAK(betweendoctypepublicandsystemidentifiersloop);
+ }
+ case '\'': {
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ bogusDoctype();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ betweendoctypepublicandsystemidentifiersloop_end:;
+ [[fallthrough]];
+ }
+ case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\"': {
+ systemIdentifier = strBufToString();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume,
+ pos);
+ NS_HTML5_BREAK(doctypesystemidentifierdoublequotedloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errGtInSystemId();
+ }
+ forceQuirks = true;
+ systemIdentifier = strBufToString();
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ doctypesystemidentifierdoublequotedloop_end:;
+ [[fallthrough]];
+ }
+ case AFTER_DOCTYPE_SYSTEM_IDENTIFIER: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ continue;
+ }
+ case '>': {
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ bogusDoctypeWithoutQuirks();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume,
+ pos);
+ NS_HTML5_BREAK(afterdoctypesystemidentifierloop);
+ }
+ }
+ }
+ afterdoctypesystemidentifierloop_end:;
+ [[fallthrough]];
+ }
+ case BOGUS_DOCTYPE: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '>': {
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ P::silentCarriageReturn(this);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ continue;
+ }
+ }
+ }
+ }
+ case DOCTYPE_YSTEM: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ if (index < 5) {
+ char16_t folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded != nsHtml5Tokenizer::YSTEM[index]) {
+ bogusDoctype();
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ index++;
+ NS_HTML5_CONTINUE(stateloop);
+ } else {
+ reconsume = true;
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_KEYWORD, reconsume, pos);
+ NS_HTML5_BREAK(doctypeystemloop);
+ }
+ }
+ doctypeystemloop_end:;
+ [[fallthrough]];
+ }
+ case AFTER_DOCTYPE_SYSTEM_KEYWORD: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume,
+ pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume,
+ pos);
+ NS_HTML5_BREAK(afterdoctypesystemkeywordloop);
+ }
+ case '\"': {
+ if (P::reportErrors) {
+ errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
+ }
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\'': {
+ if (P::reportErrors) {
+ errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
+ }
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errExpectedPublicId();
+ }
+ forceQuirks = true;
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ bogusDoctype();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ afterdoctypesystemkeywordloop_end:;
+ [[fallthrough]];
+ }
+ case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ continue;
+ }
+ case '\"': {
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\'': {
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_BREAK(beforedoctypesystemidentifierloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errExpectedSystemId();
+ }
+ forceQuirks = true;
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ bogusDoctype();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ beforedoctypesystemidentifierloop_end:;
+ [[fallthrough]];
+ }
+ case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\'': {
+ systemIdentifier = strBufToString();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errGtInSystemId();
+ }
+ forceQuirks = true;
+ systemIdentifier = strBufToString();
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ }
+ case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\'': {
+ publicIdentifier = strBufToString();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errGtInPublicId();
+ }
+ forceQuirks = true;
+ publicIdentifier = strBufToString();
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ }
+ case PROCESSING_INSTRUCTION: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\?': {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::PROCESSING_INSTRUCTION_QUESTION_MARK,
+ reconsume, pos);
+ NS_HTML5_BREAK(processinginstructionloop);
+ }
+ default: {
+ continue;
+ }
+ }
+ }
+ processinginstructionloop_end:;
+ [[fallthrough]];
+ }
+ case PROCESSING_INSTRUCTION_QUESTION_MARK: {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '>': {
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ suspendIfRequestedAfterCurrentNonTextToken();
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::PROCESSING_INSTRUCTION,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ }
+ }
+stateloop_end:;
+ flushChars(buf, pos);
+ stateSave = state;
+ returnStateSave = returnState;
+ return pos;
}
-void nsHtml5Tokenizer::attributeNameComplete() {
- attributeName =
- nsHtml5AttributeName::nameByBuffer(strBuf, strBufLen, interner);
- if (!attributeName) {
- nonInternedAttributeName->setNameForNonInterned(
- nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen,
- interner));
- attributeName = nonInternedAttributeName;
- }
+void nsHtml5Tokenizer::initDoctypeFields() {
clearStrBufAfterUse();
- if (!attributes) {
- attributes = new nsHtml5HtmlAttributes(0);
+ doctypeName = nullptr;
+ if (systemIdentifier) {
+ systemIdentifier.Release();
+ systemIdentifier = nullptr;
}
- if (attributes->contains(attributeName)) {
- errDuplicateAttribute();
- attributeName = nullptr;
+ if (publicIdentifier) {
+ publicIdentifier.Release();
+ publicIdentifier = nullptr;
}
+ forceQuirks = false;
}
-void nsHtml5Tokenizer::addAttributeWithoutValue() {
- if (attributeName) {
- attributes->addAttribute(
- attributeName, nsHtml5Portability::newEmptyString(), attributeLine);
- attributeName = nullptr;
- } else {
- clearStrBufAfterUse();
- }
+template <class P>
+void nsHtml5Tokenizer::adjustDoubleHyphenAndAppendToStrBufCarriageReturn() {
+ P::silentCarriageReturn(this);
+ adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
}
-void nsHtml5Tokenizer::addAttributeWithValue() {
- if (attributeName) {
- nsHtml5String val = strBufToString();
- if (mViewSource) {
- mViewSource->MaybeLinkifyAttributeValue(attributeName, val);
- }
- attributes->addAttribute(attributeName, val, attributeLine);
- attributeName = nullptr;
- } else {
- clearStrBufAfterUse();
- }
+template <class P>
+void nsHtml5Tokenizer::adjustDoubleHyphenAndAppendToStrBufLineFeed() {
+ P::silentLineFeed(this);
+ adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
}
-void nsHtml5Tokenizer::start() {
- initializeWithoutStarting();
- tokenHandler->startTokenization(this);
- if (mViewSource) {
- line = 1;
- col = -1;
- nextCharOnNewLine = false;
- } else if (tokenHandler->WantsLineAndColumn()) {
- line = 0;
- col = 1;
- nextCharOnNewLine = true;
- } else {
- line = -1;
- col = -1;
- nextCharOnNewLine = false;
- }
+template <class P>
+void nsHtml5Tokenizer::appendStrBufLineFeed() {
+ P::silentLineFeed(this);
+ appendStrBuf('\n');
}
-bool nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer) {
- int32_t state = stateSave;
- int32_t returnState = returnStateSave;
- char16_t c = '\0';
- shouldSuspend = false;
- lastCR = false;
- int32_t start = buffer->getStart();
- int32_t end = buffer->getEnd();
- int32_t pos = start - 1;
- switch (state) {
- case DATA:
- case RCDATA:
- case SCRIPT_DATA:
- case PLAINTEXT:
- case RAWTEXT:
- case CDATA_SECTION:
- case SCRIPT_DATA_ESCAPED:
- case SCRIPT_DATA_ESCAPE_START:
- case SCRIPT_DATA_ESCAPE_START_DASH:
- case SCRIPT_DATA_ESCAPED_DASH:
- case SCRIPT_DATA_ESCAPED_DASH_DASH:
- case SCRIPT_DATA_DOUBLE_ESCAPE_START:
- case SCRIPT_DATA_DOUBLE_ESCAPED:
- case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
- case SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
- case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
- case SCRIPT_DATA_DOUBLE_ESCAPE_END: {
- cstart = start;
- break;
- }
- default: {
- cstart = INT32_MAX;
- break;
- }
- }
- if (mViewSource) {
- mViewSource->SetBuffer(buffer);
- if (mozilla::htmlaccel::htmlaccelEnabled()) {
- pos = StateLoopViewSourceSIMD(state, c, pos, buffer->getBuffer(), false,
- returnState, buffer->getEnd());
- } else {
- pos = StateLoopViewSourceALU(state, c, pos, buffer->getBuffer(), false,
- returnState, buffer->getEnd());
- }
- mViewSource->DropBuffer((pos == buffer->getEnd()) ? pos : pos + 1);
- } else if (tokenHandler->WantsLineAndColumn()) {
- if (mozilla::htmlaccel::htmlaccelEnabled()) {
- pos = StateLoopLineColSIMD(state, c, pos, buffer->getBuffer(), false,
- returnState, buffer->getEnd());
- } else {
- pos = StateLoopLineColALU(state, c, pos, buffer->getBuffer(), false,
- returnState, buffer->getEnd());
- }
- } else if (mozilla::htmlaccel::htmlaccelEnabled()) {
- pos = StateLoopFastestSIMD(state, c, pos, buffer->getBuffer(), false,
- returnState, buffer->getEnd());
- } else {
- pos = StateLoopFastestALU(state, c, pos, buffer->getBuffer(), false,
- returnState, buffer->getEnd());
- }
- if (pos == end) {
- buffer->setStart(pos);
- } else {
- buffer->setStart(pos + 1);
- }
- return lastCR;
+template <class P>
+void nsHtml5Tokenizer::appendStrBufCarriageReturn() {
+ P::silentCarriageReturn(this);
+ appendStrBuf('\n');
}
-void nsHtml5Tokenizer::initDoctypeFields() {
- clearStrBufAfterUse();
- doctypeName = nullptr;
- if (systemIdentifier) {
- systemIdentifier.Release();
- systemIdentifier = nullptr;
- }
- if (publicIdentifier) {
- publicIdentifier.Release();
- publicIdentifier = nullptr;
- }
- forceQuirks = false;
+template <class P>
+void nsHtml5Tokenizer::emitCarriageReturn(char16_t* buf, int32_t pos) {
+ P::silentCarriageReturn(this);
+ flushChars(buf, pos);
+ tokenHandler->characters(nsHtml5Tokenizer::LF, 0, 1);
+ cstart = INT32_MAX;
}
void nsHtml5Tokenizer::emitReplacementCharacter(char16_t* buf, int32_t pos) {
@@ -503,6 +4528,10 @@ void nsHtml5Tokenizer::emitPlaintextReplacementCharacter(char16_t* buf,
cstart = pos + 1;
}
+void nsHtml5Tokenizer::setAdditionalAndRememberAmpersandLocation(char16_t add) {
+ additional = add;
+}
+
void nsHtml5Tokenizer::bogusDoctype() {
errBogusDoctype();
forceQuirks = true;
@@ -868,6 +4897,13 @@ void nsHtml5Tokenizer::emitDoctypeToken(int32_t pos) {
suspendIfRequestedAfterCurrentNonTextToken();
}
+void nsHtml5Tokenizer::suspendIfRequestedAfterCurrentNonTextToken() {
+ if (suspendAfterCurrentNonTextToken) {
+ suspendAfterCurrentNonTextToken = false;
+ shouldSuspend = true;
+ }
+}
+
void nsHtml5Tokenizer::suspendAfterCurrentTokenIfNotInText() {
switch (stateSave) {
case DATA:
@@ -979,6 +5015,25 @@ bool nsHtml5Tokenizer::internalEncodingDeclaration(
return false;
}
+void nsHtml5Tokenizer::emitOrAppendTwo(const char16_t* val,
+ int32_t returnState) {
+ if ((returnState & DATA_AND_RCDATA_MASK)) {
+ appendStrBuf(val[0]);
+ appendStrBuf(val[1]);
+ } else {
+ tokenHandler->characters(val, 0, 2);
+ }
+}
+
+void nsHtml5Tokenizer::emitOrAppendOne(const char16_t* val,
+ int32_t returnState) {
+ if ((returnState & DATA_AND_RCDATA_MASK)) {
+ appendStrBuf(val[0]);
+ } else {
+ tokenHandler->characters(val, 0, 1);
+ }
+}
+
void nsHtml5Tokenizer::end() {
if (!keepBuffer) {
strBuf = nullptr;
@@ -1002,6 +5057,10 @@ void nsHtml5Tokenizer::end() {
}
}
+void nsHtml5Tokenizer::requestSuspension() { shouldSuspend = true; }
+
+bool nsHtml5Tokenizer::isInDataState() { return (stateSave == DATA); }
+
void nsHtml5Tokenizer::resetToDataState() {
clearStrBufAfterUse();
charRefBufLen = 0;
diff --git a/parser/html/nsHtml5Tokenizer.h b/parser/html/nsHtml5Tokenizer.h
@@ -43,10 +43,8 @@
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5NamedCharactersAccel.h"
#include "nsHtml5String.h"
-#include "nsHtml5TreeBuilder.h"
#include "nsIContent.h"
#include "nsTraceRefcnt.h"
-#include "mozilla/htmlaccel/htmlaccelEnabled.h"
class nsHtml5StreamParser;
@@ -339,12 +337,15 @@ class nsHtml5Tokenizer {
void setLineNumber(int32_t line);
inline int32_t getLineNumber() { return line; }
- inline nsHtml5HtmlAttributes* emptyAttributes() {
- return nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES;
- }
+ nsHtml5HtmlAttributes* emptyAttributes();
private:
- void appendCharRefBuf(char16_t c);
+ inline void appendCharRefBuf(char16_t c) {
+ MOZ_RELEASE_ASSERT(charRefBufLen < charRefBuf.length,
+ "Attempted to overrun charRefBuf!");
+ charRefBuf[charRefBufLen++] = c;
+ }
+
void emitOrAppendCharRefBuf(int32_t returnState);
inline void clearStrBufAfterUse() { strBufLen = 0; }
@@ -359,32 +360,23 @@ class nsHtml5Tokenizer {
strBufLen = 0;
}
- void appendStrBuf(char16_t c);
+ inline void appendStrBuf(char16_t c) {
+ MOZ_ASSERT(strBufLen < strBuf.length,
+ "Previous buffer length insufficient.");
+ if (MOZ_UNLIKELY(strBufLen == strBuf.length)) {
+ if (MOZ_UNLIKELY(!EnsureBufferSpace(1))) {
+ MOZ_CRASH("Unable to recover from buffer reallocation failure");
+ }
+ }
+ strBuf[strBufLen++] = c;
+ }
protected:
- inline nsHtml5String strBufToString() {
- nsHtml5String str = nsHtml5Portability::newStringFromBuffer(
- strBuf, 0, strBufLen, tokenHandler,
- !newAttributesEachTime &&
- attributeName == nsHtml5AttributeName::ATTR_CLASS);
- clearStrBufAfterUse();
- return str;
- }
+ nsHtml5String strBufToString();
private:
- inline void strBufToDoctypeName() {
- doctypeName =
- nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen, interner);
- clearStrBufAfterUse();
- }
-
- inline void emitStrBuf() {
- if (strBufLen > 0) {
- tokenHandler->characters(strBuf, 0, strBufLen);
- clearStrBufAfterUse();
- }
- }
-
+ void strBufToDoctypeName();
+ void emitStrBuf();
inline void appendSecondHyphenToBogusComment() { appendStrBuf('-'); }
inline void adjustDoubleHyphenAndAppendToStrBufAndErr(
@@ -416,4165 +408,23 @@ class nsHtml5Tokenizer {
private:
template <class P>
- inline int32_t stateLoop(int32_t state, char16_t c, int32_t pos,
- char16_t* buf, bool reconsume, int32_t returnState,
- int32_t endPos) {
- bool reportedConsecutiveHyphens = false;
- stateloop:
- for (;;) {
- switch (state) {
- case DATA: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- switch (c) {
- case '&': {
- flushChars(buf, pos);
- MOZ_ASSERT(!charRefBufLen,
- "charRefBufLen not reset after previous use!");
- appendCharRefBuf(c);
- setAdditionalAndRememberAmpersandLocation('\0');
- returnState = state;
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '<': {
- flushChars(buf, pos);
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::TAG_OPEN, reconsume, pos);
- NS_HTML5_BREAK(dataloop);
- }
- case '\0': {
- maybeEmitReplacementCharacter(buf, pos);
- break;
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- break;
- }
- }
- }
- datamiddle:
- for (;;) {
- ++pos;
- pos += P::accelerateAdvancementData(this, buf, pos, endPos);
- for (;;) {
- if (pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '&': {
- flushChars(buf, pos);
- MOZ_ASSERT(!charRefBufLen,
- "charRefBufLen not reset after previous use!");
- appendCharRefBuf(c);
- setAdditionalAndRememberAmpersandLocation('\0');
- returnState = state;
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '<': {
- flushChars(buf, pos);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::TAG_OPEN, reconsume,
- pos);
- NS_HTML5_BREAK(dataloop);
- }
- case '\0': {
- maybeEmitReplacementCharacter(buf, pos);
- NS_HTML5_CONTINUE(datamiddle);
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- NS_HTML5_CONTINUE(datamiddle);
- }
- default: {
- ++pos;
- continue;
- }
- }
- }
- }
- }
- dataloop_end:;
- [[fallthrough]];
- }
- case TAG_OPEN: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- if (c >= 'A' && c <= 'Z') {
- endTag = false;
- clearStrBufBeforeUse();
- appendStrBuf((char16_t)(c + 0x20));
- containsHyphen = false;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::TAG_NAME, reconsume, pos);
- NS_HTML5_BREAK(tagopenloop);
- } else if (c >= 'a' && c <= 'z') {
- endTag = false;
- clearStrBufBeforeUse();
- appendStrBuf(c);
- containsHyphen = false;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::TAG_NAME, reconsume, pos);
- NS_HTML5_BREAK(tagopenloop);
- }
- switch (c) {
- case '!': {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::MARKUP_DECLARATION_OPEN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '/': {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CLOSE_TAG_OPEN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\?': {
- if (viewingXmlSource) {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::PROCESSING_INSTRUCTION, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- if (P::reportErrors) {
- errProcessingInstruction();
- }
- clearStrBufBeforeUse();
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- if (P::reportErrors) {
- errLtGt();
- }
- tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 2);
- cstart = pos + 1;
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- if (P::reportErrors) {
- errBadCharAfterLt(c);
- }
- tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
- cstart = pos;
- reconsume = true;
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- tagopenloop_end:;
- [[fallthrough]];
- }
- case TAG_NAME: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- strBufToElementNameString();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- strBufToElementNameString();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(tagnameloop);
- }
- case '/': {
- strBufToElementNameString();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SELF_CLOSING_START_TAG,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- strBufToElementNameString();
- state = P::transition(mViewSource.get(),
- emitCurrentTagToken(false, pos),
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- if (c >= 'A' && c <= 'Z') {
- c += 0x20;
- } else if (c == '-') {
- containsHyphen = true;
- }
- appendStrBuf(c);
- continue;
- }
- }
- }
- tagnameloop_end:;
- [[fallthrough]];
- }
- case BEFORE_ATTRIBUTE_NAME: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- continue;
- }
- case '/': {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SELF_CLOSING_START_TAG,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- state = P::transition(mViewSource.get(),
- emitCurrentTagToken(false, pos),
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- case '\"':
- case '\'':
- case '<':
- case '=': {
- if (P::reportErrors) {
- errBadCharBeforeAttributeNameOrNull(c);
- }
- [[fallthrough]];
- }
- default: {
- if (c >= 'A' && c <= 'Z') {
- c += 0x20;
- }
- attributeLine = line;
- clearStrBufBeforeUse();
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(beforeattributenameloop);
- }
- }
- }
- beforeattributenameloop_end:;
- [[fallthrough]];
- }
- case ATTRIBUTE_NAME: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- attributeNameComplete();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- attributeNameComplete();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '/': {
- attributeNameComplete();
- addAttributeWithoutValue();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SELF_CLOSING_START_TAG,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '=': {
- attributeNameComplete();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE,
- reconsume, pos);
- NS_HTML5_BREAK(attributenameloop);
- }
- case '>': {
- attributeNameComplete();
- addAttributeWithoutValue();
- state = P::transition(mViewSource.get(),
- emitCurrentTagToken(false, pos),
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- case '\"':
- case '\'':
- case '<': {
- if (P::reportErrors) {
- errQuoteOrLtInAttributeNameOrNull(c);
- }
- [[fallthrough]];
- }
- default: {
- if (c >= 'A' && c <= 'Z') {
- c += 0x20;
- }
- appendStrBuf(c);
- continue;
- }
- }
- }
- attributenameloop_end:;
- [[fallthrough]];
- }
- case BEFORE_ATTRIBUTE_VALUE: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- continue;
- }
- case '\"': {
- attributeLine = line;
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::ATTRIBUTE_VALUE_DOUBLE_QUOTED, reconsume,
- pos);
- NS_HTML5_BREAK(beforeattributevalueloop);
- }
- case '&': {
- attributeLine = line;
- clearStrBufBeforeUse();
- reconsume = true;
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
-
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\'': {
- attributeLine = line;
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::ATTRIBUTE_VALUE_SINGLE_QUOTED, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- if (P::reportErrors) {
- errAttributeValueMissing();
- }
- addAttributeWithoutValue();
- state = P::transition(mViewSource.get(),
- emitCurrentTagToken(false, pos),
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- case '<':
- case '=':
- case '`': {
- if (P::reportErrors) {
- errLtOrEqualsOrGraveInUnquotedAttributeOrNull(c);
- }
- [[fallthrough]];
- }
- default: {
- attributeLine = line;
- clearStrBufBeforeUse();
- appendStrBuf(c);
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
-
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- beforeattributevalueloop_end:;
- [[fallthrough]];
- }
- case ATTRIBUTE_VALUE_DOUBLE_QUOTED: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '\"': {
- addAttributeWithValue();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume,
- pos);
- NS_HTML5_BREAK(attributevaluedoublequotedloop);
- }
- case '&': {
- MOZ_ASSERT(!charRefBufLen,
- "charRefBufLen not reset after previous use!");
- appendCharRefBuf(c);
- setAdditionalAndRememberAmpersandLocation('\"');
- returnState = state;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- continue;
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- continue;
- }
- }
- }
- attributevaluedoublequotedloop_end:;
- [[fallthrough]];
- }
- case AFTER_ATTRIBUTE_VALUE_QUOTED: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '/': {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SELF_CLOSING_START_TAG,
- reconsume, pos);
- NS_HTML5_BREAK(afterattributevaluequotedloop);
- }
- case '>': {
- state = P::transition(mViewSource.get(),
- emitCurrentTagToken(false, pos),
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- if (P::reportErrors) {
- errNoSpaceBetweenAttributes();
- }
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- afterattributevaluequotedloop_end:;
- [[fallthrough]];
- }
- case SELF_CLOSING_START_TAG: {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '>': {
- state =
- P::transition(mViewSource.get(),
- emitCurrentTagToken(true, pos), reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- if (P::reportErrors) {
- errSlashNotFollowedByGt();
- }
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- case ATTRIBUTE_VALUE_UNQUOTED: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- addAttributeWithValue();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- addAttributeWithValue();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '&': {
- MOZ_ASSERT(!charRefBufLen,
- "charRefBufLen not reset after previous use!");
- appendCharRefBuf(c);
- setAdditionalAndRememberAmpersandLocation('>');
- returnState = state;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- addAttributeWithValue();
- state = P::transition(mViewSource.get(),
- emitCurrentTagToken(false, pos),
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- case '<':
- case '\"':
- case '\'':
- case '=':
- case '`': {
- if (P::reportErrors) {
- errUnquotedAttributeValOrNull(c);
- }
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- continue;
- }
- }
- }
- }
- case AFTER_ATTRIBUTE_NAME: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- continue;
- }
- case '/': {
- addAttributeWithoutValue();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SELF_CLOSING_START_TAG,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '=': {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- addAttributeWithoutValue();
- state = P::transition(mViewSource.get(),
- emitCurrentTagToken(false, pos),
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- case '\"':
- case '\'':
- case '<': {
- if (P::reportErrors) {
- errQuoteOrLtInAttributeNameOrNull(c);
- }
- [[fallthrough]];
- }
- default: {
- addAttributeWithoutValue();
- if (c >= 'A' && c <= 'Z') {
- c += 0x20;
- }
- clearStrBufBeforeUse();
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- }
- case MARKUP_DECLARATION_OPEN: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- clearStrBufBeforeUse();
- appendStrBuf(c);
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::MARKUP_DECLARATION_HYPHEN,
- reconsume, pos);
- NS_HTML5_BREAK(markupdeclarationopenloop);
- }
- case 'd':
- case 'D': {
- clearStrBufBeforeUse();
- appendStrBuf(c);
- index = 0;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::MARKUP_DECLARATION_OCTYPE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '[': {
- if (tokenHandler->cdataSectionAllowed()) {
- clearStrBufBeforeUse();
- appendStrBuf(c);
- index = 0;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CDATA_START,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- [[fallthrough]];
- }
- default: {
- if (P::reportErrors) {
- errBogusComment();
- }
- clearStrBufBeforeUse();
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- markupdeclarationopenloop_end:;
- [[fallthrough]];
- }
- case MARKUP_DECLARATION_HYPHEN: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- clearStrBufAfterOneHyphen();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_START,
- reconsume, pos);
- NS_HTML5_BREAK(markupdeclarationhyphenloop);
- }
- default: {
- if (P::reportErrors) {
- errBogusComment();
- }
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- markupdeclarationhyphenloop_end:;
- [[fallthrough]];
- }
- case COMMENT_START: {
- reportedConsecutiveHyphens = false;
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_START_DASH,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- if (P::reportErrors) {
- errPrematureEndOfComment();
- }
- emitComment(0, pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '<': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_LESSTHAN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_BREAK(commentstartloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_BREAK(commentstartloop);
- }
- }
- }
- commentstartloop_end:;
- [[fallthrough]];
- }
- case COMMENT: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_END_DASH,
- reconsume, pos);
- NS_HTML5_BREAK(commentloop);
- }
- case '<': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_LESSTHAN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- continue;
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- continue;
- }
- }
- }
- commentloop_end:;
- [[fallthrough]];
- }
- case COMMENT_END_DASH: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_END, reconsume,
- pos);
- NS_HTML5_BREAK(commentenddashloop);
- }
- case '<': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_LESSTHAN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- commentenddashloop_end:;
- [[fallthrough]];
- }
- case COMMENT_END: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '>': {
- emitComment(2, pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '-': {
- adjustDoubleHyphenAndAppendToStrBufAndErr(
- c, reportedConsecutiveHyphens);
- reportedConsecutiveHyphens = true;
- continue;
- }
- case '<': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_LESSTHAN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- adjustDoubleHyphenAndAppendToStrBufCarriageReturn<P>();
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- adjustDoubleHyphenAndAppendToStrBufLineFeed<P>();
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '!': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_END_BANG,
- reconsume, pos);
- NS_HTML5_BREAK(commentendloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- adjustDoubleHyphenAndAppendToStrBufAndErr(
- c, reportedConsecutiveHyphens);
- reportedConsecutiveHyphens = true;
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- commentendloop_end:;
- [[fallthrough]];
- }
- case COMMENT_END_BANG: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '>': {
- emitComment(3, pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '-': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_END_DASH,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- }
- case COMMENT_LESSTHAN: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '!': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG,
- reconsume, pos);
- NS_HTML5_BREAK(commentlessthanloop);
- }
- case '<': {
- appendStrBuf(c);
- continue;
- }
- case '-': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_END_DASH,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- commentlessthanloop_end:;
- [[fallthrough]];
- }
- case COMMENT_LESSTHAN_BANG: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- appendStrBuf(c);
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG_DASH,
- reconsume, pos);
- NS_HTML5_BREAK(commentlessthanbangloop);
- }
- case '<': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_LESSTHAN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- commentlessthanbangloop_end:;
- [[fallthrough]];
- }
- case COMMENT_LESSTHAN_BANG_DASH: {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- appendStrBuf(c);
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG_DASH_DASH, reconsume,
- pos);
- break;
- }
- case '<': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_LESSTHAN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- [[fallthrough]];
- }
- case COMMENT_LESSTHAN_BANG_DASH_DASH: {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '>': {
- appendStrBuf(c);
- emitComment(3, pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '-': {
- if (P::reportErrors) {
- errNestedComment();
- }
- adjustDoubleHyphenAndAppendToStrBufAndErr(
- c, reportedConsecutiveHyphens);
- reportedConsecutiveHyphens = true;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_END, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- c = '\n';
- P::silentCarriageReturn(this);
- if (P::reportErrors) {
- errNestedComment();
- }
- adjustDoubleHyphenAndAppendToStrBufAndErr(
- c, reportedConsecutiveHyphens);
- reportedConsecutiveHyphens = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- if (P::reportErrors) {
- errNestedComment();
- }
- adjustDoubleHyphenAndAppendToStrBufAndErr(
- c, reportedConsecutiveHyphens);
- reportedConsecutiveHyphens = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '!': {
- if (P::reportErrors) {
- errNestedComment();
- }
- adjustDoubleHyphenAndAppendToStrBufAndErr(
- c, reportedConsecutiveHyphens);
- reportedConsecutiveHyphens = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_END_BANG,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- if (P::reportErrors) {
- errNestedComment();
- }
- adjustDoubleHyphenAndAppendToStrBufAndErr(
- c, reportedConsecutiveHyphens);
- reportedConsecutiveHyphens = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- case COMMENT_START_DASH: {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- appendStrBuf(c);
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_END, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- if (P::reportErrors) {
- errPrematureEndOfComment();
- }
- emitComment(1, pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '<': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_LESSTHAN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- case CDATA_START: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- if (index < 6) {
- if (c == nsHtml5Tokenizer::CDATA_LSQB[index]) {
- appendStrBuf(c);
- } else {
- if (P::reportErrors) {
- errBogusComment();
- }
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- index++;
- continue;
- } else {
- clearStrBufAfterUse();
- cstart = pos;
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CDATA_SECTION, reconsume,
- pos);
- break;
- }
- }
- [[fallthrough]];
- }
- case CDATA_SECTION: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case ']': {
- flushChars(buf, pos);
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CDATA_RSQB, reconsume, pos);
- NS_HTML5_BREAK(cdatasectionloop);
- }
- case '\0': {
- maybeEmitReplacementCharacter(buf, pos);
- continue;
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- continue;
- }
- }
- }
- cdatasectionloop_end:;
- [[fallthrough]];
- }
- case CDATA_RSQB: {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case ']': {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CDATA_RSQB_RSQB,
- reconsume, pos);
- break;
- }
- default: {
- tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 1);
- cstart = pos;
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CDATA_SECTION, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- [[fallthrough]];
- }
- case CDATA_RSQB_RSQB: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case ']': {
- tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 1);
- continue;
- }
- case '>': {
- cstart = pos + 1;
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- suspendIfRequestedAfterCurrentNonTextToken();
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 2);
- cstart = pos;
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CDATA_SECTION,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- }
- case ATTRIBUTE_VALUE_SINGLE_QUOTED: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '\'': {
- addAttributeWithValue();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '&': {
- MOZ_ASSERT(!charRefBufLen,
- "charRefBufLen not reset after previous use!");
- appendCharRefBuf(c);
- setAdditionalAndRememberAmpersandLocation('\'');
- returnState = state;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE,
- reconsume, pos);
- NS_HTML5_BREAK(attributevaluesinglequotedloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- continue;
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- continue;
- }
- }
- }
- attributevaluesinglequotedloop_end:;
- [[fallthrough]];
- }
- case CONSUME_CHARACTER_REFERENCE: {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case ' ':
- case '\t':
- case '\n':
- case '\r':
- case '\f':
- case '<':
- case '&':
- case '\0':
- case ';': {
- emitOrAppendCharRefBuf(returnState);
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos;
- }
- reconsume = true;
- state =
- P::transition(mViewSource.get(), returnState, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '#': {
- appendCharRefBuf('#');
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CONSUME_NCR, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- if (c == additional) {
- emitOrAppendCharRefBuf(returnState);
- reconsume = true;
- state = P::transition(mViewSource.get(), returnState, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- if (c >= 'a' && c <= 'z') {
- firstCharKey = c - 'a' + 26;
- } else if (c >= 'A' && c <= 'Z') {
- firstCharKey = c - 'A';
- } else {
- if (c == ';') {
- if (P::reportErrors) {
- errNoNamedCharacterMatch();
- }
- }
- emitOrAppendCharRefBuf(returnState);
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos;
- }
- reconsume = true;
- state = P::transition(mViewSource.get(), returnState, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- appendCharRefBuf(c);
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::CHARACTER_REFERENCE_HILO_LOOKUP, reconsume,
- pos);
- break;
- }
- }
- [[fallthrough]];
- }
- case CHARACTER_REFERENCE_HILO_LOOKUP: {
- {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- int32_t hilo = 0;
- if (c <= 'z') {
- const int32_t* row = nsHtml5NamedCharactersAccel::HILO_ACCEL[c];
- if (row) {
- hilo = row[firstCharKey];
- }
- }
- if (!hilo) {
- if (c == ';') {
- if (P::reportErrors) {
- errNoNamedCharacterMatch();
- }
- }
- emitOrAppendCharRefBuf(returnState);
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos;
- }
- reconsume = true;
- state =
- P::transition(mViewSource.get(), returnState, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- appendCharRefBuf(c);
- lo = hilo & 0xFFFF;
- hi = hilo >> 16;
- entCol = -1;
- candidate = -1;
- charRefBufMark = 0;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CHARACTER_REFERENCE_TAIL,
- reconsume, pos);
- }
- [[fallthrough]];
- }
- case CHARACTER_REFERENCE_TAIL: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- entCol++;
- for (;;) {
- if (hi < lo) {
- NS_HTML5_BREAK(outer);
- }
- if (entCol == nsHtml5NamedCharacters::NAMES[lo].length()) {
- candidate = lo;
- charRefBufMark = charRefBufLen;
- lo++;
- } else if (entCol > nsHtml5NamedCharacters::NAMES[lo].length()) {
- NS_HTML5_BREAK(outer);
- } else if (c > nsHtml5NamedCharacters::NAMES[lo].charAt(entCol)) {
- lo++;
- } else {
- NS_HTML5_BREAK(loloop);
- }
- }
- loloop_end:;
- for (;;) {
- if (hi < lo) {
- NS_HTML5_BREAK(outer);
- }
- if (entCol == nsHtml5NamedCharacters::NAMES[hi].length()) {
- NS_HTML5_BREAK(hiloop);
- }
- if (entCol > nsHtml5NamedCharacters::NAMES[hi].length()) {
- NS_HTML5_BREAK(outer);
- } else if (c < nsHtml5NamedCharacters::NAMES[hi].charAt(entCol)) {
- hi--;
- } else {
- NS_HTML5_BREAK(hiloop);
- }
- }
- hiloop_end:;
- if (c == ';') {
- if (entCol + 1 == nsHtml5NamedCharacters::NAMES[lo].length()) {
- candidate = lo;
- charRefBufMark = charRefBufLen;
- }
- NS_HTML5_BREAK(outer);
- }
- if (hi < lo) {
- NS_HTML5_BREAK(outer);
- }
- appendCharRefBuf(c);
- continue;
- }
- outer_end:;
- if (candidate == -1) {
- if (c == ';') {
- if (P::reportErrors) {
- errNoNamedCharacterMatch();
- }
- }
- emitOrAppendCharRefBuf(returnState);
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos;
- }
- reconsume = true;
- state =
- P::transition(mViewSource.get(), returnState, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- } else {
- const nsHtml5CharacterName& candidateName =
- nsHtml5NamedCharacters::NAMES[candidate];
- if (!candidateName.length() ||
- candidateName.charAt(candidateName.length() - 1) != ';') {
- if ((returnState & DATA_AND_RCDATA_MASK)) {
- char16_t ch;
- if (charRefBufMark == charRefBufLen) {
- ch = c;
- } else {
- ch = charRefBuf[charRefBufMark];
- }
- if (ch == '=' || (ch >= '0' && ch <= '9') ||
- (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) {
- if (c == ';') {
- if (P::reportErrors) {
- errNoNamedCharacterMatch();
- }
- }
- appendCharRefBufToStrBuf();
- reconsume = true;
- state = P::transition(mViewSource.get(), returnState,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- if ((returnState & DATA_AND_RCDATA_MASK)) {
- if (P::reportErrors) {
- errUnescapedAmpersandInterpretedAsCharacterReference();
- }
- } else {
- if (P::reportErrors) {
- errNotSemicolonTerminated();
- }
- }
- }
- P::completedNamedCharacterReference(mViewSource.get());
- const char16_t* val = nsHtml5NamedCharacters::VALUES[candidate];
- if (!val[1]) {
- emitOrAppendOne(val, returnState);
- } else {
- emitOrAppendTwo(val, returnState);
- }
- if (charRefBufMark < charRefBufLen) {
- if ((returnState & DATA_AND_RCDATA_MASK)) {
- appendStrBuf(charRefBuf, charRefBufMark,
- charRefBufLen - charRefBufMark);
- } else {
- tokenHandler->characters(charRefBuf, charRefBufMark,
- charRefBufLen - charRefBufMark);
- }
- }
- bool earlyBreak = (c == ';' && charRefBufMark == charRefBufLen);
- charRefBufLen = 0;
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = earlyBreak ? pos + 1 : pos;
- }
- reconsume = !earlyBreak;
- state =
- P::transition(mViewSource.get(), returnState, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- case CONSUME_NCR: {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- value = 0;
- seenDigits = false;
- switch (c) {
- case 'x':
- case 'X': {
- appendCharRefBuf(c);
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::HEX_NCR_LOOP, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::DECIMAL_NRC_LOOP,
- reconsume, pos);
- break;
- }
- }
- [[fallthrough]];
- }
- case DECIMAL_NRC_LOOP: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- MOZ_ASSERT(value >= 0, "value must not become negative.");
- if (c >= '0' && c <= '9') {
- seenDigits = true;
- if (value <= 0x10FFFF) {
- value *= 10;
- value += c - '0';
- }
- continue;
- } else if (c == ';') {
- if (seenDigits) {
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos + 1;
- }
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::HANDLE_NCR_VALUE,
- reconsume, pos);
- NS_HTML5_BREAK(decimalloop);
- } else {
- if (P::reportErrors) {
- errNoDigitsInNCR();
- }
- appendCharRefBuf(';');
- emitOrAppendCharRefBuf(returnState);
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos + 1;
- }
- state = P::transition(mViewSource.get(), returnState, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- } else {
- if (!seenDigits) {
- if (P::reportErrors) {
- errNoDigitsInNCR();
- }
- emitOrAppendCharRefBuf(returnState);
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos;
- }
- reconsume = true;
- state = P::transition(mViewSource.get(), returnState, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- } else {
- if (P::reportErrors) {
- errCharRefLacksSemicolon();
- }
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos;
- }
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::HANDLE_NCR_VALUE,
- reconsume, pos);
- NS_HTML5_BREAK(decimalloop);
- }
- }
- }
- decimalloop_end:;
- [[fallthrough]];
- }
- case HANDLE_NCR_VALUE: {
- charRefBufLen = 0;
- handleNcrValue(returnState);
- state = P::transition(mViewSource.get(), returnState, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case HEX_NCR_LOOP: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- MOZ_ASSERT(value >= 0, "value must not become negative.");
- if (c >= '0' && c <= '9') {
- seenDigits = true;
- if (value <= 0x10FFFF) {
- value *= 16;
- value += c - '0';
- }
- continue;
- } else if (c >= 'A' && c <= 'F') {
- seenDigits = true;
- if (value <= 0x10FFFF) {
- value *= 16;
- value += c - 'A' + 10;
- }
- continue;
- } else if (c >= 'a' && c <= 'f') {
- seenDigits = true;
- if (value <= 0x10FFFF) {
- value *= 16;
- value += c - 'a' + 10;
- }
- continue;
- } else if (c == ';') {
- if (seenDigits) {
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos + 1;
- }
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::HANDLE_NCR_VALUE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- } else {
- if (P::reportErrors) {
- errNoDigitsInNCR();
- }
- appendCharRefBuf(';');
- emitOrAppendCharRefBuf(returnState);
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos + 1;
- }
- state = P::transition(mViewSource.get(), returnState, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- } else {
- if (!seenDigits) {
- if (P::reportErrors) {
- errNoDigitsInNCR();
- }
- emitOrAppendCharRefBuf(returnState);
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos;
- }
- reconsume = true;
- state = P::transition(mViewSource.get(), returnState, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- } else {
- if (P::reportErrors) {
- errCharRefLacksSemicolon();
- }
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos;
- }
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::HANDLE_NCR_VALUE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- }
- case PLAINTEXT: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '\0': {
- emitPlaintextReplacementCharacter(buf, pos);
- continue;
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- continue;
- }
- }
- }
- }
- case CLOSE_TAG_OPEN: {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '>': {
- if (P::reportErrors) {
- errLtSlashGt();
- }
- cstart = pos + 1;
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- P::silentCarriageReturn(this);
- if (P::reportErrors) {
- errGarbageAfterLtSlash();
- }
- clearStrBufBeforeUse();
- appendStrBuf('\n');
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT, reconsume,
- pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- if (P::reportErrors) {
- errGarbageAfterLtSlash();
- }
- clearStrBufBeforeUse();
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- if (c >= 'A' && c <= 'Z') {
- c += 0x20;
- }
- if (c >= 'a' && c <= 'z') {
- endTag = true;
- clearStrBufBeforeUse();
- appendStrBuf(c);
- containsHyphen = false;
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::TAG_NAME,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- } else {
- if (P::reportErrors) {
- errGarbageAfterLtSlash();
- }
- clearStrBufBeforeUse();
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- }
- case RCDATA: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- switch (c) {
- case '&': {
- flushChars(buf, pos);
- MOZ_ASSERT(!charRefBufLen,
- "charRefBufLen not reset after previous use!");
- appendCharRefBuf(c);
- setAdditionalAndRememberAmpersandLocation('\0');
- returnState = state;
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '<': {
- flushChars(buf, pos);
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- maybeEmitReplacementCharacter(buf, pos);
- break;
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- break;
- }
- }
- }
- rcdatamiddle:
- for (;;) {
- ++pos;
- pos += P::accelerateAdvancementData(this, buf, pos, endPos);
- for (;;) {
- if (pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '&': {
- flushChars(buf, pos);
- MOZ_ASSERT(!charRefBufLen,
- "charRefBufLen not reset after previous use!");
- appendCharRefBuf(c);
- setAdditionalAndRememberAmpersandLocation('\0');
- returnState = state;
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '<': {
- flushChars(buf, pos);
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- maybeEmitReplacementCharacter(buf, pos);
- NS_HTML5_CONTINUE(rcdatamiddle);
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- NS_HTML5_CONTINUE(rcdatamiddle);
- }
- default: {
- ++pos;
- continue;
- }
- }
- }
- }
- }
- }
- case RAWTEXT: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '<': {
- flushChars(buf, pos);
- returnState = state;
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume,
- pos);
- NS_HTML5_BREAK(rawtextloop);
- }
- case '\0': {
- emitReplacementCharacter(buf, pos);
- continue;
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- continue;
- }
- }
- }
- rawtextloop_end:;
- [[fallthrough]];
- }
- case RAWTEXT_RCDATA_LESS_THAN_SIGN: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '/': {
- index = 0;
- clearStrBufBeforeUse();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::NON_DATA_END_TAG_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(rawtextrcdatalessthansignloop);
- }
- default: {
- tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
- cstart = pos;
- reconsume = true;
- state = P::transition(mViewSource.get(), returnState, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- rawtextrcdatalessthansignloop_end:;
- [[fallthrough]];
- }
- case NON_DATA_END_TAG_NAME: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- if (!endTagExpectationAsArray) {
- tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2);
- cstart = pos;
- reconsume = true;
- state =
- P::transition(mViewSource.get(), returnState, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- } else if (index < endTagExpectationAsArray.length) {
- char16_t e = endTagExpectationAsArray[index];
- char16_t folded = c;
- if (c >= 'A' && c <= 'Z') {
- folded += 0x20;
- }
- if (folded != e) {
- tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2);
- emitStrBuf();
- cstart = pos;
- reconsume = true;
- state = P::transition(mViewSource.get(), returnState, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- appendStrBuf(c);
- index++;
- continue;
- } else {
- endTag = true;
- tagName = endTagExpectation;
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- clearStrBufAfterUse();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- clearStrBufAfterUse();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '/': {
- clearStrBufAfterUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SELF_CLOSING_START_TAG, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- clearStrBufAfterUse();
- state = P::transition(mViewSource.get(),
- emitCurrentTagToken(false, pos),
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2);
- emitStrBuf();
- cstart = pos;
- reconsume = true;
- state = P::transition(mViewSource.get(), returnState,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- }
- }
- case BOGUS_COMMENT: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '>': {
- emitComment(0, pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '-': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT_HYPHEN,
- reconsume, pos);
- NS_HTML5_BREAK(boguscommentloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- continue;
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- continue;
- }
- }
- }
- boguscommentloop_end:;
- [[fallthrough]];
- }
- case BOGUS_COMMENT_HYPHEN: {
- boguscommenthyphenloop:
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '>': {
- emitComment(0, pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '-': {
- appendSecondHyphenToBogusComment();
- NS_HTML5_CONTINUE(boguscommenthyphenloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- }
- case SCRIPT_DATA: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '<': {
- flushChars(buf, pos);
- returnState = state;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_LESS_THAN_SIGN,
- reconsume, pos);
- NS_HTML5_BREAK(scriptdataloop);
- }
- case '\0': {
- emitReplacementCharacter(buf, pos);
- continue;
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- continue;
- }
- }
- }
- scriptdataloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_LESS_THAN_SIGN: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '/': {
- index = 0;
- clearStrBufBeforeUse();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::NON_DATA_END_TAG_NAME,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '!': {
- tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
- cstart = pos;
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPE_START, reconsume, pos);
- NS_HTML5_BREAK(scriptdatalessthansignloop);
- }
- default: {
- tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
- cstart = pos;
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- scriptdatalessthansignloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_ESCAPE_START: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPE_START_DASH, reconsume,
- pos);
- NS_HTML5_BREAK(scriptdataescapestartloop);
- }
- default: {
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- scriptdataescapestartloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_ESCAPE_START_DASH: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume,
- pos);
- NS_HTML5_BREAK(scriptdataescapestartdashloop);
- }
- default: {
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- scriptdataescapestartdashloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_ESCAPED_DASH_DASH: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- continue;
- }
- case '<': {
- flushChars(buf, pos);
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- emitReplacementCharacter(buf, pos);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
- reconsume, pos);
- NS_HTML5_BREAK(scriptdataescapeddashdashloop);
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
- reconsume, pos);
- NS_HTML5_BREAK(scriptdataescapeddashdashloop);
- }
- }
- }
- scriptdataescapeddashdashloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_ESCAPED: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '-': {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH, reconsume, pos);
- NS_HTML5_BREAK(scriptdataescapedloop);
- }
- case '<': {
- flushChars(buf, pos);
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- emitReplacementCharacter(buf, pos);
- continue;
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- continue;
- }
- }
- }
- scriptdataescapedloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_ESCAPED_DASH: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '<': {
- flushChars(buf, pos);
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN,
- reconsume, pos);
- NS_HTML5_BREAK(scriptdataescapeddashloop);
- }
- case '\0': {
- emitReplacementCharacter(buf, pos);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- scriptdataescapeddashloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '/': {
- index = 0;
- clearStrBufBeforeUse();
- returnState = nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::NON_DATA_END_TAG_NAME,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case 'S':
- case 's': {
- tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
- cstart = pos;
- index = 1;
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPE_START,
- reconsume, pos);
- NS_HTML5_BREAK(scriptdataescapedlessthanloop);
- }
- default: {
- tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
- cstart = pos;
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- scriptdataescapedlessthanloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_DOUBLE_ESCAPE_START: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- MOZ_ASSERT(index > 0);
- if (index < 6) {
- char16_t folded = c;
- if (c >= 'A' && c <= 'Z') {
- folded += 0x20;
- }
- if (folded != nsHtml5Tokenizer::SCRIPT_ARR[index]) {
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- index++;
- continue;
- }
- switch (c) {
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f':
- case '/':
- case '>': {
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED,
- reconsume, pos);
- NS_HTML5_BREAK(scriptdatadoubleescapestartloop);
- }
- default: {
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- scriptdatadoubleescapestartloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_DOUBLE_ESCAPED: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '-': {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_DASH,
- reconsume, pos);
- NS_HTML5_BREAK(scriptdatadoubleescapedloop);
- }
- case '<': {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- emitReplacementCharacter(buf, pos);
- continue;
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- continue;
- }
- }
- }
- scriptdatadoubleescapedloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_DOUBLE_ESCAPED_DASH: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH,
- reconsume, pos);
- NS_HTML5_BREAK(scriptdatadoubleescapeddashloop);
- }
- case '<': {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- emitReplacementCharacter(buf, pos);
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- scriptdatadoubleescapeddashloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- continue;
- }
- case '<': {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN,
- reconsume, pos);
- NS_HTML5_BREAK(scriptdatadoubleescapeddashdashloop);
- }
- case '>': {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- emitReplacementCharacter(buf, pos);
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- scriptdatadoubleescapeddashdashloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '/': {
- index = 0;
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPE_END, reconsume,
- pos);
- NS_HTML5_BREAK(scriptdatadoubleescapedlessthanloop);
- }
- default: {
- reconsume = true;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- scriptdatadoubleescapedlessthanloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_DOUBLE_ESCAPE_END: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- if (index < 6) {
- char16_t folded = c;
- if (c >= 'A' && c <= 'Z') {
- folded += 0x20;
- }
- if (folded != nsHtml5Tokenizer::SCRIPT_ARR[index]) {
- reconsume = true;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- index++;
- continue;
- }
- switch (c) {
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f':
- case '/':
- case '>': {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- reconsume = true;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- }
- case MARKUP_DECLARATION_OCTYPE: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- if (index < 6) {
- char16_t folded = c;
- if (c >= 'A' && c <= 'Z') {
- folded += 0x20;
- }
- if (folded == nsHtml5Tokenizer::OCTYPE[index]) {
- appendStrBuf(c);
- } else {
- if (P::reportErrors) {
- errBogusComment();
- }
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- index++;
- continue;
- } else {
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE, reconsume, pos);
- NS_HTML5_BREAK(markupdeclarationdoctypeloop);
- }
- }
- markupdeclarationdoctypeloop_end:;
- [[fallthrough]];
- }
- case DOCTYPE: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- initDoctypeFields();
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(doctypeloop);
- }
- default: {
- if (P::reportErrors) {
- errMissingSpaceBeforeDoctypeName();
- }
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(doctypeloop);
- }
- }
- }
- doctypeloop_end:;
- [[fallthrough]];
- }
- case BEFORE_DOCTYPE_NAME: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- continue;
- }
- case '>': {
- if (P::reportErrors) {
- errNamelessDoctype();
- }
- forceQuirks = true;
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- if (c >= 'A' && c <= 'Z') {
- c += 0x20;
- }
- clearStrBufBeforeUse();
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_NAME, reconsume,
- pos);
- NS_HTML5_BREAK(beforedoctypenameloop);
- }
- }
- }
- beforedoctypenameloop_end:;
- [[fallthrough]];
- }
- case DOCTYPE_NAME: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- strBufToDoctypeName();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::AFTER_DOCTYPE_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- strBufToDoctypeName();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::AFTER_DOCTYPE_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(doctypenameloop);
- }
- case '>': {
- strBufToDoctypeName();
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- if (c >= 'A' && c <= 'Z') {
- c += 0x0020;
- }
- appendStrBuf(c);
- continue;
- }
- }
- }
- doctypenameloop_end:;
- [[fallthrough]];
- }
- case AFTER_DOCTYPE_NAME: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- continue;
- }
- case '>': {
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case 'p':
- case 'P': {
- index = 0;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_UBLIC,
- reconsume, pos);
- NS_HTML5_BREAK(afterdoctypenameloop);
- }
- case 's':
- case 'S': {
- index = 0;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_YSTEM,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- bogusDoctype();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_DOCTYPE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- afterdoctypenameloop_end:;
- [[fallthrough]];
- }
- case DOCTYPE_UBLIC: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- if (index < 5) {
- char16_t folded = c;
- if (c >= 'A' && c <= 'Z') {
- folded += 0x20;
- }
- if (folded != nsHtml5Tokenizer::UBLIC[index]) {
- bogusDoctype();
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_DOCTYPE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- index++;
- continue;
- } else {
- reconsume = true;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_KEYWORD,
- reconsume, pos);
- NS_HTML5_BREAK(doctypeublicloop);
- }
- }
- doctypeublicloop_end:;
- [[fallthrough]];
- }
- case AFTER_DOCTYPE_PUBLIC_KEYWORD: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER,
- reconsume, pos);
- NS_HTML5_BREAK(afterdoctypepublickeywordloop);
- }
- case '\"': {
- if (P::reportErrors) {
- errNoSpaceBetweenDoctypePublicKeywordAndQuote();
- }
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\'': {
- if (P::reportErrors) {
- errNoSpaceBetweenDoctypePublicKeywordAndQuote();
- }
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- if (P::reportErrors) {
- errExpectedPublicId();
- }
- forceQuirks = true;
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- bogusDoctype();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_DOCTYPE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- afterdoctypepublickeywordloop_end:;
- [[fallthrough]];
- }
- case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- continue;
- }
- case '\"': {
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED,
- reconsume, pos);
- NS_HTML5_BREAK(beforedoctypepublicidentifierloop);
- }
- case '\'': {
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- if (P::reportErrors) {
- errExpectedPublicId();
- }
- forceQuirks = true;
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- bogusDoctype();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_DOCTYPE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- beforedoctypepublicidentifierloop_end:;
- [[fallthrough]];
- }
- case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\"': {
- publicIdentifier = strBufToString();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER,
- reconsume, pos);
- NS_HTML5_BREAK(doctypepublicidentifierdoublequotedloop);
- }
- case '>': {
- if (P::reportErrors) {
- errGtInPublicId();
- }
- forceQuirks = true;
- publicIdentifier = strBufToString();
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- continue;
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- continue;
- }
- }
- }
- doctypepublicidentifierdoublequotedloop_end:;
- [[fallthrough]];
- }
- case AFTER_DOCTYPE_PUBLIC_IDENTIFIER: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::
- BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::
- BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS,
- reconsume, pos);
- NS_HTML5_BREAK(afterdoctypepublicidentifierloop);
- }
- case '>': {
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\"': {
- if (P::reportErrors) {
- errNoSpaceBetweenPublicAndSystemIds();
- }
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\'': {
- if (P::reportErrors) {
- errNoSpaceBetweenPublicAndSystemIds();
- }
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- bogusDoctype();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_DOCTYPE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- afterdoctypepublicidentifierloop_end:;
- [[fallthrough]];
- }
- case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- continue;
- }
- case '>': {
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\"': {
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED,
- reconsume, pos);
- NS_HTML5_BREAK(betweendoctypepublicandsystemidentifiersloop);
- }
- case '\'': {
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- bogusDoctype();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_DOCTYPE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- betweendoctypepublicandsystemidentifiersloop_end:;
- [[fallthrough]];
- }
- case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\"': {
- systemIdentifier = strBufToString();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER,
- reconsume, pos);
- NS_HTML5_BREAK(doctypesystemidentifierdoublequotedloop);
- }
- case '>': {
- if (P::reportErrors) {
- errGtInSystemId();
- }
- forceQuirks = true;
- systemIdentifier = strBufToString();
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- continue;
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- continue;
- }
- }
- }
- doctypesystemidentifierdoublequotedloop_end:;
- [[fallthrough]];
- }
- case AFTER_DOCTYPE_SYSTEM_IDENTIFIER: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- continue;
- }
- case '>': {
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- bogusDoctypeWithoutQuirks();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_DOCTYPE,
- reconsume, pos);
- NS_HTML5_BREAK(afterdoctypesystemidentifierloop);
- }
- }
- }
- afterdoctypesystemidentifierloop_end:;
- [[fallthrough]];
- }
- case BOGUS_DOCTYPE: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '>': {
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- P::silentCarriageReturn(this);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- continue;
- }
- }
- }
- }
- case DOCTYPE_YSTEM: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- if (index < 5) {
- char16_t folded = c;
- if (c >= 'A' && c <= 'Z') {
- folded += 0x20;
- }
- if (folded != nsHtml5Tokenizer::YSTEM[index]) {
- bogusDoctype();
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_DOCTYPE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- index++;
- NS_HTML5_CONTINUE(stateloop);
- } else {
- reconsume = true;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_KEYWORD,
- reconsume, pos);
- NS_HTML5_BREAK(doctypeystemloop);
- }
- }
- doctypeystemloop_end:;
- [[fallthrough]];
- }
- case AFTER_DOCTYPE_SYSTEM_KEYWORD: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER,
- reconsume, pos);
- NS_HTML5_BREAK(afterdoctypesystemkeywordloop);
- }
- case '\"': {
- if (P::reportErrors) {
- errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
- }
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\'': {
- if (P::reportErrors) {
- errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
- }
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- if (P::reportErrors) {
- errExpectedPublicId();
- }
- forceQuirks = true;
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- bogusDoctype();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_DOCTYPE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- afterdoctypesystemkeywordloop_end:;
- [[fallthrough]];
- }
- case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- continue;
- }
- case '\"': {
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\'': {
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED,
- reconsume, pos);
- NS_HTML5_BREAK(beforedoctypesystemidentifierloop);
- }
- case '>': {
- if (P::reportErrors) {
- errExpectedSystemId();
- }
- forceQuirks = true;
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- bogusDoctype();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_DOCTYPE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- beforedoctypesystemidentifierloop_end:;
- [[fallthrough]];
- }
- case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\'': {
- systemIdentifier = strBufToString();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- if (P::reportErrors) {
- errGtInSystemId();
- }
- forceQuirks = true;
- systemIdentifier = strBufToString();
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- continue;
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- continue;
- }
- }
- }
- }
- case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\'': {
- publicIdentifier = strBufToString();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- if (P::reportErrors) {
- errGtInPublicId();
- }
- forceQuirks = true;
- publicIdentifier = strBufToString();
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- continue;
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- continue;
- }
- }
- }
- }
- case PROCESSING_INSTRUCTION: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\?': {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::PROCESSING_INSTRUCTION_QUESTION_MARK,
- reconsume, pos);
- NS_HTML5_BREAK(processinginstructionloop);
- }
- default: {
- continue;
- }
- }
- }
- processinginstructionloop_end:;
- [[fallthrough]];
- }
- case PROCESSING_INSTRUCTION_QUESTION_MARK: {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '>': {
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- suspendIfRequestedAfterCurrentNonTextToken();
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::PROCESSING_INSTRUCTION,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- }
- }
- stateloop_end:;
- flushChars(buf, pos);
- stateSave = state;
- returnStateSave = returnState;
- return pos;
- }
-
+ int32_t stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* buf,
+ bool reconsume, int32_t returnState, int32_t endPos);
void initDoctypeFields();
template <class P>
- inline void adjustDoubleHyphenAndAppendToStrBufCarriageReturn() {
- P::silentCarriageReturn(this);
- adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
- }
-
+ void adjustDoubleHyphenAndAppendToStrBufCarriageReturn();
template <class P>
- inline void adjustDoubleHyphenAndAppendToStrBufLineFeed() {
- P::silentLineFeed(this);
- adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
- }
-
+ void adjustDoubleHyphenAndAppendToStrBufLineFeed();
template <class P>
- inline void appendStrBufLineFeed() {
- P::silentLineFeed(this);
- appendStrBuf('\n');
- }
-
+ void appendStrBufLineFeed();
template <class P>
- inline void appendStrBufCarriageReturn() {
- P::silentCarriageReturn(this);
- appendStrBuf('\n');
- }
-
+ void appendStrBufCarriageReturn();
template <class P>
- inline void emitCarriageReturn(char16_t* buf, int32_t pos) {
- P::silentCarriageReturn(this);
- flushChars(buf, pos);
- tokenHandler->characters(nsHtml5Tokenizer::LF, 0, 1);
- cstart = INT32_MAX;
- }
-
+ void emitCarriageReturn(char16_t* buf, int32_t pos);
void emitReplacementCharacter(char16_t* buf, int32_t pos);
void maybeEmitReplacementCharacter(char16_t* buf, int32_t pos);
void emitPlaintextReplacementCharacter(char16_t* buf, int32_t pos);
- inline void setAdditionalAndRememberAmpersandLocation(char16_t add) {
- additional = add;
- }
-
+ void setAdditionalAndRememberAmpersandLocation(char16_t add);
void bogusDoctype();
void bogusDoctypeWithoutQuirks();
void handleNcrValue(int32_t returnState);
@@ -4584,13 +434,7 @@ class nsHtml5Tokenizer {
private:
void emitDoctypeToken(int32_t pos);
- inline void suspendIfRequestedAfterCurrentNonTextToken() {
- if (suspendAfterCurrentNonTextToken) {
- suspendAfterCurrentNonTextToken = false;
- shouldSuspend = true;
- }
- }
-
+ void suspendIfRequestedAfterCurrentNonTextToken();
void suspendAfterCurrentTokenIfNotInText();
bool suspensionAfterCurrentNonTextTokenPending();
@@ -4598,29 +442,13 @@ class nsHtml5Tokenizer {
bool internalEncodingDeclaration(nsHtml5String internalCharset);
private:
- inline void emitOrAppendTwo(const char16_t* val, int32_t returnState) {
- if ((returnState & DATA_AND_RCDATA_MASK)) {
- appendStrBuf(val[0]);
- appendStrBuf(val[1]);
- } else {
- tokenHandler->characters(val, 0, 2);
- }
- }
-
- inline void emitOrAppendOne(const char16_t* val, int32_t returnState) {
- if ((returnState & DATA_AND_RCDATA_MASK)) {
- appendStrBuf(val[0]);
- } else {
- tokenHandler->characters(val, 0, 1);
- }
- }
+ void emitOrAppendTwo(const char16_t* val, int32_t returnState);
+ void emitOrAppendOne(const char16_t* val, int32_t returnState);
public:
void end();
- inline void requestSuspension() { shouldSuspend = true; }
-
- inline bool isInDataState() { return (stateSave == DATA); }
-
+ void requestSuspension();
+ bool isInDataState();
void resetToDataState();
void loadState(nsHtml5Tokenizer* other);
void initializeWithoutStarting();
diff --git a/parser/html/nsHtml5TokenizerALU.cpp b/parser/html/nsHtml5TokenizerALU.cpp
@@ -1,33 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include "nsHtml5Tokenizer.h"
-#include "nsHtml5TokenizerLoopPoliciesALU.h"
-
-int32_t nsHtml5Tokenizer::StateLoopFastestALU(int32_t state, char16_t c,
- int32_t pos, char16_t* buf,
- bool reconsume,
- int32_t returnState,
- int32_t endPos) {
- return stateLoop<nsHtml5FastestPolicyALU>(state, c, pos, buf, reconsume,
- returnState, endPos);
-}
-
-int32_t nsHtml5Tokenizer::StateLoopLineColALU(int32_t state, char16_t c,
- int32_t pos, char16_t* buf,
- bool reconsume,
- int32_t returnState,
- int32_t endPos) {
- return stateLoop<nsHtml5LineColPolicyALU>(state, c, pos, buf, reconsume,
- returnState, endPos);
-}
-
-int32_t nsHtml5Tokenizer::StateLoopViewSourceALU(int32_t state, char16_t c,
- int32_t pos, char16_t* buf,
- bool reconsume,
- int32_t returnState,
- int32_t endPos) {
- return stateLoop<nsHtml5ViewSourcePolicyALU>(state, c, pos, buf, reconsume,
- returnState, endPos);
-}
diff --git a/parser/html/nsHtml5TokenizerALUStubs.cpp b/parser/html/nsHtml5TokenizerALUStubs.cpp
@@ -1,32 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include "nsHtml5Tokenizer.h"
-
-int32_t nsHtml5Tokenizer::StateLoopFastestALU(int32_t state, char16_t c,
- int32_t pos, char16_t* buf,
- bool reconsume,
- int32_t returnState,
- int32_t endPos) {
- MOZ_RELEASE_ASSERT(false, "Inconsistent build config");
- return 0;
-}
-
-int32_t nsHtml5Tokenizer::StateLoopLineColALU(int32_t state, char16_t c,
- int32_t pos, char16_t* buf,
- bool reconsume,
- int32_t returnState,
- int32_t endPos) {
- MOZ_RELEASE_ASSERT(false, "Inconsistent build config");
- return 0;
-}
-
-int32_t nsHtml5Tokenizer::StateLoopViewSourceALU(int32_t state, char16_t c,
- int32_t pos, char16_t* buf,
- bool reconsume,
- int32_t returnState,
- int32_t endPos) {
- MOZ_RELEASE_ASSERT(false, "Inconsistent build config");
- return 0;
-}
diff --git a/parser/html/nsHtml5TokenizerHSupplement.h b/parser/html/nsHtml5TokenizerHSupplement.h
@@ -2,48 +2,14 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-friend struct nsHtml5ViewSourcePolicySIMD;
-friend struct nsHtml5ViewSourcePolicyALU;
-friend struct nsHtml5LineColPolicySIMD;
-friend struct nsHtml5LineColPolicyALU;
-friend struct nsHtml5FastestPolicySIMD;
-friend struct nsHtml5FastestPolicyALU;
+friend struct nsHtml5ViewSourcePolicy;
+friend struct nsHtml5LineColPolicy;
+friend struct nsHtml5FastestPolicy;
private:
int32_t col;
bool nextCharOnNewLine;
-// These functions are wrappers for template parametrized stateLoop and
-// stateLoopCompilerWorkaround so that the instantiations can go into
-// separate compilation units both to allow different compiler flags
-// and to make LLVM perform LICM on SIMD constants in functions whose size
-// isn't too large for LLVM to perform LICM before LLVM looks for inlining
-// opportunities.
-
-int32_t StateLoopFastestSIMD(int32_t state, char16_t c, int32_t pos,
- char16_t* buf, bool reconsume, int32_t returnState,
- int32_t endPos);
-
-int32_t StateLoopFastestALU(int32_t state, char16_t c, int32_t pos,
- char16_t* buf, bool reconsume, int32_t returnState,
- int32_t endPos);
-
-int32_t StateLoopLineColSIMD(int32_t state, char16_t c, int32_t pos,
- char16_t* buf, bool reconsume, int32_t returnState,
- int32_t endPos);
-
-int32_t StateLoopLineColALU(int32_t state, char16_t c, int32_t pos,
- char16_t* buf, bool reconsume, int32_t returnState,
- int32_t endPos);
-
-int32_t StateLoopViewSourceSIMD(int32_t state, char16_t c, int32_t pos,
- char16_t* buf, bool reconsume,
- int32_t returnState, int32_t endPos);
-
-int32_t StateLoopViewSourceALU(int32_t state, char16_t c, int32_t pos,
- char16_t* buf, bool reconsume,
- int32_t returnState, int32_t endPos);
-
public:
inline int32_t getColumnNumber() { return col; }
diff --git a/parser/html/nsHtml5TokenizerLoopPolicies.h b/parser/html/nsHtml5TokenizerLoopPolicies.h
@@ -0,0 +1,123 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsHtml5TokenizerLoopPolicies_h
+#define nsHtml5TokenizerLoopPolicies_h
+
+/**
+ * This policy does not report tokenizer transitions anywhere and does not
+ * track line and column numbers. To be used for innerHTML.
+ */
+struct nsHtml5FastestPolicy {
+ static const bool reportErrors = false;
+ static int32_t transition(nsHtml5Highlighter* aHighlighter, int32_t aState,
+ bool aReconsume, int32_t aPos) {
+ return aState;
+ }
+ static void completedNamedCharacterReference(
+ nsHtml5Highlighter* aHighlighter) {}
+
+ static char16_t checkChar(nsHtml5Tokenizer* aTokenizer, char16_t* buf,
+ int32_t pos) {
+ return buf[pos];
+ }
+
+ static void silentCarriageReturn(nsHtml5Tokenizer* aTokenizer) {
+ aTokenizer->lastCR = true;
+ }
+
+ static void silentLineFeed(nsHtml5Tokenizer* aTokenizer) {}
+};
+
+/**
+ * This policy does not report tokenizer transitions anywhere. To be used
+ * when _not_ viewing source and when not parsing innerHTML (or other
+ * script execution-preventing fragment).
+ */
+struct nsHtml5LineColPolicy {
+ static const bool reportErrors = false;
+ static int32_t transition(nsHtml5Highlighter* aHighlighter, int32_t aState,
+ bool aReconsume, int32_t aPos) {
+ return aState;
+ }
+ static void completedNamedCharacterReference(
+ nsHtml5Highlighter* aHighlighter) {}
+
+ static char16_t checkChar(nsHtml5Tokenizer* aTokenizer, char16_t* buf,
+ int32_t pos) {
+ // The name of this method comes from the validator.
+ // We aren't checking a char here. We read the next
+ // UTF-16 code unit and, before returning it, adjust
+ // the line and column numbers.
+ char16_t c = buf[pos];
+ if (MOZ_UNLIKELY(aTokenizer->nextCharOnNewLine)) {
+ // By changing the line and column here instead
+ // of doing so eagerly when seeing the line break
+ // causes the line break itself to be considered
+ // column-wise at the end of a line.
+ aTokenizer->line++;
+ aTokenizer->col = 1;
+ aTokenizer->nextCharOnNewLine = false;
+ } else if (MOZ_LIKELY(!NS_IS_LOW_SURROGATE(c))) {
+ // SpiderMonkey wants to count scalar values
+ // instead of UTF-16 code units. We omit low
+ // surrogates from the count so that only the
+ // high surrogate increments the count for
+ // two-code-unit scalar values.
+ //
+ // It's somewhat questionable from the performance
+ // perspective to make the human-perceivable column
+ // count correct for non-BMP characters in the case
+ // where there is a single scalar value per extended
+ // grapheme cluster when even on the BMP there are
+ // various cases where the scalar count doesn't make
+ // much sense as a human-perceived "column count" due
+ // to extended grapheme clusters consisting of more
+ // than one scalar value.
+ aTokenizer->col++;
+ }
+ return c;
+ }
+
+ static void silentCarriageReturn(nsHtml5Tokenizer* aTokenizer) {
+ aTokenizer->nextCharOnNewLine = true;
+ aTokenizer->lastCR = true;
+ }
+
+ static void silentLineFeed(nsHtml5Tokenizer* aTokenizer) {
+ aTokenizer->nextCharOnNewLine = true;
+ }
+};
+
+/**
+ * This policy reports the tokenizer transitions to a highlighter. To be used
+ * when viewing source.
+ */
+struct nsHtml5ViewSourcePolicy {
+ static const bool reportErrors = true;
+ static int32_t transition(nsHtml5Highlighter* aHighlighter, int32_t aState,
+ bool aReconsume, int32_t aPos) {
+ return aHighlighter->Transition(aState, aReconsume, aPos);
+ }
+ static void completedNamedCharacterReference(
+ nsHtml5Highlighter* aHighlighter) {
+ aHighlighter->CompletedNamedCharacterReference();
+ }
+
+ static char16_t checkChar(nsHtml5Tokenizer* aTokenizer, char16_t* buf,
+ int32_t pos) {
+ return buf[pos];
+ }
+
+ static void silentCarriageReturn(nsHtml5Tokenizer* aTokenizer) {
+ aTokenizer->line++;
+ aTokenizer->lastCR = true;
+ }
+
+ static void silentLineFeed(nsHtml5Tokenizer* aTokenizer) {
+ aTokenizer->line++;
+ }
+};
+
+#endif // nsHtml5TokenizerLoopPolicies_h
diff --git a/parser/html/nsHtml5TokenizerLoopPoliciesALU.h b/parser/html/nsHtml5TokenizerLoopPoliciesALU.h
@@ -1,150 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#ifndef nsHtml5TokenizerLoopPoliciesALU_h
-#define nsHtml5TokenizerLoopPoliciesALU_h
-
-/**
- * This policy does not report tokenizer transitions anywhere and does not
- * track line and column numbers. To be used for innerHTML. Non-SIMD version.
- */
-struct nsHtml5FastestPolicyALU {
- static const bool reportErrors = false;
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition(
- nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume,
- int32_t aPos) {
- return aState;
- }
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference(
- nsHtml5Highlighter* aHighlighter) {}
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData(
- nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos,
- int32_t endPos) {
- return 0;
- }
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar(
- nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) {
- return buf[pos];
- }
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn(
- nsHtml5Tokenizer* aTokenizer) {
- aTokenizer->lastCR = true;
- }
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed(
- nsHtml5Tokenizer* aTokenizer) {}
-};
-
-/**
- * This policy does not report tokenizer transitions anywhere. To be used
- * when _not_ viewing source and when not parsing innerHTML (or other
- * script execution-preventing fragment).
- */
-struct nsHtml5LineColPolicyALU {
- static const bool reportErrors = false;
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition(
- nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume,
- int32_t aPos) {
- return aState;
- }
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference(
- nsHtml5Highlighter* aHighlighter) {}
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData(
- nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos,
- int32_t endPos) {
- return 0;
- }
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar(
- nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) {
- // The name of this method comes from the validator.
- // We aren't checking a char here. We read the next
- // UTF-16 code unit and, before returning it, adjust
- // the line and column numbers.
- char16_t c = buf[pos];
- if (MOZ_UNLIKELY(aTokenizer->nextCharOnNewLine)) {
- // By changing the line and column here instead
- // of doing so eagerly when seeing the line break
- // causes the line break itself to be considered
- // column-wise at the end of a line.
- aTokenizer->line++;
- aTokenizer->col = 1;
- aTokenizer->nextCharOnNewLine = false;
- } else if (MOZ_LIKELY(!NS_IS_LOW_SURROGATE(c))) {
- // SpiderMonkey wants to count scalar values
- // instead of UTF-16 code units. We omit low
- // surrogates from the count so that only the
- // high surrogate increments the count for
- // two-code-unit scalar values.
- //
- // It's somewhat questionable from the performance
- // perspective to make the human-perceivable column
- // count correct for non-BMP characters in the case
- // where there is a single scalar value per extended
- // grapheme cluster when even on the BMP there are
- // various cases where the scalar count doesn't make
- // much sense as a human-perceived "column count" due
- // to extended grapheme clusters consisting of more
- // than one scalar value.
- aTokenizer->col++;
- }
- return c;
- }
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn(
- nsHtml5Tokenizer* aTokenizer) {
- aTokenizer->nextCharOnNewLine = true;
- aTokenizer->lastCR = true;
- }
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed(
- nsHtml5Tokenizer* aTokenizer) {
- aTokenizer->nextCharOnNewLine = true;
- }
-};
-
-/**
- * This policy reports the tokenizer transitions to a highlighter. To be used
- * when viewing source.
- */
-struct nsHtml5ViewSourcePolicyALU {
- static const bool reportErrors = true;
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition(
- nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume,
- int32_t aPos) {
- return aHighlighter->Transition(aState, aReconsume, aPos);
- }
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference(
- nsHtml5Highlighter* aHighlighter) {
- aHighlighter->CompletedNamedCharacterReference();
- }
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData(
- nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos,
- int32_t endPos) {
- return 0;
- }
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar(
- nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) {
- return buf[pos];
- }
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn(
- nsHtml5Tokenizer* aTokenizer) {
- aTokenizer->line++;
- aTokenizer->lastCR = true;
- }
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed(
- nsHtml5Tokenizer* aTokenizer) {
- aTokenizer->line++;
- }
-};
-
-#endif // nsHtml5TokenizerLoopPoliciesALU_h
diff --git a/parser/html/nsHtml5TokenizerLoopPoliciesSIMD.h b/parser/html/nsHtml5TokenizerLoopPoliciesSIMD.h
@@ -1,211 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#ifndef nsHtml5TokenizerLoopPoliciesSIMD_h
-#define nsHtml5TokenizerLoopPoliciesSIMD_h
-
-#include "mozilla/Attributes.h"
-#include "mozilla/htmlaccel/htmlaccelNotInline.h"
-
-/**
- * This policy does not report tokenizer transitions anywhere and does not
- * track line and column numbers. To be used for innerHTML.
- *
- * This the SIMD version for aarch64 and SSSE3-enabled x86/x86_64.
- */
-struct nsHtml5FastestPolicySIMD {
- static const bool reportErrors = false;
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition(
- nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume,
- int32_t aPos) {
- return aState;
- }
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference(
- nsHtml5Highlighter* aHighlighter) {}
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData(
- nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos,
- int32_t endPos) {
- // We need to check bounds for the `buf[pos]` access below to be OK.
- // Instead of just checking that `pos` isn't equal to `endPos`, let's
- // check that have at least one SIMD stride of data in the same branch,
- // since if we don't have at least one SIMD stride of data, we don't
- // need to proceed.
- if (endPos - pos < 16) {
- return 0;
- }
- if (buf[pos] == '<') {
- // Quickly handle the case where there is one tag immediately
- // after another and the very first thing in the data state is a
- // less-than sign.
- return 0;
- }
- return mozilla::htmlaccel::AccelerateDataFastest(buf + pos, buf + endPos);
- }
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar(
- nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) {
- return buf[pos];
- }
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn(
- nsHtml5Tokenizer* aTokenizer) {
- aTokenizer->lastCR = true;
- }
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed(
- nsHtml5Tokenizer* aTokenizer) {}
-};
-
-/**
- * This policy does not report tokenizer transitions anywhere. To be used
- * when _not_ viewing source and when not parsing innerHTML (or other
- * script execution-preventing fragment).
- */
-struct nsHtml5LineColPolicySIMD {
- static const bool reportErrors = false;
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition(
- nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume,
- int32_t aPos) {
- return aState;
- }
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference(
- nsHtml5Highlighter* aHighlighter) {}
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData(
- nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos,
- int32_t endPos) {
- // We need to check bounds for the `buf[pos]` access below to be OK.
- // Instead of just checking that `pos` isn't equal to `endPos`, let's
- // check that have at least one SIMD stride of data in the same branch,
- // since if we don't have at least one SIMD stride of data, we don't
- // need to proceed.
- if (endPos - pos < 16) {
- return 0;
- }
- char16_t c = buf[pos];
- if (c == '<' || c == '\n') {
- // Quickly handle the case where there is one tag immediately
- // after another and the very first thing in the data state is a
- // less-than sign and the case where a tag is immediately followed
- // by a line feed.
- return 0;
- }
- if (MOZ_UNLIKELY(aTokenizer->nextCharOnNewLine)) {
- // By changing the line and column here instead
- // of doing so eagerly when seeing the line break
- // causes the line break itself to be considered
- // column-wise at the end of a line.
- aTokenizer->line++;
- aTokenizer->col = 1;
- aTokenizer->nextCharOnNewLine = false;
- }
- return mozilla::htmlaccel::AccelerateDataLineCol(buf + pos, buf + endPos);
- }
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar(
- nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) {
- // The name of this method comes from the validator.
- // We aren't checking a char here. We read the next
- // UTF-16 code unit and, before returning it, adjust
- // the line and column numbers.
- char16_t c = buf[pos];
- if (MOZ_UNLIKELY(aTokenizer->nextCharOnNewLine)) {
- // By changing the line and column here instead
- // of doing so eagerly when seeing the line break
- // causes the line break itself to be considered
- // column-wise at the end of a line.
- aTokenizer->line++;
- aTokenizer->col = 1;
- aTokenizer->nextCharOnNewLine = false;
- } else if (MOZ_LIKELY(!NS_IS_LOW_SURROGATE(c))) {
- // SpiderMonkey wants to count scalar values
- // instead of UTF-16 code units. We omit low
- // surrogates from the count so that only the
- // high surrogate increments the count for
- // two-code-unit scalar values.
- //
- // It's somewhat questionable from the performance
- // perspective to make the human-perceivable column
- // count correct for non-BMP characters in the case
- // where there is a single scalar value per extended
- // grapheme cluster when even on the BMP there are
- // various cases where the scalar count doesn't make
- // much sense as a human-perceived "column count" due
- // to extended grapheme clusters consisting of more
- // than one scalar value.
- aTokenizer->col++;
- }
- return c;
- }
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn(
- nsHtml5Tokenizer* aTokenizer) {
- aTokenizer->nextCharOnNewLine = true;
- aTokenizer->lastCR = true;
- }
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed(
- nsHtml5Tokenizer* aTokenizer) {
- aTokenizer->nextCharOnNewLine = true;
- }
-};
-
-/**
- * This policy reports the tokenizer transitions to a highlighter. To be used
- * when viewing source.
- */
-struct nsHtml5ViewSourcePolicySIMD {
- static const bool reportErrors = true;
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition(
- nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume,
- int32_t aPos) {
- return aHighlighter->Transition(aState, aReconsume, aPos);
- }
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference(
- nsHtml5Highlighter* aHighlighter) {
- aHighlighter->CompletedNamedCharacterReference();
- }
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData(
- nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos,
- int32_t endPos) {
- // We need to check bounds for the `buf[pos]` access below to be OK.
- // Instead of just checking that `pos` isn't equal to `endPos`, let's
- // check that have at least one SIMD stride of data in the same branch,
- // since if we don't have at least one SIMD stride of data, we don't
- // need to proceed.
- if (endPos - pos < 16) {
- return 0;
- }
- char16_t c = buf[pos];
- if (c == '<' || c == '\n') {
- // Quickly handle the case where there is one tag immediately
- // after another and the very first thing in the data state is a
- // less-than sign and the case where a tag is immediately followed
- // by a line feed.
- return 0;
- }
- return mozilla::htmlaccel::AccelerateDataViewSource(buf + pos,
- buf + endPos);
- }
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar(
- nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) {
- return buf[pos];
- }
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn(
- nsHtml5Tokenizer* aTokenizer) {
- aTokenizer->line++;
- aTokenizer->lastCR = true;
- }
-
- MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed(
- nsHtml5Tokenizer* aTokenizer) {
- aTokenizer->line++;
- }
-};
-
-#endif // nsHtml5TokenizerLoopPoliciesSIMD_h
diff --git a/parser/html/nsHtml5TokenizerSIMD.cpp b/parser/html/nsHtml5TokenizerSIMD.cpp
@@ -1,33 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include "nsHtml5Tokenizer.h"
-#include "nsHtml5TokenizerLoopPoliciesSIMD.h"
-
-int32_t nsHtml5Tokenizer::StateLoopFastestSIMD(int32_t state, char16_t c,
- int32_t pos, char16_t* buf,
- bool reconsume,
- int32_t returnState,
- int32_t endPos) {
- return stateLoop<nsHtml5FastestPolicySIMD>(state, c, pos, buf, reconsume,
- returnState, endPos);
-}
-
-int32_t nsHtml5Tokenizer::StateLoopLineColSIMD(int32_t state, char16_t c,
- int32_t pos, char16_t* buf,
- bool reconsume,
- int32_t returnState,
- int32_t endPos) {
- return stateLoop<nsHtml5LineColPolicySIMD>(state, c, pos, buf, reconsume,
- returnState, endPos);
-}
-
-int32_t nsHtml5Tokenizer::StateLoopViewSourceSIMD(int32_t state, char16_t c,
- int32_t pos, char16_t* buf,
- bool reconsume,
- int32_t returnState,
- int32_t endPos) {
- return stateLoop<nsHtml5ViewSourcePolicySIMD>(state, c, pos, buf, reconsume,
- returnState, endPos);
-}
diff --git a/parser/html/nsHtml5TokenizerSIMDStubs.cpp b/parser/html/nsHtml5TokenizerSIMDStubs.cpp
@@ -1,32 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include "nsHtml5Tokenizer.h"
-
-int32_t nsHtml5Tokenizer::StateLoopFastestSIMD(int32_t state, char16_t c,
- int32_t pos, char16_t* buf,
- bool reconsume,
- int32_t returnState,
- int32_t endPos) {
- MOZ_RELEASE_ASSERT(false, "Inconsistent build config");
- return 0;
-}
-
-int32_t nsHtml5Tokenizer::StateLoopLineColSIMD(int32_t state, char16_t c,
- int32_t pos, char16_t* buf,
- bool reconsume,
- int32_t returnState,
- int32_t endPos) {
- MOZ_RELEASE_ASSERT(false, "Inconsistent build config");
- return 0;
-}
-
-int32_t nsHtml5Tokenizer::StateLoopViewSourceSIMD(int32_t state, char16_t c,
- int32_t pos, char16_t* buf,
- bool reconsume,
- int32_t returnState,
- int32_t endPos) {
- MOZ_RELEASE_ASSERT(false, "Inconsistent build config");
- return 0;
-}
diff --git a/parser/htmlaccel/gtest/TestHtmlSimd.cpp b/parser/htmlaccel/gtest/TestHtmlSimd.cpp
@@ -1,62 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include "gtest/gtest.h"
-#include "mozilla/htmlaccel/htmlaccelNotInline.h"
-
-// Match in the first half
-const char16_t HTML_SIMD_TEST_INPUT_LOW[16] = {
- 'a',
- 0xD834, // Surrogate pair
- 0xDD65, '\n', '<', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
-};
-
-// Match in the second half
-const char16_t HTML_SIMD_TEST_INPUT_HIGH[16] = {
- 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'a',
- 0xD834, // Surrogate pair
- 0xDD65, '\n', '<', 'f', 'g', 'h',
-};
-
-TEST(HtmlSimd, TestTextNodeAllowSurrogatesAndLf)
-{
- int32_t index = mozilla::htmlaccel::AccelerateDataFastest(
- HTML_SIMD_TEST_INPUT_LOW, HTML_SIMD_TEST_INPUT_LOW + 16);
- ASSERT_EQ(index, 4);
-}
-
-TEST(HtmlSimd, TestTextNodeAllowSurrogatesDisallowLf)
-{
- int32_t index = mozilla::htmlaccel::AccelerateDataViewSource(
- HTML_SIMD_TEST_INPUT_LOW, HTML_SIMD_TEST_INPUT_LOW + 16);
- ASSERT_EQ(index, 3);
-}
-
-TEST(HtmlSimd, TestTextNodeDisallowSurrogatesAndLf)
-{
- int32_t index = mozilla::htmlaccel::AccelerateDataLineCol(
- HTML_SIMD_TEST_INPUT_LOW, HTML_SIMD_TEST_INPUT_LOW + 16);
- ASSERT_EQ(index, 1);
-}
-
-TEST(HtmlSimd, TestTextNodeAllowSurrogatesAndLfHigh)
-{
- int32_t index = mozilla::htmlaccel::AccelerateDataFastest(
- HTML_SIMD_TEST_INPUT_HIGH, HTML_SIMD_TEST_INPUT_HIGH + 16);
- ASSERT_EQ(index, 4 + 8);
-}
-
-TEST(HtmlSimd, TestTextNodeAllowSurrogatesDisallowLfHigh)
-{
- int32_t index = mozilla::htmlaccel::AccelerateDataViewSource(
- HTML_SIMD_TEST_INPUT_HIGH, HTML_SIMD_TEST_INPUT_HIGH + 16);
- ASSERT_EQ(index, 3 + 8);
-}
-
-TEST(HtmlSimd, TestTextNodeDisallowSurrogatesAndLfHigh)
-{
- int32_t index = mozilla::htmlaccel::AccelerateDataLineCol(
- HTML_SIMD_TEST_INPUT_HIGH, HTML_SIMD_TEST_INPUT_HIGH + 16);
- ASSERT_EQ(index, 1 + 8);
-}
diff --git a/parser/htmlaccel/gtest/moz.build b/parser/htmlaccel/gtest/moz.build
@@ -1,15 +0,0 @@
-# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
-# vim: set filetype=python:
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-if CONFIG["TARGET_CPU"] == "x86_64" or (
- CONFIG["TARGET_CPU"] == "aarch64" and CONFIG["TARGET_ENDIANNESS"] == "little"
-):
- SOURCES += {
- "TestHtmlSimd.cpp",
- }
- SOURCES["TestHtmlSimd.cpp"].flags += CONFIG["HTML_ACCEL_FLAGS"]
-
-FINAL_LIBRARY = "xul-gtest"
diff --git a/parser/htmlaccel/htmlaccel.h b/parser/htmlaccel/htmlaccel.h
@@ -1,322 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#ifndef mozilla_htmlaccel_htmlaccel_h
-#define mozilla_htmlaccel_htmlaccel_h
-
-#include <string.h>
-#include <stdint.h>
-
-// Avoid adding more Gecko-specific headers to keep it easy enough to
-// copy and paste the contents of this file to Compiler Explorer.
-#include "mozilla/Attributes.h"
-
-// This file provides SIMD code for skipping over characters that
-// the caller doesn't need to act upon. For example, this code can
-// skip over characters that the HTML tokenizer doesn't need to handle
-// specially in a given state or this code could be used to skip over
-// characters that don't need to be escaped in an HTML serializer.
-
-// ISA SUPPORT: Do not include this file unless the compilation unit is
-// being compiled either for little-endian aarch64 or for x86/x86_64 with
-// at least SSSE3 enabled.
-//
-// It's probably feasible to extend this to support little-endian POWER
-// by defining
-// MOZ_ALWAYS_INLINE_EVEN_DEBUG uint8x16_t TableLookup(uint8x16_t table,
-// uint8x16_t nibbles) {
-// return vec_perm(table, table, nibbles);
-// }
-// but since I don't have a little-endian POWER system to test with,
-// this is left as an exercise to the reader. (The x86/x86_64 reduction
-// code should be portable to POWER10 using vec_extractm and the aarch64
-// reduction code should be portable to older POWER using vec_max.)
-//
-// ARMv7 is deliberately not supported due to vqtbl1q_u8 being a newer
-// addition to NEON.
-#if !defined(__LITTLE_ENDIAN__)
-# error "A little-endian target is required."
-#endif
-#if !(defined(__aarch64__) || defined(__SSSE3__))
-# error "Must be targeting aarch64 or SSSE3."
-#endif
-
-// NOTE: This file uses GCC/clang built-ins that provide SIMD portability.
-// Compared to pretending unawareness of what arm_neon.h and tmmintrin.h
-// map to in GCC and clang, this has the benefit that the code is not stuck
-// at an SSSE3 local maximum but adapts maximally to upgrades to SSE 4.2,
-// AVX2, and BMI. (Yes, enabling BMI seems to affect more than just
-// __builtin_ctz!)
-// (We need to check for __clang__, because clang-cl does not define __GNUC__.)
-#if !(defined(__GNUC__) || defined(__clang__))
-# error "A compiler that supports GCC-style portable SIMD is required."
-#endif
-
-// # General
-//
-// There is an entry point per combination of what characters terminate
-// the acceleration loop (i.e. characters that the HTML tokenizer would not
-// simply skip over). The shared implementation code is inlined into these
-// FFI entry point functions, so the parametrization made inside the FFI
-// functions constant-propagates through the implementation internals.
-//
-// The code examines 16 UTF-16 code units at a time as two 128-bit SIMD
-// vectors. First, the bytes are regrouped to so that one SIMD vector
-// contains the high halves of the UTF-16 code units (zeros for ASCII/Basic
-// Latin) and another one contains the low halves.
-//
-// In the case of the low half, we mask the vector to take the low 4 bits of
-// each 8-bit value and do a lookup from a lookup table contained in a SIMD
-// vector. The 4 bits index into 16 lanes of the other SIMD vector such that
-// we get a vector where the positions corresponding to positions of the
-// original code units contain the 8-bit value looked up from by the 4-bit
-// index.
-//
-// The lookup operation is available unconditionally on aarch64. On
-// x86/x86_64, it is part of the SSSE3 instruction set extension, which is
-// why on x86/x86_64 we must not call into this code unless SSSE3 is
-// available. (Each additional level of compiling this code with SSE4.2,
-// AVX2, or AVX2 + BMI makes this code shorter, which presumably means more
-// efficient, so instead of compiling this just with SSSE3, we compile this
-// with AVX2+BMI on x86_64, considering that CPUs with such capabilities
-// have been available for 12 years at the time of landing this code.)
-//
-// The lookup table contains the loop-terminating ASCII characters in the
-// positions given by their low 4 bits. For example, the less-than sign is
-// U+003C, so the value 0x3C is at index 0xC (decimal 12). Positions that
-// don’t correspond to a character of interest have the value 1, except lane
-// 1 has the placeholder value 2. This way, characters that we don’t want to
-// match anything in the lookup table get a non-matching placeholder: U+0001
-// gets compared with 2 (semantically U+0002) and everything else not of
-// interest gets compared with 1 (semantically U+0001) to produce a
-// non-matching lane.
-//
-// This means that instead of comparing the vector of the low halves of the
-// UTF-16 code units against multiple constant vectors each filled in all
-// lanes with a given ASCII character of interest, the table lookup gives us
-// one vector to compare against where each lane can have a different ASCII
-// character of interest to compare with.
-//
-// This requires the ASCII characters of interest to have mutually distinct
-// low 4 bits. This is true for U+0000, &, <, LF, CR, ", and ', but,
-// unfortunately, CR, ] and - share the low 4 bits, so cases where we need
-// to include a check for ] or - needs to do a separate check, since CR is
-// always in the lookup table. (Checks for ", ', ], and - are not here at
-// this time but will come in follow-up patches.)
-//
-// From these operations, we get a vector of 16 8-bit mask lanes where a
-// lane is 0xFF if the low 8 bits of the UTF-16 code unit matched an ASCII
-// character that terminates the loop and 0x00 otherwise. We lane-wise
-// compare the high halves with zero and AND the resulting mask vector
-// together with the mask vector that resulted from processing the low 8
-// bits to confirm which low 8 bits had 0 as the high 8 bits, i.e. the
-// UTF-16 code unit really was Basic Latin.
-//
-// If we have a configuration that requires terminating the loop on
-// surrogates, we check the vector containing the high halves of the UTF-16
-// code units for surrogates (by masking certain high bits to compare them
-// with a constant) and OR the resulting mask vector together with the
-// vector computed above.
-//
-// Now we have a vector of 16 8-bit mask lanes that corresponds to the input
-// of 16 UTF-16 code units to indicate which code units in the run of 16
-// UTF-16 code units require terminating the loop (i.e. must not be skipped
-// over). At this point, the handling diverges for x86/x86_64 and aarch64.
-//
-// ## x86/x86_64
-//
-// We convert the SIMD mask into bits in an ALU register. The operation
-// returns a 32-bit type, but only the low 16 bits can be non-zero. If the
-// integer is non-zero, the loop terminates, since some lane in the mask was
-// non-zero. In this case, we return the number of trailing zeros in the
-// integer. (We already know must have a non-zero bit somewhere in the low
-// 16 bits, so we can’t end up counting to the high half of the 32-bit type.)
-// Due to the little-endian semantics, the first UTF-16 code unit in the
-// input corresponds to the least-significant bit in the integer, so when the
-// first UTF-16 code unit in the input is unskippable, the least-significant
-// bit in the integer is 1, so there are 0 trailing zeros, i.e. 0 skippable
-// UTF-16 code units.
-//
-// ## aarch64
-//
-// We want to know if any lane is the mask is non-zero to decide whether to
-// terminate the loop. If there is a non-zero lane, we want to know the
-// position of the first (in the content order of the input UTF-16 text)
-// non-zero lane. To accomplish these goals, we bitwise AND the mask vector
-// with a vector of 16 constants. Since ANDing with a mask lane set to zero
-// results in zero, we need all 16 constants to be non-zero. Yet, we need to
-// be able to accommodate the possibility of first lane in content order
-// being set, which means we need to compute 0 as the result. To be able to
-// compute 0 but have the constants be non-zero, the constants are numbers
-// that need be subtracted from 16. That is, the constant vector has lanes
-// set to numbers from 16 to 1 (inclusive). We do the reduction of the
-// resulting SIMD vector to an ALU integer by taking the value of the lane
-// with the largest value.
-//
-// If no mask lane was set, the max operation results in 0, so if the
-// integer is zero, the loop continues. Otherwise, we get the number of
-// skippable UTF-16 code units by subtracting the integer from 16. That is,
-// if the first UTF-16 unit is unstoppable, we get 16 as the max lane value
-// and 16-16=0.
-//
-// # Alignment
-//
-// These functions use unaligned SIMD loads, because alignment
-// doesn't matter on aarch64 CPUs or on x86_64 CPUs from the most
-// recent decade or so. It's not worthwhile to add complexity for
-// old CPUs.
-//
-// # Inlining
-//
-// The public functions here are expected to be called from a loop. To give
-// LICM the opportunity to hoist the SIMD constants out of the loop, make
-// sure that every function on the path from the loop to here is declared
-// MOZ_ALWAYS_INLINE_EVEN_DEBUG and that all these and the loop itself are
-// compiled with the same instruction set extension flags (if applicable).
-//
-// # Acknowledments
-//
-// https://lemire.me/blog/2024/06/08/scan-html-faster-with-simd-instructions-chrome-edition/
-
-#if defined(__aarch64__)
-
-# include <arm_neon.h>
-
-#else // x86/x86_64
-
-# include <tmmintrin.h>
-// Using syntax that clang-tidy doesn't like to match GCC guidance.
-typedef uint8_t uint8x16_t __attribute__((vector_size(16)));
-
-#endif
-
-namespace mozilla::htmlaccel {
-
-namespace detail {
-
-#if defined(__aarch64__)
-// The idea is that when this is ANDed with the mask, we get 0 in the
-// non-match positions and the leftmost match ends up with higest number.
-// This way, taking the max value of the result is zero if all positions
-// are non-match, and otherwise we get a value that when subtracted from
-// 16 indicates the index of the leftmost match.
-const uint8x16_t INVERTED_ADVANCES = {16, 15, 14, 13, 12, 11, 10, 9,
- 8, 7, 6, 5, 4, 3, 2, 1};
-
-MOZ_ALWAYS_INLINE_EVEN_DEBUG uint8x16_t TableLookup(uint8x16_t aTable,
- uint8x16_t aNibbles) {
- return vqtbl1q_u8(aTable, aNibbles);
-}
-
-#else // x86/x86_64
-
-MOZ_ALWAYS_INLINE_EVEN_DEBUG uint8x16_t TableLookup(uint8x16_t aTable,
- uint8x16_t aNibbles) {
- // GCC wants reinterpret_cast
- return reinterpret_cast<uint8x16_t>(_mm_shuffle_epi8(aTable, aNibbles));
-}
-
-#endif
-
-// These formulations optimize nicely, so no point in trying something fancier
-// to fill all lanes with the same byte.
-const uint8x16_t ALL_ZEROS = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-const uint8x16_t NIBBLE_MASK = {0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF,
- 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF};
-const uint8x16_t SURROGATE_MASK = {0xF8, 0xF8, 0xF8, 0xF8, 0xF8, 0xF8,
- 0xF8, 0xF8, 0xF8, 0xF8, 0xF8, 0xF8,
- 0xF8, 0xF8, 0xF8, 0xF8};
-const uint8x16_t SURROGATE_MATCH = {0xD8, 0xD8, 0xD8, 0xD8, 0xD8, 0xD8,
- 0xD8, 0xD8, 0xD8, 0xD8, 0xD8, 0xD8,
- 0xD8, 0xD8, 0xD8, 0xD8};
-
-// The approach here supports disallowing up to 16 different
-// characters that 1) are in the Latin1 range, i.e. U+00FF or
-// below, and 2) do not have the lowest 4 bits in common with
-// each other.
-//
-// The code point value of each disallowed character needs
-// to be placed in the vector at the position indexed by the
-// low 4 bits of the character (low four bits 0 is the leftmost
-// position and low four bits 15 is the rightmost position).
-//
-// U+0001 neither occurs in typical HTML nor is one of the
-// code points we care about, so use 1 as the non-matching
-// value. We do care about U+0000, unfortunately.
-// We use U+0002 at position 1 to make sure it doesn't
-// match, either. That is, we put 1 in the positions we
-// don't care about except we put 2 at position 1.
-
-/// Disallow U+0000, less-than, ampersand, and carriage return.
-const uint8x16_t ZERO_LT_AMP_CR = {0, 2, 1, 1, 1, 1, '&', 1,
- 1, 1, 1, 1, '<', '\r', 1, 1};
-/// Disallow U+0000, less-than, ampersand, carriage return, and line feed.
-const uint8x16_t ZERO_LT_AMP_CR_LF = {0, 2, 1, 1, 1, 1, '&', 1,
- 1, 1, '\n', 1, '<', '\r', 1, 1};
-
-/// Compute a 16-lane mask for for 16 UTF-16 code units, where a lane
-/// is 0x00 if OK to skip and 0xFF in not OK to skip.
-MOZ_ALWAYS_INLINE_EVEN_DEBUG uint8x16_t
-StrideToMask(const char16_t* aArr /* len = 16 */, uint8x16_t aTable,
- bool aAllowSurrogates) {
- uint8x16_t first;
- uint8x16_t second;
- // memcpy generates a single unaligned load instruction with both ISAs.
- memcpy(&first, aArr, 16);
- memcpy(&second, aArr + 8, 16);
- // Each shuffle maps to a single instruction on aarch64.
- // On x86/x86_64, how efficiently these shuffles maps to instructions
- // depends on the level of instruction set extensions chosen, which
- // is the main reason that we compile this file at a higher extension
- // level than the minimum SSSE3 (and the main reason why this file
- // uses GNU C portable SIMD instead of sticking to what's in the
- // Intel-defined headers).
- uint8x16_t low_halves = __builtin_shufflevector(
- first, second, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
- uint8x16_t high_halves = __builtin_shufflevector(
- first, second, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
- uint8x16_t high_half_matches = high_halves == ALL_ZEROS;
- uint8x16_t low_half_matches =
- low_halves == TableLookup(aTable, low_halves & NIBBLE_MASK);
- uint8x16_t ret = low_half_matches & high_half_matches;
- if (!aAllowSurrogates) { // Assumed to be constant-propagated
- ret |= (high_halves & SURROGATE_MASK) == SURROGATE_MATCH;
- }
- return ret;
-}
-
-MOZ_ALWAYS_INLINE_EVEN_DEBUG int32_t AccelerateTextNode(const char16_t* aInput,
- const char16_t* aEnd,
- uint8x16_t aTable,
- bool aAllowSurrogates) {
- const char16_t* current = aInput;
- while (aEnd - current >= 16) {
- uint8x16_t mask = StrideToMask(current, aTable, aAllowSurrogates);
-#if defined(__aarch64__)
- uint8_t max = vmaxvq_u8(mask & INVERTED_ADVANCES);
- if (max != 0) {
- return int32_t((current - aInput) + 16 - max);
- }
-#else // x86/x86_64
- int int_mask = _mm_movemask_epi8(mask);
- if (int_mask != 0) {
- // The least-significant bit in the integer corresponds to
- // the first SIMD lane in text order. Hence, we need to count
- // trailing zeros. We already checked that the bits are not
- // all zeros, so __builtin_ctz isn't UB.
- return int32_t((current - aInput) + __builtin_ctz(int_mask));
- }
-#endif
- current += 16;
- }
- return int32_t(current - aInput);
-}
-
-} // namespace detail
-
-// Public entry points are in htmlaccelNotInline.h for now.
-
-} // namespace mozilla::htmlaccel
-
-#endif // mozilla_htmlaccel_htmlaccel_h
diff --git a/parser/htmlaccel/htmlaccelEnabled.h b/parser/htmlaccel/htmlaccelEnabled.h
@@ -1,30 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#ifndef mozilla_htmlaccel_htmlaccelEnabled_h
-#define mozilla_htmlaccel_htmlaccelEnabled_h
-
-#if defined(__x86_64__)
-# include "mozilla/SSE.h"
-#endif
-
-namespace mozilla::htmlaccel {
-
-/// This function is appropriate to call when the SIMD path is compiled
-/// with `HTML_ACCEL_FLAGS`.
-///
-/// Keep this in sync with `HTML_ACCEL_FLAGS` in `toolchain.configure`.
-inline bool htmlaccelEnabled() {
-#if defined(__aarch64__) && defined(__LITTLE_ENDIAN__)
- return true;
-#elif defined(__x86_64__)
- return mozilla::supports_bmi() && mozilla::supports_avx();
-#else
- return false;
-#endif
-}
-
-} // namespace mozilla::htmlaccel
-
-#endif // mozilla_htmlaccel_htmlaccelEnabled_h
diff --git a/parser/htmlaccel/htmlaccelNotInline.cpp b/parser/htmlaccel/htmlaccelNotInline.cpp
@@ -1,30 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include "mozilla/htmlaccel/htmlaccel.h"
-#include "mozilla/htmlaccel/htmlaccelNotInline.h"
-
-namespace mozilla::htmlaccel {
-
-/// The innerHTML / DOMParser case for the data state in the HTML parser
-MOZ_NEVER_INLINE int32_t AccelerateDataFastest(const char16_t* aPtr,
- const char16_t* aEnd) {
- return detail::AccelerateTextNode(aPtr, aEnd, detail::ZERO_LT_AMP_CR, true);
-}
-
-/// View Source case for the data state in the HTML parser
-MOZ_NEVER_INLINE int32_t AccelerateDataViewSource(const char16_t* aPtr,
- const char16_t* aEnd) {
- return detail::AccelerateTextNode(aPtr, aEnd, detail::ZERO_LT_AMP_CR_LF,
- true);
-}
-
-/// Normal network case for the data state in the HTML parser
-MOZ_NEVER_INLINE int32_t AccelerateDataLineCol(const char16_t* aPtr,
- const char16_t* aEnd) {
- return detail::AccelerateTextNode(aPtr, aEnd, detail::ZERO_LT_AMP_CR_LF,
- false);
-}
-
-} // namespace mozilla::htmlaccel
diff --git a/parser/htmlaccel/htmlaccelNotInline.h b/parser/htmlaccel/htmlaccelNotInline.h
@@ -1,34 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#ifndef mozilla_htmlaccel_htmlaccelNotInline_h
-#define mozilla_htmlaccel_htmlaccelNotInline_h
-
-#include "mozilla/Attributes.h"
-
-namespace mozilla::htmlaccel {
-// Logically these should be MOZ_ALWAYS_INLINE_EVEN_DEBUG if LLVM was working
-// as expected. However, these are MOZ_NEVER_INLINE to work around
-// https://github.com/llvm/llvm-project/issues/160886 . This way, we get
-// a little bit of LICM for the SIMD constants that need to be loaded
-// from the constant pool instead of getting materialized by splatting
-// an immediate. Once the LLVM bug is fixed, these should be changed
-// to MOZ_ALWAYS_INLINE_EVEN_DEBUG to allow the constants to move further
-// up to the top of nsHtml5Tokenizer::stateLoop.
-
-/// The innerHTML / DOMParser case for the data state in the HTML parser
-MOZ_NEVER_INLINE int32_t AccelerateDataFastest(const char16_t* aPtr,
- const char16_t* aEnd);
-
-/// View Source case for the data state in the HTML parser
-MOZ_NEVER_INLINE int32_t AccelerateDataViewSource(const char16_t* aPtr,
- const char16_t* aEnd);
-
-/// Normal network case for the data state in the HTML parser
-MOZ_NEVER_INLINE int32_t AccelerateDataLineCol(const char16_t* aPtr,
- const char16_t* aEnd);
-
-} // namespace mozilla::htmlaccel
-
-#endif // mozilla_htmlaccel_htmlaccelNotInline_h
diff --git a/parser/htmlaccel/moz.build b/parser/htmlaccel/moz.build
@@ -1,29 +0,0 @@
-# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
-# vim: set filetype=python:
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-EXPORTS.mozilla.htmlaccel += [
- "htmlaccel.h",
- "htmlaccelEnabled.h",
- "htmlaccelNotInline.h",
-]
-
-# Make sure the result is consistent with mozilla::htmlaccel::htmlaccelEnabled().
-#
-# Due to https://github.com/llvm/llvm-project/issues/160886, the entry points
-# need to be _not_ inline and, therefore, need a compilation unit. This should
-# go away once the LLVM bug is fixed.
-
-if (CONFIG["TARGET_CPU"] == "x86_64") or (
- CONFIG["TARGET_CPU"] == "aarch64" and CONFIG["TARGET_ENDIANNESS"] == "little"
-):
- SOURCES += [
- "htmlaccelNotInline.cpp",
- ]
- SOURCES["htmlaccelNotInline.cpp"].flags += CONFIG["HTML_ACCEL_FLAGS"]
-
-TEST_DIRS += ["gtest"]
-
-FINAL_LIBRARY = "xul"
diff --git a/parser/moz.build b/parser/moz.build
@@ -7,7 +7,7 @@
with Files("**"):
BUG_COMPONENT = ("Core", "DOM: HTML Parser")
-DIRS += ["expat", "prototype", "xml", "htmlaccel", "htmlparser", "html"]
+DIRS += ["expat", "prototype", "xml", "htmlparser", "html"]
EXPORTS += [
"nsCharsetSource.h",