commit d6d702dc2929971a654c48e63d773e36b1d7abf7
parent 36bc89c79adec3c31a97a6c5a235fd57e94f397e
Author: Henri Sivonen <hsivonen@hsivonen.fi>
Date: Mon, 27 Oct 2025 13:45:40 +0000
Bug 1499682 - SIMD-accelerate the data state in the HTML tokenizer. r=smaug,sergesanspaille
Other tokenizer states and the serializer are potential follow-ups.
The code movement from nsHtml5Tokenizer.cpp to nsHtml5Tokeniner.h is
for enabling the eventual non-unified build of nsHtml5TokenizerSIMD.cpp
once the LLVM bug has been fixed.
Differential Revision: https://phabricator.services.mozilla.com/D227317
Diffstat:
21 files changed, 5830 insertions(+), 4588 deletions(-)
diff --git a/build/moz.configure/toolchain.configure b/build/moz.configure/toolchain.configure
@@ -3902,6 +3902,35 @@ set_config(
),
)
+
+@depends(target, c_compiler)
+def htmlaccel_config(target, c_compiler):
+ # Keep this is sync with the mozilla::htmlaccel::htmlaccelEnabled function.
+ #
+ # The code compiles on SSSE3, but AVX+BMI generates better code
+ # and has been available for 12 years at the time of landing this,
+ # so let's give the best code to users with reasonably recent hardware.
+ #
+ # Not enabled on 32-bit x86, due to lack of insight into what hardware is
+ # representative at this point in time and due to lack of such hardware
+ # for testing to see what config would actually be an optimization.
+ #
+ # aarch64 does not need extra flags.
+ #
+ # clang-cl doesn't tolerate -flax-vector-conversions but GCC requires it.
+ #
+ # -mavx2 doesn't change codegen vs. -mavx. AVX2 and BMI always co-occur
+ # in Intel CPUs, but there are AMD CPUs that have AVX and BMI without
+ # AVX2.
+ if target.cpu != "x86_64":
+ return []
+ if c_compiler.type == "gcc":
+ return ["-mavx", "-mbmi", "-flax-vector-conversions"]
+ return ["-mavx", "-mbmi"]
+
+
+set_config("HTML_ACCEL_FLAGS", htmlaccel_config)
+
# dtrace support
##
option("--enable-dtrace", help="Build with dtrace support")
diff --git a/parser/html/javasrc/Tokenizer.java b/parser/html/javasrc/Tokenizer.java
@@ -932,7 +932,7 @@ public class Tokenizer implements Locator, Locator2 {
// ]NOCPP]
- HtmlAttributes emptyAttributes() {
+ @Inline HtmlAttributes emptyAttributes() {
// [NOCPP[
if (newAttributesEachTime) {
return new HtmlAttributes(mappingLangToXmlLang);
@@ -944,7 +944,7 @@ public class Tokenizer implements Locator, Locator2 {
// ]NOCPP]
}
- @Inline private void appendCharRefBuf(char c) {
+ private void appendCharRefBuf(char c) {
// CPPONLY: assert charRefBufLen < charRefBuf.length:
// CPPONLY: "RELEASE: Attempted to overrun charRefBuf!";
charRefBuf[charRefBufLen++] = c;
@@ -982,7 +982,7 @@ public class Tokenizer implements Locator, Locator2 {
* @param c
* the UTF-16 code unit to append
*/
- @Inline private void appendStrBuf(char c) {
+ private void appendStrBuf(char c) {
// CPPONLY: assert strBufLen < strBuf.length: "Previous buffer length insufficient.";
// CPPONLY: if (strBufLen == strBuf.length) {
// CPPONLY: if (!EnsureBufferSpace(1)) {
@@ -1000,7 +1000,7 @@ public class Tokenizer implements Locator, Locator2 {
*
* @return the buffer as a string
*/
- protected String strBufToString() {
+ @Inline protected String strBufToString() {
String str = Portability.newStringFromBuffer(strBuf, 0, strBufLen
// CPPONLY: , tokenHandler, !newAttributesEachTime && attributeName == AttributeName.CLASS
);
@@ -1014,7 +1014,7 @@ public class Tokenizer implements Locator, Locator2 {
*
* @return the buffer as local name
*/
- private void strBufToDoctypeName() {
+ @Inline private void strBufToDoctypeName() {
doctypeName = Portability.newLocalNameFromBuffer(strBuf, strBufLen, interner);
clearStrBufAfterUse();
}
@@ -1025,7 +1025,7 @@ public class Tokenizer implements Locator, Locator2 {
* @throws SAXException
* if the token handler threw
*/
- private void emitStrBuf() throws SAXException {
+ @Inline private void emitStrBuf() throws SAXException {
if (strBufLen > 0) {
tokenHandler.characters(strBuf, 0, strBufLen);
clearStrBufAfterUse();
@@ -1455,12 +1455,6 @@ public class Tokenizer implements Locator, Locator2 {
*/
int pos = start - 1;
- /**
- * The index of the first <code>char</code> in <code>buf</code> that is
- * part of a coalesced run of character tokens or
- * <code>Integer.MAX_VALUE</code> if there is not a current run being
- * coalesced.
- */
switch (state) {
case DATA:
case RCDATA:
@@ -1486,19 +1480,24 @@ public class Tokenizer implements Locator, Locator2 {
break;
}
- /**
- * The number of <code>char</code>s in <code>buf</code> that have
- * meaning. (The rest of the array is garbage and should not be
- * examined.)
- */
// CPPONLY: if (mViewSource) {
// CPPONLY: mViewSource.SetBuffer(buffer);
- // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
+ // CPPONLY: if (htmlaccelEnabled()) {
+ // CPPONLY: pos = StateLoopViewSourceSIMD(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
+ // CPPONLY: } else {
+ // CPPONLY: pos = StateLoopViewSourceALU(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
+ // CPPONLY: }
// CPPONLY: mViewSource.DropBuffer((pos == buffer.getEnd()) ? pos : pos + 1);
// CPPONLY: } else if (tokenHandler.WantsLineAndColumn()) {
- // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
+ // CPPONLY: if (htmlaccelEnabled()) {
+ // CPPONLY: pos = StateLoopLineColSIMD(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
+ // CPPONLY: } else {
+ // CPPONLY: pos = StateLoopLineColALU(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
+ // CPPONLY: }
+ // CPPONLY: } else if (htmlaccelEnabled()) {
+ // CPPONLY: pos = StateLoopFastestSIMD(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
// CPPONLY: } else {
- // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
+ // CPPONLY: pos = StateLoopFastestALU(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
// CPPONLY: }
// [NOCPP[
pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState,
@@ -1547,7 +1546,7 @@ public class Tokenizer implements Locator, Locator2 {
}
// ]NOCPP]
- @SuppressWarnings("unused") private int stateLoop(int state, char c,
+ @SuppressWarnings("unused") @Inline private int stateLoop(int state, char c,
int pos, @NoLength char[] buf, boolean reconsume, int returnState,
int endPos) throws SAXException {
boolean reportedConsecutiveHyphens = false;
@@ -1623,54 +1622,127 @@ public class Tokenizer implements Locator, Locator2 {
switch (state) {
case DATA:
dataloop: for (;;) {
+ // Ideally this reconsume block would be a separate state, DATA_RECONSUME above this one
+ // with fallthrough into this state. However, such a change would be disruptive to
+ // TransitionHandler and everything that works with returnState.
if (reconsume) {
reconsume = false;
- } else {
- if (++pos == endPos) {
- break stateloop;
+ // This is a manual copy of the switch below with break/continue
+ // adjusted as relevant. Make sure to keep in sync with the switch below!
+ switch (c) {
+ case '&':
+ /*
+ * U+0026 AMPERSAND (&) Switch to the character
+ * reference in data state.
+ */
+ flushChars(buf, pos);
+ assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\u0000');
+ returnState = state;
+ state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
+ continue stateloop;
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Switch to the tag
+ * open state.
+ */
+ flushChars(buf, pos);
+
+ state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos);
+ // `break` optimizes; `continue stateloop;` would be valid
+ break dataloop;
+ case '\u0000':
+ maybeEmitReplacementCharacter(buf, pos);
+ break;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // CPPONLY: MOZ_FALLTHROUGH;
+ default:
+ /*
+ * Anything else Emit the input character as a
+ * character token.
+ *
+ * Stay in the data state.
+ */
+ break;
}
- c = checkChar(buf, pos);
}
- switch (c) {
- case '&':
- /*
- * U+0026 AMPERSAND (&) Switch to the character
- * reference in data state.
- */
- flushChars(buf, pos);
- assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
- appendCharRefBuf(c);
- setAdditionalAndRememberAmpersandLocation('\u0000');
- returnState = state;
- state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
- continue stateloop;
- case '<':
- /*
- * U+003C LESS-THAN SIGN (<) Switch to the tag
- * open state.
- */
- flushChars(buf, pos);
-
- state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos);
- // `break` optimizes; `continue stateloop;` would be valid
- break dataloop;
- case '\u0000':
- maybeEmitReplacementCharacter(buf, pos);
- continue;
- case '\r':
- emitCarriageReturn(buf, pos);
- break stateloop;
- case '\n':
- silentLineFeed();
- // CPPONLY: MOZ_FALLTHROUGH;
- default:
- /*
- * Anything else Emit the input character as a
- * character token.
- *
- * Stay in the data state.
- */
- continue;
+ datamiddle: for (;;) {
+ ++pos;
+ // Perhaps at some point, it will be appropriate to do SIMD in Java, but not today.
+ // The line below advances pos by some number of code units that this state is indifferent to.
+ // CPPONLY: pos += accelerateAdvancementData(buf, pos, endPos);
+ for (;;) {
+ if (pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ // Make sure to keep in sync with the switch above in the reconsume block!
+ switch (c) {
+ case '&':
+ /*
+ * U+0026 AMPERSAND (&) Switch to the character
+ * reference in data state.
+ */
+ flushChars(buf, pos);
+ assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\u0000');
+ returnState = state;
+ state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
+ continue stateloop;
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Switch to the tag
+ * open state.
+ */
+ flushChars(buf, pos);
+
+ state = transition(state, Tokenizer.TAG_OPEN, reconsume, pos);
+ // `break` optimizes; `continue stateloop;` would be valid
+ break dataloop;
+ case '\u0000':
+ maybeEmitReplacementCharacter(buf, pos);
+ // Continue from above the accelerateAdvancementData call.
+ continue datamiddle;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // Continue from above the accelerateAdvancementData call.
+ continue datamiddle;
+ default:
+ /*
+ * Anything else Emit the input character as a
+ * character token.
+ *
+ * Stay in the data state.
+ */
+ // Don't go back to accelerateAdvancementData to avoid
+ // bouncing back and forth in a way that doesn't make good
+ // use of SIMD when we have less than a SIMD stride to go
+ // or when we come here due to a non-BMP characters.
+ // The SIMD code doesn't have ALU handling for the remainder
+ // that is shorter than a SIMD stride, because this case
+ // in this switch has to exist anyway (for SIMD-unavailable
+ // and for non-BMP cases) and this innermost loop can serve
+ // that purpose, too. In the non-BMP case we stay on the
+ // ALU path until we end up in one of the other cases in this
+ // switch (e.g. end of line) in order to avoid bouncing back
+ // and forth when we have text in a non-BMP script instead
+ // of an isolated emoji.
+ //
+ // We need to increment pos when staying in this innermost
+ // loop!
+ ++pos;
+ continue;
+ }
+ }
}
}
// CPPONLY: MOZ_FALLTHROUGH;
@@ -4002,52 +4074,122 @@ public class Tokenizer implements Locator, Locator2 {
// no fallthrough, reordering opportunity
case RCDATA:
rcdataloop: for (;;) {
+ // Ideally this reconsume block would be a separate state, RCDATA_RECONSUME above this one
+ // with fallthrough into this state. However, such a change would be disruptive to
+ // TransitionHandler and everything that works with returnState.
if (reconsume) {
reconsume = false;
- } else {
- if (++pos == endPos) {
- break stateloop;
+ // This is a manual copy of the switch below with break/continue
+ // adjusted as relevant. Make sure to keep in sync with the switch below!
+ switch (c) {
+ case '&':
+ /*
+ * U+0026 AMPERSAND (&) Switch to the character
+ * reference in RCDATA state.
+ */
+ flushChars(buf, pos);
+ assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\u0000');
+ returnState = state;
+ state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
+ continue stateloop;
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Switch to the
+ * RCDATA less-than sign state.
+ */
+ flushChars(buf, pos);
+ returnState = state;
+ state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ maybeEmitReplacementCharacter(buf, pos);
+ break;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // CPPONLY: MOZ_FALLTHROUGH;
+ default:
+ /*
+ * Emit the current input character as a
+ * character token. Stay in the RCDATA state.
+ */
+ break;
}
- c = checkChar(buf, pos);
}
- switch (c) {
- case '&':
- /*
- * U+0026 AMPERSAND (&) Switch to the character
- * reference in RCDATA state.
- */
- flushChars(buf, pos);
- assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
- appendCharRefBuf(c);
- setAdditionalAndRememberAmpersandLocation('\u0000');
- returnState = state;
- state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
- continue stateloop;
- case '<':
- /*
- * U+003C LESS-THAN SIGN (<) Switch to the
- * RCDATA less-than sign state.
- */
- flushChars(buf, pos);
-
- returnState = state;
- state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
- continue stateloop;
- case '\u0000':
- emitReplacementCharacter(buf, pos);
- continue;
- case '\r':
- emitCarriageReturn(buf, pos);
- break stateloop;
- case '\n':
- silentLineFeed();
- // CPPONLY: MOZ_FALLTHROUGH;
- default:
- /*
- * Emit the current input character as a
- * character token. Stay in the RCDATA state.
- */
- continue;
+ rcdatamiddle: for (;;) {
+ ++pos;
+ // Perhaps at some point, it will be appropriate to do SIMD in Java, but not today.
+ // The line below advances pos by some number of code units that this state is indifferent to.
+ // RCDATA and DATA have the same set of characters that they are indifferent to, hence accelerateData.
+ // CPPONLY: pos += accelerateAdvancementData(buf, pos, endPos);
+ for (;;) {
+ if (pos == endPos) {
+ break stateloop;
+ }
+ c = checkChar(buf, pos);
+ // Make sure to keep in sync with the switch above in the reconsume block!
+ switch (c) {
+ case '&':
+ /*
+ * U+0026 AMPERSAND (&) Switch to the character
+ * reference in RCDATA state.
+ */
+ flushChars(buf, pos);
+ assert charRefBufLen == 0: "charRefBufLen not reset after previous use!";
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\u0000');
+ returnState = state;
+ state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
+ continue stateloop;
+ case '<':
+ /*
+ * U+003C LESS-THAN SIGN (<) Switch to the
+ * RCDATA less-than sign state.
+ */
+ flushChars(buf, pos);
+ returnState = state;
+ state = transition(state, Tokenizer.RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume, pos);
+ continue stateloop;
+ case '\u0000':
+ maybeEmitReplacementCharacter(buf, pos);
+ // Continue from above the accelerateAdvancementData call.
+ continue rcdatamiddle;
+ case '\r':
+ emitCarriageReturn(buf, pos);
+ break stateloop;
+ case '\n':
+ silentLineFeed();
+ // Continue from above the accelerateAdvancementData call.
+ continue rcdatamiddle;
+ default:
+ /*
+ * Emit the current input character as a
+ * character token. Stay in the RCDATA state.
+ */
+ // Don't go back to accelerateAdvancementData to avoid
+ // bouncing back and forth in a way that doesn't make good
+ // use of SIMD when we have less than a SIMD stride to go
+ // or when we come here due to a non-BMP characters.
+ // The SIMD code doesn't have ALU handling for the remainder
+ // that is shorter than a SIMD stride, because this case
+ // in this switch has to exist anyway (for SIMD-unavailable
+ // and for non-BMP cases) and this innermost loop can serve
+ // that purpose, too. In the non-BMP case we stay on the
+ // ALU path until we end up in one of the other cases in this
+ // switch (e.g. end of line) in order to avoid bouncing back
+ // and forth when we have text in a non-BMP script instead
+ // of an isolated emoji.
+ //
+ // We need to increment pos when staying in this innermost
+ // loop!
+ ++pos;
+ continue;
+ }
+ }
}
}
// no fallthrough, reordering opportunity
@@ -6348,24 +6490,24 @@ public class Tokenizer implements Locator, Locator2 {
forceQuirks = false;
}
- private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
+ @Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
throws SAXException {
silentCarriageReturn();
adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
}
- private void adjustDoubleHyphenAndAppendToStrBufLineFeed()
+ @Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed()
throws SAXException {
silentLineFeed();
adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
}
- private void appendStrBufLineFeed() {
+ @Inline private void appendStrBufLineFeed() {
silentLineFeed();
appendStrBuf('\n');
}
- private void appendStrBufCarriageReturn() {
+ @Inline private void appendStrBufCarriageReturn() {
silentCarriageReturn();
appendStrBuf('\n');
}
@@ -6383,7 +6525,7 @@ public class Tokenizer implements Locator, Locator2 {
// ]NOCPP]
- private void emitCarriageReturn(@NoLength char[] buf, int pos)
+ @Inline private void emitCarriageReturn(@NoLength char[] buf, int pos)
throws SAXException {
silentCarriageReturn();
flushChars(buf, pos);
@@ -6412,7 +6554,7 @@ public class Tokenizer implements Locator, Locator2 {
cstart = pos + 1;
}
- private void setAdditionalAndRememberAmpersandLocation(char add) {
+ @Inline private void setAdditionalAndRememberAmpersandLocation(char add) {
additional = add;
// [NOCPP[
ampersandLocation = new LocatorImpl(this);
@@ -7077,7 +7219,7 @@ public class Tokenizer implements Locator, Locator2 {
* happened in a non-text context, this method turns that deferred suspension
* request into an immediately-pending suspension request.
*/
- private void suspendIfRequestedAfterCurrentNonTextToken() {
+ @Inline private void suspendIfRequestedAfterCurrentNonTextToken() {
if (suspendAfterCurrentNonTextToken) {
suspendAfterCurrentNonTextToken = false;
shouldSuspend = true;
@@ -7221,7 +7363,7 @@ public class Tokenizer implements Locator, Locator2 {
* @param val
* @throws SAXException
*/
- private void emitOrAppendTwo(@Const @NoLength char[] val, int returnState)
+ @Inline private void emitOrAppendTwo(@Const @NoLength char[] val, int returnState)
throws SAXException {
if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
appendStrBuf(val[0]);
@@ -7231,7 +7373,7 @@ public class Tokenizer implements Locator, Locator2 {
}
}
- private void emitOrAppendOne(@Const @NoLength char[] val, int returnState)
+ @Inline private void emitOrAppendOne(@Const @NoLength char[] val, int returnState)
throws SAXException {
if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
appendStrBuf(val[0]);
@@ -7268,7 +7410,7 @@ public class Tokenizer implements Locator, Locator2 {
}
}
- public void requestSuspension() {
+ @Inline public void requestSuspension() {
shouldSuspend = true;
}
@@ -7311,7 +7453,7 @@ public class Tokenizer implements Locator, Locator2 {
// ]NOCPP]
- public boolean isInDataState() {
+ @Inline public boolean isInDataState() {
return (stateSave == DATA);
}
diff --git a/parser/html/moz.build b/parser/html/moz.build
@@ -85,6 +85,38 @@ UNIFIED_SOURCES += [
"nsParserUtils.cpp",
]
+# Each target needs to compile:
+# (nsHtml5TokenizerALU.cpp XOR nsHtml5TokenizerALUStubs.cpp)
+# AND
+# (nsHtml5TokenizerSIMD.cpp XOR nsHtml5TokenizerSIMDStubs.cpp)
+# AND
+# (nsHtml5TokenizerALU.cpp OR nsHtml5TokenizerSIMD.cpp)
+#
+# Make sure the result is consistent with mozilla::htmlaccel::htmlaccelEnabled().
+#
+# Due to https://github.com/llvm/llvm-project/issues/160886, none of the
+# code here actually ends up with SIMD instructions, and SIMD stays in
+# htmlaccelNotInline.cpp instead. Once the LLVM bug is fixed, the functions
+# in htmlaccelNotInline.cpp should becomed always inlined and
+# nsHtml5TokenizerSIMD.cpp should be built with HTML_ACCEL_FLAGS.
+
+if CONFIG["TARGET_CPU"] == "x86_64":
+ UNIFIED_SOURCES += [
+ "nsHtml5TokenizerALU.cpp",
+ "nsHtml5TokenizerSIMD.cpp",
+ ]
+elif CONFIG["TARGET_CPU"] == "aarch64" and CONFIG["TARGET_ENDIANNESS"] == "little":
+ # aarch64 doesn't need special flags for SIMD.
+ UNIFIED_SOURCES += [
+ "nsHtml5TokenizerALUStubs.cpp",
+ "nsHtml5TokenizerSIMD.cpp",
+ ]
+else:
+ UNIFIED_SOURCES += [
+ "nsHtml5TokenizerALU.cpp",
+ "nsHtml5TokenizerSIMDStubs.cpp",
+ ]
+
FINAL_LIBRARY = "xul"
LOCAL_INCLUDES += [
diff --git a/parser/html/nsHtml5Tokenizer.cpp b/parser/html/nsHtml5Tokenizer.cpp
@@ -40,8 +40,6 @@
#include "nsHtml5Tokenizer.h"
-#include "nsHtml5TokenizerLoopPolicies.h"
-
char16_t nsHtml5Tokenizer::LT_GT[] = {'<', '>'};
char16_t nsHtml5Tokenizer::LT_SOLIDUS[] = {'<', '/'};
char16_t nsHtml5Tokenizer::RSQB_RSQB[] = {']', ']'};
@@ -215,4297 +213,274 @@ void nsHtml5Tokenizer::endTagExpectationToArray() {
return;
}
default: {
- MOZ_ASSERT(false, "Bad end tag expectation.");
- return;
- }
- }
-}
-
-void nsHtml5Tokenizer::setLineNumber(int32_t line) {
- this->attributeLine = line;
- this->line = line;
-}
-
-nsHtml5HtmlAttributes* nsHtml5Tokenizer::emptyAttributes() {
- return nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES;
-}
-
-void nsHtml5Tokenizer::emitOrAppendCharRefBuf(int32_t returnState) {
- if ((returnState & DATA_AND_RCDATA_MASK)) {
- appendCharRefBufToStrBuf();
- } else {
- if (charRefBufLen > 0) {
- tokenHandler->characters(charRefBuf, 0, charRefBufLen);
- charRefBufLen = 0;
- }
- }
-}
-
-nsHtml5String nsHtml5Tokenizer::strBufToString() {
- nsHtml5String str = nsHtml5Portability::newStringFromBuffer(
- strBuf, 0, strBufLen, tokenHandler,
- !newAttributesEachTime &&
- attributeName == nsHtml5AttributeName::ATTR_CLASS);
- clearStrBufAfterUse();
- return str;
-}
-
-void nsHtml5Tokenizer::strBufToDoctypeName() {
- doctypeName =
- nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen, interner);
- clearStrBufAfterUse();
-}
-
-void nsHtml5Tokenizer::emitStrBuf() {
- if (strBufLen > 0) {
- tokenHandler->characters(strBuf, 0, strBufLen);
- clearStrBufAfterUse();
- }
-}
-
-void nsHtml5Tokenizer::appendStrBuf(char16_t* buffer, int32_t offset,
- int32_t length) {
- int32_t newLen = nsHtml5Portability::checkedAdd(strBufLen, length);
- MOZ_ASSERT(newLen <= strBuf.length, "Previous buffer length insufficient.");
- if (MOZ_UNLIKELY(strBuf.length < newLen)) {
- if (MOZ_UNLIKELY(!EnsureBufferSpace(length))) {
- MOZ_CRASH("Unable to recover from buffer reallocation failure");
- }
- }
- nsHtml5ArrayCopy::arraycopy(buffer, offset, strBuf, strBufLen, length);
- strBufLen = newLen;
-}
-
-void nsHtml5Tokenizer::emitComment(int32_t provisionalHyphens, int32_t pos) {
- RememberGt(pos);
- tokenHandler->comment(strBuf, 0, strBufLen - provisionalHyphens);
- clearStrBufAfterUse();
- cstart = pos + 1;
- suspendIfRequestedAfterCurrentNonTextToken();
-}
-
-void nsHtml5Tokenizer::flushChars(char16_t* buf, int32_t pos) {
- if (pos > cstart) {
- tokenHandler->characters(buf, cstart, pos - cstart);
- }
- cstart = INT32_MAX;
-}
-
-void nsHtml5Tokenizer::strBufToElementNameString() {
- if (containsHyphen) {
- nsAtom* annotationName = nsHtml5ElementName::ELT_ANNOTATION_XML->getName();
- if (nsHtml5Portability::localEqualsBuffer(annotationName, strBuf,
- strBufLen)) {
- tagName = nsHtml5ElementName::ELT_ANNOTATION_XML;
- } else {
- nonInternedTagName->setNameForNonInterned(
- nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen,
- interner),
- true);
- tagName = nonInternedTagName;
- }
- } else {
- tagName = nsHtml5ElementName::elementNameByBuffer(strBuf, strBufLen);
- if (!tagName) {
- nonInternedTagName->setNameForNonInterned(
- nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen,
- interner),
- false);
- tagName = nonInternedTagName;
- }
- }
- containsHyphen = false;
- clearStrBufAfterUse();
-}
-
-int32_t nsHtml5Tokenizer::emitCurrentTagToken(bool selfClosing, int32_t pos) {
- RememberGt(pos);
- cstart = pos + 1;
- maybeErrSlashInEndTag(selfClosing);
- stateSave = nsHtml5Tokenizer::DATA;
- nsHtml5HtmlAttributes* attrs =
- (!attributes ? nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES : attributes);
- if (endTag) {
- maybeErrAttributesOnEndTag(attrs);
- if (!viewingXmlSource) {
- tokenHandler->endTag(tagName);
- }
- if (newAttributesEachTime) {
- delete attributes;
- attributes = nullptr;
- }
- } else {
- if (viewingXmlSource) {
- MOZ_ASSERT(newAttributesEachTime);
- delete attributes;
- attributes = nullptr;
- } else {
- tokenHandler->startTag(tagName, attrs, selfClosing);
- }
- }
- tagName = nullptr;
- if (newAttributesEachTime) {
- attributes = nullptr;
- } else {
- attributes->clear(0);
- }
- suspendIfRequestedAfterCurrentNonTextToken();
- return stateSave;
-}
-
-void nsHtml5Tokenizer::attributeNameComplete() {
- attributeName =
- nsHtml5AttributeName::nameByBuffer(strBuf, strBufLen, interner);
- if (!attributeName) {
- nonInternedAttributeName->setNameForNonInterned(
- nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen,
- interner));
- attributeName = nonInternedAttributeName;
- }
- clearStrBufAfterUse();
- if (!attributes) {
- attributes = new nsHtml5HtmlAttributes(0);
- }
- if (attributes->contains(attributeName)) {
- errDuplicateAttribute();
- attributeName = nullptr;
- }
-}
-
-void nsHtml5Tokenizer::addAttributeWithoutValue() {
- if (attributeName) {
- attributes->addAttribute(
- attributeName, nsHtml5Portability::newEmptyString(), attributeLine);
- attributeName = nullptr;
- } else {
- clearStrBufAfterUse();
- }
-}
-
-void nsHtml5Tokenizer::addAttributeWithValue() {
- if (attributeName) {
- nsHtml5String val = strBufToString();
- if (mViewSource) {
- mViewSource->MaybeLinkifyAttributeValue(attributeName, val);
- }
- attributes->addAttribute(attributeName, val, attributeLine);
- attributeName = nullptr;
- } else {
- clearStrBufAfterUse();
- }
-}
-
-void nsHtml5Tokenizer::start() {
- initializeWithoutStarting();
- tokenHandler->startTokenization(this);
- if (mViewSource) {
- line = 1;
- col = -1;
- nextCharOnNewLine = false;
- } else if (tokenHandler->WantsLineAndColumn()) {
- line = 0;
- col = 1;
- nextCharOnNewLine = true;
- } else {
- line = -1;
- col = -1;
- nextCharOnNewLine = false;
- }
-}
-
-bool nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer) {
- int32_t state = stateSave;
- int32_t returnState = returnStateSave;
- char16_t c = '\0';
- shouldSuspend = false;
- lastCR = false;
- int32_t start = buffer->getStart();
- int32_t end = buffer->getEnd();
- int32_t pos = start - 1;
- switch (state) {
- case DATA:
- case RCDATA:
- case SCRIPT_DATA:
- case PLAINTEXT:
- case RAWTEXT:
- case CDATA_SECTION:
- case SCRIPT_DATA_ESCAPED:
- case SCRIPT_DATA_ESCAPE_START:
- case SCRIPT_DATA_ESCAPE_START_DASH:
- case SCRIPT_DATA_ESCAPED_DASH:
- case SCRIPT_DATA_ESCAPED_DASH_DASH:
- case SCRIPT_DATA_DOUBLE_ESCAPE_START:
- case SCRIPT_DATA_DOUBLE_ESCAPED:
- case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
- case SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
- case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
- case SCRIPT_DATA_DOUBLE_ESCAPE_END: {
- cstart = start;
- break;
- }
- default: {
- cstart = INT32_MAX;
- break;
- }
- }
- if (mViewSource) {
- mViewSource->SetBuffer(buffer);
- pos = stateLoop<nsHtml5ViewSourcePolicy>(state, c, pos, buffer->getBuffer(),
- false, returnState,
- buffer->getEnd());
- mViewSource->DropBuffer((pos == buffer->getEnd()) ? pos : pos + 1);
- } else if (tokenHandler->WantsLineAndColumn()) {
- pos = stateLoop<nsHtml5LineColPolicy>(state, c, pos, buffer->getBuffer(),
- false, returnState, buffer->getEnd());
- } else {
- pos = stateLoop<nsHtml5FastestPolicy>(state, c, pos, buffer->getBuffer(),
- false, returnState, buffer->getEnd());
- }
- if (pos == end) {
- buffer->setStart(pos);
- } else {
- buffer->setStart(pos + 1);
- }
- return lastCR;
-}
-
-template <class P>
-int32_t nsHtml5Tokenizer::stateLoop(int32_t state, char16_t c, int32_t pos,
- char16_t* buf, bool reconsume,
- int32_t returnState, int32_t endPos) {
- bool reportedConsecutiveHyphens = false;
-stateloop:
- for (;;) {
- switch (state) {
- case DATA: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '&': {
- flushChars(buf, pos);
- MOZ_ASSERT(!charRefBufLen,
- "charRefBufLen not reset after previous use!");
- appendCharRefBuf(c);
- setAdditionalAndRememberAmpersandLocation('\0');
- returnState = state;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '<': {
- flushChars(buf, pos);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::TAG_OPEN, reconsume, pos);
- NS_HTML5_BREAK(dataloop);
- }
- case '\0': {
- maybeEmitReplacementCharacter(buf, pos);
- continue;
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- continue;
- }
- }
- }
- dataloop_end:;
- [[fallthrough]];
- }
- case TAG_OPEN: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- if (c >= 'A' && c <= 'Z') {
- endTag = false;
- clearStrBufBeforeUse();
- appendStrBuf((char16_t)(c + 0x20));
- containsHyphen = false;
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::TAG_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(tagopenloop);
- } else if (c >= 'a' && c <= 'z') {
- endTag = false;
- clearStrBufBeforeUse();
- appendStrBuf(c);
- containsHyphen = false;
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::TAG_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(tagopenloop);
- }
- switch (c) {
- case '!': {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::MARKUP_DECLARATION_OPEN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '/': {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CLOSE_TAG_OPEN, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\?': {
- if (viewingXmlSource) {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::PROCESSING_INSTRUCTION,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- if (P::reportErrors) {
- errProcessingInstruction();
- }
- clearStrBufBeforeUse();
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- if (P::reportErrors) {
- errLtGt();
- }
- tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 2);
- cstart = pos + 1;
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- if (P::reportErrors) {
- errBadCharAfterLt(c);
- }
- tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
- cstart = pos;
- reconsume = true;
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- tagopenloop_end:;
- [[fallthrough]];
- }
- case TAG_NAME: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- strBufToElementNameString();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- strBufToElementNameString();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(tagnameloop);
- }
- case '/': {
- strBufToElementNameString();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SELF_CLOSING_START_TAG,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- strBufToElementNameString();
- state = P::transition(mViewSource.get(),
- emitCurrentTagToken(false, pos), reconsume,
- pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- if (c >= 'A' && c <= 'Z') {
- c += 0x20;
- } else if (c == '-') {
- containsHyphen = true;
- }
- appendStrBuf(c);
- continue;
- }
- }
- }
- tagnameloop_end:;
- [[fallthrough]];
- }
- case BEFORE_ATTRIBUTE_NAME: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- continue;
- }
- case '/': {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SELF_CLOSING_START_TAG,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- state = P::transition(mViewSource.get(),
- emitCurrentTagToken(false, pos), reconsume,
- pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- case '\"':
- case '\'':
- case '<':
- case '=': {
- if (P::reportErrors) {
- errBadCharBeforeAttributeNameOrNull(c);
- }
- [[fallthrough]];
- }
- default: {
- if (c >= 'A' && c <= 'Z') {
- c += 0x20;
- }
- attributeLine = line;
- clearStrBufBeforeUse();
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::ATTRIBUTE_NAME, reconsume,
- pos);
- NS_HTML5_BREAK(beforeattributenameloop);
- }
- }
- }
- beforeattributenameloop_end:;
- [[fallthrough]];
- }
- case ATTRIBUTE_NAME: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- attributeNameComplete();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- attributeNameComplete();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '/': {
- attributeNameComplete();
- addAttributeWithoutValue();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SELF_CLOSING_START_TAG,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '=': {
- attributeNameComplete();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE,
- reconsume, pos);
- NS_HTML5_BREAK(attributenameloop);
- }
- case '>': {
- attributeNameComplete();
- addAttributeWithoutValue();
- state = P::transition(mViewSource.get(),
- emitCurrentTagToken(false, pos), reconsume,
- pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- case '\"':
- case '\'':
- case '<': {
- if (P::reportErrors) {
- errQuoteOrLtInAttributeNameOrNull(c);
- }
- [[fallthrough]];
- }
- default: {
- if (c >= 'A' && c <= 'Z') {
- c += 0x20;
- }
- appendStrBuf(c);
- continue;
- }
- }
- }
- attributenameloop_end:;
- [[fallthrough]];
- }
- case BEFORE_ATTRIBUTE_VALUE: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- continue;
- }
- case '\"': {
- attributeLine = line;
- clearStrBufBeforeUse();
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::ATTRIBUTE_VALUE_DOUBLE_QUOTED,
- reconsume, pos);
- NS_HTML5_BREAK(beforeattributevalueloop);
- }
- case '&': {
- attributeLine = line;
- clearStrBufBeforeUse();
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED,
- reconsume, pos);
-
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\'': {
- attributeLine = line;
- clearStrBufBeforeUse();
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::ATTRIBUTE_VALUE_SINGLE_QUOTED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- if (P::reportErrors) {
- errAttributeValueMissing();
- }
- addAttributeWithoutValue();
- state = P::transition(mViewSource.get(),
- emitCurrentTagToken(false, pos), reconsume,
- pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- case '<':
- case '=':
- case '`': {
- if (P::reportErrors) {
- errLtOrEqualsOrGraveInUnquotedAttributeOrNull(c);
- }
- [[fallthrough]];
- }
- default: {
- attributeLine = line;
- clearStrBufBeforeUse();
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED,
- reconsume, pos);
-
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- beforeattributevalueloop_end:;
- [[fallthrough]];
- }
- case ATTRIBUTE_VALUE_DOUBLE_QUOTED: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '\"': {
- addAttributeWithValue();
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED,
- reconsume, pos);
- NS_HTML5_BREAK(attributevaluedoublequotedloop);
- }
- case '&': {
- MOZ_ASSERT(!charRefBufLen,
- "charRefBufLen not reset after previous use!");
- appendCharRefBuf(c);
- setAdditionalAndRememberAmpersandLocation('\"');
- returnState = state;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- continue;
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- continue;
- }
- }
- }
- attributevaluedoublequotedloop_end:;
- [[fallthrough]];
- }
- case AFTER_ATTRIBUTE_VALUE_QUOTED: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '/': {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SELF_CLOSING_START_TAG,
- reconsume, pos);
- NS_HTML5_BREAK(afterattributevaluequotedloop);
- }
- case '>': {
- state = P::transition(mViewSource.get(),
- emitCurrentTagToken(false, pos), reconsume,
- pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- if (P::reportErrors) {
- errNoSpaceBetweenAttributes();
- }
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- afterattributevaluequotedloop_end:;
- [[fallthrough]];
- }
- case SELF_CLOSING_START_TAG: {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '>': {
- state =
- P::transition(mViewSource.get(), emitCurrentTagToken(true, pos),
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- if (P::reportErrors) {
- errSlashNotFollowedByGt();
- }
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- case ATTRIBUTE_VALUE_UNQUOTED: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- addAttributeWithValue();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- addAttributeWithValue();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '&': {
- MOZ_ASSERT(!charRefBufLen,
- "charRefBufLen not reset after previous use!");
- appendCharRefBuf(c);
- setAdditionalAndRememberAmpersandLocation('>');
- returnState = state;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- addAttributeWithValue();
- state = P::transition(mViewSource.get(),
- emitCurrentTagToken(false, pos), reconsume,
- pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- case '<':
- case '\"':
- case '\'':
- case '=':
- case '`': {
- if (P::reportErrors) {
- errUnquotedAttributeValOrNull(c);
- }
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- continue;
- }
- }
- }
- }
- case AFTER_ATTRIBUTE_NAME: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- continue;
- }
- case '/': {
- addAttributeWithoutValue();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SELF_CLOSING_START_TAG,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '=': {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- addAttributeWithoutValue();
- state = P::transition(mViewSource.get(),
- emitCurrentTagToken(false, pos), reconsume,
- pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- case '\"':
- case '\'':
- case '<': {
- if (P::reportErrors) {
- errQuoteOrLtInAttributeNameOrNull(c);
- }
- [[fallthrough]];
- }
- default: {
- addAttributeWithoutValue();
- if (c >= 'A' && c <= 'Z') {
- c += 0x20;
- }
- clearStrBufBeforeUse();
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::ATTRIBUTE_NAME, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- }
- case MARKUP_DECLARATION_OPEN: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- clearStrBufBeforeUse();
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::MARKUP_DECLARATION_HYPHEN,
- reconsume, pos);
- NS_HTML5_BREAK(markupdeclarationopenloop);
- }
- case 'd':
- case 'D': {
- clearStrBufBeforeUse();
- appendStrBuf(c);
- index = 0;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::MARKUP_DECLARATION_OCTYPE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '[': {
- if (tokenHandler->cdataSectionAllowed()) {
- clearStrBufBeforeUse();
- appendStrBuf(c);
- index = 0;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CDATA_START, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- [[fallthrough]];
- }
- default: {
- if (P::reportErrors) {
- errBogusComment();
- }
- clearStrBufBeforeUse();
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- markupdeclarationopenloop_end:;
- [[fallthrough]];
- }
- case MARKUP_DECLARATION_HYPHEN: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- clearStrBufAfterOneHyphen();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_START, reconsume,
- pos);
- NS_HTML5_BREAK(markupdeclarationhyphenloop);
- }
- default: {
- if (P::reportErrors) {
- errBogusComment();
- }
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- markupdeclarationhyphenloop_end:;
- [[fallthrough]];
- }
- case COMMENT_START: {
- reportedConsecutiveHyphens = false;
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_START_DASH,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- if (P::reportErrors) {
- errPrematureEndOfComment();
- }
- emitComment(0, pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '<': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_LESSTHAN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_BREAK(commentstartloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_BREAK(commentstartloop);
- }
- }
- }
- commentstartloop_end:;
- [[fallthrough]];
- }
- case COMMENT: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_END_DASH,
- reconsume, pos);
- NS_HTML5_BREAK(commentloop);
- }
- case '<': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_LESSTHAN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- continue;
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- continue;
- }
- }
- }
- commentloop_end:;
- [[fallthrough]];
- }
- case COMMENT_END_DASH: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- appendStrBuf(c);
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_END, reconsume, pos);
- NS_HTML5_BREAK(commentenddashloop);
- }
- case '<': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_LESSTHAN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- commentenddashloop_end:;
- [[fallthrough]];
- }
- case COMMENT_END: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '>': {
- emitComment(2, pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '-': {
- adjustDoubleHyphenAndAppendToStrBufAndErr(
- c, reportedConsecutiveHyphens);
- reportedConsecutiveHyphens = true;
- continue;
- }
- case '<': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_LESSTHAN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- adjustDoubleHyphenAndAppendToStrBufCarriageReturn<P>();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- adjustDoubleHyphenAndAppendToStrBufLineFeed<P>();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '!': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_END_BANG,
- reconsume, pos);
- NS_HTML5_BREAK(commentendloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- adjustDoubleHyphenAndAppendToStrBufAndErr(
- c, reportedConsecutiveHyphens);
- reportedConsecutiveHyphens = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- commentendloop_end:;
- [[fallthrough]];
- }
- case COMMENT_END_BANG: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '>': {
- emitComment(3, pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '-': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_END_DASH,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- }
- case COMMENT_LESSTHAN: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '!': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG,
- reconsume, pos);
- NS_HTML5_BREAK(commentlessthanloop);
- }
- case '<': {
- appendStrBuf(c);
- continue;
- }
- case '-': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_END_DASH,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- commentlessthanloop_end:;
- [[fallthrough]];
- }
- case COMMENT_LESSTHAN_BANG: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- appendStrBuf(c);
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG_DASH, reconsume, pos);
- NS_HTML5_BREAK(commentlessthanbangloop);
- }
- case '<': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_LESSTHAN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- commentlessthanbangloop_end:;
- [[fallthrough]];
- }
- case COMMENT_LESSTHAN_BANG_DASH: {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- appendStrBuf(c);
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG_DASH_DASH,
- reconsume, pos);
- break;
- }
- case '<': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_LESSTHAN, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- [[fallthrough]];
- }
- case COMMENT_LESSTHAN_BANG_DASH_DASH: {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '>': {
- appendStrBuf(c);
- emitComment(3, pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '-': {
- if (P::reportErrors) {
- errNestedComment();
- }
- adjustDoubleHyphenAndAppendToStrBufAndErr(
- c, reportedConsecutiveHyphens);
- reportedConsecutiveHyphens = true;
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT_END,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- c = '\n';
- P::silentCarriageReturn(this);
- if (P::reportErrors) {
- errNestedComment();
- }
- adjustDoubleHyphenAndAppendToStrBufAndErr(
- c, reportedConsecutiveHyphens);
- reportedConsecutiveHyphens = true;
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- if (P::reportErrors) {
- errNestedComment();
- }
- adjustDoubleHyphenAndAppendToStrBufAndErr(
- c, reportedConsecutiveHyphens);
- reportedConsecutiveHyphens = true;
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '!': {
- if (P::reportErrors) {
- errNestedComment();
- }
- adjustDoubleHyphenAndAppendToStrBufAndErr(
- c, reportedConsecutiveHyphens);
- reportedConsecutiveHyphens = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_END_BANG, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- if (P::reportErrors) {
- errNestedComment();
- }
- adjustDoubleHyphenAndAppendToStrBufAndErr(
- c, reportedConsecutiveHyphens);
- reportedConsecutiveHyphens = true;
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- case COMMENT_START_DASH: {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- appendStrBuf(c);
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT_END,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- if (P::reportErrors) {
- errPrematureEndOfComment();
- }
- emitComment(1, pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '<': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::COMMENT_LESSTHAN, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- case CDATA_START: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- if (index < 6) {
- if (c == nsHtml5Tokenizer::CDATA_LSQB[index]) {
- appendStrBuf(c);
- } else {
- if (P::reportErrors) {
- errBogusComment();
- }
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- index++;
- continue;
- } else {
- clearStrBufAfterUse();
- cstart = pos;
- reconsume = true;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CDATA_SECTION, reconsume, pos);
- break;
- }
- }
- [[fallthrough]];
- }
- case CDATA_SECTION: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case ']': {
- flushChars(buf, pos);
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::CDATA_RSQB,
- reconsume, pos);
- NS_HTML5_BREAK(cdatasectionloop);
- }
- case '\0': {
- maybeEmitReplacementCharacter(buf, pos);
- continue;
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- continue;
- }
- }
- }
- cdatasectionloop_end:;
- [[fallthrough]];
- }
- case CDATA_RSQB: {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case ']': {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CDATA_RSQB_RSQB, reconsume,
- pos);
- break;
- }
- default: {
- tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 1);
- cstart = pos;
- reconsume = true;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CDATA_SECTION, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- [[fallthrough]];
- }
- case CDATA_RSQB_RSQB: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case ']': {
- tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 1);
- continue;
- }
- case '>': {
- cstart = pos + 1;
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- suspendIfRequestedAfterCurrentNonTextToken();
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 2);
- cstart = pos;
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CDATA_SECTION, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- }
- case ATTRIBUTE_VALUE_SINGLE_QUOTED: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '\'': {
- addAttributeWithValue();
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '&': {
- MOZ_ASSERT(!charRefBufLen,
- "charRefBufLen not reset after previous use!");
- appendCharRefBuf(c);
- setAdditionalAndRememberAmpersandLocation('\'');
- returnState = state;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE,
- reconsume, pos);
- NS_HTML5_BREAK(attributevaluesinglequotedloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- continue;
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- continue;
- }
- }
- }
- attributevaluesinglequotedloop_end:;
- [[fallthrough]];
- }
- case CONSUME_CHARACTER_REFERENCE: {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case ' ':
- case '\t':
- case '\n':
- case '\r':
- case '\f':
- case '<':
- case '&':
- case '\0':
- case ';': {
- emitOrAppendCharRefBuf(returnState);
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos;
- }
- reconsume = true;
- state =
- P::transition(mViewSource.get(), returnState, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '#': {
- appendCharRefBuf('#');
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::CONSUME_NCR,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- if (c == additional) {
- emitOrAppendCharRefBuf(returnState);
- reconsume = true;
- state =
- P::transition(mViewSource.get(), returnState, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- if (c >= 'a' && c <= 'z') {
- firstCharKey = c - 'a' + 26;
- } else if (c >= 'A' && c <= 'Z') {
- firstCharKey = c - 'A';
- } else {
- if (c == ';') {
- if (P::reportErrors) {
- errNoNamedCharacterMatch();
- }
- }
- emitOrAppendCharRefBuf(returnState);
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos;
- }
- reconsume = true;
- state =
- P::transition(mViewSource.get(), returnState, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- appendCharRefBuf(c);
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CHARACTER_REFERENCE_HILO_LOOKUP,
- reconsume, pos);
- break;
- }
- }
- [[fallthrough]];
- }
- case CHARACTER_REFERENCE_HILO_LOOKUP: {
- {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- int32_t hilo = 0;
- if (c <= 'z') {
- const int32_t* row = nsHtml5NamedCharactersAccel::HILO_ACCEL[c];
- if (row) {
- hilo = row[firstCharKey];
- }
- }
- if (!hilo) {
- if (c == ';') {
- if (P::reportErrors) {
- errNoNamedCharacterMatch();
- }
- }
- emitOrAppendCharRefBuf(returnState);
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos;
- }
- reconsume = true;
- state =
- P::transition(mViewSource.get(), returnState, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- appendCharRefBuf(c);
- lo = hilo & 0xFFFF;
- hi = hilo >> 16;
- entCol = -1;
- candidate = -1;
- charRefBufMark = 0;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CHARACTER_REFERENCE_TAIL,
- reconsume, pos);
- }
- [[fallthrough]];
- }
- case CHARACTER_REFERENCE_TAIL: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- entCol++;
- for (;;) {
- if (hi < lo) {
- NS_HTML5_BREAK(outer);
- }
- if (entCol == nsHtml5NamedCharacters::NAMES[lo].length()) {
- candidate = lo;
- charRefBufMark = charRefBufLen;
- lo++;
- } else if (entCol > nsHtml5NamedCharacters::NAMES[lo].length()) {
- NS_HTML5_BREAK(outer);
- } else if (c > nsHtml5NamedCharacters::NAMES[lo].charAt(entCol)) {
- lo++;
- } else {
- NS_HTML5_BREAK(loloop);
- }
- }
- loloop_end:;
- for (;;) {
- if (hi < lo) {
- NS_HTML5_BREAK(outer);
- }
- if (entCol == nsHtml5NamedCharacters::NAMES[hi].length()) {
- NS_HTML5_BREAK(hiloop);
- }
- if (entCol > nsHtml5NamedCharacters::NAMES[hi].length()) {
- NS_HTML5_BREAK(outer);
- } else if (c < nsHtml5NamedCharacters::NAMES[hi].charAt(entCol)) {
- hi--;
- } else {
- NS_HTML5_BREAK(hiloop);
- }
- }
- hiloop_end:;
- if (c == ';') {
- if (entCol + 1 == nsHtml5NamedCharacters::NAMES[lo].length()) {
- candidate = lo;
- charRefBufMark = charRefBufLen;
- }
- NS_HTML5_BREAK(outer);
- }
- if (hi < lo) {
- NS_HTML5_BREAK(outer);
- }
- appendCharRefBuf(c);
- continue;
- }
- outer_end:;
- if (candidate == -1) {
- if (c == ';') {
- if (P::reportErrors) {
- errNoNamedCharacterMatch();
- }
- }
- emitOrAppendCharRefBuf(returnState);
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos;
- }
- reconsume = true;
- state = P::transition(mViewSource.get(), returnState, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- } else {
- const nsHtml5CharacterName& candidateName =
- nsHtml5NamedCharacters::NAMES[candidate];
- if (!candidateName.length() ||
- candidateName.charAt(candidateName.length() - 1) != ';') {
- if ((returnState & DATA_AND_RCDATA_MASK)) {
- char16_t ch;
- if (charRefBufMark == charRefBufLen) {
- ch = c;
- } else {
- ch = charRefBuf[charRefBufMark];
- }
- if (ch == '=' || (ch >= '0' && ch <= '9') ||
- (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) {
- if (c == ';') {
- if (P::reportErrors) {
- errNoNamedCharacterMatch();
- }
- }
- appendCharRefBufToStrBuf();
- reconsume = true;
- state = P::transition(mViewSource.get(), returnState, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- if ((returnState & DATA_AND_RCDATA_MASK)) {
- if (P::reportErrors) {
- errUnescapedAmpersandInterpretedAsCharacterReference();
- }
- } else {
- if (P::reportErrors) {
- errNotSemicolonTerminated();
- }
- }
- }
- P::completedNamedCharacterReference(mViewSource.get());
- const char16_t* val = nsHtml5NamedCharacters::VALUES[candidate];
- if (!val[1]) {
- emitOrAppendOne(val, returnState);
- } else {
- emitOrAppendTwo(val, returnState);
- }
- if (charRefBufMark < charRefBufLen) {
- if ((returnState & DATA_AND_RCDATA_MASK)) {
- appendStrBuf(charRefBuf, charRefBufMark,
- charRefBufLen - charRefBufMark);
- } else {
- tokenHandler->characters(charRefBuf, charRefBufMark,
- charRefBufLen - charRefBufMark);
- }
- }
- bool earlyBreak = (c == ';' && charRefBufMark == charRefBufLen);
- charRefBufLen = 0;
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = earlyBreak ? pos + 1 : pos;
- }
- reconsume = !earlyBreak;
- state = P::transition(mViewSource.get(), returnState, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- case CONSUME_NCR: {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- value = 0;
- seenDigits = false;
- switch (c) {
- case 'x':
- case 'X': {
- appendCharRefBuf(c);
- state =
- P::transition(mViewSource.get(), nsHtml5Tokenizer::HEX_NCR_LOOP,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::DECIMAL_NRC_LOOP, reconsume,
- pos);
- break;
- }
- }
- [[fallthrough]];
- }
- case DECIMAL_NRC_LOOP: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- MOZ_ASSERT(value >= 0, "value must not become negative.");
- if (c >= '0' && c <= '9') {
- seenDigits = true;
- if (value <= 0x10FFFF) {
- value *= 10;
- value += c - '0';
- }
- continue;
- } else if (c == ';') {
- if (seenDigits) {
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos + 1;
- }
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::HANDLE_NCR_VALUE,
- reconsume, pos);
- NS_HTML5_BREAK(decimalloop);
- } else {
- if (P::reportErrors) {
- errNoDigitsInNCR();
- }
- appendCharRefBuf(';');
- emitOrAppendCharRefBuf(returnState);
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos + 1;
- }
- state =
- P::transition(mViewSource.get(), returnState, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- } else {
- if (!seenDigits) {
- if (P::reportErrors) {
- errNoDigitsInNCR();
- }
- emitOrAppendCharRefBuf(returnState);
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos;
- }
- reconsume = true;
- state =
- P::transition(mViewSource.get(), returnState, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- } else {
- if (P::reportErrors) {
- errCharRefLacksSemicolon();
- }
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos;
- }
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::HANDLE_NCR_VALUE,
- reconsume, pos);
- NS_HTML5_BREAK(decimalloop);
- }
- }
- }
- decimalloop_end:;
- [[fallthrough]];
- }
- case HANDLE_NCR_VALUE: {
- charRefBufLen = 0;
- handleNcrValue(returnState);
- state = P::transition(mViewSource.get(), returnState, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case HEX_NCR_LOOP: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- MOZ_ASSERT(value >= 0, "value must not become negative.");
- if (c >= '0' && c <= '9') {
- seenDigits = true;
- if (value <= 0x10FFFF) {
- value *= 16;
- value += c - '0';
- }
- continue;
- } else if (c >= 'A' && c <= 'F') {
- seenDigits = true;
- if (value <= 0x10FFFF) {
- value *= 16;
- value += c - 'A' + 10;
- }
- continue;
- } else if (c >= 'a' && c <= 'f') {
- seenDigits = true;
- if (value <= 0x10FFFF) {
- value *= 16;
- value += c - 'a' + 10;
- }
- continue;
- } else if (c == ';') {
- if (seenDigits) {
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos + 1;
- }
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::HANDLE_NCR_VALUE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- } else {
- if (P::reportErrors) {
- errNoDigitsInNCR();
- }
- appendCharRefBuf(';');
- emitOrAppendCharRefBuf(returnState);
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos + 1;
- }
- state =
- P::transition(mViewSource.get(), returnState, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- } else {
- if (!seenDigits) {
- if (P::reportErrors) {
- errNoDigitsInNCR();
- }
- emitOrAppendCharRefBuf(returnState);
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos;
- }
- reconsume = true;
- state =
- P::transition(mViewSource.get(), returnState, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- } else {
- if (P::reportErrors) {
- errCharRefLacksSemicolon();
- }
- if (!(returnState & DATA_AND_RCDATA_MASK)) {
- cstart = pos;
- }
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::HANDLE_NCR_VALUE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- }
- case PLAINTEXT: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '\0': {
- emitPlaintextReplacementCharacter(buf, pos);
- continue;
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- continue;
- }
- }
- }
- }
- case CLOSE_TAG_OPEN: {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '>': {
- if (P::reportErrors) {
- errLtSlashGt();
- }
- cstart = pos + 1;
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- P::silentCarriageReturn(this);
- if (P::reportErrors) {
- errGarbageAfterLtSlash();
- }
- clearStrBufBeforeUse();
- appendStrBuf('\n');
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- if (P::reportErrors) {
- errGarbageAfterLtSlash();
- }
- clearStrBufBeforeUse();
- appendStrBuf(c);
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- if (c >= 'A' && c <= 'Z') {
- c += 0x20;
- }
- if (c >= 'a' && c <= 'z') {
- endTag = true;
- clearStrBufBeforeUse();
- appendStrBuf(c);
- containsHyphen = false;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::TAG_NAME, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- } else {
- if (P::reportErrors) {
- errGarbageAfterLtSlash();
- }
- clearStrBufBeforeUse();
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- }
- case RCDATA: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '&': {
- flushChars(buf, pos);
- MOZ_ASSERT(!charRefBufLen,
- "charRefBufLen not reset after previous use!");
- appendCharRefBuf(c);
- setAdditionalAndRememberAmpersandLocation('\0');
- returnState = state;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '<': {
- flushChars(buf, pos);
- returnState = state;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- emitReplacementCharacter(buf, pos);
- continue;
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- continue;
- }
- }
- }
- }
- case RAWTEXT: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '<': {
- flushChars(buf, pos);
- returnState = state;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN,
- reconsume, pos);
- NS_HTML5_BREAK(rawtextloop);
- }
- case '\0': {
- emitReplacementCharacter(buf, pos);
- continue;
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- continue;
- }
- }
- }
- rawtextloop_end:;
- [[fallthrough]];
- }
- case RAWTEXT_RCDATA_LESS_THAN_SIGN: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '/': {
- index = 0;
- clearStrBufBeforeUse();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::NON_DATA_END_TAG_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(rawtextrcdatalessthansignloop);
- }
- default: {
- tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
- cstart = pos;
- reconsume = true;
- state =
- P::transition(mViewSource.get(), returnState, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- rawtextrcdatalessthansignloop_end:;
- [[fallthrough]];
- }
- case NON_DATA_END_TAG_NAME: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- if (!endTagExpectationAsArray) {
- tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2);
- cstart = pos;
- reconsume = true;
- state =
- P::transition(mViewSource.get(), returnState, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- } else if (index < endTagExpectationAsArray.length) {
- char16_t e = endTagExpectationAsArray[index];
- char16_t folded = c;
- if (c >= 'A' && c <= 'Z') {
- folded += 0x20;
- }
- if (folded != e) {
- tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2);
- emitStrBuf();
- cstart = pos;
- reconsume = true;
- state =
- P::transition(mViewSource.get(), returnState, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- appendStrBuf(c);
- index++;
- continue;
- } else {
- endTag = true;
- tagName = endTagExpectation;
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- clearStrBufAfterUse();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- clearStrBufAfterUse();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '/': {
- clearStrBufAfterUse();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SELF_CLOSING_START_TAG,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- clearStrBufAfterUse();
- state = P::transition(mViewSource.get(),
- emitCurrentTagToken(false, pos),
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2);
- emitStrBuf();
- cstart = pos;
- reconsume = true;
- state = P::transition(mViewSource.get(), returnState, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- }
- }
- case BOGUS_COMMENT: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '>': {
- emitComment(0, pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '-': {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT_HYPHEN,
- reconsume, pos);
- NS_HTML5_BREAK(boguscommentloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- continue;
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- continue;
- }
- }
- }
- boguscommentloop_end:;
- [[fallthrough]];
- }
- case BOGUS_COMMENT_HYPHEN: {
- boguscommenthyphenloop:
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '>': {
- emitComment(0, pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '-': {
- appendSecondHyphenToBogusComment();
- NS_HTML5_CONTINUE(boguscommenthyphenloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT, reconsume,
- pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- }
- case SCRIPT_DATA: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '<': {
- flushChars(buf, pos);
- returnState = state;
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_LESS_THAN_SIGN, reconsume, pos);
- NS_HTML5_BREAK(scriptdataloop);
- }
- case '\0': {
- emitReplacementCharacter(buf, pos);
- continue;
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- continue;
- }
- }
- }
- scriptdataloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_LESS_THAN_SIGN: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '/': {
- index = 0;
- clearStrBufBeforeUse();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::NON_DATA_END_TAG_NAME,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '!': {
- tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
- cstart = pos;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPE_START,
- reconsume, pos);
- NS_HTML5_BREAK(scriptdatalessthansignloop);
- }
- default: {
- tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
- cstart = pos;
- reconsume = true;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- scriptdatalessthansignloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_ESCAPE_START: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPE_START_DASH,
- reconsume, pos);
- NS_HTML5_BREAK(scriptdataescapestartloop);
- }
- default: {
- reconsume = true;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- scriptdataescapestartloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_ESCAPE_START_DASH: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH_DASH,
- reconsume, pos);
- NS_HTML5_BREAK(scriptdataescapestartdashloop);
- }
- default: {
- reconsume = true;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- scriptdataescapestartdashloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_ESCAPED_DASH_DASH: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- continue;
- }
- case '<': {
- flushChars(buf, pos);
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- emitReplacementCharacter(buf, pos);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
- reconsume, pos);
- NS_HTML5_BREAK(scriptdataescapeddashdashloop);
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
- reconsume, pos);
- NS_HTML5_BREAK(scriptdataescapeddashdashloop);
- }
- }
- }
- scriptdataescapeddashdashloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_ESCAPED: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '-': {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH,
- reconsume, pos);
- NS_HTML5_BREAK(scriptdataescapedloop);
- }
- case '<': {
- flushChars(buf, pos);
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- emitReplacementCharacter(buf, pos);
- continue;
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- continue;
- }
- }
- }
- scriptdataescapedloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_ESCAPED_DASH: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH_DASH,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '<': {
- flushChars(buf, pos);
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN,
- reconsume, pos);
- NS_HTML5_BREAK(scriptdataescapeddashloop);
- }
- case '\0': {
- emitReplacementCharacter(buf, pos);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- scriptdataescapeddashloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '/': {
- index = 0;
- clearStrBufBeforeUse();
- returnState = nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::NON_DATA_END_TAG_NAME,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case 'S':
- case 's': {
- tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
- cstart = pos;
- index = 1;
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPE_START, reconsume,
- pos);
- NS_HTML5_BREAK(scriptdataescapedlessthanloop);
- }
- default: {
- tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
- cstart = pos;
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- scriptdataescapedlessthanloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_DOUBLE_ESCAPE_START: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- MOZ_ASSERT(index > 0);
- if (index < 6) {
- char16_t folded = c;
- if (c >= 'A' && c <= 'Z') {
- folded += 0x20;
- }
- if (folded != nsHtml5Tokenizer::SCRIPT_ARR[index]) {
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- index++;
- continue;
- }
- switch (c) {
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f':
- case '/':
- case '>': {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
- NS_HTML5_BREAK(scriptdatadoubleescapestartloop);
- }
- default: {
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- scriptdatadoubleescapestartloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_DOUBLE_ESCAPED: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '-': {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_DASH, reconsume,
- pos);
- NS_HTML5_BREAK(scriptdatadoubleescapedloop);
- }
- case '<': {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- emitReplacementCharacter(buf, pos);
- continue;
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- continue;
- }
- }
- }
- scriptdatadoubleescapedloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_DOUBLE_ESCAPED_DASH: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH,
- reconsume, pos);
- NS_HTML5_BREAK(scriptdatadoubleescapeddashloop);
- }
- case '<': {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- emitReplacementCharacter(buf, pos);
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- scriptdatadoubleescapeddashloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '-': {
- continue;
- }
- case '<': {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN,
- reconsume, pos);
- NS_HTML5_BREAK(scriptdatadoubleescapeddashdashloop);
- }
- case '>': {
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- emitReplacementCharacter(buf, pos);
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- scriptdatadoubleescapeddashdashloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '/': {
- index = 0;
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPE_END,
- reconsume, pos);
- NS_HTML5_BREAK(scriptdatadoubleescapedlessthanloop);
- }
- default: {
- reconsume = true;
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- scriptdatadoubleescapedlessthanloop_end:;
- [[fallthrough]];
- }
- case SCRIPT_DATA_DOUBLE_ESCAPE_END: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- if (index < 6) {
- char16_t folded = c;
- if (c >= 'A' && c <= 'Z') {
- folded += 0x20;
- }
- if (folded != nsHtml5Tokenizer::SCRIPT_ARR[index]) {
- reconsume = true;
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- index++;
- continue;
- }
- switch (c) {
- case '\r': {
- emitCarriageReturn<P>(buf, pos);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f':
- case '/':
- case '>': {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- reconsume = true;
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED, reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- }
- case MARKUP_DECLARATION_OCTYPE: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- if (index < 6) {
- char16_t folded = c;
- if (c >= 'A' && c <= 'Z') {
- folded += 0x20;
- }
- if (folded == nsHtml5Tokenizer::OCTYPE[index]) {
- appendStrBuf(c);
- } else {
- if (P::reportErrors) {
- errBogusComment();
- }
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_COMMENT, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- index++;
- continue;
- } else {
- reconsume = true;
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DOCTYPE,
- reconsume, pos);
- NS_HTML5_BREAK(markupdeclarationdoctypeloop);
- }
- }
- markupdeclarationdoctypeloop_end:;
- [[fallthrough]];
- }
- case DOCTYPE: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- initDoctypeFields();
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(doctypeloop);
- }
- default: {
- if (P::reportErrors) {
- errMissingSpaceBeforeDoctypeName();
- }
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(doctypeloop);
- }
- }
- }
- doctypeloop_end:;
- [[fallthrough]];
- }
- case BEFORE_DOCTYPE_NAME: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- continue;
- }
- case '>': {
- if (P::reportErrors) {
- errNamelessDoctype();
- }
- forceQuirks = true;
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- if (c >= 'A' && c <= 'Z') {
- c += 0x20;
- }
- clearStrBufBeforeUse();
- appendStrBuf(c);
- state =
- P::transition(mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_NAME, reconsume, pos);
- NS_HTML5_BREAK(beforedoctypenameloop);
- }
- }
- }
- beforedoctypenameloop_end:;
- [[fallthrough]];
- }
- case DOCTYPE_NAME: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- strBufToDoctypeName();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::AFTER_DOCTYPE_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- strBufToDoctypeName();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::AFTER_DOCTYPE_NAME,
- reconsume, pos);
- NS_HTML5_BREAK(doctypenameloop);
- }
- case '>': {
- strBufToDoctypeName();
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- if (c >= 'A' && c <= 'Z') {
- c += 0x0020;
- }
- appendStrBuf(c);
- continue;
- }
- }
- }
- doctypenameloop_end:;
- [[fallthrough]];
- }
- case AFTER_DOCTYPE_NAME: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- continue;
- }
- case '>': {
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case 'p':
- case 'P': {
- index = 0;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_UBLIC, reconsume,
- pos);
- NS_HTML5_BREAK(afterdoctypenameloop);
- }
- case 's':
- case 'S': {
- index = 0;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_YSTEM, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- bogusDoctype();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- afterdoctypenameloop_end:;
- [[fallthrough]];
- }
- case DOCTYPE_UBLIC: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- if (index < 5) {
- char16_t folded = c;
- if (c >= 'A' && c <= 'Z') {
- folded += 0x20;
- }
- if (folded != nsHtml5Tokenizer::UBLIC[index]) {
- bogusDoctype();
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- index++;
- continue;
- } else {
- reconsume = true;
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_KEYWORD, reconsume, pos);
- NS_HTML5_BREAK(doctypeublicloop);
- }
- }
- doctypeublicloop_end:;
- [[fallthrough]];
- }
- case AFTER_DOCTYPE_PUBLIC_KEYWORD: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume,
- pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume,
- pos);
- NS_HTML5_BREAK(afterdoctypepublickeywordloop);
- }
- case '\"': {
- if (P::reportErrors) {
- errNoSpaceBetweenDoctypePublicKeywordAndQuote();
- }
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\'': {
- if (P::reportErrors) {
- errNoSpaceBetweenDoctypePublicKeywordAndQuote();
- }
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- if (P::reportErrors) {
- errExpectedPublicId();
- }
- forceQuirks = true;
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- bogusDoctype();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- afterdoctypepublickeywordloop_end:;
- [[fallthrough]];
- }
- case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- continue;
- }
- case '\"': {
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED,
- reconsume, pos);
- NS_HTML5_BREAK(beforedoctypepublicidentifierloop);
- }
- case '\'': {
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- if (P::reportErrors) {
- errExpectedPublicId();
- }
- forceQuirks = true;
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- bogusDoctype();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- beforedoctypepublicidentifierloop_end:;
- [[fallthrough]];
- }
- case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\"': {
- publicIdentifier = strBufToString();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume,
- pos);
- NS_HTML5_BREAK(doctypepublicidentifierdoublequotedloop);
- }
- case '>': {
- if (P::reportErrors) {
- errGtInPublicId();
- }
- forceQuirks = true;
- publicIdentifier = strBufToString();
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- continue;
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- continue;
- }
- }
- }
- doctypepublicidentifierdoublequotedloop_end:;
- [[fallthrough]];
- }
- case AFTER_DOCTYPE_PUBLIC_IDENTIFIER: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::
- BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS,
- reconsume, pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::
- BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS,
- reconsume, pos);
- NS_HTML5_BREAK(afterdoctypepublicidentifierloop);
- }
- case '>': {
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\"': {
- if (P::reportErrors) {
- errNoSpaceBetweenPublicAndSystemIds();
- }
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\'': {
- if (P::reportErrors) {
- errNoSpaceBetweenPublicAndSystemIds();
- }
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- bogusDoctype();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- afterdoctypepublicidentifierloop_end:;
- [[fallthrough]];
- }
- case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- continue;
- }
- case '>': {
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\"': {
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED,
- reconsume, pos);
- NS_HTML5_BREAK(betweendoctypepublicandsystemidentifiersloop);
- }
- case '\'': {
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- bogusDoctype();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- betweendoctypepublicandsystemidentifiersloop_end:;
- [[fallthrough]];
- }
- case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\"': {
- systemIdentifier = strBufToString();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume,
- pos);
- NS_HTML5_BREAK(doctypesystemidentifierdoublequotedloop);
- }
- case '>': {
- if (P::reportErrors) {
- errGtInSystemId();
- }
- forceQuirks = true;
- systemIdentifier = strBufToString();
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- continue;
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- continue;
- }
- }
- }
- doctypesystemidentifierdoublequotedloop_end:;
- [[fallthrough]];
- }
- case AFTER_DOCTYPE_SYSTEM_IDENTIFIER: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- continue;
- }
- case '>': {
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- bogusDoctypeWithoutQuirks();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume,
- pos);
- NS_HTML5_BREAK(afterdoctypesystemidentifierloop);
- }
- }
- }
- afterdoctypesystemidentifierloop_end:;
- [[fallthrough]];
- }
- case BOGUS_DOCTYPE: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '>': {
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- P::silentCarriageReturn(this);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- default: {
- continue;
- }
- }
- }
- }
- case DOCTYPE_YSTEM: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- if (index < 5) {
- char16_t folded = c;
- if (c >= 'A' && c <= 'Z') {
- folded += 0x20;
- }
- if (folded != nsHtml5Tokenizer::YSTEM[index]) {
- bogusDoctype();
- reconsume = true;
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- index++;
- NS_HTML5_CONTINUE(stateloop);
- } else {
- reconsume = true;
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_KEYWORD, reconsume, pos);
- NS_HTML5_BREAK(doctypeystemloop);
- }
- }
- doctypeystemloop_end:;
- [[fallthrough]];
- }
- case AFTER_DOCTYPE_SYSTEM_KEYWORD: {
- for (;;) {
- if (reconsume) {
- reconsume = false;
- } else {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- }
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume,
- pos);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume,
- pos);
- NS_HTML5_BREAK(afterdoctypesystemkeywordloop);
- }
- case '\"': {
- if (P::reportErrors) {
- errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
- }
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\'': {
- if (P::reportErrors) {
- errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
- }
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- if (P::reportErrors) {
- errExpectedPublicId();
- }
- forceQuirks = true;
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- bogusDoctype();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- afterdoctypesystemkeywordloop_end:;
- [[fallthrough]];
- }
- case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\r': {
- P::silentCarriageReturn(this);
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- P::silentLineFeed(this);
- [[fallthrough]];
- }
- case ' ':
- case '\t':
- case '\f': {
- continue;
- }
- case '\"': {
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\'': {
- clearStrBufBeforeUse();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED,
- reconsume, pos);
- NS_HTML5_BREAK(beforedoctypesystemidentifierloop);
- }
- case '>': {
- if (P::reportErrors) {
- errExpectedSystemId();
- }
- forceQuirks = true;
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- bogusDoctype();
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::BOGUS_DOCTYPE, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
- beforedoctypesystemidentifierloop_end:;
- [[fallthrough]];
- }
- case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\'': {
- systemIdentifier = strBufToString();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- if (P::reportErrors) {
- errGtInSystemId();
- }
- forceQuirks = true;
- systemIdentifier = strBufToString();
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- continue;
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- continue;
- }
- }
- }
- }
- case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\'': {
- publicIdentifier = strBufToString();
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume,
- pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- case '>': {
- if (P::reportErrors) {
- errGtInPublicId();
- }
- forceQuirks = true;
- publicIdentifier = strBufToString();
- emitDoctypeToken(pos);
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- case '\r': {
- appendStrBufCarriageReturn<P>();
- NS_HTML5_BREAK(stateloop);
- }
- case '\n': {
- appendStrBufLineFeed<P>();
- continue;
- }
- case '\0': {
- c = 0xfffd;
- [[fallthrough]];
- }
- default: {
- appendStrBuf(c);
- continue;
- }
- }
- }
- }
- case PROCESSING_INSTRUCTION: {
- for (;;) {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '\?': {
- state = P::transition(
- mViewSource.get(),
- nsHtml5Tokenizer::PROCESSING_INSTRUCTION_QUESTION_MARK,
- reconsume, pos);
- NS_HTML5_BREAK(processinginstructionloop);
- }
- default: {
- continue;
- }
- }
- }
- processinginstructionloop_end:;
- [[fallthrough]];
- }
- case PROCESSING_INSTRUCTION_QUESTION_MARK: {
- if (++pos == endPos) {
- NS_HTML5_BREAK(stateloop);
- }
- c = P::checkChar(this, buf, pos);
- switch (c) {
- case '>': {
- state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
- reconsume, pos);
- suspendIfRequestedAfterCurrentNonTextToken();
- if (shouldSuspend) {
- NS_HTML5_BREAK(stateloop);
- }
- NS_HTML5_CONTINUE(stateloop);
- }
- default: {
- state = P::transition(mViewSource.get(),
- nsHtml5Tokenizer::PROCESSING_INSTRUCTION,
- reconsume, pos);
- NS_HTML5_CONTINUE(stateloop);
- }
- }
- }
+ MOZ_ASSERT(false, "Bad end tag expectation.");
+ return;
}
}
-stateloop_end:;
- flushChars(buf, pos);
- stateSave = state;
- returnStateSave = returnState;
- return pos;
}
-void nsHtml5Tokenizer::initDoctypeFields() {
+void nsHtml5Tokenizer::setLineNumber(int32_t line) {
+ this->attributeLine = line;
+ this->line = line;
+}
+
+void nsHtml5Tokenizer::appendCharRefBuf(char16_t c) {
+ MOZ_RELEASE_ASSERT(charRefBufLen < charRefBuf.length,
+ "Attempted to overrun charRefBuf!");
+ charRefBuf[charRefBufLen++] = c;
+}
+
+void nsHtml5Tokenizer::emitOrAppendCharRefBuf(int32_t returnState) {
+ if ((returnState & DATA_AND_RCDATA_MASK)) {
+ appendCharRefBufToStrBuf();
+ } else {
+ if (charRefBufLen > 0) {
+ tokenHandler->characters(charRefBuf, 0, charRefBufLen);
+ charRefBufLen = 0;
+ }
+ }
+}
+
+void nsHtml5Tokenizer::appendStrBuf(char16_t c) {
+ MOZ_ASSERT(strBufLen < strBuf.length, "Previous buffer length insufficient.");
+ if (MOZ_UNLIKELY(strBufLen == strBuf.length)) {
+ if (MOZ_UNLIKELY(!EnsureBufferSpace(1))) {
+ MOZ_CRASH("Unable to recover from buffer reallocation failure");
+ }
+ }
+ strBuf[strBufLen++] = c;
+}
+
+void nsHtml5Tokenizer::appendStrBuf(char16_t* buffer, int32_t offset,
+ int32_t length) {
+ int32_t newLen = nsHtml5Portability::checkedAdd(strBufLen, length);
+ MOZ_ASSERT(newLen <= strBuf.length, "Previous buffer length insufficient.");
+ if (MOZ_UNLIKELY(strBuf.length < newLen)) {
+ if (MOZ_UNLIKELY(!EnsureBufferSpace(length))) {
+ MOZ_CRASH("Unable to recover from buffer reallocation failure");
+ }
+ }
+ nsHtml5ArrayCopy::arraycopy(buffer, offset, strBuf, strBufLen, length);
+ strBufLen = newLen;
+}
+
+void nsHtml5Tokenizer::emitComment(int32_t provisionalHyphens, int32_t pos) {
+ RememberGt(pos);
+ tokenHandler->comment(strBuf, 0, strBufLen - provisionalHyphens);
clearStrBufAfterUse();
- doctypeName = nullptr;
- if (systemIdentifier) {
- systemIdentifier.Release();
- systemIdentifier = nullptr;
+ cstart = pos + 1;
+ suspendIfRequestedAfterCurrentNonTextToken();
+}
+
+void nsHtml5Tokenizer::flushChars(char16_t* buf, int32_t pos) {
+ if (pos > cstart) {
+ tokenHandler->characters(buf, cstart, pos - cstart);
}
- if (publicIdentifier) {
- publicIdentifier.Release();
- publicIdentifier = nullptr;
+ cstart = INT32_MAX;
+}
+
+void nsHtml5Tokenizer::strBufToElementNameString() {
+ if (containsHyphen) {
+ nsAtom* annotationName = nsHtml5ElementName::ELT_ANNOTATION_XML->getName();
+ if (nsHtml5Portability::localEqualsBuffer(annotationName, strBuf,
+ strBufLen)) {
+ tagName = nsHtml5ElementName::ELT_ANNOTATION_XML;
+ } else {
+ nonInternedTagName->setNameForNonInterned(
+ nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen,
+ interner),
+ true);
+ tagName = nonInternedTagName;
+ }
+ } else {
+ tagName = nsHtml5ElementName::elementNameByBuffer(strBuf, strBufLen);
+ if (!tagName) {
+ nonInternedTagName->setNameForNonInterned(
+ nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen,
+ interner),
+ false);
+ tagName = nonInternedTagName;
+ }
+ }
+ containsHyphen = false;
+ clearStrBufAfterUse();
+}
+
+int32_t nsHtml5Tokenizer::emitCurrentTagToken(bool selfClosing, int32_t pos) {
+ RememberGt(pos);
+ cstart = pos + 1;
+ maybeErrSlashInEndTag(selfClosing);
+ stateSave = nsHtml5Tokenizer::DATA;
+ nsHtml5HtmlAttributes* attrs =
+ (!attributes ? nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES : attributes);
+ if (endTag) {
+ maybeErrAttributesOnEndTag(attrs);
+ if (!viewingXmlSource) {
+ tokenHandler->endTag(tagName);
+ }
+ if (newAttributesEachTime) {
+ delete attributes;
+ attributes = nullptr;
+ }
+ } else {
+ if (viewingXmlSource) {
+ MOZ_ASSERT(newAttributesEachTime);
+ delete attributes;
+ attributes = nullptr;
+ } else {
+ tokenHandler->startTag(tagName, attrs, selfClosing);
+ }
+ }
+ tagName = nullptr;
+ if (newAttributesEachTime) {
+ attributes = nullptr;
+ } else {
+ attributes->clear(0);
+ }
+ suspendIfRequestedAfterCurrentNonTextToken();
+ return stateSave;
+}
+
+void nsHtml5Tokenizer::attributeNameComplete() {
+ attributeName =
+ nsHtml5AttributeName::nameByBuffer(strBuf, strBufLen, interner);
+ if (!attributeName) {
+ nonInternedAttributeName->setNameForNonInterned(
+ nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen,
+ interner));
+ attributeName = nonInternedAttributeName;
+ }
+ clearStrBufAfterUse();
+ if (!attributes) {
+ attributes = new nsHtml5HtmlAttributes(0);
+ }
+ if (attributes->contains(attributeName)) {
+ errDuplicateAttribute();
+ attributeName = nullptr;
}
- forceQuirks = false;
}
-template <class P>
-void nsHtml5Tokenizer::adjustDoubleHyphenAndAppendToStrBufCarriageReturn() {
- P::silentCarriageReturn(this);
- adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
+void nsHtml5Tokenizer::addAttributeWithoutValue() {
+ if (attributeName) {
+ attributes->addAttribute(
+ attributeName, nsHtml5Portability::newEmptyString(), attributeLine);
+ attributeName = nullptr;
+ } else {
+ clearStrBufAfterUse();
+ }
}
-template <class P>
-void nsHtml5Tokenizer::adjustDoubleHyphenAndAppendToStrBufLineFeed() {
- P::silentLineFeed(this);
- adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
+void nsHtml5Tokenizer::addAttributeWithValue() {
+ if (attributeName) {
+ nsHtml5String val = strBufToString();
+ if (mViewSource) {
+ mViewSource->MaybeLinkifyAttributeValue(attributeName, val);
+ }
+ attributes->addAttribute(attributeName, val, attributeLine);
+ attributeName = nullptr;
+ } else {
+ clearStrBufAfterUse();
+ }
}
-template <class P>
-void nsHtml5Tokenizer::appendStrBufLineFeed() {
- P::silentLineFeed(this);
- appendStrBuf('\n');
+void nsHtml5Tokenizer::start() {
+ initializeWithoutStarting();
+ tokenHandler->startTokenization(this);
+ if (mViewSource) {
+ line = 1;
+ col = -1;
+ nextCharOnNewLine = false;
+ } else if (tokenHandler->WantsLineAndColumn()) {
+ line = 0;
+ col = 1;
+ nextCharOnNewLine = true;
+ } else {
+ line = -1;
+ col = -1;
+ nextCharOnNewLine = false;
+ }
}
-template <class P>
-void nsHtml5Tokenizer::appendStrBufCarriageReturn() {
- P::silentCarriageReturn(this);
- appendStrBuf('\n');
+bool nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer) {
+ int32_t state = stateSave;
+ int32_t returnState = returnStateSave;
+ char16_t c = '\0';
+ shouldSuspend = false;
+ lastCR = false;
+ int32_t start = buffer->getStart();
+ int32_t end = buffer->getEnd();
+ int32_t pos = start - 1;
+ switch (state) {
+ case DATA:
+ case RCDATA:
+ case SCRIPT_DATA:
+ case PLAINTEXT:
+ case RAWTEXT:
+ case CDATA_SECTION:
+ case SCRIPT_DATA_ESCAPED:
+ case SCRIPT_DATA_ESCAPE_START:
+ case SCRIPT_DATA_ESCAPE_START_DASH:
+ case SCRIPT_DATA_ESCAPED_DASH:
+ case SCRIPT_DATA_ESCAPED_DASH_DASH:
+ case SCRIPT_DATA_DOUBLE_ESCAPE_START:
+ case SCRIPT_DATA_DOUBLE_ESCAPED:
+ case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN:
+ case SCRIPT_DATA_DOUBLE_ESCAPED_DASH:
+ case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH:
+ case SCRIPT_DATA_DOUBLE_ESCAPE_END: {
+ cstart = start;
+ break;
+ }
+ default: {
+ cstart = INT32_MAX;
+ break;
+ }
+ }
+ if (mViewSource) {
+ mViewSource->SetBuffer(buffer);
+ if (mozilla::htmlaccel::htmlaccelEnabled()) {
+ pos = StateLoopViewSourceSIMD(state, c, pos, buffer->getBuffer(), false,
+ returnState, buffer->getEnd());
+ } else {
+ pos = StateLoopViewSourceALU(state, c, pos, buffer->getBuffer(), false,
+ returnState, buffer->getEnd());
+ }
+ mViewSource->DropBuffer((pos == buffer->getEnd()) ? pos : pos + 1);
+ } else if (tokenHandler->WantsLineAndColumn()) {
+ if (mozilla::htmlaccel::htmlaccelEnabled()) {
+ pos = StateLoopLineColSIMD(state, c, pos, buffer->getBuffer(), false,
+ returnState, buffer->getEnd());
+ } else {
+ pos = StateLoopLineColALU(state, c, pos, buffer->getBuffer(), false,
+ returnState, buffer->getEnd());
+ }
+ } else if (mozilla::htmlaccel::htmlaccelEnabled()) {
+ pos = StateLoopFastestSIMD(state, c, pos, buffer->getBuffer(), false,
+ returnState, buffer->getEnd());
+ } else {
+ pos = StateLoopFastestALU(state, c, pos, buffer->getBuffer(), false,
+ returnState, buffer->getEnd());
+ }
+ if (pos == end) {
+ buffer->setStart(pos);
+ } else {
+ buffer->setStart(pos + 1);
+ }
+ return lastCR;
}
-template <class P>
-void nsHtml5Tokenizer::emitCarriageReturn(char16_t* buf, int32_t pos) {
- P::silentCarriageReturn(this);
- flushChars(buf, pos);
- tokenHandler->characters(nsHtml5Tokenizer::LF, 0, 1);
- cstart = INT32_MAX;
+void nsHtml5Tokenizer::initDoctypeFields() {
+ clearStrBufAfterUse();
+ doctypeName = nullptr;
+ if (systemIdentifier) {
+ systemIdentifier.Release();
+ systemIdentifier = nullptr;
+ }
+ if (publicIdentifier) {
+ publicIdentifier.Release();
+ publicIdentifier = nullptr;
+ }
+ forceQuirks = false;
}
void nsHtml5Tokenizer::emitReplacementCharacter(char16_t* buf, int32_t pos) {
@@ -4528,10 +503,6 @@ void nsHtml5Tokenizer::emitPlaintextReplacementCharacter(char16_t* buf,
cstart = pos + 1;
}
-void nsHtml5Tokenizer::setAdditionalAndRememberAmpersandLocation(char16_t add) {
- additional = add;
-}
-
void nsHtml5Tokenizer::bogusDoctype() {
errBogusDoctype();
forceQuirks = true;
@@ -4897,13 +868,6 @@ void nsHtml5Tokenizer::emitDoctypeToken(int32_t pos) {
suspendIfRequestedAfterCurrentNonTextToken();
}
-void nsHtml5Tokenizer::suspendIfRequestedAfterCurrentNonTextToken() {
- if (suspendAfterCurrentNonTextToken) {
- suspendAfterCurrentNonTextToken = false;
- shouldSuspend = true;
- }
-}
-
void nsHtml5Tokenizer::suspendAfterCurrentTokenIfNotInText() {
switch (stateSave) {
case DATA:
@@ -5015,25 +979,6 @@ bool nsHtml5Tokenizer::internalEncodingDeclaration(
return false;
}
-void nsHtml5Tokenizer::emitOrAppendTwo(const char16_t* val,
- int32_t returnState) {
- if ((returnState & DATA_AND_RCDATA_MASK)) {
- appendStrBuf(val[0]);
- appendStrBuf(val[1]);
- } else {
- tokenHandler->characters(val, 0, 2);
- }
-}
-
-void nsHtml5Tokenizer::emitOrAppendOne(const char16_t* val,
- int32_t returnState) {
- if ((returnState & DATA_AND_RCDATA_MASK)) {
- appendStrBuf(val[0]);
- } else {
- tokenHandler->characters(val, 0, 1);
- }
-}
-
void nsHtml5Tokenizer::end() {
if (!keepBuffer) {
strBuf = nullptr;
@@ -5057,10 +1002,6 @@ void nsHtml5Tokenizer::end() {
}
}
-void nsHtml5Tokenizer::requestSuspension() { shouldSuspend = true; }
-
-bool nsHtml5Tokenizer::isInDataState() { return (stateSave == DATA); }
-
void nsHtml5Tokenizer::resetToDataState() {
clearStrBufAfterUse();
charRefBufLen = 0;
diff --git a/parser/html/nsHtml5Tokenizer.h b/parser/html/nsHtml5Tokenizer.h
@@ -43,8 +43,10 @@
#include "nsHtml5NamedCharacters.h"
#include "nsHtml5NamedCharactersAccel.h"
#include "nsHtml5String.h"
+#include "nsHtml5TreeBuilder.h"
#include "nsIContent.h"
#include "nsTraceRefcnt.h"
+#include "mozilla/htmlaccel/htmlaccelEnabled.h"
class nsHtml5StreamParser;
@@ -337,15 +339,12 @@ class nsHtml5Tokenizer {
void setLineNumber(int32_t line);
inline int32_t getLineNumber() { return line; }
- nsHtml5HtmlAttributes* emptyAttributes();
-
- private:
- inline void appendCharRefBuf(char16_t c) {
- MOZ_RELEASE_ASSERT(charRefBufLen < charRefBuf.length,
- "Attempted to overrun charRefBuf!");
- charRefBuf[charRefBufLen++] = c;
+ inline nsHtml5HtmlAttributes* emptyAttributes() {
+ return nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES;
}
+ private:
+ void appendCharRefBuf(char16_t c);
void emitOrAppendCharRefBuf(int32_t returnState);
inline void clearStrBufAfterUse() { strBufLen = 0; }
@@ -360,23 +359,32 @@ class nsHtml5Tokenizer {
strBufLen = 0;
}
- inline void appendStrBuf(char16_t c) {
- MOZ_ASSERT(strBufLen < strBuf.length,
- "Previous buffer length insufficient.");
- if (MOZ_UNLIKELY(strBufLen == strBuf.length)) {
- if (MOZ_UNLIKELY(!EnsureBufferSpace(1))) {
- MOZ_CRASH("Unable to recover from buffer reallocation failure");
- }
- }
- strBuf[strBufLen++] = c;
- }
+ void appendStrBuf(char16_t c);
protected:
- nsHtml5String strBufToString();
+ inline nsHtml5String strBufToString() {
+ nsHtml5String str = nsHtml5Portability::newStringFromBuffer(
+ strBuf, 0, strBufLen, tokenHandler,
+ !newAttributesEachTime &&
+ attributeName == nsHtml5AttributeName::ATTR_CLASS);
+ clearStrBufAfterUse();
+ return str;
+ }
private:
- void strBufToDoctypeName();
- void emitStrBuf();
+ inline void strBufToDoctypeName() {
+ doctypeName =
+ nsHtml5Portability::newLocalNameFromBuffer(strBuf, strBufLen, interner);
+ clearStrBufAfterUse();
+ }
+
+ inline void emitStrBuf() {
+ if (strBufLen > 0) {
+ tokenHandler->characters(strBuf, 0, strBufLen);
+ clearStrBufAfterUse();
+ }
+ }
+
inline void appendSecondHyphenToBogusComment() { appendStrBuf('-'); }
inline void adjustDoubleHyphenAndAppendToStrBufAndErr(
@@ -408,23 +416,4167 @@ class nsHtml5Tokenizer {
private:
template <class P>
- int32_t stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* buf,
- bool reconsume, int32_t returnState, int32_t endPos);
+ inline int32_t stateLoop(int32_t state, char16_t c, int32_t pos,
+ char16_t* buf, bool reconsume, int32_t returnState,
+ int32_t endPos) {
+ bool reportedConsecutiveHyphens = false;
+ stateloop:
+ for (;;) {
+ switch (state) {
+ case DATA: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ switch (c) {
+ case '&': {
+ flushChars(buf, pos);
+ MOZ_ASSERT(!charRefBufLen,
+ "charRefBufLen not reset after previous use!");
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\0');
+ returnState = state;
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '<': {
+ flushChars(buf, pos);
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::TAG_OPEN, reconsume, pos);
+ NS_HTML5_BREAK(dataloop);
+ }
+ case '\0': {
+ maybeEmitReplacementCharacter(buf, pos);
+ break;
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ break;
+ }
+ }
+ }
+ datamiddle:
+ for (;;) {
+ ++pos;
+ pos += P::accelerateAdvancementData(this, buf, pos, endPos);
+ for (;;) {
+ if (pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '&': {
+ flushChars(buf, pos);
+ MOZ_ASSERT(!charRefBufLen,
+ "charRefBufLen not reset after previous use!");
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\0');
+ returnState = state;
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '<': {
+ flushChars(buf, pos);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::TAG_OPEN, reconsume,
+ pos);
+ NS_HTML5_BREAK(dataloop);
+ }
+ case '\0': {
+ maybeEmitReplacementCharacter(buf, pos);
+ NS_HTML5_CONTINUE(datamiddle);
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ NS_HTML5_CONTINUE(datamiddle);
+ }
+ default: {
+ ++pos;
+ continue;
+ }
+ }
+ }
+ }
+ }
+ dataloop_end:;
+ [[fallthrough]];
+ }
+ case TAG_OPEN: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ if (c >= 'A' && c <= 'Z') {
+ endTag = false;
+ clearStrBufBeforeUse();
+ appendStrBuf((char16_t)(c + 0x20));
+ containsHyphen = false;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::TAG_NAME, reconsume, pos);
+ NS_HTML5_BREAK(tagopenloop);
+ } else if (c >= 'a' && c <= 'z') {
+ endTag = false;
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ containsHyphen = false;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::TAG_NAME, reconsume, pos);
+ NS_HTML5_BREAK(tagopenloop);
+ }
+ switch (c) {
+ case '!': {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::MARKUP_DECLARATION_OPEN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '/': {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CLOSE_TAG_OPEN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\?': {
+ if (viewingXmlSource) {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::PROCESSING_INSTRUCTION, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ if (P::reportErrors) {
+ errProcessingInstruction();
+ }
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errLtGt();
+ }
+ tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 2);
+ cstart = pos + 1;
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ if (P::reportErrors) {
+ errBadCharAfterLt(c);
+ }
+ tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
+ cstart = pos;
+ reconsume = true;
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ tagopenloop_end:;
+ [[fallthrough]];
+ }
+ case TAG_NAME: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ strBufToElementNameString();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ strBufToElementNameString();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(tagnameloop);
+ }
+ case '/': {
+ strBufToElementNameString();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SELF_CLOSING_START_TAG,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ strBufToElementNameString();
+ state = P::transition(mViewSource.get(),
+ emitCurrentTagToken(false, pos),
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ } else if (c == '-') {
+ containsHyphen = true;
+ }
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ tagnameloop_end:;
+ [[fallthrough]];
+ }
+ case BEFORE_ATTRIBUTE_NAME: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ continue;
+ }
+ case '/': {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SELF_CLOSING_START_TAG,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ state = P::transition(mViewSource.get(),
+ emitCurrentTagToken(false, pos),
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ case '\"':
+ case '\'':
+ case '<':
+ case '=': {
+ if (P::reportErrors) {
+ errBadCharBeforeAttributeNameOrNull(c);
+ }
+ [[fallthrough]];
+ }
+ default: {
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ }
+ attributeLine = line;
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(beforeattributenameloop);
+ }
+ }
+ }
+ beforeattributenameloop_end:;
+ [[fallthrough]];
+ }
+ case ATTRIBUTE_NAME: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ attributeNameComplete();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ attributeNameComplete();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '/': {
+ attributeNameComplete();
+ addAttributeWithoutValue();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SELF_CLOSING_START_TAG,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '=': {
+ attributeNameComplete();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE,
+ reconsume, pos);
+ NS_HTML5_BREAK(attributenameloop);
+ }
+ case '>': {
+ attributeNameComplete();
+ addAttributeWithoutValue();
+ state = P::transition(mViewSource.get(),
+ emitCurrentTagToken(false, pos),
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ case '\"':
+ case '\'':
+ case '<': {
+ if (P::reportErrors) {
+ errQuoteOrLtInAttributeNameOrNull(c);
+ }
+ [[fallthrough]];
+ }
+ default: {
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ }
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ attributenameloop_end:;
+ [[fallthrough]];
+ }
+ case BEFORE_ATTRIBUTE_VALUE: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ continue;
+ }
+ case '\"': {
+ attributeLine = line;
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::ATTRIBUTE_VALUE_DOUBLE_QUOTED, reconsume,
+ pos);
+ NS_HTML5_BREAK(beforeattributevalueloop);
+ }
+ case '&': {
+ attributeLine = line;
+ clearStrBufBeforeUse();
+ reconsume = true;
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
+
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\'': {
+ attributeLine = line;
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::ATTRIBUTE_VALUE_SINGLE_QUOTED, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errAttributeValueMissing();
+ }
+ addAttributeWithoutValue();
+ state = P::transition(mViewSource.get(),
+ emitCurrentTagToken(false, pos),
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ case '<':
+ case '=':
+ case '`': {
+ if (P::reportErrors) {
+ errLtOrEqualsOrGraveInUnquotedAttributeOrNull(c);
+ }
+ [[fallthrough]];
+ }
+ default: {
+ attributeLine = line;
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
+
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ beforeattributevalueloop_end:;
+ [[fallthrough]];
+ }
+ case ATTRIBUTE_VALUE_DOUBLE_QUOTED: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '\"': {
+ addAttributeWithValue();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume,
+ pos);
+ NS_HTML5_BREAK(attributevaluedoublequotedloop);
+ }
+ case '&': {
+ MOZ_ASSERT(!charRefBufLen,
+ "charRefBufLen not reset after previous use!");
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\"');
+ returnState = state;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ attributevaluedoublequotedloop_end:;
+ [[fallthrough]];
+ }
+ case AFTER_ATTRIBUTE_VALUE_QUOTED: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '/': {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SELF_CLOSING_START_TAG,
+ reconsume, pos);
+ NS_HTML5_BREAK(afterattributevaluequotedloop);
+ }
+ case '>': {
+ state = P::transition(mViewSource.get(),
+ emitCurrentTagToken(false, pos),
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ if (P::reportErrors) {
+ errNoSpaceBetweenAttributes();
+ }
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ afterattributevaluequotedloop_end:;
+ [[fallthrough]];
+ }
+ case SELF_CLOSING_START_TAG: {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '>': {
+ state =
+ P::transition(mViewSource.get(),
+ emitCurrentTagToken(true, pos), reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ if (P::reportErrors) {
+ errSlashNotFollowedByGt();
+ }
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ case ATTRIBUTE_VALUE_UNQUOTED: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ addAttributeWithValue();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ addAttributeWithValue();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '&': {
+ MOZ_ASSERT(!charRefBufLen,
+ "charRefBufLen not reset after previous use!");
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('>');
+ returnState = state;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ addAttributeWithValue();
+ state = P::transition(mViewSource.get(),
+ emitCurrentTagToken(false, pos),
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ case '<':
+ case '\"':
+ case '\'':
+ case '=':
+ case '`': {
+ if (P::reportErrors) {
+ errUnquotedAttributeValOrNull(c);
+ }
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ }
+ case AFTER_ATTRIBUTE_NAME: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ continue;
+ }
+ case '/': {
+ addAttributeWithoutValue();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SELF_CLOSING_START_TAG,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '=': {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_VALUE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ addAttributeWithoutValue();
+ state = P::transition(mViewSource.get(),
+ emitCurrentTagToken(false, pos),
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ case '\"':
+ case '\'':
+ case '<': {
+ if (P::reportErrors) {
+ errQuoteOrLtInAttributeNameOrNull(c);
+ }
+ [[fallthrough]];
+ }
+ default: {
+ addAttributeWithoutValue();
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ }
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ }
+ case MARKUP_DECLARATION_OPEN: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::MARKUP_DECLARATION_HYPHEN,
+ reconsume, pos);
+ NS_HTML5_BREAK(markupdeclarationopenloop);
+ }
+ case 'd':
+ case 'D': {
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ index = 0;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::MARKUP_DECLARATION_OCTYPE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '[': {
+ if (tokenHandler->cdataSectionAllowed()) {
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ index = 0;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CDATA_START,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ [[fallthrough]];
+ }
+ default: {
+ if (P::reportErrors) {
+ errBogusComment();
+ }
+ clearStrBufBeforeUse();
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ markupdeclarationopenloop_end:;
+ [[fallthrough]];
+ }
+ case MARKUP_DECLARATION_HYPHEN: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ clearStrBufAfterOneHyphen();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_START,
+ reconsume, pos);
+ NS_HTML5_BREAK(markupdeclarationhyphenloop);
+ }
+ default: {
+ if (P::reportErrors) {
+ errBogusComment();
+ }
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ markupdeclarationhyphenloop_end:;
+ [[fallthrough]];
+ }
+ case COMMENT_START: {
+ reportedConsecutiveHyphens = false;
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_START_DASH,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errPrematureEndOfComment();
+ }
+ emitComment(0, pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '<': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_LESSTHAN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_BREAK(commentstartloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_BREAK(commentstartloop);
+ }
+ }
+ }
+ commentstartloop_end:;
+ [[fallthrough]];
+ }
+ case COMMENT: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_END_DASH,
+ reconsume, pos);
+ NS_HTML5_BREAK(commentloop);
+ }
+ case '<': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_LESSTHAN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ commentloop_end:;
+ [[fallthrough]];
+ }
+ case COMMENT_END_DASH: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_END, reconsume,
+ pos);
+ NS_HTML5_BREAK(commentenddashloop);
+ }
+ case '<': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_LESSTHAN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ commentenddashloop_end:;
+ [[fallthrough]];
+ }
+ case COMMENT_END: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '>': {
+ emitComment(2, pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '-': {
+ adjustDoubleHyphenAndAppendToStrBufAndErr(
+ c, reportedConsecutiveHyphens);
+ reportedConsecutiveHyphens = true;
+ continue;
+ }
+ case '<': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_LESSTHAN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ adjustDoubleHyphenAndAppendToStrBufCarriageReturn<P>();
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ adjustDoubleHyphenAndAppendToStrBufLineFeed<P>();
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '!': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_END_BANG,
+ reconsume, pos);
+ NS_HTML5_BREAK(commentendloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ adjustDoubleHyphenAndAppendToStrBufAndErr(
+ c, reportedConsecutiveHyphens);
+ reportedConsecutiveHyphens = true;
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ commentendloop_end:;
+ [[fallthrough]];
+ }
+ case COMMENT_END_BANG: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '>': {
+ emitComment(3, pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '-': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_END_DASH,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ }
+ case COMMENT_LESSTHAN: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '!': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG,
+ reconsume, pos);
+ NS_HTML5_BREAK(commentlessthanloop);
+ }
+ case '<': {
+ appendStrBuf(c);
+ continue;
+ }
+ case '-': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_END_DASH,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ commentlessthanloop_end:;
+ [[fallthrough]];
+ }
+ case COMMENT_LESSTHAN_BANG: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ appendStrBuf(c);
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG_DASH,
+ reconsume, pos);
+ NS_HTML5_BREAK(commentlessthanbangloop);
+ }
+ case '<': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_LESSTHAN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ commentlessthanbangloop_end:;
+ [[fallthrough]];
+ }
+ case COMMENT_LESSTHAN_BANG_DASH: {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ appendStrBuf(c);
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_LESSTHAN_BANG_DASH_DASH, reconsume,
+ pos);
+ break;
+ }
+ case '<': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_LESSTHAN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ [[fallthrough]];
+ }
+ case COMMENT_LESSTHAN_BANG_DASH_DASH: {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '>': {
+ appendStrBuf(c);
+ emitComment(3, pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '-': {
+ if (P::reportErrors) {
+ errNestedComment();
+ }
+ adjustDoubleHyphenAndAppendToStrBufAndErr(
+ c, reportedConsecutiveHyphens);
+ reportedConsecutiveHyphens = true;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_END, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ c = '\n';
+ P::silentCarriageReturn(this);
+ if (P::reportErrors) {
+ errNestedComment();
+ }
+ adjustDoubleHyphenAndAppendToStrBufAndErr(
+ c, reportedConsecutiveHyphens);
+ reportedConsecutiveHyphens = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ if (P::reportErrors) {
+ errNestedComment();
+ }
+ adjustDoubleHyphenAndAppendToStrBufAndErr(
+ c, reportedConsecutiveHyphens);
+ reportedConsecutiveHyphens = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '!': {
+ if (P::reportErrors) {
+ errNestedComment();
+ }
+ adjustDoubleHyphenAndAppendToStrBufAndErr(
+ c, reportedConsecutiveHyphens);
+ reportedConsecutiveHyphens = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_END_BANG,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ if (P::reportErrors) {
+ errNestedComment();
+ }
+ adjustDoubleHyphenAndAppendToStrBufAndErr(
+ c, reportedConsecutiveHyphens);
+ reportedConsecutiveHyphens = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ case COMMENT_START_DASH: {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ appendStrBuf(c);
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_END, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errPrematureEndOfComment();
+ }
+ emitComment(1, pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '<': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT_LESSTHAN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::COMMENT, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ case CDATA_START: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ if (index < 6) {
+ if (c == nsHtml5Tokenizer::CDATA_LSQB[index]) {
+ appendStrBuf(c);
+ } else {
+ if (P::reportErrors) {
+ errBogusComment();
+ }
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ index++;
+ continue;
+ } else {
+ clearStrBufAfterUse();
+ cstart = pos;
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CDATA_SECTION, reconsume,
+ pos);
+ break;
+ }
+ }
+ [[fallthrough]];
+ }
+ case CDATA_SECTION: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case ']': {
+ flushChars(buf, pos);
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CDATA_RSQB, reconsume, pos);
+ NS_HTML5_BREAK(cdatasectionloop);
+ }
+ case '\0': {
+ maybeEmitReplacementCharacter(buf, pos);
+ continue;
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ continue;
+ }
+ }
+ }
+ cdatasectionloop_end:;
+ [[fallthrough]];
+ }
+ case CDATA_RSQB: {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case ']': {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CDATA_RSQB_RSQB,
+ reconsume, pos);
+ break;
+ }
+ default: {
+ tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 1);
+ cstart = pos;
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CDATA_SECTION, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ [[fallthrough]];
+ }
+ case CDATA_RSQB_RSQB: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case ']': {
+ tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 1);
+ continue;
+ }
+ case '>': {
+ cstart = pos + 1;
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ suspendIfRequestedAfterCurrentNonTextToken();
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 2);
+ cstart = pos;
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CDATA_SECTION,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ }
+ case ATTRIBUTE_VALUE_SINGLE_QUOTED: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '\'': {
+ addAttributeWithValue();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '&': {
+ MOZ_ASSERT(!charRefBufLen,
+ "charRefBufLen not reset after previous use!");
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\'');
+ returnState = state;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE,
+ reconsume, pos);
+ NS_HTML5_BREAK(attributevaluesinglequotedloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ attributevaluesinglequotedloop_end:;
+ [[fallthrough]];
+ }
+ case CONSUME_CHARACTER_REFERENCE: {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ case '\f':
+ case '<':
+ case '&':
+ case '\0':
+ case ';': {
+ emitOrAppendCharRefBuf(returnState);
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state =
+ P::transition(mViewSource.get(), returnState, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '#': {
+ appendCharRefBuf('#');
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CONSUME_NCR, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ if (c == additional) {
+ emitOrAppendCharRefBuf(returnState);
+ reconsume = true;
+ state = P::transition(mViewSource.get(), returnState, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ if (c >= 'a' && c <= 'z') {
+ firstCharKey = c - 'a' + 26;
+ } else if (c >= 'A' && c <= 'Z') {
+ firstCharKey = c - 'A';
+ } else {
+ if (c == ';') {
+ if (P::reportErrors) {
+ errNoNamedCharacterMatch();
+ }
+ }
+ emitOrAppendCharRefBuf(returnState);
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = P::transition(mViewSource.get(), returnState, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ appendCharRefBuf(c);
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::CHARACTER_REFERENCE_HILO_LOOKUP, reconsume,
+ pos);
+ break;
+ }
+ }
+ [[fallthrough]];
+ }
+ case CHARACTER_REFERENCE_HILO_LOOKUP: {
+ {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ int32_t hilo = 0;
+ if (c <= 'z') {
+ const int32_t* row = nsHtml5NamedCharactersAccel::HILO_ACCEL[c];
+ if (row) {
+ hilo = row[firstCharKey];
+ }
+ }
+ if (!hilo) {
+ if (c == ';') {
+ if (P::reportErrors) {
+ errNoNamedCharacterMatch();
+ }
+ }
+ emitOrAppendCharRefBuf(returnState);
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state =
+ P::transition(mViewSource.get(), returnState, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ appendCharRefBuf(c);
+ lo = hilo & 0xFFFF;
+ hi = hilo >> 16;
+ entCol = -1;
+ candidate = -1;
+ charRefBufMark = 0;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::CHARACTER_REFERENCE_TAIL,
+ reconsume, pos);
+ }
+ [[fallthrough]];
+ }
+ case CHARACTER_REFERENCE_TAIL: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ entCol++;
+ for (;;) {
+ if (hi < lo) {
+ NS_HTML5_BREAK(outer);
+ }
+ if (entCol == nsHtml5NamedCharacters::NAMES[lo].length()) {
+ candidate = lo;
+ charRefBufMark = charRefBufLen;
+ lo++;
+ } else if (entCol > nsHtml5NamedCharacters::NAMES[lo].length()) {
+ NS_HTML5_BREAK(outer);
+ } else if (c > nsHtml5NamedCharacters::NAMES[lo].charAt(entCol)) {
+ lo++;
+ } else {
+ NS_HTML5_BREAK(loloop);
+ }
+ }
+ loloop_end:;
+ for (;;) {
+ if (hi < lo) {
+ NS_HTML5_BREAK(outer);
+ }
+ if (entCol == nsHtml5NamedCharacters::NAMES[hi].length()) {
+ NS_HTML5_BREAK(hiloop);
+ }
+ if (entCol > nsHtml5NamedCharacters::NAMES[hi].length()) {
+ NS_HTML5_BREAK(outer);
+ } else if (c < nsHtml5NamedCharacters::NAMES[hi].charAt(entCol)) {
+ hi--;
+ } else {
+ NS_HTML5_BREAK(hiloop);
+ }
+ }
+ hiloop_end:;
+ if (c == ';') {
+ if (entCol + 1 == nsHtml5NamedCharacters::NAMES[lo].length()) {
+ candidate = lo;
+ charRefBufMark = charRefBufLen;
+ }
+ NS_HTML5_BREAK(outer);
+ }
+ if (hi < lo) {
+ NS_HTML5_BREAK(outer);
+ }
+ appendCharRefBuf(c);
+ continue;
+ }
+ outer_end:;
+ if (candidate == -1) {
+ if (c == ';') {
+ if (P::reportErrors) {
+ errNoNamedCharacterMatch();
+ }
+ }
+ emitOrAppendCharRefBuf(returnState);
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state =
+ P::transition(mViewSource.get(), returnState, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ } else {
+ const nsHtml5CharacterName& candidateName =
+ nsHtml5NamedCharacters::NAMES[candidate];
+ if (!candidateName.length() ||
+ candidateName.charAt(candidateName.length() - 1) != ';') {
+ if ((returnState & DATA_AND_RCDATA_MASK)) {
+ char16_t ch;
+ if (charRefBufMark == charRefBufLen) {
+ ch = c;
+ } else {
+ ch = charRefBuf[charRefBufMark];
+ }
+ if (ch == '=' || (ch >= '0' && ch <= '9') ||
+ (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) {
+ if (c == ';') {
+ if (P::reportErrors) {
+ errNoNamedCharacterMatch();
+ }
+ }
+ appendCharRefBufToStrBuf();
+ reconsume = true;
+ state = P::transition(mViewSource.get(), returnState,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ if ((returnState & DATA_AND_RCDATA_MASK)) {
+ if (P::reportErrors) {
+ errUnescapedAmpersandInterpretedAsCharacterReference();
+ }
+ } else {
+ if (P::reportErrors) {
+ errNotSemicolonTerminated();
+ }
+ }
+ }
+ P::completedNamedCharacterReference(mViewSource.get());
+ const char16_t* val = nsHtml5NamedCharacters::VALUES[candidate];
+ if (!val[1]) {
+ emitOrAppendOne(val, returnState);
+ } else {
+ emitOrAppendTwo(val, returnState);
+ }
+ if (charRefBufMark < charRefBufLen) {
+ if ((returnState & DATA_AND_RCDATA_MASK)) {
+ appendStrBuf(charRefBuf, charRefBufMark,
+ charRefBufLen - charRefBufMark);
+ } else {
+ tokenHandler->characters(charRefBuf, charRefBufMark,
+ charRefBufLen - charRefBufMark);
+ }
+ }
+ bool earlyBreak = (c == ';' && charRefBufMark == charRefBufLen);
+ charRefBufLen = 0;
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = earlyBreak ? pos + 1 : pos;
+ }
+ reconsume = !earlyBreak;
+ state =
+ P::transition(mViewSource.get(), returnState, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ case CONSUME_NCR: {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ value = 0;
+ seenDigits = false;
+ switch (c) {
+ case 'x':
+ case 'X': {
+ appendCharRefBuf(c);
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::HEX_NCR_LOOP, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::DECIMAL_NRC_LOOP,
+ reconsume, pos);
+ break;
+ }
+ }
+ [[fallthrough]];
+ }
+ case DECIMAL_NRC_LOOP: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ MOZ_ASSERT(value >= 0, "value must not become negative.");
+ if (c >= '0' && c <= '9') {
+ seenDigits = true;
+ if (value <= 0x10FFFF) {
+ value *= 10;
+ value += c - '0';
+ }
+ continue;
+ } else if (c == ';') {
+ if (seenDigits) {
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos + 1;
+ }
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::HANDLE_NCR_VALUE,
+ reconsume, pos);
+ NS_HTML5_BREAK(decimalloop);
+ } else {
+ if (P::reportErrors) {
+ errNoDigitsInNCR();
+ }
+ appendCharRefBuf(';');
+ emitOrAppendCharRefBuf(returnState);
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos + 1;
+ }
+ state = P::transition(mViewSource.get(), returnState, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ } else {
+ if (!seenDigits) {
+ if (P::reportErrors) {
+ errNoDigitsInNCR();
+ }
+ emitOrAppendCharRefBuf(returnState);
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = P::transition(mViewSource.get(), returnState, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ } else {
+ if (P::reportErrors) {
+ errCharRefLacksSemicolon();
+ }
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::HANDLE_NCR_VALUE,
+ reconsume, pos);
+ NS_HTML5_BREAK(decimalloop);
+ }
+ }
+ }
+ decimalloop_end:;
+ [[fallthrough]];
+ }
+ case HANDLE_NCR_VALUE: {
+ charRefBufLen = 0;
+ handleNcrValue(returnState);
+ state = P::transition(mViewSource.get(), returnState, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case HEX_NCR_LOOP: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ MOZ_ASSERT(value >= 0, "value must not become negative.");
+ if (c >= '0' && c <= '9') {
+ seenDigits = true;
+ if (value <= 0x10FFFF) {
+ value *= 16;
+ value += c - '0';
+ }
+ continue;
+ } else if (c >= 'A' && c <= 'F') {
+ seenDigits = true;
+ if (value <= 0x10FFFF) {
+ value *= 16;
+ value += c - 'A' + 10;
+ }
+ continue;
+ } else if (c >= 'a' && c <= 'f') {
+ seenDigits = true;
+ if (value <= 0x10FFFF) {
+ value *= 16;
+ value += c - 'a' + 10;
+ }
+ continue;
+ } else if (c == ';') {
+ if (seenDigits) {
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos + 1;
+ }
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::HANDLE_NCR_VALUE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ } else {
+ if (P::reportErrors) {
+ errNoDigitsInNCR();
+ }
+ appendCharRefBuf(';');
+ emitOrAppendCharRefBuf(returnState);
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos + 1;
+ }
+ state = P::transition(mViewSource.get(), returnState, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ } else {
+ if (!seenDigits) {
+ if (P::reportErrors) {
+ errNoDigitsInNCR();
+ }
+ emitOrAppendCharRefBuf(returnState);
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = P::transition(mViewSource.get(), returnState, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ } else {
+ if (P::reportErrors) {
+ errCharRefLacksSemicolon();
+ }
+ if (!(returnState & DATA_AND_RCDATA_MASK)) {
+ cstart = pos;
+ }
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::HANDLE_NCR_VALUE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ }
+ case PLAINTEXT: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '\0': {
+ emitPlaintextReplacementCharacter(buf, pos);
+ continue;
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ continue;
+ }
+ }
+ }
+ }
+ case CLOSE_TAG_OPEN: {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '>': {
+ if (P::reportErrors) {
+ errLtSlashGt();
+ }
+ cstart = pos + 1;
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ P::silentCarriageReturn(this);
+ if (P::reportErrors) {
+ errGarbageAfterLtSlash();
+ }
+ clearStrBufBeforeUse();
+ appendStrBuf('\n');
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT, reconsume,
+ pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ if (P::reportErrors) {
+ errGarbageAfterLtSlash();
+ }
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ }
+ if (c >= 'a' && c <= 'z') {
+ endTag = true;
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ containsHyphen = false;
+ state =
+ P::transition(mViewSource.get(), nsHtml5Tokenizer::TAG_NAME,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ } else {
+ if (P::reportErrors) {
+ errGarbageAfterLtSlash();
+ }
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ }
+ case RCDATA: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ switch (c) {
+ case '&': {
+ flushChars(buf, pos);
+ MOZ_ASSERT(!charRefBufLen,
+ "charRefBufLen not reset after previous use!");
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\0');
+ returnState = state;
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '<': {
+ flushChars(buf, pos);
+ returnState = state;
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ maybeEmitReplacementCharacter(buf, pos);
+ break;
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ break;
+ }
+ }
+ }
+ rcdatamiddle:
+ for (;;) {
+ ++pos;
+ pos += P::accelerateAdvancementData(this, buf, pos, endPos);
+ for (;;) {
+ if (pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '&': {
+ flushChars(buf, pos);
+ MOZ_ASSERT(!charRefBufLen,
+ "charRefBufLen not reset after previous use!");
+ appendCharRefBuf(c);
+ setAdditionalAndRememberAmpersandLocation('\0');
+ returnState = state;
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::CONSUME_CHARACTER_REFERENCE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '<': {
+ flushChars(buf, pos);
+ returnState = state;
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ maybeEmitReplacementCharacter(buf, pos);
+ NS_HTML5_CONTINUE(rcdatamiddle);
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ NS_HTML5_CONTINUE(rcdatamiddle);
+ }
+ default: {
+ ++pos;
+ continue;
+ }
+ }
+ }
+ }
+ }
+ }
+ case RAWTEXT: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '<': {
+ flushChars(buf, pos);
+ returnState = state;
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::RAWTEXT_RCDATA_LESS_THAN_SIGN, reconsume,
+ pos);
+ NS_HTML5_BREAK(rawtextloop);
+ }
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ continue;
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ continue;
+ }
+ }
+ }
+ rawtextloop_end:;
+ [[fallthrough]];
+ }
+ case RAWTEXT_RCDATA_LESS_THAN_SIGN: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '/': {
+ index = 0;
+ clearStrBufBeforeUse();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::NON_DATA_END_TAG_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(rawtextrcdatalessthansignloop);
+ }
+ default: {
+ tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
+ cstart = pos;
+ reconsume = true;
+ state = P::transition(mViewSource.get(), returnState, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ rawtextrcdatalessthansignloop_end:;
+ [[fallthrough]];
+ }
+ case NON_DATA_END_TAG_NAME: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ if (!endTagExpectationAsArray) {
+ tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2);
+ cstart = pos;
+ reconsume = true;
+ state =
+ P::transition(mViewSource.get(), returnState, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ } else if (index < endTagExpectationAsArray.length) {
+ char16_t e = endTagExpectationAsArray[index];
+ char16_t folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded != e) {
+ tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2);
+ emitStrBuf();
+ cstart = pos;
+ reconsume = true;
+ state = P::transition(mViewSource.get(), returnState, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ appendStrBuf(c);
+ index++;
+ continue;
+ } else {
+ endTag = true;
+ tagName = endTagExpectation;
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ clearStrBufAfterUse();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ clearStrBufAfterUse();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_ATTRIBUTE_NAME,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '/': {
+ clearStrBufAfterUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SELF_CLOSING_START_TAG, reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ clearStrBufAfterUse();
+ state = P::transition(mViewSource.get(),
+ emitCurrentTagToken(false, pos),
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2);
+ emitStrBuf();
+ cstart = pos;
+ reconsume = true;
+ state = P::transition(mViewSource.get(), returnState,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ }
+ }
+ case BOGUS_COMMENT: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '>': {
+ emitComment(0, pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '-': {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT_HYPHEN,
+ reconsume, pos);
+ NS_HTML5_BREAK(boguscommentloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ boguscommentloop_end:;
+ [[fallthrough]];
+ }
+ case BOGUS_COMMENT_HYPHEN: {
+ boguscommenthyphenloop:
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '>': {
+ emitComment(0, pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '-': {
+ appendSecondHyphenToBogusComment();
+ NS_HTML5_CONTINUE(boguscommenthyphenloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ }
+ case SCRIPT_DATA: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '<': {
+ flushChars(buf, pos);
+ returnState = state;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_LESS_THAN_SIGN,
+ reconsume, pos);
+ NS_HTML5_BREAK(scriptdataloop);
+ }
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ continue;
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ continue;
+ }
+ }
+ }
+ scriptdataloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_LESS_THAN_SIGN: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '/': {
+ index = 0;
+ clearStrBufBeforeUse();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::NON_DATA_END_TAG_NAME,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '!': {
+ tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
+ cstart = pos;
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPE_START, reconsume, pos);
+ NS_HTML5_BREAK(scriptdatalessthansignloop);
+ }
+ default: {
+ tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
+ cstart = pos;
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ scriptdatalessthansignloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_ESCAPE_START: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPE_START_DASH, reconsume,
+ pos);
+ NS_HTML5_BREAK(scriptdataescapestartloop);
+ }
+ default: {
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ scriptdataescapestartloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_ESCAPE_START_DASH: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume,
+ pos);
+ NS_HTML5_BREAK(scriptdataescapestartdashloop);
+ }
+ default: {
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ scriptdataescapestartdashloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_ESCAPED_DASH_DASH: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ continue;
+ }
+ case '<': {
+ flushChars(buf, pos);
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_BREAK(scriptdataescapeddashdashloop);
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_BREAK(scriptdataescapeddashdashloop);
+ }
+ }
+ }
+ scriptdataescapeddashdashloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_ESCAPED: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '-': {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH, reconsume, pos);
+ NS_HTML5_BREAK(scriptdataescapedloop);
+ }
+ case '<': {
+ flushChars(buf, pos);
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ continue;
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ continue;
+ }
+ }
+ }
+ scriptdataescapedloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_ESCAPED_DASH: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_DASH_DASH, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '<': {
+ flushChars(buf, pos);
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN,
+ reconsume, pos);
+ NS_HTML5_BREAK(scriptdataescapeddashloop);
+ }
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ scriptdataescapeddashloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '/': {
+ index = 0;
+ clearStrBufBeforeUse();
+ returnState = nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::NON_DATA_END_TAG_NAME,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case 'S':
+ case 's': {
+ tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
+ cstart = pos;
+ index = 1;
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPE_START,
+ reconsume, pos);
+ NS_HTML5_BREAK(scriptdataescapedlessthanloop);
+ }
+ default: {
+ tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
+ cstart = pos;
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ scriptdataescapedlessthanloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_DOUBLE_ESCAPE_START: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ MOZ_ASSERT(index > 0);
+ if (index < 6) {
+ char16_t folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded != nsHtml5Tokenizer::SCRIPT_ARR[index]) {
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ index++;
+ continue;
+ }
+ switch (c) {
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f':
+ case '/':
+ case '>': {
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_BREAK(scriptdatadoubleescapestartloop);
+ }
+ default: {
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ scriptdatadoubleescapestartloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_DOUBLE_ESCAPED: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '-': {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_DASH,
+ reconsume, pos);
+ NS_HTML5_BREAK(scriptdatadoubleescapedloop);
+ }
+ case '<': {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ continue;
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ continue;
+ }
+ }
+ }
+ scriptdatadoubleescapedloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_DOUBLE_ESCAPED_DASH: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH,
+ reconsume, pos);
+ NS_HTML5_BREAK(scriptdatadoubleescapeddashloop);
+ }
+ case '<': {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ scriptdatadoubleescapeddashloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '-': {
+ continue;
+ }
+ case '<': {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN,
+ reconsume, pos);
+ NS_HTML5_BREAK(scriptdatadoubleescapeddashdashloop);
+ }
+ case '>': {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA, reconsume,
+ pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ emitReplacementCharacter(buf, pos);
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ scriptdatadoubleescapeddashdashloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '/': {
+ index = 0;
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPE_END, reconsume,
+ pos);
+ NS_HTML5_BREAK(scriptdatadoubleescapedlessthanloop);
+ }
+ default: {
+ reconsume = true;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ scriptdatadoubleescapedlessthanloop_end:;
+ [[fallthrough]];
+ }
+ case SCRIPT_DATA_DOUBLE_ESCAPE_END: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ if (index < 6) {
+ char16_t folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded != nsHtml5Tokenizer::SCRIPT_ARR[index]) {
+ reconsume = true;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ index++;
+ continue;
+ }
+ switch (c) {
+ case '\r': {
+ emitCarriageReturn<P>(buf, pos);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f':
+ case '/':
+ case '>': {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ reconsume = true;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::SCRIPT_DATA_DOUBLE_ESCAPED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ }
+ case MARKUP_DECLARATION_OCTYPE: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ if (index < 6) {
+ char16_t folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded == nsHtml5Tokenizer::OCTYPE[index]) {
+ appendStrBuf(c);
+ } else {
+ if (P::reportErrors) {
+ errBogusComment();
+ }
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_COMMENT,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ index++;
+ continue;
+ } else {
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE, reconsume, pos);
+ NS_HTML5_BREAK(markupdeclarationdoctypeloop);
+ }
+ }
+ markupdeclarationdoctypeloop_end:;
+ [[fallthrough]];
+ }
+ case DOCTYPE: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ initDoctypeFields();
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(doctypeloop);
+ }
+ default: {
+ if (P::reportErrors) {
+ errMissingSpaceBeforeDoctypeName();
+ }
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_DOCTYPE_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(doctypeloop);
+ }
+ }
+ }
+ doctypeloop_end:;
+ [[fallthrough]];
+ }
+ case BEFORE_DOCTYPE_NAME: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ continue;
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errNamelessDoctype();
+ }
+ forceQuirks = true;
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x20;
+ }
+ clearStrBufBeforeUse();
+ appendStrBuf(c);
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_NAME, reconsume,
+ pos);
+ NS_HTML5_BREAK(beforedoctypenameloop);
+ }
+ }
+ }
+ beforedoctypenameloop_end:;
+ [[fallthrough]];
+ }
+ case DOCTYPE_NAME: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ strBufToDoctypeName();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_DOCTYPE_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ strBufToDoctypeName();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_DOCTYPE_NAME,
+ reconsume, pos);
+ NS_HTML5_BREAK(doctypenameloop);
+ }
+ case '>': {
+ strBufToDoctypeName();
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ if (c >= 'A' && c <= 'Z') {
+ c += 0x0020;
+ }
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ doctypenameloop_end:;
+ [[fallthrough]];
+ }
+ case AFTER_DOCTYPE_NAME: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ continue;
+ }
+ case '>': {
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case 'p':
+ case 'P': {
+ index = 0;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_UBLIC,
+ reconsume, pos);
+ NS_HTML5_BREAK(afterdoctypenameloop);
+ }
+ case 's':
+ case 'S': {
+ index = 0;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_YSTEM,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ bogusDoctype();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_DOCTYPE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ afterdoctypenameloop_end:;
+ [[fallthrough]];
+ }
+ case DOCTYPE_UBLIC: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ if (index < 5) {
+ char16_t folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded != nsHtml5Tokenizer::UBLIC[index]) {
+ bogusDoctype();
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_DOCTYPE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ index++;
+ continue;
+ } else {
+ reconsume = true;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_KEYWORD,
+ reconsume, pos);
+ NS_HTML5_BREAK(doctypeublicloop);
+ }
+ }
+ doctypeublicloop_end:;
+ [[fallthrough]];
+ }
+ case AFTER_DOCTYPE_PUBLIC_KEYWORD: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_DOCTYPE_PUBLIC_IDENTIFIER,
+ reconsume, pos);
+ NS_HTML5_BREAK(afterdoctypepublickeywordloop);
+ }
+ case '\"': {
+ if (P::reportErrors) {
+ errNoSpaceBetweenDoctypePublicKeywordAndQuote();
+ }
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\'': {
+ if (P::reportErrors) {
+ errNoSpaceBetweenDoctypePublicKeywordAndQuote();
+ }
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errExpectedPublicId();
+ }
+ forceQuirks = true;
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ bogusDoctype();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_DOCTYPE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ afterdoctypepublickeywordloop_end:;
+ [[fallthrough]];
+ }
+ case BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ continue;
+ }
+ case '\"': {
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_BREAK(beforedoctypepublicidentifierloop);
+ }
+ case '\'': {
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errExpectedPublicId();
+ }
+ forceQuirks = true;
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ bogusDoctype();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_DOCTYPE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ beforedoctypepublicidentifierloop_end:;
+ [[fallthrough]];
+ }
+ case DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\"': {
+ publicIdentifier = strBufToString();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER,
+ reconsume, pos);
+ NS_HTML5_BREAK(doctypepublicidentifierdoublequotedloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errGtInPublicId();
+ }
+ forceQuirks = true;
+ publicIdentifier = strBufToString();
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ doctypepublicidentifierdoublequotedloop_end:;
+ [[fallthrough]];
+ }
+ case AFTER_DOCTYPE_PUBLIC_IDENTIFIER: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::
+ BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::
+ BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS,
+ reconsume, pos);
+ NS_HTML5_BREAK(afterdoctypepublicidentifierloop);
+ }
+ case '>': {
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\"': {
+ if (P::reportErrors) {
+ errNoSpaceBetweenPublicAndSystemIds();
+ }
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\'': {
+ if (P::reportErrors) {
+ errNoSpaceBetweenPublicAndSystemIds();
+ }
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ bogusDoctype();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_DOCTYPE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ afterdoctypepublicidentifierloop_end:;
+ [[fallthrough]];
+ }
+ case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ continue;
+ }
+ case '>': {
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\"': {
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_BREAK(betweendoctypepublicandsystemidentifiersloop);
+ }
+ case '\'': {
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ bogusDoctype();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_DOCTYPE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ betweendoctypepublicandsystemidentifiersloop_end:;
+ [[fallthrough]];
+ }
+ case DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\"': {
+ systemIdentifier = strBufToString();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER,
+ reconsume, pos);
+ NS_HTML5_BREAK(doctypesystemidentifierdoublequotedloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errGtInSystemId();
+ }
+ forceQuirks = true;
+ systemIdentifier = strBufToString();
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ doctypesystemidentifierdoublequotedloop_end:;
+ [[fallthrough]];
+ }
+ case AFTER_DOCTYPE_SYSTEM_IDENTIFIER: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ continue;
+ }
+ case '>': {
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ bogusDoctypeWithoutQuirks();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_DOCTYPE,
+ reconsume, pos);
+ NS_HTML5_BREAK(afterdoctypesystemidentifierloop);
+ }
+ }
+ }
+ afterdoctypesystemidentifierloop_end:;
+ [[fallthrough]];
+ }
+ case BOGUS_DOCTYPE: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '>': {
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ P::silentCarriageReturn(this);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ default: {
+ continue;
+ }
+ }
+ }
+ }
+ case DOCTYPE_YSTEM: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ if (index < 5) {
+ char16_t folded = c;
+ if (c >= 'A' && c <= 'Z') {
+ folded += 0x20;
+ }
+ if (folded != nsHtml5Tokenizer::YSTEM[index]) {
+ bogusDoctype();
+ reconsume = true;
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_DOCTYPE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ index++;
+ NS_HTML5_CONTINUE(stateloop);
+ } else {
+ reconsume = true;
+ state =
+ P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_KEYWORD,
+ reconsume, pos);
+ NS_HTML5_BREAK(doctypeystemloop);
+ }
+ }
+ doctypeystemloop_end:;
+ [[fallthrough]];
+ }
+ case AFTER_DOCTYPE_SYSTEM_KEYWORD: {
+ for (;;) {
+ if (reconsume) {
+ reconsume = false;
+ } else {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ }
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER,
+ reconsume, pos);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::BEFORE_DOCTYPE_SYSTEM_IDENTIFIER,
+ reconsume, pos);
+ NS_HTML5_BREAK(afterdoctypesystemkeywordloop);
+ }
+ case '\"': {
+ if (P::reportErrors) {
+ errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
+ }
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\'': {
+ if (P::reportErrors) {
+ errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
+ }
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errExpectedPublicId();
+ }
+ forceQuirks = true;
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ bogusDoctype();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_DOCTYPE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ afterdoctypesystemkeywordloop_end:;
+ [[fallthrough]];
+ }
+ case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\r': {
+ P::silentCarriageReturn(this);
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ P::silentLineFeed(this);
+ [[fallthrough]];
+ }
+ case ' ':
+ case '\t':
+ case '\f': {
+ continue;
+ }
+ case '\"': {
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\'': {
+ clearStrBufBeforeUse();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED,
+ reconsume, pos);
+ NS_HTML5_BREAK(beforedoctypesystemidentifierloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errExpectedSystemId();
+ }
+ forceQuirks = true;
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ bogusDoctype();
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::BOGUS_DOCTYPE,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ beforedoctypesystemidentifierloop_end:;
+ [[fallthrough]];
+ }
+ case DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\'': {
+ systemIdentifier = strBufToString();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_DOCTYPE_SYSTEM_IDENTIFIER,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errGtInSystemId();
+ }
+ forceQuirks = true;
+ systemIdentifier = strBufToString();
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ }
+ case DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\'': {
+ publicIdentifier = strBufToString();
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::AFTER_DOCTYPE_PUBLIC_IDENTIFIER,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '>': {
+ if (P::reportErrors) {
+ errGtInPublicId();
+ }
+ forceQuirks = true;
+ publicIdentifier = strBufToString();
+ emitDoctypeToken(pos);
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ case '\r': {
+ appendStrBufCarriageReturn<P>();
+ NS_HTML5_BREAK(stateloop);
+ }
+ case '\n': {
+ appendStrBufLineFeed<P>();
+ continue;
+ }
+ case '\0': {
+ c = 0xfffd;
+ [[fallthrough]];
+ }
+ default: {
+ appendStrBuf(c);
+ continue;
+ }
+ }
+ }
+ }
+ case PROCESSING_INSTRUCTION: {
+ for (;;) {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '\?': {
+ state = P::transition(
+ mViewSource.get(),
+ nsHtml5Tokenizer::PROCESSING_INSTRUCTION_QUESTION_MARK,
+ reconsume, pos);
+ NS_HTML5_BREAK(processinginstructionloop);
+ }
+ default: {
+ continue;
+ }
+ }
+ }
+ processinginstructionloop_end:;
+ [[fallthrough]];
+ }
+ case PROCESSING_INSTRUCTION_QUESTION_MARK: {
+ if (++pos == endPos) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ c = P::checkChar(this, buf, pos);
+ switch (c) {
+ case '>': {
+ state = P::transition(mViewSource.get(), nsHtml5Tokenizer::DATA,
+ reconsume, pos);
+ suspendIfRequestedAfterCurrentNonTextToken();
+ if (shouldSuspend) {
+ NS_HTML5_BREAK(stateloop);
+ }
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ default: {
+ state = P::transition(mViewSource.get(),
+ nsHtml5Tokenizer::PROCESSING_INSTRUCTION,
+ reconsume, pos);
+ NS_HTML5_CONTINUE(stateloop);
+ }
+ }
+ }
+ }
+ }
+ stateloop_end:;
+ flushChars(buf, pos);
+ stateSave = state;
+ returnStateSave = returnState;
+ return pos;
+ }
+
void initDoctypeFields();
template <class P>
- void adjustDoubleHyphenAndAppendToStrBufCarriageReturn();
+ inline void adjustDoubleHyphenAndAppendToStrBufCarriageReturn() {
+ P::silentCarriageReturn(this);
+ adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
+ }
+
template <class P>
- void adjustDoubleHyphenAndAppendToStrBufLineFeed();
+ inline void adjustDoubleHyphenAndAppendToStrBufLineFeed() {
+ P::silentLineFeed(this);
+ adjustDoubleHyphenAndAppendToStrBufAndErr('\n', false);
+ }
+
template <class P>
- void appendStrBufLineFeed();
+ inline void appendStrBufLineFeed() {
+ P::silentLineFeed(this);
+ appendStrBuf('\n');
+ }
+
template <class P>
- void appendStrBufCarriageReturn();
+ inline void appendStrBufCarriageReturn() {
+ P::silentCarriageReturn(this);
+ appendStrBuf('\n');
+ }
+
template <class P>
- void emitCarriageReturn(char16_t* buf, int32_t pos);
+ inline void emitCarriageReturn(char16_t* buf, int32_t pos) {
+ P::silentCarriageReturn(this);
+ flushChars(buf, pos);
+ tokenHandler->characters(nsHtml5Tokenizer::LF, 0, 1);
+ cstart = INT32_MAX;
+ }
+
void emitReplacementCharacter(char16_t* buf, int32_t pos);
void maybeEmitReplacementCharacter(char16_t* buf, int32_t pos);
void emitPlaintextReplacementCharacter(char16_t* buf, int32_t pos);
- void setAdditionalAndRememberAmpersandLocation(char16_t add);
+ inline void setAdditionalAndRememberAmpersandLocation(char16_t add) {
+ additional = add;
+ }
+
void bogusDoctype();
void bogusDoctypeWithoutQuirks();
void handleNcrValue(int32_t returnState);
@@ -434,7 +4586,13 @@ class nsHtml5Tokenizer {
private:
void emitDoctypeToken(int32_t pos);
- void suspendIfRequestedAfterCurrentNonTextToken();
+ inline void suspendIfRequestedAfterCurrentNonTextToken() {
+ if (suspendAfterCurrentNonTextToken) {
+ suspendAfterCurrentNonTextToken = false;
+ shouldSuspend = true;
+ }
+ }
+
void suspendAfterCurrentTokenIfNotInText();
bool suspensionAfterCurrentNonTextTokenPending();
@@ -442,13 +4600,29 @@ class nsHtml5Tokenizer {
bool internalEncodingDeclaration(nsHtml5String internalCharset);
private:
- void emitOrAppendTwo(const char16_t* val, int32_t returnState);
- void emitOrAppendOne(const char16_t* val, int32_t returnState);
+ inline void emitOrAppendTwo(const char16_t* val, int32_t returnState) {
+ if ((returnState & DATA_AND_RCDATA_MASK)) {
+ appendStrBuf(val[0]);
+ appendStrBuf(val[1]);
+ } else {
+ tokenHandler->characters(val, 0, 2);
+ }
+ }
+
+ inline void emitOrAppendOne(const char16_t* val, int32_t returnState) {
+ if ((returnState & DATA_AND_RCDATA_MASK)) {
+ appendStrBuf(val[0]);
+ } else {
+ tokenHandler->characters(val, 0, 1);
+ }
+ }
public:
void end();
- void requestSuspension();
- bool isInDataState();
+ inline void requestSuspension() { shouldSuspend = true; }
+
+ inline bool isInDataState() { return (stateSave == DATA); }
+
void resetToDataState();
void loadState(nsHtml5Tokenizer* other);
void initializeWithoutStarting();
diff --git a/parser/html/nsHtml5TokenizerALU.cpp b/parser/html/nsHtml5TokenizerALU.cpp
@@ -0,0 +1,33 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsHtml5Tokenizer.h"
+#include "nsHtml5TokenizerLoopPoliciesALU.h"
+
+int32_t nsHtml5Tokenizer::StateLoopFastestALU(int32_t state, char16_t c,
+ int32_t pos, char16_t* buf,
+ bool reconsume,
+ int32_t returnState,
+ int32_t endPos) {
+ return stateLoop<nsHtml5FastestPolicyALU>(state, c, pos, buf, reconsume,
+ returnState, endPos);
+}
+
+int32_t nsHtml5Tokenizer::StateLoopLineColALU(int32_t state, char16_t c,
+ int32_t pos, char16_t* buf,
+ bool reconsume,
+ int32_t returnState,
+ int32_t endPos) {
+ return stateLoop<nsHtml5LineColPolicyALU>(state, c, pos, buf, reconsume,
+ returnState, endPos);
+}
+
+int32_t nsHtml5Tokenizer::StateLoopViewSourceALU(int32_t state, char16_t c,
+ int32_t pos, char16_t* buf,
+ bool reconsume,
+ int32_t returnState,
+ int32_t endPos) {
+ return stateLoop<nsHtml5ViewSourcePolicyALU>(state, c, pos, buf, reconsume,
+ returnState, endPos);
+}
diff --git a/parser/html/nsHtml5TokenizerALUStubs.cpp b/parser/html/nsHtml5TokenizerALUStubs.cpp
@@ -0,0 +1,32 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsHtml5Tokenizer.h"
+
+int32_t nsHtml5Tokenizer::StateLoopFastestALU(int32_t state, char16_t c,
+ int32_t pos, char16_t* buf,
+ bool reconsume,
+ int32_t returnState,
+ int32_t endPos) {
+ MOZ_RELEASE_ASSERT(false, "Inconsistent build config");
+ return 0;
+}
+
+int32_t nsHtml5Tokenizer::StateLoopLineColALU(int32_t state, char16_t c,
+ int32_t pos, char16_t* buf,
+ bool reconsume,
+ int32_t returnState,
+ int32_t endPos) {
+ MOZ_RELEASE_ASSERT(false, "Inconsistent build config");
+ return 0;
+}
+
+int32_t nsHtml5Tokenizer::StateLoopViewSourceALU(int32_t state, char16_t c,
+ int32_t pos, char16_t* buf,
+ bool reconsume,
+ int32_t returnState,
+ int32_t endPos) {
+ MOZ_RELEASE_ASSERT(false, "Inconsistent build config");
+ return 0;
+}
diff --git a/parser/html/nsHtml5TokenizerHSupplement.h b/parser/html/nsHtml5TokenizerHSupplement.h
@@ -2,14 +2,48 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-friend struct nsHtml5ViewSourcePolicy;
-friend struct nsHtml5LineColPolicy;
-friend struct nsHtml5FastestPolicy;
+friend struct nsHtml5ViewSourcePolicySIMD;
+friend struct nsHtml5ViewSourcePolicyALU;
+friend struct nsHtml5LineColPolicySIMD;
+friend struct nsHtml5LineColPolicyALU;
+friend struct nsHtml5FastestPolicySIMD;
+friend struct nsHtml5FastestPolicyALU;
private:
int32_t col;
bool nextCharOnNewLine;
+// These functions are wrappers for template parametrized stateLoop and
+// stateLoopCompilerWorkaround so that the instantiations can go into
+// separate compilation units both to allow different compiler flags
+// and to make LLVM perform LICM on SIMD constants in functions whose size
+// isn't too large for LLVM to perform LICM before LLVM looks for inlining
+// opportunities.
+
+int32_t StateLoopFastestSIMD(int32_t state, char16_t c, int32_t pos,
+ char16_t* buf, bool reconsume, int32_t returnState,
+ int32_t endPos);
+
+int32_t StateLoopFastestALU(int32_t state, char16_t c, int32_t pos,
+ char16_t* buf, bool reconsume, int32_t returnState,
+ int32_t endPos);
+
+int32_t StateLoopLineColSIMD(int32_t state, char16_t c, int32_t pos,
+ char16_t* buf, bool reconsume, int32_t returnState,
+ int32_t endPos);
+
+int32_t StateLoopLineColALU(int32_t state, char16_t c, int32_t pos,
+ char16_t* buf, bool reconsume, int32_t returnState,
+ int32_t endPos);
+
+int32_t StateLoopViewSourceSIMD(int32_t state, char16_t c, int32_t pos,
+ char16_t* buf, bool reconsume,
+ int32_t returnState, int32_t endPos);
+
+int32_t StateLoopViewSourceALU(int32_t state, char16_t c, int32_t pos,
+ char16_t* buf, bool reconsume,
+ int32_t returnState, int32_t endPos);
+
public:
inline int32_t getColumnNumber() { return col; }
diff --git a/parser/html/nsHtml5TokenizerLoopPolicies.h b/parser/html/nsHtml5TokenizerLoopPolicies.h
@@ -1,123 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#ifndef nsHtml5TokenizerLoopPolicies_h
-#define nsHtml5TokenizerLoopPolicies_h
-
-/**
- * This policy does not report tokenizer transitions anywhere and does not
- * track line and column numbers. To be used for innerHTML.
- */
-struct nsHtml5FastestPolicy {
- static const bool reportErrors = false;
- static int32_t transition(nsHtml5Highlighter* aHighlighter, int32_t aState,
- bool aReconsume, int32_t aPos) {
- return aState;
- }
- static void completedNamedCharacterReference(
- nsHtml5Highlighter* aHighlighter) {}
-
- static char16_t checkChar(nsHtml5Tokenizer* aTokenizer, char16_t* buf,
- int32_t pos) {
- return buf[pos];
- }
-
- static void silentCarriageReturn(nsHtml5Tokenizer* aTokenizer) {
- aTokenizer->lastCR = true;
- }
-
- static void silentLineFeed(nsHtml5Tokenizer* aTokenizer) {}
-};
-
-/**
- * This policy does not report tokenizer transitions anywhere. To be used
- * when _not_ viewing source and when not parsing innerHTML (or other
- * script execution-preventing fragment).
- */
-struct nsHtml5LineColPolicy {
- static const bool reportErrors = false;
- static int32_t transition(nsHtml5Highlighter* aHighlighter, int32_t aState,
- bool aReconsume, int32_t aPos) {
- return aState;
- }
- static void completedNamedCharacterReference(
- nsHtml5Highlighter* aHighlighter) {}
-
- static char16_t checkChar(nsHtml5Tokenizer* aTokenizer, char16_t* buf,
- int32_t pos) {
- // The name of this method comes from the validator.
- // We aren't checking a char here. We read the next
- // UTF-16 code unit and, before returning it, adjust
- // the line and column numbers.
- char16_t c = buf[pos];
- if (MOZ_UNLIKELY(aTokenizer->nextCharOnNewLine)) {
- // By changing the line and column here instead
- // of doing so eagerly when seeing the line break
- // causes the line break itself to be considered
- // column-wise at the end of a line.
- aTokenizer->line++;
- aTokenizer->col = 1;
- aTokenizer->nextCharOnNewLine = false;
- } else if (MOZ_LIKELY(!NS_IS_LOW_SURROGATE(c))) {
- // SpiderMonkey wants to count scalar values
- // instead of UTF-16 code units. We omit low
- // surrogates from the count so that only the
- // high surrogate increments the count for
- // two-code-unit scalar values.
- //
- // It's somewhat questionable from the performance
- // perspective to make the human-perceivable column
- // count correct for non-BMP characters in the case
- // where there is a single scalar value per extended
- // grapheme cluster when even on the BMP there are
- // various cases where the scalar count doesn't make
- // much sense as a human-perceived "column count" due
- // to extended grapheme clusters consisting of more
- // than one scalar value.
- aTokenizer->col++;
- }
- return c;
- }
-
- static void silentCarriageReturn(nsHtml5Tokenizer* aTokenizer) {
- aTokenizer->nextCharOnNewLine = true;
- aTokenizer->lastCR = true;
- }
-
- static void silentLineFeed(nsHtml5Tokenizer* aTokenizer) {
- aTokenizer->nextCharOnNewLine = true;
- }
-};
-
-/**
- * This policy reports the tokenizer transitions to a highlighter. To be used
- * when viewing source.
- */
-struct nsHtml5ViewSourcePolicy {
- static const bool reportErrors = true;
- static int32_t transition(nsHtml5Highlighter* aHighlighter, int32_t aState,
- bool aReconsume, int32_t aPos) {
- return aHighlighter->Transition(aState, aReconsume, aPos);
- }
- static void completedNamedCharacterReference(
- nsHtml5Highlighter* aHighlighter) {
- aHighlighter->CompletedNamedCharacterReference();
- }
-
- static char16_t checkChar(nsHtml5Tokenizer* aTokenizer, char16_t* buf,
- int32_t pos) {
- return buf[pos];
- }
-
- static void silentCarriageReturn(nsHtml5Tokenizer* aTokenizer) {
- aTokenizer->line++;
- aTokenizer->lastCR = true;
- }
-
- static void silentLineFeed(nsHtml5Tokenizer* aTokenizer) {
- aTokenizer->line++;
- }
-};
-
-#endif // nsHtml5TokenizerLoopPolicies_h
diff --git a/parser/html/nsHtml5TokenizerLoopPoliciesALU.h b/parser/html/nsHtml5TokenizerLoopPoliciesALU.h
@@ -0,0 +1,150 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsHtml5TokenizerLoopPoliciesALU_h
+#define nsHtml5TokenizerLoopPoliciesALU_h
+
+/**
+ * This policy does not report tokenizer transitions anywhere and does not
+ * track line and column numbers. To be used for innerHTML. Non-SIMD version.
+ */
+struct nsHtml5FastestPolicyALU {
+ static const bool reportErrors = false;
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition(
+ nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume,
+ int32_t aPos) {
+ return aState;
+ }
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference(
+ nsHtml5Highlighter* aHighlighter) {}
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData(
+ nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos,
+ int32_t endPos) {
+ return 0;
+ }
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar(
+ nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) {
+ return buf[pos];
+ }
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn(
+ nsHtml5Tokenizer* aTokenizer) {
+ aTokenizer->lastCR = true;
+ }
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed(
+ nsHtml5Tokenizer* aTokenizer) {}
+};
+
+/**
+ * This policy does not report tokenizer transitions anywhere. To be used
+ * when _not_ viewing source and when not parsing innerHTML (or other
+ * script execution-preventing fragment).
+ */
+struct nsHtml5LineColPolicyALU {
+ static const bool reportErrors = false;
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition(
+ nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume,
+ int32_t aPos) {
+ return aState;
+ }
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference(
+ nsHtml5Highlighter* aHighlighter) {}
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData(
+ nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos,
+ int32_t endPos) {
+ return 0;
+ }
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar(
+ nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) {
+ // The name of this method comes from the validator.
+ // We aren't checking a char here. We read the next
+ // UTF-16 code unit and, before returning it, adjust
+ // the line and column numbers.
+ char16_t c = buf[pos];
+ if (MOZ_UNLIKELY(aTokenizer->nextCharOnNewLine)) {
+ // By changing the line and column here instead
+ // of doing so eagerly when seeing the line break
+ // causes the line break itself to be considered
+ // column-wise at the end of a line.
+ aTokenizer->line++;
+ aTokenizer->col = 1;
+ aTokenizer->nextCharOnNewLine = false;
+ } else if (MOZ_LIKELY(!NS_IS_LOW_SURROGATE(c))) {
+ // SpiderMonkey wants to count scalar values
+ // instead of UTF-16 code units. We omit low
+ // surrogates from the count so that only the
+ // high surrogate increments the count for
+ // two-code-unit scalar values.
+ //
+ // It's somewhat questionable from the performance
+ // perspective to make the human-perceivable column
+ // count correct for non-BMP characters in the case
+ // where there is a single scalar value per extended
+ // grapheme cluster when even on the BMP there are
+ // various cases where the scalar count doesn't make
+ // much sense as a human-perceived "column count" due
+ // to extended grapheme clusters consisting of more
+ // than one scalar value.
+ aTokenizer->col++;
+ }
+ return c;
+ }
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn(
+ nsHtml5Tokenizer* aTokenizer) {
+ aTokenizer->nextCharOnNewLine = true;
+ aTokenizer->lastCR = true;
+ }
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed(
+ nsHtml5Tokenizer* aTokenizer) {
+ aTokenizer->nextCharOnNewLine = true;
+ }
+};
+
+/**
+ * This policy reports the tokenizer transitions to a highlighter. To be used
+ * when viewing source.
+ */
+struct nsHtml5ViewSourcePolicyALU {
+ static const bool reportErrors = true;
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition(
+ nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume,
+ int32_t aPos) {
+ return aHighlighter->Transition(aState, aReconsume, aPos);
+ }
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference(
+ nsHtml5Highlighter* aHighlighter) {
+ aHighlighter->CompletedNamedCharacterReference();
+ }
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData(
+ nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos,
+ int32_t endPos) {
+ return 0;
+ }
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar(
+ nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) {
+ return buf[pos];
+ }
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn(
+ nsHtml5Tokenizer* aTokenizer) {
+ aTokenizer->line++;
+ aTokenizer->lastCR = true;
+ }
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed(
+ nsHtml5Tokenizer* aTokenizer) {
+ aTokenizer->line++;
+ }
+};
+
+#endif // nsHtml5TokenizerLoopPoliciesALU_h
diff --git a/parser/html/nsHtml5TokenizerLoopPoliciesSIMD.h b/parser/html/nsHtml5TokenizerLoopPoliciesSIMD.h
@@ -0,0 +1,211 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef nsHtml5TokenizerLoopPoliciesSIMD_h
+#define nsHtml5TokenizerLoopPoliciesSIMD_h
+
+#include "mozilla/Attributes.h"
+#include "mozilla/htmlaccel/htmlaccelNotInline.h"
+
+/**
+ * This policy does not report tokenizer transitions anywhere and does not
+ * track line and column numbers. To be used for innerHTML.
+ *
+ * This the SIMD version for aarch64 and SSSE3-enabled x86/x86_64.
+ */
+struct nsHtml5FastestPolicySIMD {
+ static const bool reportErrors = false;
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition(
+ nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume,
+ int32_t aPos) {
+ return aState;
+ }
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference(
+ nsHtml5Highlighter* aHighlighter) {}
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData(
+ nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos,
+ int32_t endPos) {
+ // We need to check bounds for the `buf[pos]` access below to be OK.
+ // Instead of just checking that `pos` isn't equal to `endPos`, let's
+ // check that have at least one SIMD stride of data in the same branch,
+ // since if we don't have at least one SIMD stride of data, we don't
+ // need to proceed.
+ if (endPos - pos < 16) {
+ return 0;
+ }
+ if (buf[pos] == '<') {
+ // Quickly handle the case where there is one tag immediately
+ // after another and the very first thing in the data state is a
+ // less-than sign.
+ return 0;
+ }
+ return mozilla::htmlaccel::AccelerateDataFastest(buf + pos, buf + endPos);
+ }
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar(
+ nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) {
+ return buf[pos];
+ }
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn(
+ nsHtml5Tokenizer* aTokenizer) {
+ aTokenizer->lastCR = true;
+ }
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed(
+ nsHtml5Tokenizer* aTokenizer) {}
+};
+
+/**
+ * This policy does not report tokenizer transitions anywhere. To be used
+ * when _not_ viewing source and when not parsing innerHTML (or other
+ * script execution-preventing fragment).
+ */
+struct nsHtml5LineColPolicySIMD {
+ static const bool reportErrors = false;
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition(
+ nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume,
+ int32_t aPos) {
+ return aState;
+ }
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference(
+ nsHtml5Highlighter* aHighlighter) {}
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData(
+ nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos,
+ int32_t endPos) {
+ // We need to check bounds for the `buf[pos]` access below to be OK.
+ // Instead of just checking that `pos` isn't equal to `endPos`, let's
+ // check that have at least one SIMD stride of data in the same branch,
+ // since if we don't have at least one SIMD stride of data, we don't
+ // need to proceed.
+ if (endPos - pos < 16) {
+ return 0;
+ }
+ char16_t c = buf[pos];
+ if (c == '<' || c == '\n') {
+ // Quickly handle the case where there is one tag immediately
+ // after another and the very first thing in the data state is a
+ // less-than sign and the case where a tag is immediately followed
+ // by a line feed.
+ return 0;
+ }
+ if (MOZ_UNLIKELY(aTokenizer->nextCharOnNewLine)) {
+ // By changing the line and column here instead
+ // of doing so eagerly when seeing the line break
+ // causes the line break itself to be considered
+ // column-wise at the end of a line.
+ aTokenizer->line++;
+ aTokenizer->col = 1;
+ aTokenizer->nextCharOnNewLine = false;
+ }
+ return mozilla::htmlaccel::AccelerateDataLineCol(buf + pos, buf + endPos);
+ }
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar(
+ nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) {
+ // The name of this method comes from the validator.
+ // We aren't checking a char here. We read the next
+ // UTF-16 code unit and, before returning it, adjust
+ // the line and column numbers.
+ char16_t c = buf[pos];
+ if (MOZ_UNLIKELY(aTokenizer->nextCharOnNewLine)) {
+ // By changing the line and column here instead
+ // of doing so eagerly when seeing the line break
+ // causes the line break itself to be considered
+ // column-wise at the end of a line.
+ aTokenizer->line++;
+ aTokenizer->col = 1;
+ aTokenizer->nextCharOnNewLine = false;
+ } else if (MOZ_LIKELY(!NS_IS_LOW_SURROGATE(c))) {
+ // SpiderMonkey wants to count scalar values
+ // instead of UTF-16 code units. We omit low
+ // surrogates from the count so that only the
+ // high surrogate increments the count for
+ // two-code-unit scalar values.
+ //
+ // It's somewhat questionable from the performance
+ // perspective to make the human-perceivable column
+ // count correct for non-BMP characters in the case
+ // where there is a single scalar value per extended
+ // grapheme cluster when even on the BMP there are
+ // various cases where the scalar count doesn't make
+ // much sense as a human-perceived "column count" due
+ // to extended grapheme clusters consisting of more
+ // than one scalar value.
+ aTokenizer->col++;
+ }
+ return c;
+ }
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn(
+ nsHtml5Tokenizer* aTokenizer) {
+ aTokenizer->nextCharOnNewLine = true;
+ aTokenizer->lastCR = true;
+ }
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed(
+ nsHtml5Tokenizer* aTokenizer) {
+ aTokenizer->nextCharOnNewLine = true;
+ }
+};
+
+/**
+ * This policy reports the tokenizer transitions to a highlighter. To be used
+ * when viewing source.
+ */
+struct nsHtml5ViewSourcePolicySIMD {
+ static const bool reportErrors = true;
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t transition(
+ nsHtml5Highlighter* aHighlighter, int32_t aState, bool aReconsume,
+ int32_t aPos) {
+ return aHighlighter->Transition(aState, aReconsume, aPos);
+ }
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static void completedNamedCharacterReference(
+ nsHtml5Highlighter* aHighlighter) {
+ aHighlighter->CompletedNamedCharacterReference();
+ }
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static int32_t accelerateAdvancementData(
+ nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos,
+ int32_t endPos) {
+ // We need to check bounds for the `buf[pos]` access below to be OK.
+ // Instead of just checking that `pos` isn't equal to `endPos`, let's
+ // check that have at least one SIMD stride of data in the same branch,
+ // since if we don't have at least one SIMD stride of data, we don't
+ // need to proceed.
+ if (endPos - pos < 16) {
+ return 0;
+ }
+ char16_t c = buf[pos];
+ if (c == '<' || c == '\n') {
+ // Quickly handle the case where there is one tag immediately
+ // after another and the very first thing in the data state is a
+ // less-than sign and the case where a tag is immediately followed
+ // by a line feed.
+ return 0;
+ }
+ return mozilla::htmlaccel::AccelerateDataViewSource(buf + pos,
+ buf + endPos);
+ }
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static char16_t checkChar(
+ nsHtml5Tokenizer* aTokenizer, char16_t* buf, int32_t pos) {
+ return buf[pos];
+ }
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentCarriageReturn(
+ nsHtml5Tokenizer* aTokenizer) {
+ aTokenizer->line++;
+ aTokenizer->lastCR = true;
+ }
+
+ MOZ_ALWAYS_INLINE_EVEN_DEBUG static void silentLineFeed(
+ nsHtml5Tokenizer* aTokenizer) {
+ aTokenizer->line++;
+ }
+};
+
+#endif // nsHtml5TokenizerLoopPoliciesSIMD_h
diff --git a/parser/html/nsHtml5TokenizerSIMD.cpp b/parser/html/nsHtml5TokenizerSIMD.cpp
@@ -0,0 +1,33 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsHtml5Tokenizer.h"
+#include "nsHtml5TokenizerLoopPoliciesSIMD.h"
+
+int32_t nsHtml5Tokenizer::StateLoopFastestSIMD(int32_t state, char16_t c,
+ int32_t pos, char16_t* buf,
+ bool reconsume,
+ int32_t returnState,
+ int32_t endPos) {
+ return stateLoop<nsHtml5FastestPolicySIMD>(state, c, pos, buf, reconsume,
+ returnState, endPos);
+}
+
+int32_t nsHtml5Tokenizer::StateLoopLineColSIMD(int32_t state, char16_t c,
+ int32_t pos, char16_t* buf,
+ bool reconsume,
+ int32_t returnState,
+ int32_t endPos) {
+ return stateLoop<nsHtml5LineColPolicySIMD>(state, c, pos, buf, reconsume,
+ returnState, endPos);
+}
+
+int32_t nsHtml5Tokenizer::StateLoopViewSourceSIMD(int32_t state, char16_t c,
+ int32_t pos, char16_t* buf,
+ bool reconsume,
+ int32_t returnState,
+ int32_t endPos) {
+ return stateLoop<nsHtml5ViewSourcePolicySIMD>(state, c, pos, buf, reconsume,
+ returnState, endPos);
+}
diff --git a/parser/html/nsHtml5TokenizerSIMDStubs.cpp b/parser/html/nsHtml5TokenizerSIMDStubs.cpp
@@ -0,0 +1,32 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsHtml5Tokenizer.h"
+
+int32_t nsHtml5Tokenizer::StateLoopFastestSIMD(int32_t state, char16_t c,
+ int32_t pos, char16_t* buf,
+ bool reconsume,
+ int32_t returnState,
+ int32_t endPos) {
+ MOZ_RELEASE_ASSERT(false, "Inconsistent build config");
+ return 0;
+}
+
+int32_t nsHtml5Tokenizer::StateLoopLineColSIMD(int32_t state, char16_t c,
+ int32_t pos, char16_t* buf,
+ bool reconsume,
+ int32_t returnState,
+ int32_t endPos) {
+ MOZ_RELEASE_ASSERT(false, "Inconsistent build config");
+ return 0;
+}
+
+int32_t nsHtml5Tokenizer::StateLoopViewSourceSIMD(int32_t state, char16_t c,
+ int32_t pos, char16_t* buf,
+ bool reconsume,
+ int32_t returnState,
+ int32_t endPos) {
+ MOZ_RELEASE_ASSERT(false, "Inconsistent build config");
+ return 0;
+}
diff --git a/parser/htmlaccel/gtest/TestHtmlSimd.cpp b/parser/htmlaccel/gtest/TestHtmlSimd.cpp
@@ -0,0 +1,62 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "gtest/gtest.h"
+#include "mozilla/htmlaccel/htmlaccelNotInline.h"
+
+// Match in the first half
+const char16_t HTML_SIMD_TEST_INPUT_LOW[16] = {
+ 'a',
+ 0xD834, // Surrogate pair
+ 0xDD65, '\n', '<', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
+};
+
+// Match in the second half
+const char16_t HTML_SIMD_TEST_INPUT_HIGH[16] = {
+ 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'a',
+ 0xD834, // Surrogate pair
+ 0xDD65, '\n', '<', 'f', 'g', 'h',
+};
+
+TEST(HtmlSimd, TestTextNodeAllowSurrogatesAndLf)
+{
+ int32_t index = mozilla::htmlaccel::AccelerateDataFastest(
+ HTML_SIMD_TEST_INPUT_LOW, HTML_SIMD_TEST_INPUT_LOW + 16);
+ ASSERT_EQ(index, 4);
+}
+
+TEST(HtmlSimd, TestTextNodeAllowSurrogatesDisallowLf)
+{
+ int32_t index = mozilla::htmlaccel::AccelerateDataViewSource(
+ HTML_SIMD_TEST_INPUT_LOW, HTML_SIMD_TEST_INPUT_LOW + 16);
+ ASSERT_EQ(index, 3);
+}
+
+TEST(HtmlSimd, TestTextNodeDisallowSurrogatesAndLf)
+{
+ int32_t index = mozilla::htmlaccel::AccelerateDataLineCol(
+ HTML_SIMD_TEST_INPUT_LOW, HTML_SIMD_TEST_INPUT_LOW + 16);
+ ASSERT_EQ(index, 1);
+}
+
+TEST(HtmlSimd, TestTextNodeAllowSurrogatesAndLfHigh)
+{
+ int32_t index = mozilla::htmlaccel::AccelerateDataFastest(
+ HTML_SIMD_TEST_INPUT_HIGH, HTML_SIMD_TEST_INPUT_HIGH + 16);
+ ASSERT_EQ(index, 4 + 8);
+}
+
+TEST(HtmlSimd, TestTextNodeAllowSurrogatesDisallowLfHigh)
+{
+ int32_t index = mozilla::htmlaccel::AccelerateDataViewSource(
+ HTML_SIMD_TEST_INPUT_HIGH, HTML_SIMD_TEST_INPUT_HIGH + 16);
+ ASSERT_EQ(index, 3 + 8);
+}
+
+TEST(HtmlSimd, TestTextNodeDisallowSurrogatesAndLfHigh)
+{
+ int32_t index = mozilla::htmlaccel::AccelerateDataLineCol(
+ HTML_SIMD_TEST_INPUT_HIGH, HTML_SIMD_TEST_INPUT_HIGH + 16);
+ ASSERT_EQ(index, 1 + 8);
+}
diff --git a/parser/htmlaccel/gtest/moz.build b/parser/htmlaccel/gtest/moz.build
@@ -0,0 +1,15 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+if CONFIG["TARGET_CPU"] == "x86_64" or (
+ CONFIG["TARGET_CPU"] == "aarch64" and CONFIG["TARGET_ENDIANNESS"] == "little"
+):
+ SOURCES += {
+ "TestHtmlSimd.cpp",
+ }
+ SOURCES["TestHtmlSimd.cpp"].flags += CONFIG["HTML_ACCEL_FLAGS"]
+
+FINAL_LIBRARY = "xul-gtest"
diff --git a/parser/htmlaccel/htmlaccel.h b/parser/htmlaccel/htmlaccel.h
@@ -0,0 +1,322 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_htmlaccel_htmlaccel_h
+#define mozilla_htmlaccel_htmlaccel_h
+
+#include <string.h>
+#include <stdint.h>
+
+// Avoid adding more Gecko-specific headers to keep it easy enough to
+// copy and paste the contents of this file to Compiler Explorer.
+#include "mozilla/Attributes.h"
+
+// This file provides SIMD code for skipping over characters that
+// the caller doesn't need to act upon. For example, this code can
+// skip over characters that the HTML tokenizer doesn't need to handle
+// specially in a given state or this code could be used to skip over
+// characters that don't need to be escaped in an HTML serializer.
+
+// ISA SUPPORT: Do not include this file unless the compilation unit is
+// being compiled either for little-endian aarch64 or for x86/x86_64 with
+// at least SSSE3 enabled.
+//
+// It's probably feasible to extend this to support little-endian POWER
+// by defining
+// MOZ_ALWAYS_INLINE_EVEN_DEBUG uint8x16_t TableLookup(uint8x16_t table,
+// uint8x16_t nibbles) {
+// return vec_perm(table, table, nibbles);
+// }
+// but since I don't have a little-endian POWER system to test with,
+// this is left as an exercise to the reader. (The x86/x86_64 reduction
+// code should be portable to POWER10 using vec_extractm and the aarch64
+// reduction code should be portable to older POWER using vec_max.)
+//
+// ARMv7 is deliberately not supported due to vqtbl1q_u8 being a newer
+// addition to NEON.
+#if !defined(__LITTLE_ENDIAN__)
+# error "A little-endian target is required."
+#endif
+#if !(defined(__aarch64__) || defined(__SSSE3__))
+# error "Must be targeting aarch64 or SSSE3."
+#endif
+
+// NOTE: This file uses GCC/clang built-ins that provide SIMD portability.
+// Compared to pretending unawareness of what arm_neon.h and tmmintrin.h
+// map to in GCC and clang, this has the benefit that the code is not stuck
+// at an SSSE3 local maximum but adapts maximally to upgrades to SSE 4.2,
+// AVX2, and BMI. (Yes, enabling BMI seems to affect more than just
+// __builtin_ctz!)
+// (We need to check for __clang__, because clang-cl does not define __GNUC__.)
+#if !(defined(__GNUC__) || defined(__clang__))
+# error "A compiler that supports GCC-style portable SIMD is required."
+#endif
+
+// # General
+//
+// There is an entry point per combination of what characters terminate
+// the acceleration loop (i.e. characters that the HTML tokenizer would not
+// simply skip over). The shared implementation code is inlined into these
+// FFI entry point functions, so the parametrization made inside the FFI
+// functions constant-propagates through the implementation internals.
+//
+// The code examines 16 UTF-16 code units at a time as two 128-bit SIMD
+// vectors. First, the bytes are regrouped to so that one SIMD vector
+// contains the high halves of the UTF-16 code units (zeros for ASCII/Basic
+// Latin) and another one contains the low halves.
+//
+// In the case of the low half, we mask the vector to take the low 4 bits of
+// each 8-bit value and do a lookup from a lookup table contained in a SIMD
+// vector. The 4 bits index into 16 lanes of the other SIMD vector such that
+// we get a vector where the positions corresponding to positions of the
+// original code units contain the 8-bit value looked up from by the 4-bit
+// index.
+//
+// The lookup operation is available unconditionally on aarch64. On
+// x86/x86_64, it is part of the SSSE3 instruction set extension, which is
+// why on x86/x86_64 we must not call into this code unless SSSE3 is
+// available. (Each additional level of compiling this code with SSE4.2,
+// AVX2, or AVX2 + BMI makes this code shorter, which presumably means more
+// efficient, so instead of compiling this just with SSSE3, we compile this
+// with AVX2+BMI on x86_64, considering that CPUs with such capabilities
+// have been available for 12 years at the time of landing this code.)
+//
+// The lookup table contains the loop-terminating ASCII characters in the
+// positions given by their low 4 bits. For example, the less-than sign is
+// U+003C, so the value 0x3C is at index 0xC (decimal 12). Positions that
+// don’t correspond to a character of interest have the value 1, except lane
+// 1 has the placeholder value 2. This way, characters that we don’t want to
+// match anything in the lookup table get a non-matching placeholder: U+0001
+// gets compared with 2 (semantically U+0002) and everything else not of
+// interest gets compared with 1 (semantically U+0001) to produce a
+// non-matching lane.
+//
+// This means that instead of comparing the vector of the low halves of the
+// UTF-16 code units against multiple constant vectors each filled in all
+// lanes with a given ASCII character of interest, the table lookup gives us
+// one vector to compare against where each lane can have a different ASCII
+// character of interest to compare with.
+//
+// This requires the ASCII characters of interest to have mutually distinct
+// low 4 bits. This is true for U+0000, &, <, LF, CR, ", and ', but,
+// unfortunately, CR, ] and - share the low 4 bits, so cases where we need
+// to include a check for ] or - needs to do a separate check, since CR is
+// always in the lookup table. (Checks for ", ', ], and - are not here at
+// this time but will come in follow-up patches.)
+//
+// From these operations, we get a vector of 16 8-bit mask lanes where a
+// lane is 0xFF if the low 8 bits of the UTF-16 code unit matched an ASCII
+// character that terminates the loop and 0x00 otherwise. We lane-wise
+// compare the high halves with zero and AND the resulting mask vector
+// together with the mask vector that resulted from processing the low 8
+// bits to confirm which low 8 bits had 0 as the high 8 bits, i.e. the
+// UTF-16 code unit really was Basic Latin.
+//
+// If we have a configuration that requires terminating the loop on
+// surrogates, we check the vector containing the high halves of the UTF-16
+// code units for surrogates (by masking certain high bits to compare them
+// with a constant) and OR the resulting mask vector together with the
+// vector computed above.
+//
+// Now we have a vector of 16 8-bit mask lanes that corresponds to the input
+// of 16 UTF-16 code units to indicate which code units in the run of 16
+// UTF-16 code units require terminating the loop (i.e. must not be skipped
+// over). At this point, the handling diverges for x86/x86_64 and aarch64.
+//
+// ## x86/x86_64
+//
+// We convert the SIMD mask into bits in an ALU register. The operation
+// returns a 32-bit type, but only the low 16 bits can be non-zero. If the
+// integer is non-zero, the loop terminates, since some lane in the mask was
+// non-zero. In this case, we return the number of trailing zeros in the
+// integer. (We already know must have a non-zero bit somewhere in the low
+// 16 bits, so we can’t end up counting to the high half of the 32-bit type.)
+// Due to the little-endian semantics, the first UTF-16 code unit in the
+// input corresponds to the least-significant bit in the integer, so when the
+// first UTF-16 code unit in the input is unskippable, the least-significant
+// bit in the integer is 1, so there are 0 trailing zeros, i.e. 0 skippable
+// UTF-16 code units.
+//
+// ## aarch64
+//
+// We want to know if any lane is the mask is non-zero to decide whether to
+// terminate the loop. If there is a non-zero lane, we want to know the
+// position of the first (in the content order of the input UTF-16 text)
+// non-zero lane. To accomplish these goals, we bitwise AND the mask vector
+// with a vector of 16 constants. Since ANDing with a mask lane set to zero
+// results in zero, we need all 16 constants to be non-zero. Yet, we need to
+// be able to accommodate the possibility of first lane in content order
+// being set, which means we need to compute 0 as the result. To be able to
+// compute 0 but have the constants be non-zero, the constants are numbers
+// that need be subtracted from 16. That is, the constant vector has lanes
+// set to numbers from 16 to 1 (inclusive). We do the reduction of the
+// resulting SIMD vector to an ALU integer by taking the value of the lane
+// with the largest value.
+//
+// If no mask lane was set, the max operation results in 0, so if the
+// integer is zero, the loop continues. Otherwise, we get the number of
+// skippable UTF-16 code units by subtracting the integer from 16. That is,
+// if the first UTF-16 unit is unstoppable, we get 16 as the max lane value
+// and 16-16=0.
+//
+// # Alignment
+//
+// These functions use unaligned SIMD loads, because alignment
+// doesn't matter on aarch64 CPUs or on x86_64 CPUs from the most
+// recent decade or so. It's not worthwhile to add complexity for
+// old CPUs.
+//
+// # Inlining
+//
+// The public functions here are expected to be called from a loop. To give
+// LICM the opportunity to hoist the SIMD constants out of the loop, make
+// sure that every function on the path from the loop to here is declared
+// MOZ_ALWAYS_INLINE_EVEN_DEBUG and that all these and the loop itself are
+// compiled with the same instruction set extension flags (if applicable).
+//
+// # Acknowledments
+//
+// https://lemire.me/blog/2024/06/08/scan-html-faster-with-simd-instructions-chrome-edition/
+
+#if defined(__aarch64__)
+
+# include <arm_neon.h>
+
+#else // x86/x86_64
+
+# include <tmmintrin.h>
+// Using syntax that clang-tidy doesn't like to match GCC guidance.
+typedef uint8_t uint8x16_t __attribute__((vector_size(16)));
+
+#endif
+
+namespace mozilla::htmlaccel {
+
+namespace detail {
+
+#if defined(__aarch64__)
+// The idea is that when this is ANDed with the mask, we get 0 in the
+// non-match positions and the leftmost match ends up with higest number.
+// This way, taking the max value of the result is zero if all positions
+// are non-match, and otherwise we get a value that when subtracted from
+// 16 indicates the index of the leftmost match.
+const uint8x16_t INVERTED_ADVANCES = {16, 15, 14, 13, 12, 11, 10, 9,
+ 8, 7, 6, 5, 4, 3, 2, 1};
+
+MOZ_ALWAYS_INLINE_EVEN_DEBUG uint8x16_t TableLookup(uint8x16_t aTable,
+ uint8x16_t aNibbles) {
+ return vqtbl1q_u8(aTable, aNibbles);
+}
+
+#else // x86/x86_64
+
+MOZ_ALWAYS_INLINE_EVEN_DEBUG uint8x16_t TableLookup(uint8x16_t aTable,
+ uint8x16_t aNibbles) {
+ // GCC wants reinterpret_cast
+ return reinterpret_cast<uint8x16_t>(_mm_shuffle_epi8(aTable, aNibbles));
+}
+
+#endif
+
+// These formulations optimize nicely, so no point in trying something fancier
+// to fill all lanes with the same byte.
+const uint8x16_t ALL_ZEROS = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+const uint8x16_t NIBBLE_MASK = {0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF,
+ 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF};
+const uint8x16_t SURROGATE_MASK = {0xF8, 0xF8, 0xF8, 0xF8, 0xF8, 0xF8,
+ 0xF8, 0xF8, 0xF8, 0xF8, 0xF8, 0xF8,
+ 0xF8, 0xF8, 0xF8, 0xF8};
+const uint8x16_t SURROGATE_MATCH = {0xD8, 0xD8, 0xD8, 0xD8, 0xD8, 0xD8,
+ 0xD8, 0xD8, 0xD8, 0xD8, 0xD8, 0xD8,
+ 0xD8, 0xD8, 0xD8, 0xD8};
+
+// The approach here supports disallowing up to 16 different
+// characters that 1) are in the Latin1 range, i.e. U+00FF or
+// below, and 2) do not have the lowest 4 bits in common with
+// each other.
+//
+// The code point value of each disallowed character needs
+// to be placed in the vector at the position indexed by the
+// low 4 bits of the character (low four bits 0 is the leftmost
+// position and low four bits 15 is the rightmost position).
+//
+// U+0001 neither occurs in typical HTML nor is one of the
+// code points we care about, so use 1 as the non-matching
+// value. We do care about U+0000, unfortunately.
+// We use U+0002 at position 1 to make sure it doesn't
+// match, either. That is, we put 1 in the positions we
+// don't care about except we put 2 at position 1.
+
+/// Disallow U+0000, less-than, ampersand, and carriage return.
+const uint8x16_t ZERO_LT_AMP_CR = {0, 2, 1, 1, 1, 1, '&', 1,
+ 1, 1, 1, 1, '<', '\r', 1, 1};
+/// Disallow U+0000, less-than, ampersand, carriage return, and line feed.
+const uint8x16_t ZERO_LT_AMP_CR_LF = {0, 2, 1, 1, 1, 1, '&', 1,
+ 1, 1, '\n', 1, '<', '\r', 1, 1};
+
+/// Compute a 16-lane mask for for 16 UTF-16 code units, where a lane
+/// is 0x00 if OK to skip and 0xFF in not OK to skip.
+MOZ_ALWAYS_INLINE_EVEN_DEBUG uint8x16_t
+StrideToMask(const char16_t* aArr /* len = 16 */, uint8x16_t aTable,
+ bool aAllowSurrogates) {
+ uint8x16_t first;
+ uint8x16_t second;
+ // memcpy generates a single unaligned load instruction with both ISAs.
+ memcpy(&first, aArr, 16);
+ memcpy(&second, aArr + 8, 16);
+ // Each shuffle maps to a single instruction on aarch64.
+ // On x86/x86_64, how efficiently these shuffles maps to instructions
+ // depends on the level of instruction set extensions chosen, which
+ // is the main reason that we compile this file at a higher extension
+ // level than the minimum SSSE3 (and the main reason why this file
+ // uses GNU C portable SIMD instead of sticking to what's in the
+ // Intel-defined headers).
+ uint8x16_t low_halves = __builtin_shufflevector(
+ first, second, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
+ uint8x16_t high_halves = __builtin_shufflevector(
+ first, second, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31);
+ uint8x16_t high_half_matches = high_halves == ALL_ZEROS;
+ uint8x16_t low_half_matches =
+ low_halves == TableLookup(aTable, low_halves & NIBBLE_MASK);
+ uint8x16_t ret = low_half_matches & high_half_matches;
+ if (!aAllowSurrogates) { // Assumed to be constant-propagated
+ ret |= (high_halves & SURROGATE_MASK) == SURROGATE_MATCH;
+ }
+ return ret;
+}
+
+MOZ_ALWAYS_INLINE_EVEN_DEBUG int32_t AccelerateTextNode(const char16_t* aInput,
+ const char16_t* aEnd,
+ uint8x16_t aTable,
+ bool aAllowSurrogates) {
+ const char16_t* current = aInput;
+ while (aEnd - current >= 16) {
+ uint8x16_t mask = StrideToMask(current, aTable, aAllowSurrogates);
+#if defined(__aarch64__)
+ uint8_t max = vmaxvq_u8(mask & INVERTED_ADVANCES);
+ if (max != 0) {
+ return int32_t((current - aInput) + 16 - max);
+ }
+#else // x86/x86_64
+ int int_mask = _mm_movemask_epi8(mask);
+ if (int_mask != 0) {
+ // The least-significant bit in the integer corresponds to
+ // the first SIMD lane in text order. Hence, we need to count
+ // trailing zeros. We already checked that the bits are not
+ // all zeros, so __builtin_ctz isn't UB.
+ return int32_t((current - aInput) + __builtin_ctz(int_mask));
+ }
+#endif
+ current += 16;
+ }
+ return int32_t(current - aInput);
+}
+
+} // namespace detail
+
+// Public entry points are in htmlaccelNotInline.h for now.
+
+} // namespace mozilla::htmlaccel
+
+#endif // mozilla_htmlaccel_htmlaccel_h
diff --git a/parser/htmlaccel/htmlaccelEnabled.h b/parser/htmlaccel/htmlaccelEnabled.h
@@ -0,0 +1,30 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_htmlaccel_htmlaccelEnabled_h
+#define mozilla_htmlaccel_htmlaccelEnabled_h
+
+#if defined(__x86_64__)
+# include "mozilla/SSE.h"
+#endif
+
+namespace mozilla::htmlaccel {
+
+/// This function is appropriate to call when the SIMD path is compiled
+/// with `HTML_ACCEL_FLAGS`.
+///
+/// Keep this in sync with `HTML_ACCEL_FLAGS` in `toolchain.configure`.
+inline bool htmlaccelEnabled() {
+#if defined(__aarch64__) && defined(__LITTLE_ENDIAN__)
+ return true;
+#elif defined(__x86_64__)
+ return mozilla::supports_bmi() && mozilla::supports_avx();
+#else
+ return false;
+#endif
+}
+
+} // namespace mozilla::htmlaccel
+
+#endif // mozilla_htmlaccel_htmlaccelEnabled_h
diff --git a/parser/htmlaccel/htmlaccelNotInline.cpp b/parser/htmlaccel/htmlaccelNotInline.cpp
@@ -0,0 +1,30 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mozilla/htmlaccel/htmlaccel.h"
+#include "mozilla/htmlaccel/htmlaccelNotInline.h"
+
+namespace mozilla::htmlaccel {
+
+/// The innerHTML / DOMParser case for the data state in the HTML parser
+MOZ_NEVER_INLINE int32_t AccelerateDataFastest(const char16_t* aPtr,
+ const char16_t* aEnd) {
+ return detail::AccelerateTextNode(aPtr, aEnd, detail::ZERO_LT_AMP_CR, true);
+}
+
+/// View Source case for the data state in the HTML parser
+MOZ_NEVER_INLINE int32_t AccelerateDataViewSource(const char16_t* aPtr,
+ const char16_t* aEnd) {
+ return detail::AccelerateTextNode(aPtr, aEnd, detail::ZERO_LT_AMP_CR_LF,
+ true);
+}
+
+/// Normal network case for the data state in the HTML parser
+MOZ_NEVER_INLINE int32_t AccelerateDataLineCol(const char16_t* aPtr,
+ const char16_t* aEnd) {
+ return detail::AccelerateTextNode(aPtr, aEnd, detail::ZERO_LT_AMP_CR_LF,
+ false);
+}
+
+} // namespace mozilla::htmlaccel
diff --git a/parser/htmlaccel/htmlaccelNotInline.h b/parser/htmlaccel/htmlaccelNotInline.h
@@ -0,0 +1,34 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_htmlaccel_htmlaccelNotInline_h
+#define mozilla_htmlaccel_htmlaccelNotInline_h
+
+#include "mozilla/Attributes.h"
+
+namespace mozilla::htmlaccel {
+// Logically these should be MOZ_ALWAYS_INLINE_EVEN_DEBUG if LLVM was working
+// as expected. However, these are MOZ_NEVER_INLINE to work around
+// https://github.com/llvm/llvm-project/issues/160886 . This way, we get
+// a little bit of LICM for the SIMD constants that need to be loaded
+// from the constant pool instead of getting materialized by splatting
+// an immediate. Once the LLVM bug is fixed, these should be changed
+// to MOZ_ALWAYS_INLINE_EVEN_DEBUG to allow the constants to move further
+// up to the top of nsHtml5Tokenizer::stateLoop.
+
+/// The innerHTML / DOMParser case for the data state in the HTML parser
+MOZ_NEVER_INLINE int32_t AccelerateDataFastest(const char16_t* aPtr,
+ const char16_t* aEnd);
+
+/// View Source case for the data state in the HTML parser
+MOZ_NEVER_INLINE int32_t AccelerateDataViewSource(const char16_t* aPtr,
+ const char16_t* aEnd);
+
+/// Normal network case for the data state in the HTML parser
+MOZ_NEVER_INLINE int32_t AccelerateDataLineCol(const char16_t* aPtr,
+ const char16_t* aEnd);
+
+} // namespace mozilla::htmlaccel
+
+#endif // mozilla_htmlaccel_htmlaccelNotInline_h
diff --git a/parser/htmlaccel/moz.build b/parser/htmlaccel/moz.build
@@ -0,0 +1,29 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+EXPORTS.mozilla.htmlaccel += [
+ "htmlaccel.h",
+ "htmlaccelEnabled.h",
+ "htmlaccelNotInline.h",
+]
+
+# Make sure the result is consistent with mozilla::htmlaccel::htmlaccelEnabled().
+#
+# Due to https://github.com/llvm/llvm-project/issues/160886, the entry points
+# need to be _not_ inline and, therefore, need a compilation unit. This should
+# go away once the LLVM bug is fixed.
+
+if (CONFIG["TARGET_CPU"] == "x86_64") or (
+ CONFIG["TARGET_CPU"] == "aarch64" and CONFIG["TARGET_ENDIANNESS"] == "little"
+):
+ SOURCES += [
+ "htmlaccelNotInline.cpp",
+ ]
+ SOURCES["htmlaccelNotInline.cpp"].flags += CONFIG["HTML_ACCEL_FLAGS"]
+
+TEST_DIRS += ["gtest"]
+
+FINAL_LIBRARY = "xul"
diff --git a/parser/moz.build b/parser/moz.build
@@ -7,7 +7,7 @@
with Files("**"):
BUG_COMPONENT = ("Core", "DOM: HTML Parser")
-DIRS += ["expat", "prototype", "xml", "htmlparser", "html"]
+DIRS += ["expat", "prototype", "xml", "htmlaccel", "htmlparser", "html"]
EXPORTS += [
"nsCharsetSource.h",