html5lib-testcase-support.js (9658B)
1 // This library supports HTML5lib-style test cases. 2 // 3 // The HTMLlib test case format describes an actual DOM tree. For testing, and 4 // particular for testing of DOM parsers and DOM parser-related functionality, 5 // this has the advantage of being able to represent edge cases. 6 // 7 // Example: If `.replaceWithChildren` is called on the `<span>` element as a 8 // result of parsing `"<p>Hello<span>World</span></p>"`, then this results in 9 // a tree with two adjacent text nodes. This behaviour will affect subsequent 10 // DOM operations and should thus be tested. The HTML5lib format makes it easy 11 // to describe the expected result unambiguously. 12 // 13 // References: 14 // - HTML5lib: https://github.com/html5lib 15 // - HTML5lib testcases: https://github.com/html5lib/html5lib-tests/tree/master/tree-construction 16 // - test case format description: 17 // https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/README.md 18 // 19 // The main "API" is: 20 // 21 // - parse_html5lib_testcases(string) 22 // This returns an array of dictionaries, where the dictionary contains the 23 // the text of the test file, keyed by the lines starting with a hashtag. 24 // 25 // E.g. #data\nbla results in [{data: "bla"}]. 26 // 27 // - html5lib_testcases_from_script() 28 // Wrapper for parse_html5lib_testcases that gets the test data from a script 29 // element with type "html5lib-tests". This allows to specify the test data 30 // in the test file, but requires working around closing script tags. 31 // 32 // - html5lib_testcases_from_response(response_promise) 33 // Wrapper for parse_html5lib_testcases that gets the data from a Response 34 // Promise, as is returned from `fetch()`, and returns a Promise for the array 35 // of testcases. This allows getting the test dat from a text resource. 36 // 37 // - build_node_tree(node, documentstr) 38 // This builds a node tree from the "#document" string from a testcase, and 39 // appends it to the node argument. Returns node. 40 // 41 // - assert_subtree_equals(node1, node2) 42 // Asserts that the child trees of node1 and node2 are equals. This 43 // recursively descends the trees. 44 // 45 // - assert_testcase(node, testcase) 46 // Wrapper for build_node_tree and assert_subtree_equals, for use with a 47 // result of parse_html5lib_testcases. 48 // 49 50 function html5lib_testcases_from_script() { 51 return parse_html5lib_testcases( 52 document.querySelector("script[type='html5lib-tests']").textContent); 53 } 54 55 function html5lib_testcases_from_response(response_promise) { 56 return response_promise 57 .then(response => response.text()) 58 .then(parse_html5lib_testcases); 59 } 60 61 function add_html5lib_testcase(testcases, current) { 62 for (const item in current) { 63 current[item] = current[item].join("\n"); 64 } 65 if (Object.entries(current).length) { 66 testcases.push(current); 67 } 68 } 69 70 function parse_html5lib_testcases(content) { 71 const testcases = []; 72 var state = undefined; 73 var current = {}; 74 for (const line of content.split("\n")) { 75 if (!line) { 76 add_html5lib_testcase(testcases, current); 77 state = undefined; 78 current = {}; 79 } else if (line[0] == "#") { 80 state = line.substring(1); 81 current[state] = []; 82 } else if (state) { 83 current[state].push(line); 84 } else { 85 // Error handling is for another day. 86 } 87 } 88 return testcases; 89 } 90 91 function get_child_at(node, level) { 92 for (i = 0; i < level; i++) { 93 if (is_html_template(node)) { 94 // For <template>, continue with the content fragment. 95 node = node.content; 96 } else { 97 node = node.lastChild; 98 } 99 } 100 return node; 101 } 102 103 function append_child_at(node, level, child) { 104 get_child_at(node, level).appendChild(child); 105 } 106 107 function is_element(node) { 108 return node.tagName && node.namespaceURI; 109 } 110 111 function is_html_template(node) { 112 return is_element(node) && node.tagName == "TEMPLATE" && 113 node.namespaceURI == "http://www.w3.org/1999/xhtml"; 114 } 115 116 function create_element(name, maybe_namespace) { 117 // `For the HTML namespace, the namespace designator is the empty string, 118 // i.e. there's no prefix. For the SVG namespace, the namespace designator is 119 // "svg ". For the MathML namespace, the namespace designator is "math ". 120 if (maybe_namespace == "svg ") { 121 return document.createElementNS("http://www.w3.org/2000/svg", name); 122 } else if (maybe_namespace == "math ") { 123 return document.createElementNS("http://www.w3.org/1998/Math/MathML", name); 124 } else if (!maybe_namespace) { 125 return document.createElement(name); 126 } else { 127 assert_unreached(`Invalid element name: "${maybe_namespace}${name}"`); 128 } 129 } 130 131 function set_attribute(node, name, maybe_namespace, value) { 132 // `The attribute name string is the local name prefixed by a namespace 133 // designator. For no namespace, the namespace designator is the empty string, 134 // i.e. there's no prefix. For the XLink namespace, the namespace designator 135 // is "xlink " [Likewise, "xml " and "xmlns ".] 136 if (maybe_namespace == "xlink ") { 137 node.setAttributeNS("http://www.w3.org/1999/xlink", name, value); 138 } else if (maybe_namespace == "xml ") { 139 node.setAttributeNS("http://www.w3.org/XML/1998/namespace", name, value); 140 } else if (maybe_namespace == "xmlns ") { 141 node.setAttributeNS("http://www.w3.org/2000/xmlns/", name, value); 142 } else if (!maybe_namespace) { 143 node.setAttribute(name, value); 144 } else { 145 assert_unreached(`Invalid attribute name: "${maybe_namespace}${name}"`); 146 } 147 } 148 149 function build_node_tree(root, docstr) { 150 // Format described here: 151 // https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/README.md 152 153 // Special-case empty string: Don't build anything. 154 // (Happens for test docs that cause parse errors, but also for genuinely 155 // empty expectation documents.) 156 if (!docstr) return root; 157 158 for (const line of docstr.split("\n")) { 159 const [_, indent, remainder] = line.match(/^\| ( *)(.*)/); 160 const level = indent.length / 2; 161 if (match = remainder.match(/^<([a-z]* )?([a-zA-Z0-9_-]*)>$/)) { 162 // `Element nodes must be represented by a "<, the tag name string, ">".` 163 append_child_at(root, level, create_element(match[2], match[1])); 164 } else if (match = remainder.match(/^"([^"]*)"$/)) { 165 // `Text nodes must be the string, in double quotes.` 166 append_child_at(root, level, document.createTextNode(match[1])); 167 } else if (match = remainder.match(/^([a-z]* )?(.*)="(.*)"$/)) { 168 // `Attribute nodes must have the attribute name string, then an "=" sign, 169 // then the attribute value in double quotes (").` 170 set_attribute(get_child_at(root, level), match[2], match[1], match[3]); 171 } else if (match = remainder.match(/^<!--(.*)-->$/)) { 172 // `Comments must be "<" then "!-- " then the data then " -->".` 173 append_child_at(root, level, document.createComment(match[1])); 174 } else if (match = remainder.match( 175 /^<!DOCTYPE ([^ ]*)( "([^"]*)"( "([^"]*)")?)?>$/)) { 176 // `DOCTYPEs must be "<!DOCTYPE " then [... bla bla ...]` 177 append_child_at(root, level, 178 document.implementation.createDocumentType(match[1], match[3], match[5])); 179 } else if (match = remainder.match(/^<?([a-z]*)( (.*))>$/)) { 180 // `Processing instructions must be "<?", then the target, then [...]` 181 append_child_at(root, level, document.createProcessingInstruction( 182 match[1], match[3])); 183 } else if (remainder == "content") { 184 // Template contents are represented by the string "content" with the 185 // children below it. 186 // Nothing to do here; so let's just check we're actually in a template. 187 assert_true(is_html_template(get_child_at(root, level)), 188 "\"content\" only expected as child of a <template>."); 189 } else { 190 assert_unreached( 191 `Unknown line type. Maybe test data is malformed. ("${line}")`); 192 } 193 } 194 return root; 195 } 196 197 function assert_subtree_equals(node1, node2) { 198 // Iterate in parallel over both trees. 199 const tree1 = document.createNodeIterator(node1); 200 const tree2 = document.createNodeIterator(node2); 201 // Skip the root/context node, so that we can re-use the test with different 202 // context types. 203 var current1 = tree1.nextNode(); 204 var current2 = tree2.nextNode(); 205 do { 206 current1 = tree1.nextNode(); 207 current2 = tree2.nextNode(); 208 209 if (!current1) break; 210 211 // Conceptually, we only want to check whether a.isEqualNode(b). But that 212 // yields terrible error messages ("expected true but got false"). With 213 // this being a test suite and all, let's invest a bit of effort into nice 214 // error messages. 215 if (!current1.isEqualNode(current2)) { 216 let breadcrumbs = ""; 217 let current = current1; 218 while (current) { 219 const here = is_element(current) ? `<${current.tagName}>` : `${current}`; 220 breadcrumbs = `${here} / ${breadcrumbs}`; 221 current = current.parentNode; 222 } 223 breadcrumbs = breadcrumbs.substring(0, breadcrumbs.length - 3); 224 assert_true(current1.isEqualNode(current2), 225 `${current1}.isEqual(${current2}) fails. Path: ${breadcrumbs}.`); 226 } 227 228 // NodeIterator does not recurse into template contents. So we need to do 229 // this manually. 230 if (is_html_template(current1) && is_html_template(current2)) { 231 assert_subtree_equals(current1.content, current2.content); 232 } 233 } while (current1); 234 235 // Ensure that both iterators have come to an end. 236 assert_false(!!current2, "Additional nodes at the of node2.\n"); 237 } 238 239 function assert_testcase(node, testcase) { 240 const context = document.createElement(testcase["document-fragment"] ?? "div"); 241 const tree = build_node_tree(context, testcase.document); 242 assert_subtree_equals(node, tree); 243 }