test_serializers_entities_in_attr.js (3788B)
1 const encoders = { 2 xml: doc => { 3 let enc = Cu.createDocumentEncoder("text/xml"); 4 enc.init(doc, "text/xml", Ci.nsIDocumentEncoder.OutputLFLineBreak); 5 return enc; 6 }, 7 html: doc => { 8 let enc = Cu.createDocumentEncoder("text/html"); 9 enc.init(doc, "text/html", Ci.nsIDocumentEncoder.OutputLFLineBreak); 10 return enc; 11 }, 12 htmlBasic: doc => { 13 let enc = Cu.createDocumentEncoder("text/html"); 14 enc.init( 15 doc, 16 "text/html", 17 Ci.nsIDocumentEncoder.OutputEncodeBasicEntities | 18 Ci.nsIDocumentEncoder.OutputLFLineBreak 19 ); 20 return enc; 21 }, 22 xhtml: doc => { 23 let enc = Cu.createDocumentEncoder("application/xhtml+xml"); 24 enc.init( 25 doc, 26 "application/xhtml+xml", 27 Ci.nsIDocumentEncoder.OutputLFLineBreak 28 ); 29 return enc; 30 }, 31 }; 32 33 // Which characters should we encode as entities? It depends on the serializer. 34 const encodeAll = { html: true, htmlBasic: true, xhtml: true, xml: true }; 35 const encodeHTMLBasic = { 36 html: false, 37 htmlBasic: true, 38 xhtml: false, 39 xml: false, 40 }; 41 const encodeXML = { html: false, htmlBasic: false, xhtml: true, xml: true }; 42 const encodeNone = { html: false, htmlBasic: false, xhtml: false, xml: false }; 43 const encodingInfoMap = new Map([ 44 // Basic sanity chars '<', '>', '"', '&' get encoded in all cases. 45 ["<", encodeAll], 46 [">", encodeAll], 47 ['"', encodeAll], 48 ["&", encodeAll], 49 // nbsp is only encoded with the HTML encoder when encoding basic entities. 50 ["\xA0", encodeHTMLBasic], 51 // Whitespace bits are only encoded in XML. 52 ["\n", encodeXML], 53 ["\r", encodeXML], 54 ["\t", encodeXML], 55 ]); 56 57 const encodingMap = new Map([ 58 ["<", "<"], 59 [">", ">"], 60 ['"', """], 61 ["&", "&"], 62 ["\xA0", " "], 63 ["\n", "
"], 64 ["\r", "
"], 65 ["\t", "	"], 66 ]); 67 68 function encodingInfoForChar(c) { 69 var info = encodingInfoMap.get(c); 70 if (info) { 71 return info; 72 } 73 return encodeNone; 74 } 75 76 function encodingForChar(c, type) { 77 var info = encodingInfoForChar(c); 78 if (!info[type]) { 79 return c; 80 } 81 return encodingMap.get(c); 82 } 83 84 const doc = new DOMParser().parseFromString("<root></root>", "text/xml"); 85 const root = doc.documentElement; 86 for (let i = 0; i < 255; ++i) { 87 let el = doc.createElement("span"); 88 el.setAttribute("x", String.fromCharCode(i)); 89 el.textContent = " "; 90 root.appendChild(el); 91 } 92 for (let type of ["xml", "xhtml", "htmlBasic", "html"]) { 93 let str = encoders[type](doc).encodeToString(); 94 const prefix = '<root><span x="'; 95 const suffix = '"> </span></root>'; 96 Assert.ok(str.startsWith(prefix), `${type} serialization starts correctly`); 97 Assert.ok(str.endsWith(suffix), `${type} serialization ends correctly`); 98 str = str.substring(prefix.length, str.length - suffix.length); 99 let encodings = str.split('"> </span><span x="'); 100 for (let i = 0; i < 255; ++i) { 101 let c = String.fromCharCode(i); 102 Assert.equal( 103 encodingForChar(c, type), 104 encodings[i], 105 `${type} encoding of char ${i} is correct` 106 ); 107 } 108 } 109 110 // Verify that we don't escape entities in attributes with JS code. 111 const docWithJS = new DOMParser().parseFromString("<root></root>", "text/xml"); 112 const rootWithJS = docWithJS.documentElement; 113 const htmlNS = "http://www.w3.org/1999/xhtml"; 114 const jsCode = "x < 2 || y > 3"; 115 let div = docWithJS.createElementNS(htmlNS, "div"); 116 div.setAttribute("href", `javascript:${jsCode}`); 117 div.setAttribute("src", `javascript:${jsCode}`); 118 div.setAttribute("onclick", jsCode); 119 div.setAttribute("onload", jsCode); 120 rootWithJS.appendChild(div); 121 for (let type of ["xml", "xhtml", "htmlBasic", "html"]) { 122 let str = encoders[type](docWithJS).encodeToString(); 123 Assert.equal( 124 str.search("&[a-z]+;"), 125 -1, 126 `${type} encoding does not escape entities in attributes with JS code` 127 ); 128 }