test_utf8_illegals.js (3514B)
1 // Tests illegal UTF-8 sequences 2 3 var Cc = Components.Constructor; 4 5 const { NetUtil } = ChromeUtils.importESModule( 6 "resource://gre/modules/NetUtil.sys.mjs" 7 ); 8 9 const tests = [ 10 { 11 inStrings: [ 12 "%80", // Illegal or incomplete sequences 13 "%8f", 14 "%90", 15 "%9f", 16 "%a0", 17 "%bf", 18 "%c0", 19 "%c1", 20 "%c2", 21 "%df", 22 "%e0", 23 "%e0%a0", 24 "%e0%bf", 25 "%ed%80", 26 "%ed%9f", 27 "%ef", 28 "%ef%bf", 29 "%f0", 30 "%f0%90", 31 "%f0%90%80", 32 "%f0%90%bf", 33 "%f0%bf", 34 "%f0%bf%80", 35 "%f0%bf%bf", 36 "%f4", 37 "%f4%80", 38 "%f4%80%80", 39 "%f4%80%bf", 40 "%f4%8f", 41 "%f4%8f%80", 42 "%f4%8f%bf", 43 "%f5", 44 "%f7", 45 "%f8", 46 "%fb", 47 "%fc", 48 "%fd", 49 ], 50 expected: "ABC\ufffdXYZ", 51 }, 52 53 { 54 inStrings: [ 55 "%c0%af", // Illegal bytes in 2-octet 56 "%c1%af", 57 ], // sequences 58 expected: "ABC\ufffd\ufffdXYZ", 59 }, 60 61 { 62 inStrings: [ 63 "%e0%80%80", // Illegal bytes in 3-octet 64 "%e0%80%af", // sequences 65 "%e0%9f%bf", 66 // long surrogates 67 "%ed%a0%80", // D800 68 "%ed%ad%bf", // DB7F 69 "%ed%ae%80", // DB80 70 "%ed%af%bf", // DBFF 71 "%ed%b0%80", // DC00 72 "%ed%be%80", // DF80 73 "%ed%bf%bf", 74 ], // DFFF 75 expected: "ABC\ufffd\ufffd\ufffdXYZ", 76 }, 77 78 { 79 inStrings: [ 80 "%f0%80%80%80", // Illegal bytes in 4-octet 81 "%f0%80%80%af", // sequences 82 "%f0%8f%bf%bf", 83 "%f4%90%80%80", 84 "%f4%bf%bf%bf", 85 "%f5%80%80%80", 86 "%f7%bf%bf%bf", 87 ], 88 expected: "ABC\ufffd\ufffd\ufffd\ufffdXYZ", 89 }, 90 91 { 92 inStrings: [ 93 "%f8%80%80%80%80", // Illegal bytes in 5-octet 94 "%f8%80%80%80%af", // sequences 95 "%fb%bf%bf%bf%bf", 96 ], 97 expected: "ABC\ufffd\ufffd\ufffd\ufffd\ufffdXYZ", 98 }, 99 100 // Surrogate pairs 101 { 102 inStrings: [ 103 "%ed%a0%80%ed%b0%80", // D800 DC00 104 "%ed%a0%80%ed%bf%bf", // D800 DFFF 105 "%ed%ad%bf%ed%b0%80", // DB7F DC00 106 "%ed%ad%bf%ed%bf%bf", // DB7F DFFF 107 "%ed%ae%80%ed%b0%80", // DB80 DC00 108 "%ed%ae%80%ed%bf%bf", // DB80 DFFF 109 "%ed%af%bf%ed%b0%80", // DBFF DC00 110 "%ed%ad%bf%ed%bf%bf", // DBFF DFFF 111 "%fc%80%80%80%80%80", // Illegal bytes in 6-octet 112 "%fc%80%80%80%80%af", // sequences 113 "%fd%bf%bf%bf%bf%bf", 114 ], 115 expected: "ABC\ufffd\ufffd\ufffd\ufffd\ufffd\ufffdXYZ", 116 }, 117 ]; 118 119 function testCaseInputStream(inStr, expected) { 120 var dataURI = "data:text/plain; charset=UTF-8,ABC" + inStr + "XYZ"; 121 dump(inStr + "==>"); 122 123 var ConverterInputStream = Cc( 124 "@mozilla.org/intl/converter-input-stream;1", 125 "nsIConverterInputStream", 126 "init" 127 ); 128 var channel = NetUtil.newChannel({ 129 uri: dataURI, 130 loadUsingSystemPrincipal: true, 131 }); 132 var testInputStream = channel.open(); 133 var testConverter = new ConverterInputStream( 134 testInputStream, 135 "UTF-8", 136 16, 137 0xfffd 138 ); 139 140 if (!(testConverter instanceof Ci.nsIUnicharLineInputStream)) { 141 throw new Error("not line input stream"); 142 } 143 144 var outStr = ""; 145 var more; 146 do { 147 // read the line and check for eof 148 var line = {}; 149 more = testConverter.readLine(line); 150 outStr += line.value; 151 } while (more); 152 153 dump(outStr + "; expected=" + expected + "\n"); 154 Assert.equal(outStr, expected); 155 Assert.equal(outStr.length, expected.length); 156 } 157 158 function run_test() { 159 for (var t of tests) { 160 for (var inStr of t.inStrings) { 161 testCaseInputStream(inStr, t.expected); 162 } 163 } 164 }