test_BOMEncoding.js (13471B)
1 /* 2 * test_TextDecoderBOMEncoding.js 3 * bug 764234 tests 4 */ 5 6 /* eslint-env mozilla/testharness */ 7 8 function runTextDecoderBOMEnoding() { 9 test(testDecodeValidBOMUTF16, "testDecodeValidBOMUTF16"); 10 test(testBOMEncodingUTF8, "testBOMEncodingUTF8"); 11 test(testMoreBOMEncoding, "testMoreBOMEncoding"); 12 } 13 14 function testDecodeValidBOMUTF16() { 15 var expectedString = 16 '"\u0412\u0441\u0435 \u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u044B\u0435 \u0441\u0435\u043C\u044C\u0438 \u043F\u043E\u0445\u043E\u0436\u0438 \u0434\u0440\u0443\u0433 \u043D\u0430 \u0434\u0440\u0443\u0433\u0430, \u043A\u0430\u0436\u0434\u0430\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430\u044F \u0441\u0435\u043C\u044C\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430 \u043F\u043E-\u0441\u0432\u043E\u0435\u043C\u0443."'; 17 18 // Testing UTF-16BE 19 var data = [ 20 0xfe, 0xff, 0x00, 0x22, 0x04, 0x12, 0x04, 0x41, 0x04, 0x35, 0x00, 0x20, 21 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3b, 22 0x04, 0x38, 0x04, 0x32, 0x04, 0x4b, 0x04, 0x35, 0x00, 0x20, 0x04, 0x41, 23 0x04, 0x35, 0x04, 0x3c, 0x04, 0x4c, 0x04, 0x38, 0x00, 0x20, 0x04, 0x3f, 24 0x04, 0x3e, 0x04, 0x45, 0x04, 0x3e, 0x04, 0x36, 0x04, 0x38, 0x00, 0x20, 25 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x00, 0x20, 0x04, 0x3d, 26 0x04, 0x30, 0x00, 0x20, 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 27 0x04, 0x30, 0x00, 0x2c, 0x00, 0x20, 0x04, 0x3a, 0x04, 0x30, 0x04, 0x36, 28 0x04, 0x34, 0x04, 0x30, 0x04, 0x4f, 0x00, 0x20, 0x04, 0x3d, 0x04, 0x35, 29 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3b, 30 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4f, 0x00, 0x20, 0x04, 0x41, 31 0x04, 0x35, 0x04, 0x3c, 0x04, 0x4c, 0x04, 0x4f, 0x00, 0x20, 0x04, 0x3d, 32 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 33 0x04, 0x3b, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x00, 0x20, 0x04, 0x3f, 34 0x04, 0x3e, 0x00, 0x2d, 0x04, 0x41, 0x04, 0x32, 0x04, 0x3e, 0x04, 0x35, 35 0x04, 0x3c, 0x04, 0x43, 0x00, 0x2e, 0x00, 0x22, 36 ]; 37 testBOMCharset({ 38 encoding: "utf-16be", 39 data, 40 expected: expectedString, 41 msg: "decoder valid UTF-16BE test.", 42 }); 43 } 44 45 function testBOMEncodingUTF8() { 46 // basic utf-8 test with valid encoding and byte stream. no byte om provided. 47 var data = [0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27]; 48 var expectedString = " !\"#$%&'"; 49 testBOMCharset({ 50 encoding: "utf-8", 51 data, 52 expected: expectedString, 53 msg: "utf-8 encoding.", 54 }); 55 56 // test valid encoding provided with valid byte OM also provided. 57 data = [0xef, 0xbb, 0xbf, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27]; 58 expectedString = " !\"#$%&'"; 59 testBOMCharset({ 60 encoding: "utf-8", 61 data, 62 expected: expectedString, 63 msg: "valid utf-8 encoding provided with VALID utf-8 BOM test.", 64 }); 65 66 // test valid encoding provided with invalid byte OM also provided. 67 data = [0xff, 0xfe, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27]; 68 testBOMCharset({ 69 encoding: "utf-8", 70 fatal: true, 71 data, 72 error: "TypeError", 73 msg: "valid utf-8 encoding provided with invalid utf-8 fatal BOM test.", 74 }); 75 76 // test valid encoding provided with invalid byte OM also provided. 77 data = [0xff, 0xfe, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27]; 78 expectedString = "\ufffd\ufffd !\"#$%&'"; 79 testBOMCharset({ 80 encoding: "utf-8", 81 data, 82 expected: expectedString, 83 msg: "valid utf-8 encoding provided with invalid utf-8 BOM test.", 84 }); 85 86 // test empty encoding provided with invalid byte OM also provided. 87 data = [0xff, 0xfe, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27]; 88 testBOMCharset({ 89 encoding: "", 90 data, 91 error: "RangeError", 92 msg: "empty encoding provided with invalid utf-8 BOM test.", 93 }); 94 } 95 96 function testMoreBOMEncoding() { 97 var expectedString = 98 '"\u0412\u0441\u0435 \u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u044B\u0435 \u0441\u0435\u043C\u044C\u0438 \u043F\u043E\u0445\u043E\u0436\u0438 \u0434\u0440\u0443\u0433 \u043D\u0430 \u0434\u0440\u0443\u0433\u0430, \u043A\u0430\u0436\u0434\u0430\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430\u044F \u0441\u0435\u043C\u044C\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430 \u043F\u043E-\u0441\u0432\u043E\u0435\u043C\u0443."'; 99 100 // Testing user provided encoding is UTF-16BE & bom encoding is utf-16le 101 var data = [ 102 0xff, 0xfe, 0x00, 0x22, 0x04, 0x12, 0x04, 0x41, 0x04, 0x35, 0x00, 0x20, 103 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3b, 104 0x04, 0x38, 0x04, 0x32, 0x04, 0x4b, 0x04, 0x35, 0x00, 0x20, 0x04, 0x41, 105 0x04, 0x35, 0x04, 0x3c, 0x04, 0x4c, 0x04, 0x38, 0x00, 0x20, 0x04, 0x3f, 106 0x04, 0x3e, 0x04, 0x45, 0x04, 0x3e, 0x04, 0x36, 0x04, 0x38, 0x00, 0x20, 107 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x00, 0x20, 0x04, 0x3d, 108 0x04, 0x30, 0x00, 0x20, 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 109 0x04, 0x30, 0x00, 0x2c, 0x00, 0x20, 0x04, 0x3a, 0x04, 0x30, 0x04, 0x36, 110 0x04, 0x34, 0x04, 0x30, 0x04, 0x4f, 0x00, 0x20, 0x04, 0x3d, 0x04, 0x35, 111 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3b, 112 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4f, 0x00, 0x20, 0x04, 0x41, 113 0x04, 0x35, 0x04, 0x3c, 0x04, 0x4c, 0x04, 0x4f, 0x00, 0x20, 0x04, 0x3d, 114 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 115 0x04, 0x3b, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x00, 0x20, 0x04, 0x3f, 116 0x04, 0x3e, 0x00, 0x2d, 0x04, 0x41, 0x04, 0x32, 0x04, 0x3e, 0x04, 0x35, 117 0x04, 0x3c, 0x04, 0x43, 0x00, 0x2e, 0x00, 0x22, 118 ]; 119 120 testBOMCharset({ 121 encoding: "utf-16be", 122 fatal: true, 123 data, 124 expected: "\ufffe" + expectedString, 125 msg: "test decoder invalid BOM encoding for utf-16be fatal.", 126 }); 127 128 testBOMCharset({ 129 encoding: "utf-16be", 130 data, 131 expected: "\ufffe" + expectedString, 132 msg: "test decoder invalid BOM encoding for utf-16be.", 133 }); 134 135 // Testing user provided encoding is UTF-16LE & bom encoding is utf-16be 136 var dataUTF16 = [ 137 0xfe, 0xff, 0x22, 0x00, 0x12, 0x04, 0x41, 0x04, 0x35, 0x04, 0x20, 0x00, 138 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3b, 0x04, 139 0x38, 0x04, 0x32, 0x04, 0x4b, 0x04, 0x35, 0x04, 0x20, 0x00, 0x41, 0x04, 140 0x35, 0x04, 0x3c, 0x04, 0x4c, 0x04, 0x38, 0x04, 0x20, 0x00, 0x3f, 0x04, 141 0x3e, 0x04, 0x45, 0x04, 0x3e, 0x04, 0x36, 0x04, 0x38, 0x04, 0x20, 0x00, 142 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x20, 0x00, 0x3d, 0x04, 143 0x30, 0x04, 0x20, 0x00, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 144 0x30, 0x04, 0x2c, 0x00, 0x20, 0x00, 0x3a, 0x04, 0x30, 0x04, 0x36, 0x04, 145 0x34, 0x04, 0x30, 0x04, 0x4f, 0x04, 0x20, 0x00, 0x3d, 0x04, 0x35, 0x04, 146 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3b, 0x04, 147 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4f, 0x04, 0x20, 0x00, 0x41, 0x04, 148 0x35, 0x04, 0x3c, 0x04, 0x4c, 0x04, 0x4f, 0x04, 0x20, 0x00, 0x3d, 0x04, 149 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 150 0x3b, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x20, 0x00, 0x3f, 0x04, 151 0x3e, 0x04, 0x2d, 0x00, 0x41, 0x04, 0x32, 0x04, 0x3e, 0x04, 0x35, 0x04, 152 0x3c, 0x04, 0x43, 0x04, 0x2e, 0x00, 0x22, 0x00, 153 ]; 154 testBOMCharset({ 155 encoding: "utf-16le", 156 fatal: true, 157 data: dataUTF16, 158 expected: "\ufffe" + expectedString, 159 msg: "test decoder invalid BOM encoding for utf-16le fatal.", 160 }); 161 162 testBOMCharset({ 163 encoding: "utf-16le", 164 data: dataUTF16, 165 expected: "\ufffe" + expectedString, 166 msg: "test decoder invalid BOM encoding for utf-16le.", 167 }); 168 169 // Testing user provided encoding is UTF-16 & bom encoding is utf-16be 170 testBOMCharset({ 171 encoding: "utf-16", 172 fatal: true, 173 data: dataUTF16, 174 expected: "\ufffe" + expectedString, 175 msg: "test decoder invalid BOM encoding for utf-16 fatal.", 176 }); 177 178 testBOMCharset({ 179 encoding: "utf-16", 180 data: dataUTF16, 181 expected: "\ufffe" + expectedString, 182 msg: "test decoder invalid BOM encoding for utf-16.", 183 }); 184 185 // Testing user provided encoding is UTF-16 & bom encoding is utf-16le 186 dataUTF16 = [ 187 0xff, 0xfe, 0x22, 0x00, 0x12, 0x04, 0x41, 0x04, 0x35, 0x04, 0x20, 0x00, 188 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3b, 0x04, 189 0x38, 0x04, 0x32, 0x04, 0x4b, 0x04, 0x35, 0x04, 0x20, 0x00, 0x41, 0x04, 190 0x35, 0x04, 0x3c, 0x04, 0x4c, 0x04, 0x38, 0x04, 0x20, 0x00, 0x3f, 0x04, 191 0x3e, 0x04, 0x45, 0x04, 0x3e, 0x04, 0x36, 0x04, 0x38, 0x04, 0x20, 0x00, 192 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x20, 0x00, 0x3d, 0x04, 193 0x30, 0x04, 0x20, 0x00, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 194 0x30, 0x04, 0x2c, 0x00, 0x20, 0x00, 0x3a, 0x04, 0x30, 0x04, 0x36, 0x04, 195 0x34, 0x04, 0x30, 0x04, 0x4f, 0x04, 0x20, 0x00, 0x3d, 0x04, 0x35, 0x04, 196 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3b, 0x04, 197 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4f, 0x04, 0x20, 0x00, 0x41, 0x04, 198 0x35, 0x04, 0x3c, 0x04, 0x4c, 0x04, 0x4f, 0x04, 0x20, 0x00, 0x3d, 0x04, 199 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 200 0x3b, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x20, 0x00, 0x3f, 0x04, 201 0x3e, 0x04, 0x2d, 0x00, 0x41, 0x04, 0x32, 0x04, 0x3e, 0x04, 0x35, 0x04, 202 0x3c, 0x04, 0x43, 0x04, 0x2e, 0x00, 0x22, 0x00, 203 ]; 204 testBOMCharset({ 205 encoding: "utf-16", 206 fatal: true, 207 data: dataUTF16, 208 expected: expectedString, 209 msg: "test decoder BOM encoding for utf-16 fatal.", 210 }); 211 212 testBOMCharset({ 213 encoding: "utf-16", 214 data: dataUTF16, 215 expected: expectedString, 216 msg: "test decoder BOM encoding for utf-16.", 217 }); 218 219 // Testing user provided encoding is UTF-8 & bom encoding is utf-16be 220 data = [ 221 0xfe, 0xff, 0x22, 0xd0, 0x92, 0xd1, 0x81, 0xd0, 0xb5, 0x20, 0xd1, 0x81, 222 0xd1, 0x87, 0xd0, 0xb0, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xbb, 0xd0, 0xb8, 223 0xd0, 0xb2, 0xd1, 0x8b, 0xd0, 0xb5, 0x20, 0xd1, 0x81, 0xd0, 0xb5, 0xd0, 224 0xbc, 0xd1, 0x8c, 0xd0, 0xb8, 0x20, 0xd0, 0xbf, 0xd0, 0xbe, 0xd1, 0x85, 225 0xd0, 0xbe, 0xd0, 0xb6, 0xd0, 0xb8, 0x20, 0xd0, 0xb4, 0xd1, 0x80, 0xd1, 226 0x83, 0xd0, 0xb3, 0x20, 0xd0, 0xbd, 0xd0, 0xb0, 0x20, 0xd0, 0xb4, 0xd1, 227 0x80, 0xd1, 0x83, 0xd0, 0xb3, 0xd0, 0xb0, 0x2c, 0x20, 0xd0, 0xba, 0xd0, 228 0xb0, 0xd0, 0xb6, 0xd0, 0xb4, 0xd0, 0xb0, 0xd1, 0x8f, 0x20, 0xd0, 0xbd, 229 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x87, 0xd0, 0xb0, 0xd1, 0x81, 0xd1, 0x82, 230 0xd0, 0xbb, 0xd0, 0xb8, 0xd0, 0xb2, 0xd0, 0xb0, 0xd1, 0x8f, 0x20, 0xd1, 231 0x81, 0xd0, 0xb5, 0xd0, 0xbc, 0xd1, 0x8c, 0xd1, 0x8f, 0x20, 0xd0, 0xbd, 232 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x87, 0xd0, 0xb0, 0xd1, 0x81, 0xd1, 0x82, 233 0xd0, 0xbb, 0xd0, 0xb8, 0xd0, 0xb2, 0xd0, 0xb0, 0x20, 0xd0, 0xbf, 0xd0, 234 0xbe, 0x2d, 0xd1, 0x81, 0xd0, 0xb2, 0xd0, 0xbe, 0xd0, 0xb5, 0xd0, 0xbc, 235 0xd1, 0x83, 0x2e, 0x22, 236 ]; 237 238 testBOMCharset({ 239 encoding: "utf-8", 240 fatal: true, 241 data, 242 error: "TypeError", 243 msg: "test decoder invalid BOM encoding for valid utf-8 fatal provided label.", 244 }); 245 246 testBOMCharset({ 247 encoding: "utf-8", 248 data, 249 expected: "\ufffd\ufffd" + expectedString, 250 msg: "test decoder invalid BOM encoding for valid utf-8 provided label.", 251 }); 252 253 // Testing user provided encoding is non-UTF & bom encoding is utf-16be 254 data = [ 255 0xfe, 0xff, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 256 0xac, 0xad, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 257 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 258 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 259 0xd1, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 260 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 261 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 262 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 263 ]; 264 265 expectedString = 266 "\u03CE\uFFFD\u2019\xA3\u20AC\u20AF\xA6\xA7\xA8\xA9\u037A\xAB\xAC\xAD\u2015" + 267 "\xB0\xB1\xB2\xB3\u0384\u0385\u0386\xB7\u0388\u0389\u038A\xBB\u038C\xBD\u038E\u038F" + 268 "\u0390\u0391\u0392\u0393\u0394\u0395\u0396\u0397\u0398\u0399\u039A\u039B\u039C\u039D\u039E\u039F" + 269 "\u03A0\u03A1\u03A3\u03A4\u03A5\u03A6\u03A7\u03A8\u03A9\u03AA\u03AB\u03AC\u03AD\u03AE\u03AF" + 270 "\u03B0\u03B1\u03B2\u03B3\u03B4\u03B5\u03B6\u03B7\u03B8\u03B9\u03BA\u03BB\u03BC\u03BD\u03BE\u03BF" + 271 "\u03C0\u03C1\u03C2\u03C3\u03C4\u03C5\u03C6\u03C7\u03C8\u03C9\u03CA\u03CB\u03CC\u03CD\u03CE"; 272 273 testBOMCharset({ 274 encoding: "greek", 275 fatal: true, 276 data, 277 error: "TypeError", 278 msg: "test decoder encoding provided with invalid BOM encoding for greek.", 279 }); 280 281 testBOMCharset({ 282 encoding: "greek", 283 data, 284 expected: expectedString, 285 msg: "test decoder encoding provided with invalid BOM encoding for greek.", 286 }); 287 } 288 289 function testBOMCharset(test) { 290 var outText; 291 try { 292 var decoder = 293 "fatal" in test 294 ? new TextDecoder(test.encoding, { fatal: test.fatal }) 295 : new TextDecoder(test.encoding); 296 outText = decoder.decode(new Uint8Array(test.data)); 297 } catch (e) { 298 assert_equals(e.name, test.error, test.msg); 299 return; 300 } 301 assert_true(!test.error, test.msg); 302 303 if (outText !== test.expected) { 304 assert_equals( 305 escape(outText), 306 escape(test.expected), 307 test.msg + " Code points do not match expected code points." 308 ); 309 } 310 }