test_misc.js (10906B)
1 // NOTE: Requires testharness.js 2 // http://www.w3.org/2008/webapps/wiki/Harness 3 4 test(function () { 5 var badStrings = [ 6 { input: "\ud800", expected: "\ufffd" }, // Surrogate half 7 { input: "\udc00", expected: "\ufffd" }, // Surrogate half 8 { input: "abc\ud800def", expected: "abc\ufffddef" }, // Surrogate half 9 { input: "abc\udc00def", expected: "abc\ufffddef" }, // Surrogate half 10 { input: "\udc00\ud800", expected: "\ufffd\ufffd" }, // Wrong order 11 ]; 12 13 badStrings.forEach(function (t) { 14 var encoded = new TextEncoder().encode(t.input); 15 var decoded = new TextDecoder("utf-8").decode(encoded); 16 assert_equals(t.expected, decoded); 17 }); 18 }, "bad data"); 19 20 test(function () { 21 var bad = [ 22 { encoding: "utf-8", input: [0xc0] }, // ends early 23 { encoding: "utf-8", input: [0xc0, 0x00] }, // invalid trail 24 { encoding: "utf-8", input: [0xc0, 0xc0] }, // invalid trail 25 { encoding: "utf-8", input: [0xe0] }, // ends early 26 { encoding: "utf-8", input: [0xe0, 0x00] }, // invalid trail 27 { encoding: "utf-8", input: [0xe0, 0xc0] }, // invalid trail 28 { encoding: "utf-8", input: [0xe0, 0x80, 0x00] }, // invalid trail 29 { encoding: "utf-8", input: [0xe0, 0x80, 0xc0] }, // invalid trail 30 { encoding: "utf-8", input: [0xfc, 0x80, 0x80, 0x80, 0x80, 0x80] }, // > 0x10FFFF 31 { encoding: "utf-16le", input: [0x00] }, // truncated code unit 32 { encoding: "utf-16le", input: [0x00, 0xd8] }, // surrogate half 33 { encoding: "utf-16le", input: [0x00, 0xd8, 0x00, 0x00] }, // surrogate half 34 { encoding: "utf-16le", input: [0x00, 0xdc, 0x00, 0x00] }, // trail surrogate 35 { encoding: "utf-16le", input: [0x00, 0xdc, 0x00, 0xd8] }, // swapped surrogates 36 // TODO: Single byte encoding cases 37 ]; 38 39 bad.forEach(function (t) { 40 assert_throws({ name: "TypeError" }, function () { 41 new TextDecoder(t.encoding, { fatal: true }).decode( 42 new Uint8Array(t.input) 43 ); 44 }); 45 }); 46 }, "fatal flag"); 47 48 test(function () { 49 var encodings = [ 50 { label: "utf-8", encoding: "utf-8" }, 51 { label: "utf-16", encoding: "utf-16le" }, 52 { label: "utf-16le", encoding: "utf-16le" }, 53 { label: "utf-16be", encoding: "utf-16be" }, 54 { label: "ascii", encoding: "windows-1252" }, 55 { label: "iso-8859-1", encoding: "windows-1252" }, 56 ]; 57 58 encodings.forEach(function (test) { 59 assert_equals( 60 new TextDecoder(test.label.toLowerCase()).encoding, 61 test.encoding 62 ); 63 assert_equals( 64 new TextDecoder(test.label.toUpperCase()).encoding, 65 test.encoding 66 ); 67 }); 68 }, "Encoding names are case insensitive"); 69 70 test(function () { 71 var utf8_bom = [0xef, 0xbb, 0xbf]; 72 var utf8 = [ 73 0x7a, 0xc2, 0xa2, 0xe6, 0xb0, 0xb4, 0xf0, 0x9d, 0x84, 0x9e, 0xf4, 0x8f, 74 0xbf, 0xbd, 75 ]; 76 77 var utf16le_bom = [0xff, 0xfe]; 78 var utf16le = [ 79 0x7a, 0x00, 0xa2, 0x00, 0x34, 0x6c, 0x34, 0xd8, 0x1e, 0xdd, 0xff, 0xdb, 80 0xfd, 0xdf, 81 ]; 82 83 var utf16be_bom = [0xfe, 0xff]; 84 var utf16be = [ 85 0x00, 0x7a, 0x00, 0xa2, 0x6c, 0x34, 0xd8, 0x34, 0xdd, 0x1e, 0xdb, 0xff, 86 0xdf, 0xfd, 87 ]; 88 89 var string = "z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD"; // z, cent, CJK water, G-Clef, Private-use character 90 91 // missing BOMs 92 assert_equals(new TextDecoder("utf-8").decode(new Uint8Array(utf8)), string); 93 assert_equals( 94 new TextDecoder("utf-16le").decode(new Uint8Array(utf16le)), 95 string 96 ); 97 assert_equals( 98 new TextDecoder("utf-16be").decode(new Uint8Array(utf16be)), 99 string 100 ); 101 102 // matching BOMs 103 assert_equals( 104 new TextDecoder("utf-8").decode(new Uint8Array(utf8_bom.concat(utf8))), 105 string 106 ); 107 assert_equals( 108 new TextDecoder("utf-16le").decode( 109 new Uint8Array(utf16le_bom.concat(utf16le)) 110 ), 111 string 112 ); 113 assert_equals( 114 new TextDecoder("utf-16be").decode( 115 new Uint8Array(utf16be_bom.concat(utf16be)) 116 ), 117 string 118 ); 119 120 // matching BOMs split 121 var decoder8 = new TextDecoder("utf-8"); 122 assert_equals( 123 decoder8.decode(new Uint8Array(utf8_bom.slice(0, 1)), { stream: true }), 124 "" 125 ); 126 assert_equals( 127 decoder8.decode(new Uint8Array(utf8_bom.slice(1).concat(utf8))), 128 string 129 ); 130 assert_equals( 131 decoder8.decode(new Uint8Array(utf8_bom.slice(0, 2)), { stream: true }), 132 "" 133 ); 134 assert_equals( 135 decoder8.decode(new Uint8Array(utf8_bom.slice(2).concat(utf8))), 136 string 137 ); 138 var decoder16le = new TextDecoder("utf-16le"); 139 assert_equals( 140 decoder16le.decode(new Uint8Array(utf16le_bom.slice(0, 1)), { 141 stream: true, 142 }), 143 "" 144 ); 145 assert_equals( 146 decoder16le.decode(new Uint8Array(utf16le_bom.slice(1).concat(utf16le))), 147 string 148 ); 149 var decoder16be = new TextDecoder("utf-16be"); 150 assert_equals( 151 decoder16be.decode(new Uint8Array(utf16be_bom.slice(0, 1)), { 152 stream: true, 153 }), 154 "" 155 ); 156 assert_equals( 157 decoder16be.decode(new Uint8Array(utf16be_bom.slice(1).concat(utf16be))), 158 string 159 ); 160 161 // mismatching BOMs 162 assert_not_equals( 163 new TextDecoder("utf-8").decode(new Uint8Array(utf16le_bom.concat(utf8))), 164 string 165 ); 166 assert_not_equals( 167 new TextDecoder("utf-8").decode(new Uint8Array(utf16be_bom.concat(utf8))), 168 string 169 ); 170 assert_not_equals( 171 new TextDecoder("utf-16le").decode( 172 new Uint8Array(utf8_bom.concat(utf16le)) 173 ), 174 string 175 ); 176 assert_not_equals( 177 new TextDecoder("utf-16le").decode( 178 new Uint8Array(utf16be_bom.concat(utf16le)) 179 ), 180 string 181 ); 182 assert_not_equals( 183 new TextDecoder("utf-16be").decode( 184 new Uint8Array(utf8_bom.concat(utf16be)) 185 ), 186 string 187 ); 188 assert_not_equals( 189 new TextDecoder("utf-16be").decode( 190 new Uint8Array(utf16le_bom.concat(utf16be)) 191 ), 192 string 193 ); 194 }, "Byte-order marks"); 195 196 test(function () { 197 assert_equals(new TextDecoder("utf-8").encoding, "utf-8"); // canonical case 198 assert_equals(new TextDecoder("UTF-16").encoding, "utf-16le"); // canonical case and name 199 assert_equals(new TextDecoder("UTF-16BE").encoding, "utf-16be"); // canonical case and name 200 assert_equals(new TextDecoder("iso8859-1").encoding, "windows-1252"); // canonical case and name 201 assert_equals(new TextDecoder("iso-8859-1").encoding, "windows-1252"); // canonical case and name 202 }, "Encoding names"); 203 204 test(function () { 205 ["utf-8", "utf-16le", "utf-16be"].forEach(function (encoding) { 206 var string = 207 "\x00123ABCabc\x80\xFF\u0100\u1000\uFFFD\uD800\uDC00\uDBFF\uDFFF"; 208 var octets = { 209 "utf-16le": [ 210 0x00, 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x41, 0x00, 0x42, 0x00, 211 0x43, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x80, 0x00, 0xff, 0x00, 212 0x00, 0x01, 0x00, 0x10, 0xfd, 0xff, 0x00, 0xd8, 0x00, 0xdc, 0xff, 0xdb, 213 0xff, 0xdf, 214 ], 215 "utf-16be": [ 216 0x00, 0x00, 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x41, 0x00, 0x42, 217 0x00, 0x43, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x80, 0x00, 0xff, 218 0x01, 0x00, 0x10, 0x00, 0xff, 0xfd, 0xd8, 0x00, 0xdc, 0x00, 0xdb, 0xff, 219 0xdf, 0xff, 220 ], 221 }; 222 var encoded = octets[encoding] || new TextEncoder().encode(string); 223 224 for (var len = 1; len <= 5; ++len) { 225 var out = "", 226 decoder = new TextDecoder(encoding); 227 for (var i = 0; i < encoded.length; i += len) { 228 var sub = []; 229 for (var j = i; j < encoded.length && j < i + len; ++j) { 230 sub.push(encoded[j]); 231 } 232 out += decoder.decode(new Uint8Array(sub), { stream: true }); 233 } 234 out += decoder.decode(); 235 assert_equals(out, string, "streaming decode " + encoding); 236 } 237 }); 238 }, "Streaming Decode"); 239 240 test(function () { 241 var jis = [0x82, 0xc9, 0x82, 0xd9, 0x82, 0xf1]; 242 var expected = "\u306B\u307B\u3093"; // Nihon 243 assert_equals( 244 new TextDecoder("shift_jis").decode(new Uint8Array(jis)), 245 expected 246 ); 247 }, "Shift_JIS Decode"); 248 249 test(function () { 250 var encodings = [ 251 "utf-8", 252 "ibm866", 253 "iso-8859-2", 254 "iso-8859-3", 255 "iso-8859-4", 256 "iso-8859-5", 257 "iso-8859-6", 258 "iso-8859-7", 259 "iso-8859-8", 260 "iso-8859-8-i", 261 "iso-8859-10", 262 "iso-8859-13", 263 "iso-8859-14", 264 "iso-8859-15", 265 "iso-8859-16", 266 "koi8-r", 267 "koi8-u", 268 "macintosh", 269 "windows-874", 270 "windows-1250", 271 "windows-1251", 272 "windows-1252", 273 "windows-1253", 274 "windows-1254", 275 "windows-1255", 276 "windows-1256", 277 "windows-1257", 278 "windows-1258", 279 "x-mac-cyrillic", 280 "gbk", 281 "gb18030", 282 "big5", 283 "euc-jp", 284 "iso-2022-jp", 285 "shift_jis", 286 "euc-kr", 287 "x-user-defined", 288 ]; 289 290 encodings.forEach(function (encoding) { 291 var string = "", 292 bytes = []; 293 for (var i = 0; i < 128; ++i) { 294 // Encodings that have escape codes in 0x00-0x7F 295 if ( 296 encoding === "iso-2022-jp" && 297 (i === 0x1b || i === 0xe || i === 0xf) 298 ) { 299 continue; 300 } 301 302 string += String.fromCharCode(i); 303 bytes.push(i); 304 } 305 var ascii_encoded = new TextEncoder().encode(string); 306 assert_equals( 307 new TextDecoder(encoding).decode(ascii_encoded), 308 string, 309 encoding 310 ); 311 //assert_array_equals(new TextEncoder().encode(string), bytes, encoding); 312 }); 313 }, "Supersets of ASCII decode ASCII correctly"); 314 315 test(function () { 316 assert_throws({ name: "TypeError" }, function () { 317 new TextDecoder("utf-8", { fatal: true }).decode(new Uint8Array([0xff])); 318 }); 319 // This should not hang: 320 new TextDecoder("utf-8").decode(new Uint8Array([0xff])); 321 322 assert_throws({ name: "TypeError" }, function () { 323 new TextDecoder("utf-16", { fatal: true }).decode(new Uint8Array([0x00])); 324 }); 325 // This should not hang: 326 new TextDecoder("utf-16").decode(new Uint8Array([0x00])); 327 328 assert_throws({ name: "TypeError" }, function () { 329 new TextDecoder("utf-16be", { fatal: true }).decode(new Uint8Array([0x00])); 330 }); 331 // This should not hang: 332 new TextDecoder("utf-16be").decode(new Uint8Array([0x00])); 333 }, "Non-fatal errors at EOF"); 334 335 test(function () { 336 var encodings = [ 337 "utf-8", 338 "ibm866", 339 "iso-8859-2", 340 "iso-8859-3", 341 "iso-8859-4", 342 "iso-8859-5", 343 "iso-8859-6", 344 "iso-8859-7", 345 "iso-8859-8", 346 "iso-8859-8-i", 347 "iso-8859-10", 348 "iso-8859-13", 349 "iso-8859-14", 350 "iso-8859-15", 351 "iso-8859-16", 352 "koi8-r", 353 "koi8-u", 354 "macintosh", 355 "windows-874", 356 "windows-1250", 357 "windows-1251", 358 "windows-1252", 359 "windows-1253", 360 "windows-1254", 361 "windows-1255", 362 "windows-1256", 363 "windows-1257", 364 "windows-1258", 365 "x-mac-cyrillic", 366 "gbk", 367 "gb18030", 368 "big5", 369 "euc-jp", 370 "iso-2022-jp", 371 "shift_jis", 372 "euc-kr", 373 "x-user-defined", 374 "utf-16le", 375 "utf-16be", 376 ]; 377 378 encodings.forEach(function (encoding) { 379 assert_equals(new TextDecoder(encoding).encoding, encoding); 380 assert_equals(new TextEncoder(encoding).encoding, "utf-8"); 381 }); 382 }, "Non-UTF-8 encodings supported only for decode, not encode");