[ tor-browser ].git.dasho

test_misc.js (10906B)
      1 // NOTE: Requires testharness.js
      2 // http://www.w3.org/2008/webapps/wiki/Harness
      3 
      4 test(function () {
      5  var badStrings = [
      6    { input: "\ud800", expected: "\ufffd" }, // Surrogate half
      7    { input: "\udc00", expected: "\ufffd" }, // Surrogate half
      8    { input: "abc\ud800def", expected: "abc\ufffddef" }, // Surrogate half
      9    { input: "abc\udc00def", expected: "abc\ufffddef" }, // Surrogate half
     10    { input: "\udc00\ud800", expected: "\ufffd\ufffd" }, // Wrong order
     11  ];
     12 
     13  badStrings.forEach(function (t) {
     14    var encoded = new TextEncoder().encode(t.input);
     15    var decoded = new TextDecoder("utf-8").decode(encoded);
     16    assert_equals(t.expected, decoded);
     17  });
     18 }, "bad data");
     19 
     20 test(function () {
     21  var bad = [
     22    { encoding: "utf-8", input: [0xc0] }, // ends early
     23    { encoding: "utf-8", input: [0xc0, 0x00] }, // invalid trail
     24    { encoding: "utf-8", input: [0xc0, 0xc0] }, // invalid trail
     25    { encoding: "utf-8", input: [0xe0] }, // ends early
     26    { encoding: "utf-8", input: [0xe0, 0x00] }, // invalid trail
     27    { encoding: "utf-8", input: [0xe0, 0xc0] }, // invalid trail
     28    { encoding: "utf-8", input: [0xe0, 0x80, 0x00] }, // invalid trail
     29    { encoding: "utf-8", input: [0xe0, 0x80, 0xc0] }, // invalid trail
     30    { encoding: "utf-8", input: [0xfc, 0x80, 0x80, 0x80, 0x80, 0x80] }, // > 0x10FFFF
     31    { encoding: "utf-16le", input: [0x00] }, // truncated code unit
     32    { encoding: "utf-16le", input: [0x00, 0xd8] }, // surrogate half
     33    { encoding: "utf-16le", input: [0x00, 0xd8, 0x00, 0x00] }, // surrogate half
     34    { encoding: "utf-16le", input: [0x00, 0xdc, 0x00, 0x00] }, // trail surrogate
     35    { encoding: "utf-16le", input: [0x00, 0xdc, 0x00, 0xd8] }, // swapped surrogates
     36    // TODO: Single byte encoding cases
     37  ];
     38 
     39  bad.forEach(function (t) {
     40    assert_throws({ name: "TypeError" }, function () {
     41      new TextDecoder(t.encoding, { fatal: true }).decode(
     42        new Uint8Array(t.input)
     43      );
     44    });
     45  });
     46 }, "fatal flag");
     47 
     48 test(function () {
     49  var encodings = [
     50    { label: "utf-8", encoding: "utf-8" },
     51    { label: "utf-16", encoding: "utf-16le" },
     52    { label: "utf-16le", encoding: "utf-16le" },
     53    { label: "utf-16be", encoding: "utf-16be" },
     54    { label: "ascii", encoding: "windows-1252" },
     55    { label: "iso-8859-1", encoding: "windows-1252" },
     56  ];
     57 
     58  encodings.forEach(function (test) {
     59    assert_equals(
     60      new TextDecoder(test.label.toLowerCase()).encoding,
     61      test.encoding
     62    );
     63    assert_equals(
     64      new TextDecoder(test.label.toUpperCase()).encoding,
     65      test.encoding
     66    );
     67  });
     68 }, "Encoding names are case insensitive");
     69 
     70 test(function () {
     71  var utf8_bom = [0xef, 0xbb, 0xbf];
     72  var utf8 = [
     73    0x7a, 0xc2, 0xa2, 0xe6, 0xb0, 0xb4, 0xf0, 0x9d, 0x84, 0x9e, 0xf4, 0x8f,
     74    0xbf, 0xbd,
     75  ];
     76 
     77  var utf16le_bom = [0xff, 0xfe];
     78  var utf16le = [
     79    0x7a, 0x00, 0xa2, 0x00, 0x34, 0x6c, 0x34, 0xd8, 0x1e, 0xdd, 0xff, 0xdb,
     80    0xfd, 0xdf,
     81  ];
     82 
     83  var utf16be_bom = [0xfe, 0xff];
     84  var utf16be = [
     85    0x00, 0x7a, 0x00, 0xa2, 0x6c, 0x34, 0xd8, 0x34, 0xdd, 0x1e, 0xdb, 0xff,
     86    0xdf, 0xfd,
     87  ];
     88 
     89  var string = "z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD"; // z, cent, CJK water, G-Clef, Private-use character
     90 
     91  // missing BOMs
     92  assert_equals(new TextDecoder("utf-8").decode(new Uint8Array(utf8)), string);
     93  assert_equals(
     94    new TextDecoder("utf-16le").decode(new Uint8Array(utf16le)),
     95    string
     96  );
     97  assert_equals(
     98    new TextDecoder("utf-16be").decode(new Uint8Array(utf16be)),
     99    string
    100  );
    101 
    102  // matching BOMs
    103  assert_equals(
    104    new TextDecoder("utf-8").decode(new Uint8Array(utf8_bom.concat(utf8))),
    105    string
    106  );
    107  assert_equals(
    108    new TextDecoder("utf-16le").decode(
    109      new Uint8Array(utf16le_bom.concat(utf16le))
    110    ),
    111    string
    112  );
    113  assert_equals(
    114    new TextDecoder("utf-16be").decode(
    115      new Uint8Array(utf16be_bom.concat(utf16be))
    116    ),
    117    string
    118  );
    119 
    120  // matching BOMs split
    121  var decoder8 = new TextDecoder("utf-8");
    122  assert_equals(
    123    decoder8.decode(new Uint8Array(utf8_bom.slice(0, 1)), { stream: true }),
    124    ""
    125  );
    126  assert_equals(
    127    decoder8.decode(new Uint8Array(utf8_bom.slice(1).concat(utf8))),
    128    string
    129  );
    130  assert_equals(
    131    decoder8.decode(new Uint8Array(utf8_bom.slice(0, 2)), { stream: true }),
    132    ""
    133  );
    134  assert_equals(
    135    decoder8.decode(new Uint8Array(utf8_bom.slice(2).concat(utf8))),
    136    string
    137  );
    138  var decoder16le = new TextDecoder("utf-16le");
    139  assert_equals(
    140    decoder16le.decode(new Uint8Array(utf16le_bom.slice(0, 1)), {
    141      stream: true,
    142    }),
    143    ""
    144  );
    145  assert_equals(
    146    decoder16le.decode(new Uint8Array(utf16le_bom.slice(1).concat(utf16le))),
    147    string
    148  );
    149  var decoder16be = new TextDecoder("utf-16be");
    150  assert_equals(
    151    decoder16be.decode(new Uint8Array(utf16be_bom.slice(0, 1)), {
    152      stream: true,
    153    }),
    154    ""
    155  );
    156  assert_equals(
    157    decoder16be.decode(new Uint8Array(utf16be_bom.slice(1).concat(utf16be))),
    158    string
    159  );
    160 
    161  // mismatching BOMs
    162  assert_not_equals(
    163    new TextDecoder("utf-8").decode(new Uint8Array(utf16le_bom.concat(utf8))),
    164    string
    165  );
    166  assert_not_equals(
    167    new TextDecoder("utf-8").decode(new Uint8Array(utf16be_bom.concat(utf8))),
    168    string
    169  );
    170  assert_not_equals(
    171    new TextDecoder("utf-16le").decode(
    172      new Uint8Array(utf8_bom.concat(utf16le))
    173    ),
    174    string
    175  );
    176  assert_not_equals(
    177    new TextDecoder("utf-16le").decode(
    178      new Uint8Array(utf16be_bom.concat(utf16le))
    179    ),
    180    string
    181  );
    182  assert_not_equals(
    183    new TextDecoder("utf-16be").decode(
    184      new Uint8Array(utf8_bom.concat(utf16be))
    185    ),
    186    string
    187  );
    188  assert_not_equals(
    189    new TextDecoder("utf-16be").decode(
    190      new Uint8Array(utf16le_bom.concat(utf16be))
    191    ),
    192    string
    193  );
    194 }, "Byte-order marks");
    195 
    196 test(function () {
    197  assert_equals(new TextDecoder("utf-8").encoding, "utf-8"); // canonical case
    198  assert_equals(new TextDecoder("UTF-16").encoding, "utf-16le"); // canonical case and name
    199  assert_equals(new TextDecoder("UTF-16BE").encoding, "utf-16be"); // canonical case and name
    200  assert_equals(new TextDecoder("iso8859-1").encoding, "windows-1252"); // canonical case and name
    201  assert_equals(new TextDecoder("iso-8859-1").encoding, "windows-1252"); // canonical case and name
    202 }, "Encoding names");
    203 
    204 test(function () {
    205  ["utf-8", "utf-16le", "utf-16be"].forEach(function (encoding) {
    206    var string =
    207      "\x00123ABCabc\x80\xFF\u0100\u1000\uFFFD\uD800\uDC00\uDBFF\uDFFF";
    208    var octets = {
    209      "utf-16le": [
    210        0x00, 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x41, 0x00, 0x42, 0x00,
    211        0x43, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x80, 0x00, 0xff, 0x00,
    212        0x00, 0x01, 0x00, 0x10, 0xfd, 0xff, 0x00, 0xd8, 0x00, 0xdc, 0xff, 0xdb,
    213        0xff, 0xdf,
    214      ],
    215      "utf-16be": [
    216        0x00, 0x00, 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x41, 0x00, 0x42,
    217        0x00, 0x43, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x80, 0x00, 0xff,
    218        0x01, 0x00, 0x10, 0x00, 0xff, 0xfd, 0xd8, 0x00, 0xdc, 0x00, 0xdb, 0xff,
    219        0xdf, 0xff,
    220      ],
    221    };
    222    var encoded = octets[encoding] || new TextEncoder().encode(string);
    223 
    224    for (var len = 1; len <= 5; ++len) {
    225      var out = "",
    226        decoder = new TextDecoder(encoding);
    227      for (var i = 0; i < encoded.length; i += len) {
    228        var sub = [];
    229        for (var j = i; j < encoded.length && j < i + len; ++j) {
    230          sub.push(encoded[j]);
    231        }
    232        out += decoder.decode(new Uint8Array(sub), { stream: true });
    233      }
    234      out += decoder.decode();
    235      assert_equals(out, string, "streaming decode " + encoding);
    236    }
    237  });
    238 }, "Streaming Decode");
    239 
    240 test(function () {
    241  var jis = [0x82, 0xc9, 0x82, 0xd9, 0x82, 0xf1];
    242  var expected = "\u306B\u307B\u3093"; // Nihon
    243  assert_equals(
    244    new TextDecoder("shift_jis").decode(new Uint8Array(jis)),
    245    expected
    246  );
    247 }, "Shift_JIS Decode");
    248 
    249 test(function () {
    250  var encodings = [
    251    "utf-8",
    252    "ibm866",
    253    "iso-8859-2",
    254    "iso-8859-3",
    255    "iso-8859-4",
    256    "iso-8859-5",
    257    "iso-8859-6",
    258    "iso-8859-7",
    259    "iso-8859-8",
    260    "iso-8859-8-i",
    261    "iso-8859-10",
    262    "iso-8859-13",
    263    "iso-8859-14",
    264    "iso-8859-15",
    265    "iso-8859-16",
    266    "koi8-r",
    267    "koi8-u",
    268    "macintosh",
    269    "windows-874",
    270    "windows-1250",
    271    "windows-1251",
    272    "windows-1252",
    273    "windows-1253",
    274    "windows-1254",
    275    "windows-1255",
    276    "windows-1256",
    277    "windows-1257",
    278    "windows-1258",
    279    "x-mac-cyrillic",
    280    "gbk",
    281    "gb18030",
    282    "big5",
    283    "euc-jp",
    284    "iso-2022-jp",
    285    "shift_jis",
    286    "euc-kr",
    287    "x-user-defined",
    288  ];
    289 
    290  encodings.forEach(function (encoding) {
    291    var string = "",
    292      bytes = [];
    293    for (var i = 0; i < 128; ++i) {
    294      // Encodings that have escape codes in 0x00-0x7F
    295      if (
    296        encoding === "iso-2022-jp" &&
    297        (i === 0x1b || i === 0xe || i === 0xf)
    298      ) {
    299        continue;
    300      }
    301 
    302      string += String.fromCharCode(i);
    303      bytes.push(i);
    304    }
    305    var ascii_encoded = new TextEncoder().encode(string);
    306    assert_equals(
    307      new TextDecoder(encoding).decode(ascii_encoded),
    308      string,
    309      encoding
    310    );
    311    //assert_array_equals(new TextEncoder().encode(string), bytes, encoding);
    312  });
    313 }, "Supersets of ASCII decode ASCII correctly");
    314 
    315 test(function () {
    316  assert_throws({ name: "TypeError" }, function () {
    317    new TextDecoder("utf-8", { fatal: true }).decode(new Uint8Array([0xff]));
    318  });
    319  // This should not hang:
    320  new TextDecoder("utf-8").decode(new Uint8Array([0xff]));
    321 
    322  assert_throws({ name: "TypeError" }, function () {
    323    new TextDecoder("utf-16", { fatal: true }).decode(new Uint8Array([0x00]));
    324  });
    325  // This should not hang:
    326  new TextDecoder("utf-16").decode(new Uint8Array([0x00]));
    327 
    328  assert_throws({ name: "TypeError" }, function () {
    329    new TextDecoder("utf-16be", { fatal: true }).decode(new Uint8Array([0x00]));
    330  });
    331  // This should not hang:
    332  new TextDecoder("utf-16be").decode(new Uint8Array([0x00]));
    333 }, "Non-fatal errors at EOF");
    334 
    335 test(function () {
    336  var encodings = [
    337    "utf-8",
    338    "ibm866",
    339    "iso-8859-2",
    340    "iso-8859-3",
    341    "iso-8859-4",
    342    "iso-8859-5",
    343    "iso-8859-6",
    344    "iso-8859-7",
    345    "iso-8859-8",
    346    "iso-8859-8-i",
    347    "iso-8859-10",
    348    "iso-8859-13",
    349    "iso-8859-14",
    350    "iso-8859-15",
    351    "iso-8859-16",
    352    "koi8-r",
    353    "koi8-u",
    354    "macintosh",
    355    "windows-874",
    356    "windows-1250",
    357    "windows-1251",
    358    "windows-1252",
    359    "windows-1253",
    360    "windows-1254",
    361    "windows-1255",
    362    "windows-1256",
    363    "windows-1257",
    364    "windows-1258",
    365    "x-mac-cyrillic",
    366    "gbk",
    367    "gb18030",
    368    "big5",
    369    "euc-jp",
    370    "iso-2022-jp",
    371    "shift_jis",
    372    "euc-kr",
    373    "x-user-defined",
    374    "utf-16le",
    375    "utf-16be",
    376  ];
    377 
    378  encodings.forEach(function (encoding) {
    379    assert_equals(new TextDecoder(encoding).encoding, encoding);
    380    assert_equals(new TextEncoder(encoding).encoding, "utf-8");
    381  });
    382 }, "Non-UTF-8 encodings supported only for decode, not encode");
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE