tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

encode-utf8.any.js (5298B)


      1 // META: global=window,worker
      2 // META: script=resources/readable-stream-from-array.js
      3 // META: script=resources/readable-stream-to-array.js
      4 
      5 'use strict';
      6 const inputString = 'I \u{1F499} streams';
      7 const expectedOutputBytes = [0x49, 0x20, 0xf0, 0x9f, 0x92, 0x99, 0x20, 0x73,
      8                             0x74, 0x72, 0x65, 0x61, 0x6d, 0x73];
      9 // This is a character that must be represented in two code units in a string,
     10 // ie. it is not in the Basic Multilingual Plane.
     11 const astralCharacter = '\u{1F499}';  // BLUE HEART
     12 const astralCharacterEncoded = [0xf0, 0x9f, 0x92, 0x99];
     13 const leading = astralCharacter[0];
     14 const trailing = astralCharacter[1];
     15 const replacementEncoded = [0xef, 0xbf, 0xbd];
     16 
     17 // These tests assume that the implementation correctly classifies leading and
     18 // trailing surrogates and treats all the code units in each set equivalently.
     19 
     20 const testCases = [
     21  {
     22    input: [inputString],
     23    output: [expectedOutputBytes],
     24    description: 'encoding one string of UTF-8 should give one complete chunk'
     25  },
     26  {
     27    input: [leading, trailing],
     28    output: [astralCharacterEncoded],
     29    description: 'a character split between chunks should be correctly encoded'
     30  },
     31  {
     32    input: [leading, trailing + astralCharacter],
     33    output: [astralCharacterEncoded.concat(astralCharacterEncoded)],
     34    description: 'a character following one split between chunks should be ' +
     35        'correctly encoded'
     36  },
     37  {
     38    input: [leading, trailing + leading, trailing],
     39    output: [astralCharacterEncoded, astralCharacterEncoded],
     40    description: 'two consecutive astral characters each split down the ' +
     41        'middle should be correctly reassembled'
     42  },
     43  {
     44    input: [leading, trailing + leading + leading, trailing],
     45    output: [astralCharacterEncoded.concat(replacementEncoded), astralCharacterEncoded],
     46    description: 'two consecutive astral characters each split down the ' +
     47        'middle with an invalid surrogate in the middle should be correctly ' +
     48        'encoded'
     49  },
     50  {
     51    input: [leading],
     52    output: [replacementEncoded],
     53    description: 'a stream ending in a leading surrogate should emit a ' +
     54        'replacement character as a final chunk'
     55  },
     56  {
     57    input: [leading, astralCharacter],
     58    output: [replacementEncoded.concat(astralCharacterEncoded)],
     59    description: 'an unmatched surrogate at the end of a chunk followed by ' +
     60        'an astral character in the next chunk should be replaced with ' +
     61        'the replacement character at the start of the next output chunk'
     62  },
     63  {
     64    input: [leading, 'A'],
     65    output: [replacementEncoded.concat([65])],
     66    description: 'an unmatched surrogate at the end of a chunk followed by ' +
     67        'an ascii character in the next chunk should be replaced with ' +
     68        'the replacement character at the start of the next output chunk'
     69  },
     70  {
     71    input: [leading, leading, trailing],
     72    output: [replacementEncoded, astralCharacterEncoded],
     73    description: 'an unmatched surrogate at the end of a chunk followed by ' +
     74        'a plane 1 character split into two chunks should result in ' +
     75        'the encoded plane 1 character appearing in the last output chunk'
     76  },
     77  {
     78    input: [leading, leading],
     79    output: [replacementEncoded, replacementEncoded],
     80    description: 'two leading chunks should result in two replacement ' +
     81        'characters'
     82  },
     83  {
     84    input: [leading + leading, trailing],
     85    output: [replacementEncoded, astralCharacterEncoded],
     86    description: 'a non-terminal unpaired leading surrogate should ' +
     87        'immediately be replaced'
     88  },
     89  {
     90    input: [trailing, astralCharacter],
     91    output: [replacementEncoded, astralCharacterEncoded],
     92    description: 'a terminal unpaired trailing surrogate should ' +
     93        'immediately be replaced'
     94  },
     95  {
     96    input: [leading, '', trailing],
     97    output: [astralCharacterEncoded],
     98    description: 'a leading surrogate chunk should be carried past empty chunks'
     99  },
    100  {
    101    input: [leading, ''],
    102    output: [replacementEncoded],
    103    description: 'a leading surrogate chunk should error when it is clear ' +
    104        'it didn\'t form a pair'
    105  },
    106  {
    107    input: [''],
    108    output: [],
    109    description: 'an empty string should result in no output chunk'
    110  },
    111  {
    112    input: ['', inputString],
    113    output: [expectedOutputBytes],
    114    description: 'a leading empty chunk should be ignored'
    115  },
    116  {
    117    input: [inputString, ''],
    118    output: [expectedOutputBytes],
    119    description: 'a trailing empty chunk should be ignored'
    120  },
    121  {
    122    input: ['A'],
    123    output: [[65]],
    124    description: 'a plain ASCII chunk should be converted'
    125  },
    126  {
    127    input: ['\xff'],
    128    output: [[195, 191]],
    129    description: 'characters in the ISO-8859-1 range should be encoded correctly'
    130  },
    131 ];
    132 
    133 for (const {input, output, description} of testCases) {
    134  promise_test(async () => {
    135    const inputStream = readableStreamFromArray(input);
    136    const outputStream = inputStream.pipeThrough(new TextEncoderStream());
    137    const chunkArray = await readableStreamToArray(outputStream);
    138    assert_equals(chunkArray.length, output.length,
    139                  'number of chunks should match');
    140    for (let i = 0; i < output.length; ++i) {
    141      assert_array_equals(chunkArray[i], output[i], `chunk ${i} should match`);
    142    }
    143  }, description);
    144 }