tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

send-file-form-helper.js (12001B)


      1 'use strict';
      2 
      3 // See /FileAPI/file/resources/echo-content-escaped.py
      4 function escapeString(string) {
      5  return string.replace(/\\/g, "\\\\").replace(
      6    /[^\x20-\x7E]/g,
      7    (x) => {
      8      let hex = x.charCodeAt(0).toString(16);
      9      if (hex.length < 2) hex = "0" + hex;
     10      return `\\x${hex}`;
     11    },
     12  ).replace(/\\x0d\\x0a/g, "\r\n");
     13 }
     14 
     15 // Rationale for this particular test character sequence, which is
     16 // used in filenames and also in file contents:
     17 //
     18 // - ABC~ ensures the string starts with something we can read to
     19 //   ensure it is from the correct source; ~ is used because even
     20 //   some 1-byte otherwise-ASCII-like parts of ISO-2022-JP
     21 //   interpret it differently.
     22 // - ‾¥ are inside a single-byte range of ISO-2022-JP and help
     23 //   diagnose problems due to filesystem encoding or locale
     24 // - ≈ is inside IBM437 and helps diagnose problems due to filesystem
     25 //   encoding or locale
     26 // - ¤ is inside Latin-1 and helps diagnose problems due to
     27 //   filesystem encoding or locale; it is also the "simplest" case
     28 //   needing substitution in ISO-2022-JP
     29 // - ・ is inside a single-byte range of ISO-2022-JP in some variants
     30 //   and helps diagnose problems due to filesystem encoding or locale;
     31 //   on the web it is distinct when decoding but unified when encoding
     32 // - ・ is inside a double-byte range of ISO-2022-JP and helps
     33 //   diagnose problems due to filesystem encoding or locale
     34 // - • is inside Windows-1252 and helps diagnose problems due to
     35 //   filesystem encoding or locale and also ensures these aren't
     36 //   accidentally turned into e.g. control codes
     37 // - ∙ is inside IBM437 and helps diagnose problems due to filesystem
     38 //   encoding or locale
     39 // - · is inside Latin-1 and helps diagnose problems due to
     40 //   filesystem encoding or locale and also ensures HTML named
     41 //   character references (e.g. &middot;) are not used
     42 // - ☼ is inside IBM437 shadowing C0 and helps diagnose problems due to
     43 //   filesystem encoding or locale and also ensures these aren't
     44 //   accidentally turned into e.g. control codes
     45 // - ★ is inside ISO-2022-JP on a non-Kanji page and makes correct
     46 //   output easier to spot
     47 // - 星 is inside ISO-2022-JP on a Kanji page and makes correct
     48 //   output easier to spot
     49 // - 🌟 is outside the BMP and makes incorrect surrogate pair
     50 //   substitution detectable and ensures substitutions work
     51 //   correctly immediately after Kanji 2-byte ISO-2022-JP
     52 // - 星 repeated here ensures the correct codec state is used
     53 //   after a non-BMP substitution
     54 // - ★ repeated here also makes correct output easier to spot
     55 // - ☼ is inside IBM437 shadowing C0 and helps diagnose problems due to
     56 //   filesystem encoding or locale and also ensures these aren't
     57 //   accidentally turned into e.g. control codes and also ensures
     58 //   substitutions work correctly immediately after non-Kanji
     59 //   2-byte ISO-2022-JP
     60 // - · is inside Latin-1 and helps diagnose problems due to
     61 //   filesystem encoding or locale and also ensures HTML named
     62 //   character references (e.g. &middot;) are not used
     63 // - ∙ is inside IBM437 and helps diagnose problems due to filesystem
     64 //   encoding or locale
     65 // - • is inside Windows-1252 and again helps diagnose problems
     66 //   due to filesystem encoding or locale
     67 // - ・ is inside a double-byte range of ISO-2022-JP and helps
     68 //   diagnose problems due to filesystem encoding or locale
     69 // - ・ is inside a single-byte range of ISO-2022-JP in some variants
     70 //   and helps diagnose problems due to filesystem encoding or locale;
     71 //   on the web it is distinct when decoding but unified when encoding
     72 // - ¤ is inside Latin-1 and helps diagnose problems due to
     73 //   filesystem encoding or locale; again it is a "simple"
     74 //   substitution case
     75 // - ≈ is inside IBM437 and helps diagnose problems due to filesystem
     76 //   encoding or locale
     77 // - ¥‾ are inside a single-byte range of ISO-2022-JP and help
     78 //   diagnose problems due to filesystem encoding or locale
     79 // - ~XYZ ensures earlier errors don't lead to misencoding of
     80 //   simple ASCII
     81 //
     82 // Overall the near-symmetry makes common I18N mistakes like
     83 // off-by-1-after-non-BMP easier to spot. All the characters
     84 // are also allowed in Windows Unicode filenames.
     85 const kTestChars = 'ABC~‾¥≈¤・・•∙·☼★星🌟星★☼·∙•・・¤≈¥‾~XYZ';
     86 
     87 // The kTestFallback* strings represent the expected byte sequence from
     88 // encoding kTestChars with the given encoding with "html" replacement
     89 // mode, isomorphic-decoded. That means, characters that can't be
     90 // encoded in that encoding get HTML-escaped, but no further
     91 // `escapeString`-like escapes are needed.
     92 const kTestFallbackUtf8 = (
     93  "ABC~\xE2\x80\xBE\xC2\xA5\xE2\x89\x88\xC2\xA4\xEF\xBD\xA5\xE3\x83\xBB\xE2" +
     94    "\x80\xA2\xE2\x88\x99\xC2\xB7\xE2\x98\xBC\xE2\x98\x85\xE6\x98\x9F\xF0\x9F" +
     95    "\x8C\x9F\xE6\x98\x9F\xE2\x98\x85\xE2\x98\xBC\xC2\xB7\xE2\x88\x99\xE2\x80" +
     96    "\xA2\xE3\x83\xBB\xEF\xBD\xA5\xC2\xA4\xE2\x89\x88\xC2\xA5\xE2\x80\xBE~XYZ"
     97 );
     98 
     99 const kTestFallbackIso2022jp = (
    100  ("ABC~\x1B(J~\\≈¤\x1B$B!&!&\x1B(B•∙·☼\x1B$B!z@1\x1B(B🌟" +
    101    "\x1B$B@1!z\x1B(B☼·∙•\x1B$B!&!&\x1B(B¤≈\x1B(J\\~\x1B(B~XYZ")
    102    .replace(/[^\0-\x7F]/gu, (x) => `&#${x.codePointAt(0)};`)
    103 );
    104 
    105 const kTestFallbackWindows1252 = (
    106  "ABC~‾\xA5≈\xA4・・\x95∙\xB7☼★星🌟星★☼\xB7∙\x95・・\xA4≈\xA5‾~XYZ".replace(
    107    /[^\0-\xFF]/gu,
    108    (x) => `&#${x.codePointAt(0)};`,
    109  )
    110 );
    111 
    112 const kTestFallbackXUserDefined = kTestChars.replace(
    113  /[^\0-\x7F]/gu,
    114  (x) => `&#${x.codePointAt(0)};`,
    115 );
    116 
    117 // formPostFileUploadTest - verifies multipart upload structure and
    118 // numeric character reference replacement for filenames, field names,
    119 // and field values using form submission.
    120 //
    121 // Uses /FileAPI/file/resources/echo-content-escaped.py to echo the
    122 // upload POST with controls and non-ASCII bytes escaped. This is done
    123 // because navigations whose response body contains [\0\b\v] may get
    124 // treated as a download, which is not what we want. Use the
    125 // `escapeString` function to replicate that kind of escape (note that
    126 // it takes an isomorphic-decoded string, not a byte sequence).
    127 //
    128 // Fields in the parameter object:
    129 //
    130 // - fileNameSource: purely explanatory and gives a clue about which
    131 //   character encoding is the source for the non-7-bit-ASCII parts of
    132 //   the fileBaseName, or Unicode if no smaller-than-Unicode source
    133 //   contains all the characters. Used in the test name.
    134 // - fileBaseName: the not-necessarily-just-7-bit-ASCII file basename
    135 //   used for the constructed test file. Used in the test name.
    136 // - formEncoding: the acceptCharset of the form used to submit the
    137 //   test file. Used in the test name.
    138 // - expectedEncodedBaseName: the expected formEncoding-encoded
    139 //   version of fileBaseName, isomorphic-decoded. That means, characters
    140 //   that can't be encoded in that encoding get HTML-escaped, but no
    141 //   further `escapeString`-like escapes are needed.
    142 const formPostFileUploadTest = ({
    143  fileNameSource,
    144  fileBaseName,
    145  formEncoding,
    146  expectedEncodedBaseName,
    147 }) => {
    148  promise_test(async testCase => {
    149 
    150    if (document.readyState !== 'complete') {
    151      await new Promise(resolve => addEventListener('load', resolve));
    152    }
    153 
    154    const formTargetFrame = Object.assign(document.createElement('iframe'), {
    155      name: 'formtargetframe',
    156    });
    157    document.body.append(formTargetFrame);
    158    testCase.add_cleanup(() => {
    159      document.body.removeChild(formTargetFrame);
    160    });
    161 
    162    const form = Object.assign(document.createElement('form'), {
    163      acceptCharset: formEncoding,
    164      action: '/FileAPI/file/resources/echo-content-escaped.py',
    165      method: 'POST',
    166      enctype: 'multipart/form-data',
    167      target: formTargetFrame.name,
    168    });
    169    document.body.append(form);
    170    testCase.add_cleanup(() => {
    171      document.body.removeChild(form);
    172    });
    173 
    174    // Used to verify that the browser agrees with the test about
    175    // which form charset is used.
    176    form.append(Object.assign(document.createElement('input'), {
    177      type: 'hidden',
    178      name: '_charset_',
    179    }));
    180 
    181    // Used to verify that the browser agrees with the test about
    182    // field value replacement and encoding independently of file system
    183    // idiosyncracies.
    184    form.append(Object.assign(document.createElement('input'), {
    185      type: 'hidden',
    186      name: 'filename',
    187      value: fileBaseName,
    188    }));
    189 
    190    // Same, but with name and value reversed to ensure field names
    191    // get the same treatment.
    192    form.append(Object.assign(document.createElement('input'), {
    193      type: 'hidden',
    194      name: fileBaseName,
    195      value: 'filename',
    196    }));
    197 
    198    const fileInput = Object.assign(document.createElement('input'), {
    199      type: 'file',
    200      name: 'file',
    201    });
    202    form.append(fileInput);
    203 
    204    // Removes c:\fakepath\ or other pseudofolder and returns just the
    205    // final component of filePath; allows both / and \ as segment
    206    // delimiters.
    207    const baseNameOfFilePath = filePath => filePath.split(/[\/\\]/).pop();
    208    await new Promise(resolve => {
    209      const dataTransfer = new DataTransfer;
    210      dataTransfer.items.add(
    211          new File([kTestChars], fileBaseName, {type: 'text/plain'}));
    212      fileInput.files = dataTransfer.files;
    213      // For historical reasons .value will be prefixed with
    214      // c:\fakepath\, but the basename should match the file name
    215      // exposed through the newer .files[0].name API. This check
    216      // verifies that assumption.
    217      assert_equals(
    218          baseNameOfFilePath(fileInput.files[0].name),
    219          baseNameOfFilePath(fileInput.value),
    220          `The basename of the field's value should match its files[0].name`);
    221      form.submit();
    222      formTargetFrame.onload = resolve;
    223    });
    224 
    225    const formDataText = formTargetFrame.contentDocument.body.textContent;
    226    const formDataLines = formDataText.split('\n');
    227    if (formDataLines.length && !formDataLines[formDataLines.length - 1]) {
    228      --formDataLines.length;
    229    }
    230    assert_greater_than(
    231        formDataLines.length,
    232        2,
    233        `${fileBaseName}: multipart form data must have at least 3 lines: ${
    234             JSON.stringify(formDataText)
    235           }`);
    236    const boundary = formDataLines[0];
    237    assert_equals(
    238        formDataLines[formDataLines.length - 1],
    239        boundary + '--',
    240        `${fileBaseName}: multipart form data must end with ${boundary}--: ${
    241             JSON.stringify(formDataText)
    242           }`);
    243 
    244    const asValue = expectedEncodedBaseName.replace(/\r\n?|\n/g, "\r\n");
    245    const asName = asValue.replace(/[\r\n"]/g, encodeURIComponent);
    246    const asFilename = expectedEncodedBaseName.replace(/[\r\n"]/g, encodeURIComponent);
    247 
    248    // The response body from echo-content-escaped.py has controls and non-ASCII
    249    // bytes escaped, so any caller-provided field that might contain such bytes
    250    // must be passed to `escapeString`, after any other expected
    251    // transformations.
    252    const expectedText = [
    253      boundary,
    254      'Content-Disposition: form-data; name="_charset_"',
    255      '',
    256      formEncoding,
    257      boundary,
    258      'Content-Disposition: form-data; name="filename"',
    259      '',
    260      // Unlike for names and filenames, multipart/form-data values don't escape
    261      // \r\n linebreaks, and when they're read from an iframe they become \n.
    262      escapeString(asValue).replace(/\r\n/g, "\n"),
    263      boundary,
    264      `Content-Disposition: form-data; name="${escapeString(asName)}"`,
    265      '',
    266      'filename',
    267      boundary,
    268      `Content-Disposition: form-data; name="file"; ` +
    269          `filename="${escapeString(asFilename)}"`,
    270      'Content-Type: text/plain',
    271      '',
    272      escapeString(kTestFallbackUtf8),
    273      boundary + '--',
    274    ].join('\n');
    275 
    276    assert_true(
    277        formDataText.startsWith(expectedText),
    278        `Unexpected multipart-shaped form data received:\n${
    279             formDataText
    280           }\nExpected:\n${expectedText}`);
    281  }, `Upload ${fileBaseName} (${fileNameSource}) in ${formEncoding} form`);
    282 };