send-file-form-helper.js (12001B)
1 'use strict'; 2 3 // See /FileAPI/file/resources/echo-content-escaped.py 4 function escapeString(string) { 5 return string.replace(/\\/g, "\\\\").replace( 6 /[^\x20-\x7E]/g, 7 (x) => { 8 let hex = x.charCodeAt(0).toString(16); 9 if (hex.length < 2) hex = "0" + hex; 10 return `\\x${hex}`; 11 }, 12 ).replace(/\\x0d\\x0a/g, "\r\n"); 13 } 14 15 // Rationale for this particular test character sequence, which is 16 // used in filenames and also in file contents: 17 // 18 // - ABC~ ensures the string starts with something we can read to 19 // ensure it is from the correct source; ~ is used because even 20 // some 1-byte otherwise-ASCII-like parts of ISO-2022-JP 21 // interpret it differently. 22 // - ‾¥ are inside a single-byte range of ISO-2022-JP and help 23 // diagnose problems due to filesystem encoding or locale 24 // - ≈ is inside IBM437 and helps diagnose problems due to filesystem 25 // encoding or locale 26 // - ¤ is inside Latin-1 and helps diagnose problems due to 27 // filesystem encoding or locale; it is also the "simplest" case 28 // needing substitution in ISO-2022-JP 29 // - ・ is inside a single-byte range of ISO-2022-JP in some variants 30 // and helps diagnose problems due to filesystem encoding or locale; 31 // on the web it is distinct when decoding but unified when encoding 32 // - ・ is inside a double-byte range of ISO-2022-JP and helps 33 // diagnose problems due to filesystem encoding or locale 34 // - • is inside Windows-1252 and helps diagnose problems due to 35 // filesystem encoding or locale and also ensures these aren't 36 // accidentally turned into e.g. control codes 37 // - ∙ is inside IBM437 and helps diagnose problems due to filesystem 38 // encoding or locale 39 // - · is inside Latin-1 and helps diagnose problems due to 40 // filesystem encoding or locale and also ensures HTML named 41 // character references (e.g. ·) are not used 42 // - ☼ is inside IBM437 shadowing C0 and helps diagnose problems due to 43 // filesystem encoding or locale and also ensures these aren't 44 // accidentally turned into e.g. control codes 45 // - ★ is inside ISO-2022-JP on a non-Kanji page and makes correct 46 // output easier to spot 47 // - 星 is inside ISO-2022-JP on a Kanji page and makes correct 48 // output easier to spot 49 // - 🌟 is outside the BMP and makes incorrect surrogate pair 50 // substitution detectable and ensures substitutions work 51 // correctly immediately after Kanji 2-byte ISO-2022-JP 52 // - 星 repeated here ensures the correct codec state is used 53 // after a non-BMP substitution 54 // - ★ repeated here also makes correct output easier to spot 55 // - ☼ is inside IBM437 shadowing C0 and helps diagnose problems due to 56 // filesystem encoding or locale and also ensures these aren't 57 // accidentally turned into e.g. control codes and also ensures 58 // substitutions work correctly immediately after non-Kanji 59 // 2-byte ISO-2022-JP 60 // - · is inside Latin-1 and helps diagnose problems due to 61 // filesystem encoding or locale and also ensures HTML named 62 // character references (e.g. ·) are not used 63 // - ∙ is inside IBM437 and helps diagnose problems due to filesystem 64 // encoding or locale 65 // - • is inside Windows-1252 and again helps diagnose problems 66 // due to filesystem encoding or locale 67 // - ・ is inside a double-byte range of ISO-2022-JP and helps 68 // diagnose problems due to filesystem encoding or locale 69 // - ・ is inside a single-byte range of ISO-2022-JP in some variants 70 // and helps diagnose problems due to filesystem encoding or locale; 71 // on the web it is distinct when decoding but unified when encoding 72 // - ¤ is inside Latin-1 and helps diagnose problems due to 73 // filesystem encoding or locale; again it is a "simple" 74 // substitution case 75 // - ≈ is inside IBM437 and helps diagnose problems due to filesystem 76 // encoding or locale 77 // - ¥‾ are inside a single-byte range of ISO-2022-JP and help 78 // diagnose problems due to filesystem encoding or locale 79 // - ~XYZ ensures earlier errors don't lead to misencoding of 80 // simple ASCII 81 // 82 // Overall the near-symmetry makes common I18N mistakes like 83 // off-by-1-after-non-BMP easier to spot. All the characters 84 // are also allowed in Windows Unicode filenames. 85 const kTestChars = 'ABC~‾¥≈¤・・•∙·☼★星🌟星★☼·∙•・・¤≈¥‾~XYZ'; 86 87 // The kTestFallback* strings represent the expected byte sequence from 88 // encoding kTestChars with the given encoding with "html" replacement 89 // mode, isomorphic-decoded. That means, characters that can't be 90 // encoded in that encoding get HTML-escaped, but no further 91 // `escapeString`-like escapes are needed. 92 const kTestFallbackUtf8 = ( 93 "ABC~\xE2\x80\xBE\xC2\xA5\xE2\x89\x88\xC2\xA4\xEF\xBD\xA5\xE3\x83\xBB\xE2" + 94 "\x80\xA2\xE2\x88\x99\xC2\xB7\xE2\x98\xBC\xE2\x98\x85\xE6\x98\x9F\xF0\x9F" + 95 "\x8C\x9F\xE6\x98\x9F\xE2\x98\x85\xE2\x98\xBC\xC2\xB7\xE2\x88\x99\xE2\x80" + 96 "\xA2\xE3\x83\xBB\xEF\xBD\xA5\xC2\xA4\xE2\x89\x88\xC2\xA5\xE2\x80\xBE~XYZ" 97 ); 98 99 const kTestFallbackIso2022jp = ( 100 ("ABC~\x1B(J~\\≈¤\x1B$B!&!&\x1B(B•∙·☼\x1B$B!z@1\x1B(B🌟" + 101 "\x1B$B@1!z\x1B(B☼·∙•\x1B$B!&!&\x1B(B¤≈\x1B(J\\~\x1B(B~XYZ") 102 .replace(/[^\0-\x7F]/gu, (x) => `&#${x.codePointAt(0)};`) 103 ); 104 105 const kTestFallbackWindows1252 = ( 106 "ABC~‾\xA5≈\xA4・・\x95∙\xB7☼★星🌟星★☼\xB7∙\x95・・\xA4≈\xA5‾~XYZ".replace( 107 /[^\0-\xFF]/gu, 108 (x) => `&#${x.codePointAt(0)};`, 109 ) 110 ); 111 112 const kTestFallbackXUserDefined = kTestChars.replace( 113 /[^\0-\x7F]/gu, 114 (x) => `&#${x.codePointAt(0)};`, 115 ); 116 117 // formPostFileUploadTest - verifies multipart upload structure and 118 // numeric character reference replacement for filenames, field names, 119 // and field values using form submission. 120 // 121 // Uses /FileAPI/file/resources/echo-content-escaped.py to echo the 122 // upload POST with controls and non-ASCII bytes escaped. This is done 123 // because navigations whose response body contains [\0\b\v] may get 124 // treated as a download, which is not what we want. Use the 125 // `escapeString` function to replicate that kind of escape (note that 126 // it takes an isomorphic-decoded string, not a byte sequence). 127 // 128 // Fields in the parameter object: 129 // 130 // - fileNameSource: purely explanatory and gives a clue about which 131 // character encoding is the source for the non-7-bit-ASCII parts of 132 // the fileBaseName, or Unicode if no smaller-than-Unicode source 133 // contains all the characters. Used in the test name. 134 // - fileBaseName: the not-necessarily-just-7-bit-ASCII file basename 135 // used for the constructed test file. Used in the test name. 136 // - formEncoding: the acceptCharset of the form used to submit the 137 // test file. Used in the test name. 138 // - expectedEncodedBaseName: the expected formEncoding-encoded 139 // version of fileBaseName, isomorphic-decoded. That means, characters 140 // that can't be encoded in that encoding get HTML-escaped, but no 141 // further `escapeString`-like escapes are needed. 142 const formPostFileUploadTest = ({ 143 fileNameSource, 144 fileBaseName, 145 formEncoding, 146 expectedEncodedBaseName, 147 }) => { 148 promise_test(async testCase => { 149 150 if (document.readyState !== 'complete') { 151 await new Promise(resolve => addEventListener('load', resolve)); 152 } 153 154 const formTargetFrame = Object.assign(document.createElement('iframe'), { 155 name: 'formtargetframe', 156 }); 157 document.body.append(formTargetFrame); 158 testCase.add_cleanup(() => { 159 document.body.removeChild(formTargetFrame); 160 }); 161 162 const form = Object.assign(document.createElement('form'), { 163 acceptCharset: formEncoding, 164 action: '/FileAPI/file/resources/echo-content-escaped.py', 165 method: 'POST', 166 enctype: 'multipart/form-data', 167 target: formTargetFrame.name, 168 }); 169 document.body.append(form); 170 testCase.add_cleanup(() => { 171 document.body.removeChild(form); 172 }); 173 174 // Used to verify that the browser agrees with the test about 175 // which form charset is used. 176 form.append(Object.assign(document.createElement('input'), { 177 type: 'hidden', 178 name: '_charset_', 179 })); 180 181 // Used to verify that the browser agrees with the test about 182 // field value replacement and encoding independently of file system 183 // idiosyncracies. 184 form.append(Object.assign(document.createElement('input'), { 185 type: 'hidden', 186 name: 'filename', 187 value: fileBaseName, 188 })); 189 190 // Same, but with name and value reversed to ensure field names 191 // get the same treatment. 192 form.append(Object.assign(document.createElement('input'), { 193 type: 'hidden', 194 name: fileBaseName, 195 value: 'filename', 196 })); 197 198 const fileInput = Object.assign(document.createElement('input'), { 199 type: 'file', 200 name: 'file', 201 }); 202 form.append(fileInput); 203 204 // Removes c:\fakepath\ or other pseudofolder and returns just the 205 // final component of filePath; allows both / and \ as segment 206 // delimiters. 207 const baseNameOfFilePath = filePath => filePath.split(/[\/\\]/).pop(); 208 await new Promise(resolve => { 209 const dataTransfer = new DataTransfer; 210 dataTransfer.items.add( 211 new File([kTestChars], fileBaseName, {type: 'text/plain'})); 212 fileInput.files = dataTransfer.files; 213 // For historical reasons .value will be prefixed with 214 // c:\fakepath\, but the basename should match the file name 215 // exposed through the newer .files[0].name API. This check 216 // verifies that assumption. 217 assert_equals( 218 baseNameOfFilePath(fileInput.files[0].name), 219 baseNameOfFilePath(fileInput.value), 220 `The basename of the field's value should match its files[0].name`); 221 form.submit(); 222 formTargetFrame.onload = resolve; 223 }); 224 225 const formDataText = formTargetFrame.contentDocument.body.textContent; 226 const formDataLines = formDataText.split('\n'); 227 if (formDataLines.length && !formDataLines[formDataLines.length - 1]) { 228 --formDataLines.length; 229 } 230 assert_greater_than( 231 formDataLines.length, 232 2, 233 `${fileBaseName}: multipart form data must have at least 3 lines: ${ 234 JSON.stringify(formDataText) 235 }`); 236 const boundary = formDataLines[0]; 237 assert_equals( 238 formDataLines[formDataLines.length - 1], 239 boundary + '--', 240 `${fileBaseName}: multipart form data must end with ${boundary}--: ${ 241 JSON.stringify(formDataText) 242 }`); 243 244 const asValue = expectedEncodedBaseName.replace(/\r\n?|\n/g, "\r\n"); 245 const asName = asValue.replace(/[\r\n"]/g, encodeURIComponent); 246 const asFilename = expectedEncodedBaseName.replace(/[\r\n"]/g, encodeURIComponent); 247 248 // The response body from echo-content-escaped.py has controls and non-ASCII 249 // bytes escaped, so any caller-provided field that might contain such bytes 250 // must be passed to `escapeString`, after any other expected 251 // transformations. 252 const expectedText = [ 253 boundary, 254 'Content-Disposition: form-data; name="_charset_"', 255 '', 256 formEncoding, 257 boundary, 258 'Content-Disposition: form-data; name="filename"', 259 '', 260 // Unlike for names and filenames, multipart/form-data values don't escape 261 // \r\n linebreaks, and when they're read from an iframe they become \n. 262 escapeString(asValue).replace(/\r\n/g, "\n"), 263 boundary, 264 `Content-Disposition: form-data; name="${escapeString(asName)}"`, 265 '', 266 'filename', 267 boundary, 268 `Content-Disposition: form-data; name="file"; ` + 269 `filename="${escapeString(asFilename)}"`, 270 'Content-Type: text/plain', 271 '', 272 escapeString(kTestFallbackUtf8), 273 boundary + '--', 274 ].join('\n'); 275 276 assert_true( 277 formDataText.startsWith(expectedText), 278 `Unexpected multipart-shaped form data received:\n${ 279 formDataText 280 }\nExpected:\n${expectedText}`); 281 }, `Upload ${fileBaseName} (${fileNameSource}) in ${formEncoding} form`); 282 };