column-numbers-in-long-lines.js (16106B)
1 // |reftest| skip-if(!this.hasOwnProperty('Reflect')||!Reflect.parse) -- uses Reflect.parse(..., { loc: true}) to trigger the column-computing API 2 /* 3 * Any copyright is dedicated to the Public Domain. 4 * http://creativecommons.org/licenses/publicdomain/ 5 */ 6 7 //----------------------------------------------------------------------------- 8 var BUGNUMBER = 1551916; 9 var summary = 10 "Optimize computing a column number as count of code points by caching " + 11 "column numbers (and whether each chunk might contain anything multi-unit) " + 12 "and counting forward from them"; 13 14 print(BUGNUMBER + ": " + summary); 15 16 /************** 17 * BEGIN TEST * 18 **************/ 19 20 // Various testing of column-number computations, with respect to counting as 21 // code points or units, for very long lines. 22 // 23 // This test should pass when column numbers are counts of code points (current 24 // behavior) or code units (past behavior). It also *should* pass for any valid 25 // |TokenStreamAnyChars::ColumnChunkLength| value (it must be at least 4 so that 26 // the maximum-length code point in UTF-8/16 will fit in a single chunk), 27 // because the value of that constant should be externally invisible save for 28 // perf effects. (As a result, recompiling and running this test with a variety 29 // of different values assigned to that constant is a good smoke-test of column 30 // number computation, that doesn't require having to closely inspect any 31 // column-related code.) 32 // 33 // However, this test is structured on the assumption that that constant has the 34 // value 128, in order to exercise in targeted fashion various column number 35 // computation edge cases. 36 // 37 // All this testing *could* be written to not be done with |Reflect.parse| -- 38 // backwards column computations happen even when compiling normal code, in some 39 // cases. But it's much more the exception than the rule. And |Reflect.parse| 40 // has *very* predictable column-computation operations (basically start/end 41 // coordinates are computed immediately when the end of an AST node is reached) 42 // that make it easier to recognize what the exact pattern of computations for 43 // which offsets will look like. 44 45 // Helper function for checking node location tersely. 46 function checkLoc(node, expectedStart, expectedEnd) 47 { 48 let start = node.loc.start; 49 50 assertEq(start.line, expectedStart[0], 51 "start line number must be as expected"); 52 assertEq(start.column, expectedStart[1], 53 "start column number must be as expected"); 54 55 let end = node.loc.end; 56 57 assertEq(end.line, expectedEnd[0], "end line number must be as expected"); 58 assertEq(end.column, expectedEnd[1], 59 "end column number must be as expected"); 60 } 61 62 function lengthInCodePoints(str) 63 { 64 return [...str].length; 65 } 66 67 // True if column numbers are counts of code points, false otherwise. This 68 // constant can be used to short-circuit testing that isn't point/unit-agnostic. 69 const columnsAreCodePoints = (function() 70 { 71 var columnTypes = []; 72 73 function checkColumn(actual, expectedPoints, expectedUnits) 74 { 75 if (actual === expectedPoints) 76 columnTypes.push("p"); 77 else if (actual === expectedUnits) 78 columnTypes.push("u"); 79 else 80 columnTypes.push("x"); 81 } 82 83 var script = Reflect.parse('"😱😱😱😱";', { loc: true }); 84 assertEq(script.type, "Program"); 85 assertEq(script.loc.start.line, 1); 86 assertEq(script.loc.end.line, 1); 87 assertEq(script.loc.start.column, 1); 88 checkColumn(script.loc.end.column, 8, 12); 89 90 var body = script.body; 91 assertEq(body.length, 1); 92 93 var stmt = body[0]; 94 assertEq(stmt.type, "ExpressionStatement"); 95 assertEq(stmt.loc.start.line, 1); 96 assertEq(stmt.loc.end.line, 1); 97 assertEq(stmt.loc.start.column, 1); 98 checkColumn(stmt.loc.end.column, 8, 12); 99 100 var expr = stmt.expression; 101 assertEq(expr.type, "Literal"); 102 assertEq(expr.value, "😱😱😱😱"); 103 assertEq(expr.loc.start.line, 1); 104 assertEq(expr.loc.end.line, 1); 105 assertEq(expr.loc.start.column, 1); 106 checkColumn(expr.loc.end.column, 7, 11); 107 108 var checkResult = columnTypes.join(","); 109 110 assertEq(checkResult === "p,p,p" || checkResult === "u,u,u", true, 111 "columns must be wholly code points or units: " + checkResult); 112 113 return checkResult === "p,p,p"; 114 })(); 115 116 // Start with some basic sanity-testing, without regard to exactly when, how, or 117 // in what order (offset => column) computations are performed. 118 function testSimple() 119 { 120 if (!columnsAreCodePoints) 121 return; 122 123 // Array elements within the full |simpleCode| string constructed below are 124 // one-element arrays containing the string "😱😱#x" where "#" is the 125 // character that, in C++, could be written as |'(' + i| where |i| is the 126 // index of the array within the outer array. 127 let simpleCodeArray = 128 [ 129 'var Q = [[', // column 1, offset 0 130 // REPEAT 131 '"😱😱(x"],["', // column 11, offset 10 132 '😱😱)x"],["😱', // column 21, offset 22 133 '😱*x"],["😱😱', // column 31, offset 35 134 '+x"],["😱😱,', // column 41, offset 48 135 'x"],["😱😱-x', // column 51, offset 60 136 '"],["😱😱.x"', // column 61, offset 72 137 '],["😱😱/x"]', // column 71, offset 84 138 ',["😱😱0x"],', // column 81, offset 96 139 '["😱😱1x"],[', // column 91, offset 108 140 // REPEAT 141 '"😱😱2x"],["', // column 101, offset 120 -- chunk limit between "] 142 '😱😱3x"],["😱', // column 111, offset 132 143 '😱4x"],["😱😱', // column 121, offset 145 144 '5x"],["😱😱6', // column 131, offset 158 145 'x"],["😱😱7x', // column 141, offset 170 146 '"],["😱😱8x"', // column 151, offset 182 147 '],["😱😱9x"]', // column 161, offset 194 148 ',["😱😱:x"],', // column 171, offset 206 149 '["😱😱;x"],[', // column 181, offset 218 150 // REPEAT 151 '"😱😱<x"],["', // column 191, offset 230 152 '😱😱=x"],["😱', // column 201, offset 242 153 '😱>x"],["😱😱', // column 211, offset 255 -- chunk limit splits first 😱 154 '?x"],["😱😱@', // column 221, offset 268 155 'x"],["😱😱Ax', // column 231, offset 280 156 '"],["😱😱Bx"', // column 241, offset 292 157 '],["😱😱Cx"]', // column 251, offset 304 158 ',["😱😱Dx"],', // column 261, offset 316 159 '["😱😱Ex"],[', // column 271, offset 328 160 // REPEAT 161 '"😱😱Fx"],["', // column 281, offset 340 162 '😱😱Gx"],["😱', // column 291, offset 352 163 '😱Hx"],["😱😱', // column 301, offset 365 164 'Ix"],["😱😱J', // column 311, offset 378 -- chunk limit between [" 165 'x"],["😱😱Kx', // column 321, offset 390 166 '"],["😱😱Lx"', // column 331, offset 402 167 '],["😱😱Mx"]', // column 341, offset 414 168 ',["😱😱Nx"],', // column 351, offset 426 169 '["😱😱Ox"]];', // column 361 (10 long), offset 438 (+12 to end) 170 ]; 171 let simpleCode = simpleCodeArray.join(""); 172 173 // |simpleCode| overall contains this many code points. (This number is 174 // chosen to be several |TokenStreamAnyChars::ColumnChunkLength = 128| chunks 175 // long so that long-line handling is exercised, and the relevant vector 176 // increased in length, for more than one chunk [which would be too short to 177 // trigger chunking] and for more than two chunks [so that vector extension 178 // will eventually occur].) 179 const CodePointLength = 370; 180 181 assertEq(lengthInCodePoints(simpleCode), CodePointLength, 182 "code point count should be correct"); 183 184 // |simpleCodeArray| contains this many REPEAT-delimited cycles. 185 const RepetitionNumber = 4; 186 187 // Each cycle consists of this many elements. 188 const ElementsPerCycle = 9; 189 190 // Each element in a cycle has at least this many 😱. 191 const MinFaceScreamingPerElementInCycle = 2; 192 193 // Each cycle consists of many elements with three 😱. 194 const ElementsInCycleWithThreeFaceScreaming = 2; 195 196 // Compute the overall number of UTF-16 code units. (UTF-16 because this is a 197 // JS string as input.) 198 const OverallCodeUnitCount = 199 CodePointLength + 200 RepetitionNumber * (ElementsPerCycle * MinFaceScreamingPerElementInCycle + 201 ElementsInCycleWithThreeFaceScreaming); 202 203 // Code units != code points. 204 assertEq(OverallCodeUnitCount > CodePointLength, true, 205 "string contains code points outside BMP, so length in units " + 206 "exceeds length in points"); 207 208 // The overall computed number of code units has this exact numeric value. 209 assertEq(OverallCodeUnitCount, 450, 210 "code unit count computation produces this value"); 211 212 // The overall computed number of code units matches the string length. 213 assertEq(simpleCode.length, OverallCodeUnitCount, "string length must match"); 214 215 // Evaluate the string. 216 var Q; 217 eval(simpleCode); 218 219 // Verify characteristics of the resulting execution. 220 assertEq(Array.isArray(Q), true); 221 222 const NumArrayElements = 40; 223 assertEq(Q.length, NumArrayElements); 224 Q.forEach((v, i) => { 225 assertEq(Array.isArray(v), true); 226 assertEq(v.length, 1); 227 assertEq(v[0], "😱😱" + String.fromCharCode('('.charCodeAt(0) + i) + "x"); 228 }); 229 230 let parseTree = Reflect.parse(simpleCode, { loc: true }); 231 232 // Check the overall script. 233 assertEq(parseTree.type, "Program"); 234 checkLoc(parseTree, [1, 0], [1, 370]); 235 assertEq(parseTree.body.length, 1); 236 237 // Check the coordinates of the declaration. 238 let varDecl = parseTree.body[0]; 239 assertEq(varDecl.type, "VariableDeclaration"); 240 checkLoc(varDecl, [1, 0], [1, 369]); 241 242 // ...and its initializing expression. 243 let varInit = varDecl.declarations[0].init; 244 assertEq(varInit.type, "ArrayExpression"); 245 checkLoc(varInit, [1, 8], [1, 369]); 246 247 // ...and then every literal inside it. 248 assertEq(varInit.elements.length, NumArrayElements, "array literal length"); 249 250 const ItemLength = lengthInCodePoints('["😱😱#x"],'); 251 assertEq(ItemLength, 9, "item length check"); 252 253 for (let i = 0; i < NumArrayElements; i++) 254 { 255 let elem = varInit.elements[i]; 256 assertEq(elem.type, "ArrayExpression"); 257 258 let startCol = 9 + i * ItemLength; 259 let endCol = startCol + ItemLength - 1; 260 checkLoc(elem, [1, startCol], [1, endCol]); 261 262 let arrayElems = elem.elements; 263 assertEq(arrayElems.length, 1); 264 265 let str = arrayElems[0]; 266 assertEq(str.type, "Literal"); 267 assertEq(str.value, 268 "😱😱" + String.fromCharCode('('.charCodeAt(0) + i) + "x"); 269 checkLoc(str, [1, startCol + 1], [1, endCol - 1]); 270 } 271 } 272 testSimple(); 273 274 // Test |ChunkInfo::unitsType() == UnitsType::GuaranteedSingleUnit| -- not that 275 // it should be observable, precisely, but effects of mis-applying or 276 // miscomputing it would in principle be observable if such were happening. 277 // This test also is intended to to be useful for (manually, in a debugger) 278 // verifying that the optimization is computed and kicks in correctly. 279 function testGuaranteedSingleUnit() 280 { 281 if (!columnsAreCodePoints) 282 return; 283 284 // Begin a few array literals in a first chunk to test column computation in 285 // that first chunk. 286 // 287 // End some of them in the first chunk to test columns *before* we know we 288 // have a long line. 289 // 290 // End one array *outside* the first chunk to test a computation inside a 291 // first chunk *after* we know we have a long line and have computed a first 292 // chunk. 293 let mixedChunksCode = "var Z = [ [ [],"; // column 1, offset 0 294 assertEq(mixedChunksCode.length, 15); 295 assertEq(lengthInCodePoints(mixedChunksCode), 15); 296 297 mixedChunksCode += 298 " ".repeat(128 - mixedChunksCode.length); // column 16, offset 15 299 assertEq(mixedChunksCode.length, 128); 300 assertEq(lengthInCodePoints(mixedChunksCode), 128); 301 302 // Fill out a second chunk as also single-unit, with an outer array literal 303 // that begins in this chunk but finishes in the next (to test column 304 // computation in a prior, guaranteed-single-unit chunk). 305 mixedChunksCode += "[" + "[],".repeat(42) + " "; // column 129, offset 128 306 assertEq(mixedChunksCode.length, 256); 307 assertEq(lengthInCodePoints(mixedChunksCode), 256); 308 309 // Add a third chunk with one last empty nested array literal (so that we 310 // tack on another chunk, and conclude the second chunk is single-unit, before 311 // closing the enclosing array literal). Then close the enclosing array 312 // literal. Finally start a new string literal element containing 313 // multi-unit code points. For good measure, make the chunk *end* in the 314 // middle of such a code point, so that the relevant chunk limit must be 315 // retracted one code unit. 316 mixedChunksCode += "[] ], '" + "😱".repeat(61); // column 257, offset 256 317 assertEq(mixedChunksCode.length, 384 + 1); 318 assertEq(lengthInCodePoints(mixedChunksCode), 324); 319 320 // Wrap things up. Terminate the string, then terminate the nested array 321 // literal to trigger a column computation within the first chunk that can 322 // benefit from knowing the first chunk is all single-unit. Next add a *new* 323 // element to the outermost array, a string literal that contains a line 324 // terminator. The terminator invalidates the column computation cache, so 325 // when the outermost array is closed, location info for it will not hit the 326 // cache. Finally, tack on the terminating semicolon for good measure. 327 mixedChunksCode += "' ], '\u2028' ];"; // column 325, offset 385 328 assertEq(mixedChunksCode.length, 396); 329 assertEq(lengthInCodePoints(mixedChunksCode), 335); 330 331 let parseTree = Reflect.parse(mixedChunksCode, { loc: true }); 332 333 // Check the overall script. 334 assertEq(parseTree.type, "Program"); 335 checkLoc(parseTree, [1, 0], [2, 4]); 336 assertEq(parseTree.body.length, 1); 337 338 // Check the coordinates of the declaration. 339 let varDecl = parseTree.body[0]; 340 assertEq(varDecl.type, "VariableDeclaration"); 341 checkLoc(varDecl, [1, 0], [2, 3]); 342 343 // ...and its initializing expression. 344 let varInit = varDecl.declarations[0].init; 345 assertEq(varInit.type, "ArrayExpression"); 346 checkLoc(varInit, [1, 8], [2, 3]); 347 348 let outerArrayElements = varInit.elements; 349 assertEq(outerArrayElements.length, 2); 350 351 { 352 // Next the first element, the array inside the initializing expression. 353 let nestedArray = varInit.elements[0]; 354 assertEq(nestedArray.type, "ArrayExpression"); 355 checkLoc(nestedArray, [1, 10], [1, 327]); 356 357 // Now inside that nested array. 358 let nestedArrayElements = nestedArray.elements; 359 assertEq(nestedArrayElements.length, 3); 360 361 // First the [] in chunk #0 362 let emptyArray = nestedArrayElements[0]; 363 assertEq(emptyArray.type, "ArrayExpression"); 364 assertEq(emptyArray.elements.length, 0); 365 checkLoc(emptyArray, [1, 12], [1, 14]); 366 367 // Then the big array of empty arrays starting in chunk #1 and ending just 368 // barely in chunk #2. 369 let bigArrayOfEmpties = nestedArrayElements[1]; 370 assertEq(bigArrayOfEmpties.type, "ArrayExpression"); 371 assertEq(bigArrayOfEmpties.elements.length, 42 + 1); 372 bigArrayOfEmpties.elements.forEach((elem, i) => { 373 assertEq(elem.type, "ArrayExpression"); 374 assertEq(elem.elements.length, 0); 375 if (i !== 42) 376 checkLoc(elem, [1, 129 + i * 3], [1, 131 + i * 3]); 377 else 378 checkLoc(elem, [1, 256], [1, 258]); // last element was hand-placed 379 }); 380 381 // Then the string literal of multi-unit code points beginning in chunk #2 382 // and ending just into chunk #3 on a second line. 383 let multiUnitStringLiteral = nestedArrayElements[2]; 384 assertEq(multiUnitStringLiteral.type, "Literal"); 385 assertEq(multiUnitStringLiteral.value, "😱".repeat(61)); 386 checkLoc(multiUnitStringLiteral, [1, 262], [1, 325]); 387 } 388 389 { 390 // Finally, the string literal containing a line terminator as element in 391 // the outermost array. 392 let stringLiteralWithEmbeddedTerminator = outerArrayElements[1]; 393 assertEq(stringLiteralWithEmbeddedTerminator.type, "Literal"); 394 assertEq(stringLiteralWithEmbeddedTerminator.value, "\u2028"); 395 checkLoc(stringLiteralWithEmbeddedTerminator, [1, 329], [2, 1]); 396 } 397 } 398 testGuaranteedSingleUnit(); 399 400 if (typeof reportCompare === "function") 401 reportCompare(true, true); 402 403 print("Testing completed");