tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

column-numbers-in-long-lines.js (16106B)


      1 // |reftest| skip-if(!this.hasOwnProperty('Reflect')||!Reflect.parse) -- uses Reflect.parse(..., { loc: true}) to trigger the column-computing API
      2 /*
      3 * Any copyright is dedicated to the Public Domain.
      4 * http://creativecommons.org/licenses/publicdomain/
      5 */
      6 
      7 //-----------------------------------------------------------------------------
      8 var BUGNUMBER = 1551916;
      9 var summary =
     10  "Optimize computing a column number as count of code points by caching " +
     11  "column numbers (and whether each chunk might contain anything multi-unit) " +
     12  "and counting forward from them";
     13 
     14 print(BUGNUMBER + ": " + summary);
     15 
     16 /**************
     17 * BEGIN TEST *
     18 **************/
     19 
     20 // Various testing of column-number computations, with respect to counting as
     21 // code points or units, for very long lines.
     22 //
     23 // This test should pass when column numbers are counts of code points (current
     24 // behavior) or code units (past behavior).  It also *should* pass for any valid
     25 // |TokenStreamAnyChars::ColumnChunkLength| value (it must be at least 4 so that
     26 // the maximum-length code point in UTF-8/16 will fit in a single chunk),
     27 // because the value of that constant should be externally invisible save for
     28 // perf effects. (As a result, recompiling and running this test with a variety
     29 // of different values assigned to that constant is a good smoke-test of column
     30 // number computation, that doesn't require having to closely inspect any
     31 // column-related code.)
     32 //
     33 // However, this test is structured on the assumption that that constant has the
     34 // value 128, in order to exercise in targeted fashion various column number
     35 // computation edge cases.
     36 //
     37 // All this testing *could* be written to not be done with |Reflect.parse| --
     38 // backwards column computations happen even when compiling normal code, in some
     39 // cases.  But it's much more the exception than the rule.  And |Reflect.parse|
     40 // has *very* predictable column-computation operations (basically start/end
     41 // coordinates are computed immediately when the end of an AST node is reached)
     42 // that make it easier to recognize what the exact pattern of computations for
     43 // which offsets will look like.
     44 
     45 // Helper function for checking node location tersely.
     46 function checkLoc(node, expectedStart, expectedEnd)
     47 {
     48  let start = node.loc.start;
     49 
     50  assertEq(start.line, expectedStart[0],
     51           "start line number must be as expected");
     52  assertEq(start.column, expectedStart[1],
     53           "start column number must be as expected");
     54 
     55  let end = node.loc.end;
     56 
     57  assertEq(end.line, expectedEnd[0], "end line number must be as expected");
     58  assertEq(end.column, expectedEnd[1],
     59           "end column number must be as expected");
     60 }
     61 
     62 function lengthInCodePoints(str)
     63 {
     64  return [...str].length;
     65 }
     66 
     67 // True if column numbers are counts of code points, false otherwise.  This
     68 // constant can be used to short-circuit testing that isn't point/unit-agnostic.
     69 const columnsAreCodePoints = (function()
     70 {
     71  var columnTypes = [];
     72 
     73  function checkColumn(actual, expectedPoints, expectedUnits)
     74  {
     75    if (actual === expectedPoints)
     76      columnTypes.push("p");
     77    else if (actual === expectedUnits)
     78      columnTypes.push("u");
     79    else
     80      columnTypes.push("x");
     81  }
     82 
     83  var script = Reflect.parse('"😱😱😱😱";', { loc: true });
     84  assertEq(script.type, "Program");
     85  assertEq(script.loc.start.line, 1);
     86  assertEq(script.loc.end.line, 1);
     87  assertEq(script.loc.start.column, 1);
     88  checkColumn(script.loc.end.column, 8, 12);
     89 
     90  var body = script.body;
     91  assertEq(body.length, 1);
     92 
     93  var stmt = body[0];
     94  assertEq(stmt.type, "ExpressionStatement");
     95  assertEq(stmt.loc.start.line, 1);
     96  assertEq(stmt.loc.end.line, 1);
     97  assertEq(stmt.loc.start.column, 1);
     98  checkColumn(stmt.loc.end.column, 8, 12);
     99 
    100  var expr = stmt.expression;
    101  assertEq(expr.type, "Literal");
    102  assertEq(expr.value, "😱😱😱😱");
    103  assertEq(expr.loc.start.line, 1);
    104  assertEq(expr.loc.end.line, 1);
    105  assertEq(expr.loc.start.column, 1);
    106  checkColumn(expr.loc.end.column, 7, 11);
    107 
    108  var checkResult = columnTypes.join(",");
    109 
    110  assertEq(checkResult === "p,p,p" || checkResult === "u,u,u", true,
    111           "columns must be wholly code points or units: " + checkResult);
    112 
    113  return checkResult === "p,p,p";
    114 })();
    115 
    116 // Start with some basic sanity-testing, without regard to exactly when, how, or
    117 // in what order (offset => column) computations are performed.
    118 function testSimple()
    119 {
    120  if (!columnsAreCodePoints)
    121    return;
    122 
    123  // Array elements within the full |simpleCode| string constructed below are
    124  // one-element arrays containing the string "😱😱#x" where "#" is the
    125  // character that, in C++, could be written as |'(' + i| where |i| is the
    126  // index of the array within the outer array.
    127  let simpleCodeArray =
    128    [
    129      'var Q = [[',  // column 1, offset 0
    130      // REPEAT
    131      '"😱😱(x"],["',  // column 11, offset 10
    132      '😱😱)x"],["😱',  // column 21, offset 22
    133      '😱*x"],["😱😱',  // column 31, offset 35
    134      '+x"],["😱😱,',  // column 41, offset 48
    135      'x"],["😱😱-x',  // column 51, offset 60
    136      '"],["😱😱.x"',  // column 61, offset 72
    137      '],["😱😱/x"]',  // column 71, offset 84
    138      ',["😱😱0x"],',  // column 81, offset 96
    139      '["😱😱1x"],[',  // column 91, offset 108
    140      // REPEAT
    141      '"😱😱2x"],["',  // column 101, offset 120 -- chunk limit between "]
    142      '😱😱3x"],["😱',  // column 111, offset 132
    143      '😱4x"],["😱😱',  // column 121, offset 145
    144      '5x"],["😱😱6',  // column 131, offset 158
    145      'x"],["😱😱7x',  // column 141, offset 170
    146      '"],["😱😱8x"',  // column 151, offset 182
    147      '],["😱😱9x"]',  // column 161, offset 194
    148      ',["😱😱:x"],',  // column 171, offset 206
    149      '["😱😱;x"],[',  // column 181, offset 218
    150      // REPEAT
    151      '"😱😱<x"],["',  // column 191, offset 230
    152      '😱😱=x"],["😱',  // column 201, offset 242
    153      '😱>x"],["😱😱',  // column 211, offset 255 -- chunk limit splits first 😱
    154      '?x"],["😱😱@',  // column 221, offset 268
    155      'x"],["😱😱Ax',  // column 231, offset 280
    156      '"],["😱😱Bx"',  // column 241, offset 292
    157      '],["😱😱Cx"]',  // column 251, offset 304
    158      ',["😱😱Dx"],',  // column 261, offset 316
    159      '["😱😱Ex"],[',  // column 271, offset 328
    160      // REPEAT
    161      '"😱😱Fx"],["',  // column 281, offset 340
    162      '😱😱Gx"],["😱',  // column 291, offset 352
    163      '😱Hx"],["😱😱',  // column 301, offset 365
    164      'Ix"],["😱😱J',  // column 311, offset 378 -- chunk limit between ["
    165      'x"],["😱😱Kx',  // column 321, offset 390
    166      '"],["😱😱Lx"',  // column 331, offset 402
    167      '],["😱😱Mx"]',  // column 341, offset 414
    168      ',["😱😱Nx"],',  // column 351, offset 426
    169      '["😱😱Ox"]];',  // column 361 (10 long), offset 438 (+12 to end)
    170    ];
    171  let simpleCode = simpleCodeArray.join("");
    172 
    173  // |simpleCode| overall contains this many code points.  (This number is
    174  // chosen to be several |TokenStreamAnyChars::ColumnChunkLength = 128| chunks
    175  // long so that long-line handling is exercised, and the relevant vector
    176  // increased in length, for more than one chunk [which would be too short to
    177  // trigger chunking] and for more than two chunks [so that vector extension
    178  // will eventually occur].)
    179  const CodePointLength = 370;
    180 
    181  assertEq(lengthInCodePoints(simpleCode), CodePointLength,
    182           "code point count should be correct");
    183 
    184  // |simpleCodeArray| contains this many REPEAT-delimited cycles.
    185  const RepetitionNumber = 4;
    186 
    187  // Each cycle consists of this many elements.
    188  const ElementsPerCycle = 9;
    189 
    190  // Each element in a cycle has at least this many 😱.
    191  const MinFaceScreamingPerElementInCycle = 2;
    192 
    193  // Each cycle consists of many elements with three 😱.
    194  const ElementsInCycleWithThreeFaceScreaming = 2;
    195 
    196  // Compute the overall number of UTF-16 code units.  (UTF-16 because this is a
    197  // JS string as input.)
    198  const OverallCodeUnitCount =
    199    CodePointLength +
    200    RepetitionNumber * (ElementsPerCycle * MinFaceScreamingPerElementInCycle +
    201                        ElementsInCycleWithThreeFaceScreaming);
    202 
    203  // Code units != code points.
    204  assertEq(OverallCodeUnitCount > CodePointLength, true,
    205           "string contains code points outside BMP, so length in units " +
    206           "exceeds length in points");
    207 
    208  // The overall computed number of code units has this exact numeric value.
    209  assertEq(OverallCodeUnitCount, 450,
    210           "code unit count computation produces this value");
    211 
    212  // The overall computed number of code units matches the string length.
    213  assertEq(simpleCode.length, OverallCodeUnitCount, "string length must match");
    214 
    215  // Evaluate the string.
    216  var Q;
    217  eval(simpleCode);
    218 
    219  // Verify characteristics of the resulting execution.
    220  assertEq(Array.isArray(Q), true);
    221 
    222  const NumArrayElements = 40;
    223  assertEq(Q.length, NumArrayElements);
    224  Q.forEach((v, i) => {
    225    assertEq(Array.isArray(v), true);
    226    assertEq(v.length, 1);
    227    assertEq(v[0], "😱😱" + String.fromCharCode('('.charCodeAt(0) + i) + "x");
    228  });
    229 
    230  let parseTree = Reflect.parse(simpleCode, { loc: true });
    231 
    232  // Check the overall script.
    233  assertEq(parseTree.type, "Program");
    234  checkLoc(parseTree, [1, 0], [1, 370]);
    235  assertEq(parseTree.body.length, 1);
    236 
    237  // Check the coordinates of the declaration.
    238  let varDecl = parseTree.body[0];
    239  assertEq(varDecl.type, "VariableDeclaration");
    240  checkLoc(varDecl, [1, 0], [1, 369]);
    241 
    242  // ...and its initializing expression.
    243  let varInit = varDecl.declarations[0].init;
    244  assertEq(varInit.type, "ArrayExpression");
    245  checkLoc(varInit, [1, 8], [1, 369]);
    246 
    247  // ...and then every literal inside it.
    248  assertEq(varInit.elements.length, NumArrayElements, "array literal length");
    249 
    250  const ItemLength = lengthInCodePoints('["😱😱#x"],');
    251  assertEq(ItemLength, 9, "item length check");
    252 
    253  for (let i = 0; i < NumArrayElements; i++)
    254  {
    255    let elem = varInit.elements[i];
    256    assertEq(elem.type, "ArrayExpression");
    257 
    258    let startCol = 9 + i * ItemLength;
    259    let endCol = startCol + ItemLength - 1;
    260    checkLoc(elem, [1, startCol], [1, endCol]);
    261 
    262    let arrayElems = elem.elements;
    263    assertEq(arrayElems.length, 1);
    264 
    265    let str = arrayElems[0];
    266    assertEq(str.type, "Literal");
    267    assertEq(str.value,
    268             "😱😱" + String.fromCharCode('('.charCodeAt(0) + i) + "x");
    269    checkLoc(str, [1, startCol + 1], [1, endCol - 1]);
    270  }
    271 }
    272 testSimple();
    273 
    274 // Test |ChunkInfo::unitsType() == UnitsType::GuaranteedSingleUnit| -- not that
    275 // it should be observable, precisely, but effects of mis-applying or
    276 // miscomputing it would in principle be observable if such were happening.
    277 // This test also is intended to to be useful for (manually, in a debugger)
    278 // verifying that the optimization is computed and kicks in correctly.
    279 function testGuaranteedSingleUnit()
    280 {
    281  if (!columnsAreCodePoints)
    282    return;
    283 
    284  // Begin a few array literals in a first chunk to test column computation in
    285  // that first chunk.
    286  //
    287  // End some of them in the first chunk to test columns *before* we know we
    288  // have a long line.
    289  //
    290  // End one array *outside* the first chunk to test a computation inside a
    291  // first chunk *after* we know we have a long line and have computed a first
    292  // chunk.
    293  let mixedChunksCode = "var Z = [ [ [],"; // column 1, offset 0
    294  assertEq(mixedChunksCode.length, 15);
    295  assertEq(lengthInCodePoints(mixedChunksCode), 15);
    296 
    297  mixedChunksCode +=
    298    " ".repeat(128 - mixedChunksCode.length); // column 16, offset 15
    299  assertEq(mixedChunksCode.length, 128);
    300  assertEq(lengthInCodePoints(mixedChunksCode), 128);
    301 
    302  // Fill out a second chunk as also single-unit, with an outer array literal
    303  // that begins in this chunk but finishes in the next (to test column
    304  // computation in a prior, guaranteed-single-unit chunk).
    305  mixedChunksCode += "[" + "[],".repeat(42) + " "; // column 129, offset 128
    306  assertEq(mixedChunksCode.length, 256);
    307  assertEq(lengthInCodePoints(mixedChunksCode), 256);
    308 
    309  // Add a third chunk with one last empty nested array literal (so that we
    310  // tack on another chunk, and conclude the second chunk is single-unit, before
    311  // closing the enclosing array literal).  Then close the enclosing array
    312  // literal.  Finally start a new string literal element containing
    313  // multi-unit code points.  For good measure, make the chunk *end* in the
    314  // middle of such a code point, so that the relevant chunk limit must be
    315  // retracted one code unit.
    316  mixedChunksCode += "[] ], '" + "😱".repeat(61); // column 257, offset 256
    317  assertEq(mixedChunksCode.length, 384 + 1);
    318  assertEq(lengthInCodePoints(mixedChunksCode), 324);
    319 
    320  // Wrap things up.  Terminate the string, then terminate the nested array
    321  // literal to trigger a column computation within the first chunk that can
    322  // benefit from knowing the first chunk is all single-unit.  Next add a *new*
    323  // element to the outermost array, a string literal that contains a line
    324  // terminator.  The terminator invalidates the column computation cache, so
    325  // when the outermost array is closed, location info for it will not hit the
    326  // cache.  Finally, tack on the terminating semicolon for good measure.
    327  mixedChunksCode += "' ], '\u2028' ];"; // column 325, offset 385
    328  assertEq(mixedChunksCode.length, 396);
    329  assertEq(lengthInCodePoints(mixedChunksCode), 335);
    330 
    331  let parseTree = Reflect.parse(mixedChunksCode, { loc: true });
    332 
    333  // Check the overall script.
    334  assertEq(parseTree.type, "Program");
    335  checkLoc(parseTree, [1, 0], [2, 4]);
    336  assertEq(parseTree.body.length, 1);
    337 
    338  // Check the coordinates of the declaration.
    339  let varDecl = parseTree.body[0];
    340  assertEq(varDecl.type, "VariableDeclaration");
    341  checkLoc(varDecl, [1, 0], [2, 3]);
    342 
    343  // ...and its initializing expression.
    344  let varInit = varDecl.declarations[0].init;
    345  assertEq(varInit.type, "ArrayExpression");
    346  checkLoc(varInit, [1, 8], [2, 3]);
    347 
    348  let outerArrayElements = varInit.elements;
    349  assertEq(outerArrayElements.length, 2);
    350 
    351  {
    352    // Next the first element, the array inside the initializing expression.
    353    let nestedArray = varInit.elements[0];
    354    assertEq(nestedArray.type, "ArrayExpression");
    355    checkLoc(nestedArray, [1, 10], [1, 327]);
    356 
    357    // Now inside that nested array.
    358    let nestedArrayElements = nestedArray.elements;
    359    assertEq(nestedArrayElements.length, 3);
    360 
    361    // First the [] in chunk #0
    362    let emptyArray = nestedArrayElements[0];
    363    assertEq(emptyArray.type, "ArrayExpression");
    364    assertEq(emptyArray.elements.length, 0);
    365    checkLoc(emptyArray, [1, 12], [1, 14]);
    366 
    367    // Then the big array of empty arrays starting in chunk #1 and ending just
    368    // barely in chunk #2.
    369    let bigArrayOfEmpties = nestedArrayElements[1];
    370    assertEq(bigArrayOfEmpties.type, "ArrayExpression");
    371    assertEq(bigArrayOfEmpties.elements.length, 42 + 1);
    372    bigArrayOfEmpties.elements.forEach((elem, i) => {
    373      assertEq(elem.type, "ArrayExpression");
    374      assertEq(elem.elements.length, 0);
    375      if (i !== 42)
    376        checkLoc(elem, [1, 129 + i * 3], [1, 131 + i * 3]);
    377      else
    378        checkLoc(elem, [1, 256], [1, 258]); // last element was hand-placed
    379    });
    380 
    381    // Then the string literal of multi-unit code points beginning in chunk #2
    382    // and ending just into chunk #3 on a second line.
    383    let multiUnitStringLiteral = nestedArrayElements[2];
    384    assertEq(multiUnitStringLiteral.type, "Literal");
    385    assertEq(multiUnitStringLiteral.value, "😱".repeat(61));
    386    checkLoc(multiUnitStringLiteral, [1, 262], [1, 325]);
    387  }
    388 
    389  {
    390    // Finally, the string literal containing a line terminator as element in
    391    // the outermost array.
    392    let stringLiteralWithEmbeddedTerminator = outerArrayElements[1];
    393    assertEq(stringLiteralWithEmbeddedTerminator.type, "Literal");
    394    assertEq(stringLiteralWithEmbeddedTerminator.value, "\u2028");
    395    checkLoc(stringLiteralWithEmbeddedTerminator, [1, 329], [2, 1]);
    396  }
    397 }
    398 testGuaranteedSingleUnit();
    399 
    400 if (typeof reportCompare === "function")
    401  reportCompare(true, true);
    402 
    403 print("Testing completed");