tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

CFG.js (43244B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
      3 * You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 /* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */
      6 
      7 // Utility code for traversing the JSON data structures produced by sixgill.
      8 
      9 "use strict";
     10 
     11 var TRACING = false;
     12 
     13 // When edge.Kind == "Pointer", these are the meanings of the edge.Reference field.
     14 var PTR_POINTER = 0;
     15 var PTR_REFERENCE = 1;
     16 var PTR_RVALUE_REF = 2;
     17 
     18 // Find all points (positions within the code) of the body given by the list of
     19 // bodies and the blockId to match (which will specify an outer function or a
     20 // loop within it), recursing into loops if needed.
     21 function findAllPoints(bodies, blockId, bits)
     22 {
     23    var points = [];
     24    var body;
     25 
     26    for (var xbody of bodies) {
     27        if (sameBlockId(xbody.BlockId, blockId)) {
     28            assert(!body);
     29            body = xbody;
     30        }
     31    }
     32    assert(body);
     33 
     34    if (!("PEdge" in body))
     35        return;
     36    for (var edge of body.PEdge) {
     37        points.push([body, edge.Index[0], bits]);
     38        if (edge.Kind == "Loop")
     39            points.push(...findAllPoints(bodies, edge.BlockId, bits));
     40    }
     41 
     42    return points;
     43 }
     44 
     45 // Visitor of a graph of <body, ppoint> vertexes and sixgill-generated edges,
     46 // where the edges represent the actual computation happening.
     47 //
     48 // Uses the syntax `var Visitor = class { ... }` rather than `class Visitor`
     49 // to allow reloading this file with the JS debugger.
     50 var Visitor = class {
     51    constructor(bodies) {
     52        this.visited_bodies = new Map();
     53        for (const body of bodies) {
     54            this.visited_bodies.set(body, new Map());
     55        }
     56    }
     57 
     58    // Prepend `edge` to the info stored at the successor node, returning
     59    // the updated info value. This should be overridden by pretty much any
     60    // subclass, as a traversal's semantics are largely determined by this method.
     61    extend_path(edge, body, ppoint, successor_value) { return true; }
     62 
     63    // Default implementation does a basic "only visit nodes once" search.
     64    // (Whether this is BFS/DFS/other is determined by the caller.)
     65 
     66    // Override if you need to revisit nodes. Valid actions are "continue",
     67    // "prune", and "done". "continue" means continue with the search. "prune"
     68    // means do not continue to predecessors of this node, only continue with
     69    // the remaining entries in the work queue. "done" means the
     70    // whole search is complete even if unvisited nodes remain.
     71    next_action(prev, current) { return prev ? "prune" : "continue"; }
     72 
     73    // Update the info at a node. If this is the first time the node has been
     74    // seen, `prev` will be undefined. `current` will be the info computed by
     75    // `extend_path`. The node will be updated with the return value.
     76    merge_info(prev, current) { return true; }
     77 
     78    // Default visit() implementation. Subclasses will usually leave this alone
     79    // and use the other methods as extension points.
     80    //
     81    // Take a body, a point within that body, and the info computed by
     82    // extend_path() for that point when traversing an edge. Return whether the
     83    // search should continue ("continue"), the search should be pruned and
     84    // other paths followed ("prune"), or that the whole search is complete and
     85    // it is time to return a value ("done", and the value returned by
     86    // merge_info() will be returned by the overall search).
     87    //
     88    // Persistently record the value computed so far at each point, and call
     89    // (overridable) next_action() and merge_info() methods with the previous
     90    // and freshly-computed value for each point.
     91    //
     92    // Often, extend_path() will decide how/whether to continue the search and
     93    // will return the search action to take, and next_action() will blindly
     94    // return it if the point has not yet been visited. (And if it has, it will
     95    // prune this branch of the search so that no point is visited multiple
     96    // times.)
     97    visit(body, ppoint, info) {
     98        const visited_value_table = this.visited_bodies.get(body);
     99        const existing_value_if_visited = visited_value_table.get(ppoint);
    100        const action = this.next_action(existing_value_if_visited, info);
    101        const merged = this.merge_info(existing_value_if_visited, info);
    102        visited_value_table.set(ppoint, merged);
    103        return [action, merged];
    104    }
    105 };
    106 
    107 function findMatchingBlock(bodies, blockId) {
    108    for (const body of bodies) {
    109        if (sameBlockId(body.BlockId, blockId)) {
    110            return body;
    111        }
    112    }
    113    assert(false);
    114 }
    115 
    116 // For a given function containing a set of bodies, each containing a set of
    117 // ppoints, perform a mostly breadth-first traversal through the complete graph
    118 // of all <body, ppoint> nodes throughout all the bodies of the function.
    119 //
    120 // When traversing, every <body, ppoint> node is associated with a value that
    121 // is assigned or updated whenever it is visited. The overall traversal
    122 // terminates when a given condition is reached, and an arbitrary custom value
    123 // is returned. If the search completes without the termination condition
    124 // being reached, it will return the value associated with the entrypoint
    125 // node, which is initialized to `entrypoint_fallback_value` (and thus serves as
    126 // the fallback return value if all search paths are pruned before reaching
    127 // the entrypoint.)
    128 //
    129 // The traversal is only *mostly* breadth-first because the visitor decides
    130 // whether to stop searching when it sees a node. If a node is visited for a
    131 // second time, the visitor can choose to continue (and thus revisit the node)
    132 // in order to find "better" paths that may include a node more than once.
    133 // The search is done in the "upwards" direction -- as in, it starts at the
    134 // exit point and searches through predecessors.
    135 //
    136 // Override visitor.visit() to return an action and a value. The action
    137 // determines whether the overall search should terminate ('done'), or
    138 // continues looking through the predecessors of the current node ('continue'),
    139 // or whether it should just continue processing the work queue without
    140 // looking at predecessors ('prune').
    141 //
    142 // This allows this function to be used in different ways. If the visitor
    143 // associates a value with each node that chains onto its forward-flow successors
    144 // (predecessors in the "upwards" search order), then a complete path through
    145 // the graph will be returned.
    146 //
    147 // Alternatively, BFS_upwards() can be used to test whether a condition holds
    148 // (eg "the exit point is reachable only after calling SomethingImportant()"),
    149 // in which case no path is needed and the visitor can compute a simple boolean
    150 // every time it encounters a point. Note that `entrypoint_fallback_value` will
    151 // still be returned if the search terminates without ever reaching the
    152 // entrypoint, which is useful for dominator analyses.
    153 //
    154 // See the Visitor base class's implementation of visit(), above, for the
    155 // most commonly used visit logic.
    156 function BFS_upwards(start_body, start_ppoint, bodies, visitor,
    157                     initial_successor_value = {},
    158                     entrypoint_fallback_value=null)
    159 {
    160    let entrypoint_value = entrypoint_fallback_value;
    161 
    162    const work = [[start_body, start_ppoint, null, initial_successor_value]];
    163    if (TRACING) {
    164        printErr(`BFS start at ${blockIdentifier(start_body)}:${start_ppoint}`);
    165    }
    166 
    167    while (work.length > 0) {
    168        const [body, ppoint, edgeToAdd, successor_value] = work.shift();
    169        if (TRACING) {
    170            const s = edgeToAdd ? " : " + str(edgeToAdd) : "";
    171            printErr(`prepending edge from ${ppoint} to state '${successor_value}'${s}`);
    172        }
    173        let value = visitor.extend_path(edgeToAdd, body, ppoint, successor_value);
    174 
    175        const [action,  merged_value] = visitor.visit(body, ppoint, value);
    176        if (action === "done") {
    177            return merged_value;
    178        }
    179        if (action === "prune") {
    180            // Do not push anything else to the work queue, but continue processing
    181            // other branches.
    182            continue;
    183        }
    184        assert(action == "continue");
    185        value = merged_value;
    186 
    187        const predecessors = getPredecessors(body);
    188        for (const edge of (predecessors[ppoint] || [])) {
    189            if (edge.Kind == "Loop") {
    190                // Propagate the search into the exit point of the loop body.
    191                const loopBody = findMatchingBlock(bodies, edge.BlockId);
    192                const loopEnd = loopBody.Index[1];
    193                work.push([loopBody, loopEnd, null, value]);
    194                // Don't continue to predecessors here without going through
    195                // the loop. (The points in this body that enter the loop will
    196                // be traversed when we reach the entry point of the loop.)
    197            }
    198            work.push([body, edge.Index[0], edge, value]);
    199        }
    200 
    201        // Check for hitting the entry point of a loop body.
    202        if (ppoint == body.Index[0] && body.BlockId.Kind == "Loop") {
    203            // Propagate to outer body parents that enter the loop body.
    204            for (const parent of (body.BlockPPoint || [])) {
    205                const parentBody = findMatchingBlock(bodies, parent.BlockId);
    206                work.push([parentBody, parent.Index, null, value]);
    207            }
    208 
    209            // This point is also preceded by the *end* of this loop, for the
    210            // previous iteration.
    211            work.push([body, body.Index[1], null, value]);
    212        }
    213 
    214        // Check for reaching the entrypoint of the function.
    215        if (body === start_body && ppoint == body.Index[0]) {
    216            entrypoint_value = value;
    217        }
    218    }
    219 
    220    // The search space was exhausted without finding a 'done' state. That
    221    // might be because all search paths were pruned before reaching the entry
    222    // point of the function, in which case entrypoint_value will still be its initial
    223    // value. (If entrypoint_value has been set, then we may still not have visited the
    224    // entire graph, if some paths were pruned but at least one made it to the entrypoint.)
    225    return entrypoint_value;
    226 }
    227 
    228 // Given the CFG for the constructor call of some RAII, return whether the
    229 // given edge is the matching destructor call.
    230 function isMatchingDestructor(constructor, edge)
    231 {
    232    if (edge.Kind != "Call")
    233        return false;
    234    var callee = edge.Exp[0];
    235    if (callee.Kind != "Var")
    236        return false;
    237    var variable = callee.Variable;
    238    assert(variable.Kind == "Func");
    239    if (variable.Name[1].charAt(0) != '~')
    240        return false;
    241 
    242    // Note that in some situations, a regular function can begin with '~', so
    243    // we don't necessarily have a destructor in hand. This is probably a
    244    // sixgill artifact, but in js::wasm::ModuleGenerator::~ModuleGenerator, a
    245    // templatized static inline EraseIf is invoked, and it gets named ~EraseIf
    246    // for some reason.
    247    if (!("PEdgeCallInstance" in edge))
    248        return false;
    249 
    250    var constructExp = constructor.PEdgeCallInstance.Exp;
    251    assert(constructExp.Kind == "Var");
    252 
    253    var destructExp = edge.PEdgeCallInstance.Exp;
    254    if (destructExp.Kind != "Var")
    255        return false;
    256 
    257    return sameVariable(constructExp.Variable, destructExp.Variable);
    258 }
    259 
    260 // Return all calls within the RAII scope of any constructor matched by
    261 // isConstructor(). (Note that this would be insufficient if you needed to
    262 // treat each instance separately, such as when different regions of a function
    263 // body were guarded by these constructors and you needed to do something
    264 // different with each.)
    265 function allRAIIGuardedCallPoints(typeInfo, bodies, body, isConstructor)
    266 {
    267    if (!("PEdge" in body))
    268        return [];
    269 
    270    var points = [];
    271 
    272    for (var edge of body.PEdge) {
    273        if (edge.Kind != "Call")
    274            continue;
    275        var callee = edge.Exp[0];
    276        if (callee.Kind != "Var")
    277            continue;
    278        var variable = callee.Variable;
    279        assert(variable.Kind == "Func");
    280        const bits = isConstructor(typeInfo, edge.Type, variable.Name);
    281        if (!bits)
    282            continue;
    283        if (!("PEdgeCallInstance" in edge))
    284            continue;
    285        if (edge.PEdgeCallInstance.Exp.Kind != "Var")
    286            continue;
    287 
    288        points.push(...pointsInRAIIScope(bodies, body, edge, bits));
    289    }
    290 
    291    return points;
    292 }
    293 
    294 // Test whether the given edge is the constructor corresponding to the given
    295 // destructor edge.
    296 function isMatchingConstructor(destructor, edge)
    297 {
    298    if (edge.Kind != "Call")
    299        return false;
    300    var callee = edge.Exp[0];
    301    if (callee.Kind != "Var")
    302        return false;
    303    var variable = callee.Variable;
    304    if (variable.Kind != "Func")
    305        return false;
    306    var name = readable(variable.Name[0]);
    307    var destructorName = readable(destructor.Exp[0].Variable.Name[0]);
    308    var match = destructorName.match(/^(.*?::)~(\w+)\(/);
    309    if (!match) {
    310        printErr("Unhandled destructor syntax: " + destructorName);
    311        return false;
    312    }
    313    var constructorSubstring = match[1] + match[2];
    314    if (name.indexOf(constructorSubstring) == -1)
    315        return false;
    316 
    317    var destructExp = destructor.PEdgeCallInstance.Exp;
    318    if (destructExp.Kind != "Var")
    319        return false;
    320 
    321    var constructExp = edge.PEdgeCallInstance.Exp;
    322    if (constructExp.Kind != "Var")
    323        return false;
    324 
    325    return sameVariable(constructExp.Variable, destructExp.Variable);
    326 }
    327 
    328 function findMatchingConstructor(destructorEdge, body, warnIfNotFound=true)
    329 {
    330    var worklist = [destructorEdge];
    331    var predecessors = getPredecessors(body);
    332    while(worklist.length > 0) {
    333        var edge = worklist.pop();
    334        if (isMatchingConstructor(destructorEdge, edge))
    335            return edge;
    336        if (edge.Index[0] in predecessors) {
    337            for (var e of predecessors[edge.Index[0]])
    338                worklist.push(e);
    339        }
    340    }
    341    if (warnIfNotFound)
    342        printErr("Could not find matching constructor!");
    343    return undefined;
    344 }
    345 
    346 function pointsInRAIIScope(bodies, body, constructorEdge, bits) {
    347    var seen = {};
    348    var worklist = [constructorEdge.Index[1]];
    349    var points = [];
    350    while (worklist.length) {
    351        var point = worklist.pop();
    352        if (point in seen)
    353            continue;
    354        seen[point] = true;
    355        points.push([body, point, bits]);
    356        var successors = getSuccessors(body);
    357        if (!(point in successors))
    358            continue;
    359        for (var nedge of successors[point]) {
    360            if (isMatchingDestructor(constructorEdge, nedge))
    361                continue;
    362            if (nedge.Kind == "Loop")
    363                points.push(...findAllPoints(bodies, nedge.BlockId, bits));
    364            worklist.push(nedge.Index[1]);
    365        }
    366    }
    367 
    368    return points;
    369 }
    370 
    371 function isImmobileValue(exp) {
    372    if (exp.Kind == "Int" && exp.String == "0") {
    373        return true;
    374    }
    375    return false;
    376 }
    377 
    378 // Returns whether decl is a body.DefineVariable[] entry for a non-temporary reference.
    379 function isReferenceDecl(decl) {
    380    return decl.Type.Kind == "Pointer" && decl.Type.Reference != PTR_POINTER && decl.Variable.Kind != "Temp";
    381 }
    382 
    383 function expressionIsVariableAddress(exp, variable)
    384 {
    385    while (exp.Kind == "Fld")
    386        exp = exp.Exp[0];
    387    return exp.Kind == "Var" && sameVariable(exp.Variable, variable);
    388 }
    389 
    390 function edgeTakesVariableAddress(edge, variable, body)
    391 {
    392    if (ignoreEdgeUse(edge, variable, body))
    393        return false;
    394    if (ignoreEdgeAddressTaken(edge))
    395        return false;
    396    switch (edge.Kind) {
    397    case "Assign":
    398        return expressionIsVariableAddress(edge.Exp[1], variable);
    399    case "Call":
    400        if ("PEdgeCallArguments" in edge) {
    401            for (var exp of edge.PEdgeCallArguments.Exp) {
    402                if (expressionIsVariableAddress(exp, variable))
    403                    return true;
    404            }
    405        }
    406        return false;
    407    default:
    408        return false;
    409    }
    410 }
    411 
    412 // Look at an invocation of a virtual method or function pointer contained in a
    413 // field, and return the static type of the invocant (or the containing struct,
    414 // for a function pointer field.)
    415 function getFieldCallInstanceCSU(edge, field)
    416 {
    417    if ("FieldInstanceFunction" in field) {
    418        // We have a 'this'.
    419        const instanceExp = edge.PEdgeCallInstance.Exp;
    420        if (instanceExp.Kind == 'Drf') {
    421            // somevar->foo()
    422            return edge.Type.TypeFunctionCSU.Type.Name;
    423        } else if (instanceExp.Kind == 'Fld') {
    424            // somevar.foo()
    425            return instanceExp.Field.FieldCSU.Type.Name;
    426        } else if (instanceExp.Kind == 'Index') {
    427            // A strange construct.
    428            // C++ code: static_cast<JS::CustomAutoRooter*>(this)->trace(trc);
    429            // CFG: Call(21,30, this*[-1]{JS::CustomAutoRooter}.trace*(trc*))
    430            return instanceExp.Type.Name;
    431        } else if (instanceExp.Kind == 'Var') {
    432            // C++: reinterpret_cast<SimpleTimeZone*>(gRawGMT)->~SimpleTimeZone();
    433            // CFG:
    434            //   # icu_64::SimpleTimeZone::icu_64::SimpleTimeZone.__comp_dtor
    435            //   [6,7] Call gRawGMT.icu_64::SimpleTimeZone.__comp_dtor ()
    436            return field.FieldCSU.Type.Name;
    437        } else {
    438            printErr("------------------ edge -------------------");
    439            printErr(JSON.stringify(edge, null, 4));
    440            printErr("------------------ field -------------------");
    441            printErr(JSON.stringify(field, null, 4));
    442            assert(false, `unrecognized FieldInstanceFunction Kind ${instanceExp.Kind}`);
    443        }
    444    } else {
    445        // somefar.foo() where somevar is a field of some CSU.
    446        return field.FieldCSU.Type.Name;
    447    }
    448 }
    449 
    450 function expressionUsesVariable(exp, variable)
    451 {
    452    if (exp.Kind == "Var" && sameVariable(exp.Variable, variable))
    453        return true;
    454    if (!("Exp" in exp))
    455        return false;
    456    for (var childExp of exp.Exp) {
    457        if (expressionUsesVariable(childExp, variable))
    458            return true;
    459    }
    460    return false;
    461 }
    462 
    463 function expressionUsesVariableContents(exp, variable)
    464 {
    465    if (!("Exp" in exp))
    466        return false;
    467    for (var childExp of exp.Exp) {
    468        if (childExp.Kind == 'Drf') {
    469            if (expressionUsesVariable(childExp, variable))
    470                return true;
    471        } else if (expressionUsesVariableContents(childExp, variable)) {
    472            return true;
    473        }
    474    }
    475    return false;
    476 }
    477 
    478 // Detect simple |return nullptr;| statements.
    479 function isReturningImmobileValue(edge, variable)
    480 {
    481    if (variable.Kind == "Return") {
    482        if (edge.Exp[0].Kind == "Var" && sameVariable(edge.Exp[0].Variable, variable)) {
    483            if (isImmobileValue(edge.Exp[1]))
    484                return true;
    485        }
    486    }
    487    return false;
    488 }
    489 
    490 // If the edge uses the given variable's value, return the earliest point at
    491 // which the use is definite. Usually, that means the source of the edge
    492 // (anything that reaches that source point will end up using the variable, but
    493 // there may be other ways to reach the destination of the edge.)
    494 //
    495 // Return values are implicitly used at the very last point in the function.
    496 // This makes a difference: if an RAII class GCs in its destructor, we need to
    497 // start looking at the final point in the function, not one point back from
    498 // that, since that would skip over the GCing call.
    499 //
    500 // Certain references may be annotated to be live to the end of the function
    501 // as well (eg AutoCheckCannotGC&& parameters).
    502 //
    503 // Note that this returns a nonzero value only if the variable's incoming value is used.
    504 // So this would return 0 for 'obj':
    505 //
    506 //     obj = someFunction();
    507 //
    508 // but these would return a positive value:
    509 //
    510 //     obj = someFunction(obj);
    511 //     obj->foo = someFunction();
    512 //
    513 function edgeUsesVariable(edge, variable, body, liveToEnd=false)
    514 {
    515    if (ignoreEdgeUse(edge, variable, body))
    516        return 0;
    517 
    518    if (variable.Kind == "Return") {
    519        liveToEnd = true;
    520    }
    521 
    522    if (liveToEnd && body.Index[1] == edge.Index[1] && body.BlockId.Kind == "Function") {
    523        // The last point in the function body is treated as using the return
    524        // value. This is the only time the destination point is returned
    525        // rather than the source point.
    526        return edge.Index[1];
    527    }
    528 
    529    var src = edge.Index[0];
    530 
    531    switch (edge.Kind) {
    532 
    533    case "Assign": {
    534        // Detect `Return := nullptr`.
    535        if (isReturningImmobileValue(edge, variable))
    536            return 0;
    537        const [lhs, rhs] = edge.Exp;
    538        // Detect `lhs := ...variable...`
    539        if (expressionUsesVariable(rhs, variable))
    540            return src;
    541        // Detect `...variable... := rhs` but not `variable := rhs`. The latter
    542        // overwrites the previous value of `variable` without using it.
    543        if (expressionUsesVariable(lhs, variable) && !expressionIsVariable(lhs, variable))
    544            return src;
    545        return 0;
    546    }
    547 
    548    case "Assume":
    549        return expressionUsesVariableContents(edge.Exp[0], variable) ? src : 0;
    550 
    551    case "Call": {
    552        const callee = edge.Exp[0];
    553        if (expressionUsesVariable(callee, variable))
    554            return src;
    555        if ("PEdgeCallInstance" in edge) {
    556            if (expressionUsesVariable(edge.PEdgeCallInstance.Exp, variable)) {
    557                if (edgeStartsValueLiveRange(edge, variable)) {
    558                    // If the variable is being constructed, then the incoming
    559                    // value is not used here; it didn't exist before
    560                    // construction. (The analysis doesn't get told where
    561                    // variables are defined, so must infer it from
    562                    // construction. If the variable does not have a
    563                    // constructor, its live range may be larger than it really
    564                    // ought to be if it is defined within a loop body, but
    565                    // that is conservative.)
    566                } else {
    567                    return src;
    568                }
    569            }
    570        }
    571        if ("PEdgeCallArguments" in edge) {
    572            for (var exp of edge.PEdgeCallArguments.Exp) {
    573                if (expressionUsesVariable(exp, variable))
    574                    return src;
    575            }
    576        }
    577        if (edge.Exp.length == 1)
    578            return 0;
    579 
    580        // Assigning call result to a variable.
    581        const lhs = edge.Exp[1];
    582        if (expressionUsesVariable(lhs, variable) && !expressionIsVariable(lhs, variable))
    583            return src;
    584        return 0;
    585    }
    586 
    587    case "Loop":
    588        return 0;
    589 
    590    case "Assembly":
    591        return 0;
    592 
    593    default:
    594        assert(false);
    595    }
    596 }
    597 
    598 // If `decl` is the body.DefineVariable[] declaration of a reference type, then
    599 // return the expression without the outer dereference. Otherwise, return the
    600 // original expression.
    601 function maybeDereference(exp, decl) {
    602    if (exp.Kind == "Drf" && exp.Exp[0].Kind == "Var") {
    603        if (isReferenceDecl(decl)) {
    604            return exp.Exp[0];
    605        }
    606    }
    607    return exp;
    608 }
    609 
    610 function expressionIsVariable(exp, variable)
    611 {
    612    return exp.Kind == "Var" && sameVariable(exp.Variable, variable);
    613 }
    614 
    615 // Similar to the above, except treat uses of a reference as if they were uses
    616 // of the dereferenced contents. This requires knowing the type of the
    617 // variable, and so takes its declaration rather than the variable itself.
    618 function expressionIsDeclaredVariable(exp, decl)
    619 {
    620    exp = maybeDereference(exp, decl);
    621    return expressionIsVariable(exp, decl.Variable);
    622 }
    623 
    624 function expressionIsMethodOnVariableDecl(exp, decl)
    625 {
    626    // This might be calling a method on a base class, in which case exp will
    627    // be an unnamed field of the variable instead of the variable itself.
    628    while (exp.Kind == "Fld" && exp.Field.Name[0].startsWith("field:"))
    629        exp = exp.Exp[0];
    630    return expressionIsDeclaredVariable(exp, decl);
    631 }
    632 
    633 // Return whether the edge starts the live range of a variable's value, by setting
    634 // it to some new value. Examples of starting obj's live range:
    635 //
    636 //     obj = foo;
    637 //     obj = foo();
    638 //     obj = foo(obj);         // uses previous value but then sets to new value
    639 //     SomeClass obj(true, 1); // constructor
    640 //
    641 function edgeStartsValueLiveRange(edge, variable)
    642 {
    643    // Direct assignments start live range of lhs: var = value
    644    if (edge.Kind == "Assign") {
    645        const [lhs, rhs] = edge.Exp;
    646        return (expressionIsVariable(lhs, variable) &&
    647                !isReturningImmobileValue(edge, variable));
    648    }
    649 
    650    if (edge.Kind != "Call")
    651        return false;
    652 
    653    // Assignments of call results start live range: var = foo()
    654    if (1 in edge.Exp) {
    655        var lhs = edge.Exp[1];
    656        if (expressionIsVariable(lhs, variable))
    657            return true;
    658    }
    659 
    660    // Constructor calls start live range of instance: SomeClass var(...)
    661    if ("PEdgeCallInstance" in edge) {
    662        var instance = edge.PEdgeCallInstance.Exp;
    663 
    664        // Kludge around incorrect dereference on some constructor calls.
    665        if (instance.Kind == "Drf")
    666            instance = instance.Exp[0];
    667 
    668        if (!expressionIsVariable(instance, variable))
    669            return false;
    670 
    671        var callee = edge.Exp[0];
    672        if (callee.Kind != "Var")
    673            return false;
    674 
    675        assert(callee.Variable.Kind == "Func");
    676        var calleeName = readable(callee.Variable.Name[0]);
    677 
    678        // Constructor calls include the text 'Name::Name(' or 'Name<...>::Name('.
    679        var openParen = calleeName.indexOf('(');
    680        if (openParen < 0)
    681            return false;
    682        calleeName = calleeName.substring(0, openParen);
    683 
    684        var lastColon = calleeName.lastIndexOf('::');
    685        if (lastColon < 0)
    686            return false;
    687        var constructorName = calleeName.substr(lastColon + 2);
    688        calleeName = calleeName.substr(0, lastColon);
    689 
    690        var lastTemplateOpen = calleeName.lastIndexOf('<');
    691        if (lastTemplateOpen >= 0)
    692            calleeName = calleeName.substr(0, lastTemplateOpen);
    693 
    694        if (calleeName.endsWith(constructorName))
    695            return true;
    696    }
    697 
    698    return false;
    699 }
    700 
    701 // Return the result of a `matcher` callback on the call found in the given
    702 // `edge`, if the edge is a direct call to a named function (if not, return false).
    703 // `matcher` is given the name of the callee (actually, a tuple
    704 // [fully qualified name, base name]), an array of expressions containing the
    705 // arguments, and if the result of the call is assigned to a variable,
    706 // the expression representing that variable(the lhs).
    707 //
    708 // https://firefox-source-docs.mozilla.org/js/HazardAnalysis/CFG.html for
    709 // documentation of the data structure used here.
    710 function matchEdgeCall(edge, matcher) {
    711    if (edge.Kind != "Call") {
    712        return false;
    713    }
    714 
    715    const callee = edge.Exp[0];
    716 
    717    if (edge.Type.Kind == 'Function' &&
    718        edge.Exp[0].Kind == 'Var' &&
    719        edge.Exp[0].Variable.Kind == 'Func') {
    720        const calleeName = edge.Exp[0].Variable.Name;
    721        const args = edge.PEdgeCallArguments;
    722        const argExprs = args ? args.Exp : [];
    723        const lhs = edge.Exp[1]; // May be undefined
    724        return matcher(calleeName, argExprs, lhs);
    725    }
    726 
    727    return false;
    728 }
    729 
    730 function edgeMarksVariableGCSafe(edge, variable) {
    731    return matchEdgeCall(edge, (calleeName, argExprs, _lhs) => {
    732        // explicit JS_HAZ_VARIABLE_IS_GC_SAFE annotation
    733        return (calleeName[1] == 'MarkVariableAsGCSafe' &&
    734            calleeName[0].includes("JS::detail::MarkVariableAsGCSafe") &&
    735            argExprs.length == 1 &&
    736            expressionIsVariable(argExprs[0], variable));
    737    });
    738 }
    739 
    740 // Match an optional <namespace>:: followed by the class name,
    741 // and then an optional template parameter marker.
    742 //
    743 // Example: mozilla::dom::UniquePtr<...
    744 //
    745 function parseTypeName(typeName) {
    746    const m = typeName.match(/^(((?:\w|::)+::)?(\w+))\b(\<)?/);
    747    if (!m) {
    748        return undefined;
    749    }
    750    const [, type, raw_namespace, classname, is_specialized] = m;
    751    const namespace = raw_namespace === null ? "" : raw_namespace;
    752    return { type, namespace, classname, is_specialized }
    753 }
    754 
    755 // Return whether an edge "clears out" a variable's value. A simple example
    756 // would be
    757 //
    758 //     var = nullptr;
    759 //
    760 // for analyses for which nullptr is a "safe" value (eg GC rooting hazards; you
    761 // can't get in trouble by holding a nullptr live across a GC.) A more complex
    762 // example is a Maybe<T> that gets reset:
    763 //
    764 //     Maybe<AutoCheckCannotGC> nogc;
    765 //     nogc.emplace(cx);
    766 //     nogc.reset();
    767 //     gc();             // <-- not a problem; nogc is invalidated by prev line
    768 //     nogc.emplace(cx);
    769 //     foo(nogc);
    770 //
    771 // Yet another example is a UniquePtr being passed by value, which means the
    772 // receiver takes ownership:
    773 //
    774 //     UniquePtr<JSObject*> uobj(obj);
    775 //     foo(uobj);
    776 //     gc();
    777 //
    778 function edgeEndsValueLiveRange(edge, variable, body)
    779 {
    780    // var = nullptr;
    781    if (edge.Kind == "Assign") {
    782        const [lhs, rhs] = edge.Exp;
    783        return expressionIsVariable(lhs, variable) && isImmobileValue(rhs);
    784    }
    785 
    786    if (edge.Kind != "Call")
    787        return false;
    788 
    789    if (edgeMarksVariableGCSafe(edge, variable)) {
    790        // explicit JS_HAZ_VARIABLE_IS_GC_SAFE annotation
    791        return true;
    792    }
    793 
    794    const decl = lookupVariable(body, variable);
    795 
    796    if (matchEdgeCall(edge, (calleeName, argExprs, lhs) => {
    797        return calleeName[1] == 'move' && calleeName[0].includes('std::move(') &&
    798            expressionIsDeclaredVariable(argExprs[0], decl) &&
    799            lhs &&
    800            lhs.Kind == 'Var' &&
    801            lhs.Variable.Kind == 'Temp';
    802    })) {
    803        // temp = std::move(var)
    804        //
    805        // If var is a UniquePtr, and we pass it into something that takes
    806        // ownership, then it should be considered to be invalid. Example:
    807        //
    808        //     consume(std::move(var));
    809        //
    810        // where consume takes a UniquePtr. This will compile to something like
    811        //
    812        //     UniquePtr* __temp_1 = &std::move(var);
    813        //     UniquePtr&& __temp_2(*temp_1); // move constructor
    814        //     consume(__temp_2);
    815        //     ~UniquePtr(__temp_2);
    816        //
    817        // The line commented with "// move constructor" is a result of passing
    818        // a UniquePtr as a parameter. If consume() took a UniquePtr&&
    819        // directly, this would just be:
    820        //
    821        //     UniquePtr* __temp_1 = &std::move(var);
    822        //     consume(__temp_1);
    823        //
    824        // which is not guaranteed to move from the reference. It might just
    825        // ignore the parameter. We can't predict what consume(UniquePtr&&)
    826        // will do. We do know that UniquePtr(UniquePtr&& other) moves out of
    827        // `other`.
    828        //
    829        // The std::move() technically is irrelevant, but because we only care
    830        // about bare variables, it has to be used, which is fortunate because
    831        // the UniquePtr&& constructor operates on a temporary, not the
    832        // variable we care about.
    833 
    834        const lhs = edge.Exp[1].Variable;
    835        if (basicBlockEatsVariable(lhs, body, edge.Index[1]))
    836          return true;
    837    }
    838 
    839    const callee = edge.Exp[0];
    840 
    841    if (edge.Type.Kind == 'Function' &&
    842        edge.Type.TypeFunctionCSU &&
    843        edge.PEdgeCallInstance &&
    844        expressionIsMethodOnVariableDecl(edge.PEdgeCallInstance.Exp, decl))
    845    {
    846        const typeName = edge.Type.TypeFunctionCSU.Type.Name;
    847 
    848        // Synthesize a zero-arg constructor name like
    849        // mozilla::dom::UniquePtr<T>::UniquePtr(). Note that the `<T>` is
    850        // literal -- the pretty name from sixgill will render the actual
    851        // constructor name as something like
    852        //
    853        //   UniquePtr<T>::UniquePtr() [where T = int]
    854        //
    855        const parsed = parseTypeName(typeName);
    856        if (parsed) {
    857            const { type, namespace, classname, is_specialized } = parsed;
    858 
    859            // special-case: the initial constructor that doesn't provide a value.
    860            // Useful for things like Maybe<T>.
    861            const template = is_specialized ? '<T>' : '';
    862            const ctorName = `${namespace}${classname}${template}::${classname}()`;
    863            if (callee.Kind == 'Var' &&
    864                typesWithSafeConstructors.has(type) &&
    865                callee.Variable.Name[0].includes(ctorName))
    866            {
    867                return true;
    868            }
    869 
    870            // special-case: UniquePtr::reset() and similar.
    871            if (callee.Kind == 'Var' &&
    872                type in resetterMethods &&
    873                resetterMethods[type].has(callee.Variable.Name[1]))
    874            {
    875                return true;
    876            }
    877        }
    878    }
    879 
    880    // special-case: passing UniquePtr<T> by value.
    881    if (edge.Type.Kind == 'Function' &&
    882        edge.Type.TypeFunctionArgument &&
    883        edge.PEdgeCallArguments)
    884    {
    885        for (const i in edge.Type.TypeFunctionArgument) {
    886            const param = edge.Type.TypeFunctionArgument[i];
    887            if (param.Type.Kind != 'CSU')
    888                continue;
    889            if (!param.Type.Name.startsWith("mozilla::UniquePtr<"))
    890                continue;
    891            const arg = edge.PEdgeCallArguments.Exp[i];
    892            if (expressionIsVariable(arg, variable)) {
    893                return true;
    894            }
    895        }
    896    }
    897 
    898    return false;
    899 }
    900 
    901 // Look up a variable in the list of declarations for this body.
    902 function lookupVariable(body, variable) {
    903    for (const decl of (body.DefineVariable || [])) {
    904        if (sameVariable(decl.Variable, variable)) {
    905            return decl;
    906        }
    907    }
    908    return undefined;
    909 }
    910 
    911 function edgeMovesVariable(edge, variable, body)
    912 {
    913    if (edge.Kind != 'Call')
    914        return false;
    915    const callee = edge.Exp[0];
    916    if (callee.Kind == 'Var' &&
    917        callee.Variable.Kind == 'Func')
    918    {
    919        const { Variable: { Name: [ fullname, shortname ] } } = callee;
    920 
    921        // Match an rvalue parameter.
    922 
    923        if (!edge || !edge.PEdgeCallArguments || !edge.PEdgeCallArguments.Exp) {
    924            return false;
    925        }
    926 
    927        for (const arg of edge.PEdgeCallArguments.Exp) {
    928            if (arg.Kind != 'Drf') continue;
    929            const val = arg.Exp[0];
    930            if (val.Kind == 'Var' && sameVariable(val.Variable, variable)) {
    931                // This argument is the variable we're looking for. Return true
    932                // if it is passed as an rvalue reference.
    933                const type = lookupVariable(body, variable).Type;
    934                if (type.Kind == "Pointer" && type.Reference == PTR_RVALUE_REF) {
    935                    return true;
    936                }
    937            }
    938        }
    939    }
    940 
    941    return false;
    942 }
    943 
    944 // Scan forward through the basic block in 'body' starting at 'startpoint',
    945 // looking for a call that passes 'variable' to a move constructor that
    946 // "consumes" it (eg UniquePtr::UniquePtr(UniquePtr&&)).
    947 function basicBlockEatsVariable(variable, body, startpoint)
    948 {
    949    const successors = getSuccessors(body);
    950    let point = startpoint;
    951    while (point in successors) {
    952        // Only handle a single basic block. If it forks, stop looking.
    953        const edges = successors[point];
    954        if (edges.length != 1) {
    955            return false;
    956        }
    957        const edge = edges[0];
    958 
    959        if (edgeMovesVariable(edge, variable, body)) {
    960            return true;
    961        }
    962 
    963        // edgeStartsValueLiveRange will find places where 'variable' is given
    964        // a new value. Never observed in practice, since this function is only
    965        // called with a temporary resulting from std::move(), which is used
    966        // immediately for a call. But just to be robust to future uses:
    967        if (edgeStartsValueLiveRange(edge, variable)) {
    968            return false;
    969        }
    970 
    971        point = edge.Index[1];
    972    }
    973 
    974    return false;
    975 }
    976 
    977 var PROP_REFCNT          = 1 << 0;
    978 var PROP_SHARED_PTR_DTOR = 1 << 1;
    979 
    980 function getCalleeProperties(calleeName) {
    981    let props = 0;
    982 
    983    if (isRefcountedDtor(calleeName)) {
    984        props |= PROP_REFCNT;
    985    }
    986    if (calleeName.includes("~shared_ptr()")) {
    987        props |= PROP_SHARED_PTR_DTOR;
    988    }
    989    return props;
    990 }
    991 
    992 // Basic C++ ABI mangling: prefix an identifier with its length, in decimal.
    993 function mangle(name) {
    994    return name.length + name;
    995 }
    996 
    997 var TriviallyDestructibleTypes = new Set([
    998    // Single-token types from
    999    // https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-builtin
   1000    "void", "wchar_t", "bool", "char", "short", "int", "long", "float", "double",
   1001    "__int64", "__int128", "__float128", "char32_t", "char16_t", "char8_t",
   1002    // Remaining observed cases. These are types T in shared_ptr<T> that have
   1003    // been observed, where the types themselves have trivial destructors, and
   1004    // the custom deleter doesn't do anything nontrivial that we might care about.
   1005    "_IO_FILE"
   1006 ]);
   1007 function synthesizeDestructorName(className) {
   1008    if (className.includes("<") || className.includes(" ") || className.includes("{")) {
   1009        return;
   1010    }
   1011    if (TriviallyDestructibleTypes.has(className)) {
   1012        return;
   1013    }
   1014    const parts = className.split("::");
   1015    const mangled_dtor = "_ZN" + parts.map(p => mangle(p)).join("") + "D2Ev";
   1016    const pretty_dtor = `void ${className}::~${parts.at(-1)}()`;
   1017    // Note that there will be a later check to verify that the function name
   1018    // synthesized here is an actual function, and assert if not (see
   1019    // assertFunctionExists() in computeCallgraph.js.)
   1020    return mangled_dtor + "$" + pretty_dtor;
   1021 }
   1022 
   1023 function getCallEdgeProperties(body, edge, calleeName, functionBodies) {
   1024    let attrs = 0;
   1025    let extraCalls = [];
   1026 
   1027    if (edge.Kind !== "Call") {
   1028        return { attrs, extraCalls };
   1029    }
   1030 
   1031    const props = getCalleeProperties(calleeName);
   1032    if (props & PROP_REFCNT) {
   1033        // std::swap of two refcounted values thinks it can drop the
   1034        // ref count to zero. Or rather, it just calls operator=() in a context
   1035        // where the refcount will never drop to zero.
   1036        const blockId = blockIdentifier(body);
   1037        if (blockId.includes("std::swap") || blockId.includes("mozilla::Swap")) {
   1038            // Replace the refcnt release call with nothing. It's not going to happen.
   1039            attrs |= ATTR_REPLACED;
   1040        }
   1041    }
   1042 
   1043    if (props & PROP_SHARED_PTR_DTOR) {
   1044        // Replace shared_ptr<T>::~shared_ptr() calls to T::~T() calls.
   1045        // Note that this will only apply to simple cases.
   1046        // Any templatized type, in particular, will be ignored and the original
   1047        // call tree will be left alone. If this triggers a hazard, then we can
   1048        // consider extending the mangling support.
   1049        //
   1050        // If the call to ~shared_ptr is not replaced, then it might end up calling
   1051        // an unknown function pointer. This does not always happen-- in some cases,
   1052        // the call tree below ~shared_ptr will invoke the correct destructor without
   1053        // going through function pointers.
   1054        const m = calleeName.match(/shared_ptr<(.*?)>::~shared_ptr\(\)(?: \[with T = ([\w:]+))?/);
   1055        assert(m);
   1056        let className = m[1] == "T" ? m[2] : m[1];
   1057        assert(className != "");
   1058        // cv qualification does not apply to destructors.
   1059        className = className.replace("const ", "");
   1060        className = className.replace("volatile ", "");
   1061        const dtor = synthesizeDestructorName(className);
   1062        if (dtor) {
   1063            attrs |= ATTR_REPLACED;
   1064            extraCalls.push({
   1065                attrs: ATTR_SYNTHETIC,
   1066                name: dtor,
   1067            });
   1068        }
   1069    }
   1070 
   1071    if ((props & PROP_REFCNT) == 0) {
   1072        return { attrs, extraCalls };
   1073    }
   1074 
   1075    let callee = edge.Exp[0];
   1076    while (callee.Kind === "Drf") {
   1077        callee = callee.Exp[0];
   1078    }
   1079 
   1080    const instance = edge.PEdgeCallInstance.Exp;
   1081    if (instance.Kind !== "Var") {
   1082        // TODO: handle field destructors
   1083        return { attrs, extraCalls };
   1084    }
   1085 
   1086    // Test whether the dtor call is dominated by operations on the variable
   1087    // that mean it will not go to a zero refcount in the dtor: either because
   1088    // it's already dead (eg r.forget() was called) or because it can be proven
   1089    // to have a ref count of greater than 1. This is implemented by looking
   1090    // for the reverse: find a path scanning backwards from the dtor call where
   1091    // the variable is used in any way that does *not* ensure that it is
   1092    // trivially destructible.
   1093 
   1094    const variable = instance.Variable;
   1095 
   1096    const visitor = new class DominatorVisitor extends Visitor {
   1097        // Do not revisit nodes. For new nodes, relay the decision made by
   1098        // extend_path.
   1099        next_action(seen, current) { return seen ? "prune" : current; }
   1100 
   1101        // We don't revisit, so always use the new.
   1102        merge_info(seen, current) { return current; }
   1103 
   1104        // Return the action to take from this node.
   1105        extend_path(edge, body, ppoint, successor_value) {
   1106            if (!edge) {
   1107                // Dummy edge to join two points.
   1108                return "continue";
   1109            }
   1110 
   1111            if (!edgeUsesVariable(edge, variable, body)) {
   1112                // Nothing of interest on this edge, keep searching.
   1113                return "continue";
   1114            }
   1115 
   1116            if (edgeEndsValueLiveRange(edge, variable, body)) {
   1117                // This path is safe!
   1118                return "prune";
   1119            }
   1120 
   1121            // Unsafe. Found a use that might set the variable to a
   1122            // nonzero refcount.
   1123            return "done";
   1124        }
   1125    }(functionBodies);
   1126 
   1127    // Searching upwards from a destructor call, return the opposite of: is
   1128    // there a path to a use or the start of the function that does NOT hit a
   1129    // safe assignment like refptr.forget() first?
   1130    //
   1131    // In graph terms: return whether the destructor call is dominated by forget() calls (or similar).
   1132    const edgeIsNonReleasingDtor = !BFS_upwards(
   1133        body, edge.Index[0], functionBodies, visitor, "start",
   1134        false // Return value if we do not reach the root without finding a non-forget() use.
   1135    );
   1136    if (edgeIsNonReleasingDtor) {
   1137        attrs |= ATTR_GC_SUPPRESSED | ATTR_NONRELEASING;
   1138    }
   1139    return { attrs, extraCalls };
   1140 }
   1141 
   1142 // gcc uses something like "__dt_del " for virtual destructors that it
   1143 // generates.
   1144 function isSyntheticVirtualDestructor(funcName) {
   1145    return funcName.endsWith(" ");
   1146 }
   1147 
   1148 function typedField(field)
   1149 {
   1150    if ("FieldInstanceFunction" in field) {
   1151        // Virtual call
   1152        //
   1153        // This makes a minimal attempt at dealing with overloading, by
   1154        // incorporating the number of parameters. So far, that is all that has
   1155        // been needed. If more is needed, sixgill will need to produce a full
   1156        // mangled type.
   1157        const {Type, Name: [name]} = field;
   1158 
   1159        // Virtual destructors don't need a type or argument count,
   1160        // and synthetic ones don't have them filled in.
   1161        if (isSyntheticVirtualDestructor(name)) {
   1162            return name;
   1163        }
   1164 
   1165        var nargs = 0;
   1166        if (Type.Kind == "Function" && "TypeFunctionArguments" in Type)
   1167            nargs = Type.TypeFunctionArguments.Type.length;
   1168        return name + ":" + nargs;
   1169    } else {
   1170        // Function pointer field
   1171        return field.Name[0];
   1172    }
   1173 }
   1174 
   1175 function fieldKey(csuName, field)
   1176 {
   1177    return csuName + "." + typedField(field);
   1178 }