tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

computeCallgraph.js (16188B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
      3 * You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 /* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */
      6 
      7 "use strict";
      8 
      9 loadRelativeToScript('callgraph.js');
     10 
     11 var options = parse_options([
     12    {
     13        name: '--verbose',
     14        type: 'bool'
     15    },
     16    {
     17        name: '--function',
     18        type: 'string'
     19    },
     20    {
     21        name: 'typeInfo_filename',
     22        type: 'string',
     23        default: "typeInfo.txt"
     24    },
     25    {
     26        name: 'callgraphOut_filename',
     27        type: 'string',
     28        default: "rawcalls.txt"
     29    },
     30    {
     31        name: 'batch',
     32        default: 1,
     33        type: 'number'
     34    },
     35    {
     36        name: 'numBatches',
     37        default: 1,
     38        type: 'number'
     39    },
     40 ]);
     41 
     42 var origOut = os.file.redirect(options.callgraphOut_filename);
     43 
     44 var memoized = new Map();
     45 
     46 var unmangled2id = new Set();
     47 
     48 // Insert a string into the name table and return the ID. Do not use for
     49 // functions, which must be handled specially.
     50 function getId(name)
     51 {
     52    let id = memoized.get(name);
     53    if (id !== undefined)
     54        return id;
     55 
     56    id = memoized.size + 1;
     57    memoized.set(name, id);
     58    print(`#${id} ${name}`);
     59 
     60    return id;
     61 }
     62 
     63 // Split a function into mangled and unmangled parts and return the ID for the
     64 // function.
     65 function functionId(name)
     66 {
     67    const [mangled, unmangled] = splitFunction(name);
     68    const id = getId(mangled);
     69 
     70    // Only produce a mangled -> unmangled mapping once, unless there are
     71    // multiple unmangled names for the same mangled name.
     72    if (unmangled2id.has(unmangled))
     73        return id;
     74 
     75    print(`= ${id} ${unmangled}`);
     76    unmangled2id.add(unmangled);
     77    return id;
     78 }
     79 
     80 var lastline;
     81 function printOnce(line)
     82 {
     83    if (line != lastline) {
     84        print(line);
     85        lastline = line;
     86    }
     87 }
     88 
     89 // Returns a table mapping function name to lists of
     90 // [annotation-name, annotation-value] pairs:
     91 //   { function-name => [ [annotation-name, annotation-value] ] }
     92 //
     93 // Note that sixgill will only store certain attributes (annotation-names), so
     94 // this won't be *all* the attributes in the source, just the ones that sixgill
     95 // watches for.
     96 function getAllAttributes(body)
     97 {
     98    var all_annotations = {};
     99    for (var v of (body.DefineVariable || [])) {
    100        if (v.Variable.Kind != 'Func')
    101            continue;
    102        var name = v.Variable.Name[0];
    103        var annotations = all_annotations[name] = [];
    104 
    105        for (var ann of (v.Type.Annotation || [])) {
    106            annotations.push(ann.Name);
    107        }
    108    }
    109 
    110    return all_annotations;
    111 }
    112 
    113 // Get just the annotations understood by the hazard analysis.
    114 function getAnnotations(functionName, body) {
    115    var tags = new Set();
    116    var attributes = getAllAttributes(body);
    117    if (functionName in attributes) {
    118        for (var [ annName, annValue ] of attributes[functionName]) {
    119            if (annName == 'annotate')
    120                tags.add(annValue);
    121        }
    122    }
    123    return tags;
    124 }
    125 
    126 // Scan through a function body, pulling out all annotations and calls and
    127 // recording them in callgraph.txt.
    128 function processBody(functionName, body, functionBodies)
    129 {
    130    if (!('PEdge' in body))
    131        return;
    132 
    133    for (var tag of getAnnotations(functionName, body).values()) {
    134        const id = functionId(functionName);
    135        print(`T ${id} ${tag}`);
    136        if (tag == "Calls JSNatives")
    137            printOnce(`D ${id} ${functionId("(js-code)")}`);
    138    }
    139 
    140    // Set of all callees that have been output so far, in order to suppress
    141    // repeated callgraph edges from being recorded. This uses a Map from
    142    // callees to limit sets, because we don't want a limited edge to prevent
    143    // an unlimited edge from being recorded later. (So an edge will be skipped
    144    // if it exists and is at least as limited as the previously seen edge.)
    145    //
    146    // Limit sets are implemented as integers interpreted as bitfields.
    147    //
    148    var seen = new Map();
    149 
    150    lastline = null;
    151    for (var edge of body.PEdge) {
    152        if (edge.Kind != "Call")
    153            continue;
    154 
    155        // The attrs (eg ATTR_GC_SUPPRESSED) are determined by whatever RAII
    156        // scopes might be active, which have been computed previously for all
    157        // points in the body.
    158        const scopeAttrs = body.attrs[edge.Index[0]] | 0;
    159 
    160        for (const { callee, attrs } of getCallees(body, edge, scopeAttrs, functionBodies)) {
    161            // Some function names will be synthesized by manually constructing
    162            // their names. Verify that we managed to synthesize an existing function.
    163            // This cannot be done later with either the callees or callers tables,
    164            // because the function may be an otherwise uncalled leaf.
    165            if (attrs & ATTR_SYNTHETIC) {
    166                assertFunctionExists(callee.name);
    167            }
    168 
    169            // Individual callees may have additional attrs. The only such
    170            // bit currently is that nsISupports.{AddRef,Release} are assumed
    171            // to never GC.
    172            let prologue = attrs ? `/${attrs} ` : "";
    173            prologue += functionId(functionName) + " ";
    174            if (callee.kind == 'direct') {
    175                const prev_attrs = seen.has(callee.name) ? seen.get(callee.name) : ATTRS_UNVISITED;
    176                if (prev_attrs & ~attrs) {
    177                    // Only output an edge if it loosens a limit.
    178                    seen.set(callee.name, prev_attrs & attrs);
    179                    printOnce("D " + prologue + functionId(callee.name));
    180                }
    181            } else if (callee.kind == 'field') {
    182                var { csu, field, isVirtual } = callee;
    183                const tag = isVirtual ? 'V' : 'F';
    184                const fullfield = `${csu}.${field}`;
    185                printOnce(`${tag} ${prologue}${getId(fullfield)} CLASS ${csu} FIELD ${field}`);
    186            } else if (callee.kind == 'resolved-field') {
    187                // Fully-resolved field (virtual method) call. Record the
    188                // callgraph edges. Do not consider attrs, since they are local
    189                // to this callsite and we are writing out a global record
    190                // here.
    191                //
    192                // Any field call that does *not* have an R entry must be
    193                // assumed to call anything.
    194                var { csu, field, callees } = callee;
    195                var fullFieldName = csu + "." + field;
    196                if (!virtualResolutionsSeen.has(fullFieldName)) {
    197                    virtualResolutionsSeen.add(fullFieldName);
    198                    for (var target of callees)
    199                        printOnce("R " + getId(fullFieldName) + " " + functionId(target.name));
    200                }
    201            } else if (callee.kind == 'indirect') {
    202                printOnce("I " + prologue + "VARIABLE " + callee.variable);
    203            } else if (callee.kind == 'unknown') {
    204                printOnce("I " + prologue + "VARIABLE UNKNOWN");
    205            } else {
    206                printErr("invalid " + callee.kind + " callee");
    207                debugger;
    208            }
    209        }
    210    }
    211 }
    212 
    213 // Reserve IDs for special function names.
    214 
    215 // represents anything that can run JS
    216 assert(ID.jscode == functionId("(js-code)"));
    217 
    218 // function pointers will get an edge to this in loadCallgraph.js; only the ID
    219 // reservation is present in callgraph.txt
    220 assert(ID.anyfunc == functionId("(any-function)"));
    221 
    222 // same as above, but for fields annotated to never GC
    223 assert(ID.nogcfunc == functionId("(nogc-function)"));
    224 
    225 // garbage collection
    226 assert(ID.gc == functionId("(GC)"));
    227 
    228 var typeInfo = loadTypeInfo(options.typeInfo_filename);
    229 
    230 loadTypes("src_comp.xdb");
    231 
    232 // Arbitrary JS code must always be assumed to GC. In real code, there would
    233 // always be a path anyway through some arbitrary JSNative, but this route will be shorter.
    234 print(`D ${ID.jscode} ${ID.gc}`);
    235 
    236 // An unknown function is assumed to GC.
    237 print(`D ${ID.anyfunc} ${ID.gc}`);
    238 
    239 // Output call edges for all virtual methods defined anywhere, from
    240 // Class.methodname to what a (dynamic) instance of Class would run when
    241 // methodname was called (either Class::methodname() if defined, or some
    242 // Base::methodname() for inherited method definitions).
    243 for (const [fieldkey, methods] of virtualDefinitions) {
    244    const caller = getId(fieldkey);
    245    for (const name of methods) {
    246        const callee = functionId(name);
    247        printOnce(`D ${caller} ${callee}`);
    248    }
    249 }
    250 
    251 // Output call edges from C.methodname -> S.methodname for all subclasses S of
    252 // class C. This is for when you are calling methodname on a pointer/ref of
    253 // dynamic type C, so that the callgraph contains calls to all descendant
    254 // subclasses' implementations.
    255 for (const [csu, methods] of virtualDeclarations) {
    256    for (const {field, dtor} of methods) {
    257        const caller = getId(fieldKey(csu, field));
    258        if (virtualCanRunJS(csu, field.Name[0]))
    259            printOnce(`D ${caller} ${functionId("(js-code)")}`);
    260        if (dtor)
    261            printOnce(`D ${caller} ${functionId(dtor)}`);
    262        if (!subclasses.has(csu))
    263            continue;
    264        for (const sub of subclasses.get(csu)) {
    265            printOnce(`D ${caller} ${getId(fieldKey(sub, field))}`);
    266        }
    267    }
    268 }
    269 
    270 var xdb = xdbLibrary();
    271 xdb.open("src_body.xdb");
    272 
    273 if (options.verbose) {
    274    printErr("Finished loading data structures");
    275 }
    276 
    277 var minStream = xdb.min_data_stream();
    278 var maxStream = xdb.max_data_stream();
    279 
    280 if (options.function) {
    281    var index = xdb.lookup_key(options.function);
    282    if (!index) {
    283        printErr("Function not found");
    284        quit(1);
    285    }
    286    minStream = maxStream = index;
    287 }
    288 
    289 function assertFunctionExists(name) {
    290    var data = xdb.read_entry(name);
    291    assert(data.contents != 0, `synthetic function '${name}' not found!`);
    292 }
    293 
    294 function process(functionName, functionBodies)
    295 {
    296    for (var body of functionBodies)
    297        body.attrs = [];
    298 
    299    for (var body of functionBodies) {
    300        for (var [pbody, id, attrs] of allRAIIGuardedCallPoints(typeInfo, functionBodies, body, isLimitConstructor)) {
    301            pbody.attrs[id] = attrs;
    302        }
    303    }
    304 
    305    if (options.function) {
    306        debugger;
    307    }
    308    for (var body of functionBodies) {
    309        processBody(functionName, body, functionBodies);
    310    }
    311 
    312    // Not strictly necessary, but add an edge from the synthetic "(js-code)"
    313    // to RunScript to allow better stacks than just randomly selecting a
    314    // JSNative to blame things on.
    315    if (functionName.includes("js::RunScript"))
    316        print(`D ${functionId("(js-code)")} ${functionId(functionName)}`);
    317 
    318    // GCC generates multiple constructors and destructors ("in-charge" and
    319    // "not-in-charge") to handle virtual base classes. They are normally
    320    // identical, and it appears that GCC does some magic to alias them to the
    321    // same thing. But this aliasing is not visible to the analysis. So we'll
    322    // add a dummy call edge from "foo" -> "foo *INTERNAL* ", since only "foo"
    323    // will show up as called but only "foo *INTERNAL* " will be emitted in the
    324    // case where the constructors are identical.
    325    //
    326    // This is slightly conservative in the case where they are *not*
    327    // identical, but that should be rare enough that we don't care.
    328    var markerPos = functionName.indexOf(internalMarker);
    329    if (markerPos > 0) {
    330        var inChargeXTor = functionName.replace(internalMarker, "");
    331        printOnce("D " + functionId(inChargeXTor) + " " + functionId(functionName));
    332    }
    333 
    334    const [ mangled, unmangled ] = splitFunction(functionName);
    335 
    336    // Further note: from https://itanium-cxx-abi.github.io/cxx-abi/abi.html the
    337    // different kinds of constructors/destructors are:
    338    // C1	# complete object constructor
    339    // C2	# base object constructor
    340    // C3	# complete object allocating constructor
    341    // D0	# deleting destructor
    342    // D1	# complete object destructor
    343    // D2	# base object destructor
    344    //
    345    // In actual practice, I have observed C4 and D4 xtors generated by gcc
    346    // 4.9.3 (but not 4.7.3). The gcc source code says:
    347    //
    348    //   /* This is the old-style "[unified]" constructor.
    349    //      In some cases, we may emit this function and call
    350    //      it from the clones in order to share code and save space.  */
    351    //
    352    // Unfortunately, that "call... from the clones" does not seem to appear in
    353    // the CFG we get from GCC. So if we see a C4 constructor or D4 destructor,
    354    // inject an edge to it from C1, C2, and C3 (or D1, D2, and D3). (Note that
    355    // C3 isn't even used in current GCC, but add the edge anyway just in
    356    // case.)
    357    //
    358    // from gcc/cp/mangle.c:
    359    //
    360    // <special-name> ::= D0 # deleting (in-charge) destructor
    361    //                ::= D1 # complete object (in-charge) destructor
    362    //                ::= D2 # base object (not-in-charge) destructor
    363    // <special-name> ::= C1   # complete object constructor
    364    //                ::= C2   # base object constructor
    365    //                ::= C3   # complete object allocating constructor
    366    //
    367    // Currently, allocating constructors are never used.
    368    //
    369    if (functionName.indexOf("C4") != -1) {
    370        // E terminates the method name (and precedes the method parameters).
    371        // If eg "C4E" shows up in the mangled name for another reason, this
    372        // will create bogus edges in the callgraph. But it will affect little
    373        // and is somewhat difficult to avoid, so we will live with it.
    374        //
    375        // Another possibility! A templatized constructor will contain C4I...E
    376        // for template arguments.
    377        //
    378        for (let [synthetic, variant, desc] of [
    379            ['C4E', 'C1E', 'complete_ctor'],
    380            ['C4E', 'C2E', 'base_ctor'],
    381            ['C4E', 'C3E', 'complete_alloc_ctor'],
    382            ['C4I', 'C1I', 'complete_ctor'],
    383            ['C4I', 'C2I', 'base_ctor'],
    384            ['C4I', 'C3I', 'complete_alloc_ctor']])
    385        {
    386            if (mangled.indexOf(synthetic) == -1)
    387                continue;
    388 
    389            let variant_mangled = mangled.replace(synthetic, variant);
    390            let variant_full = `${variant_mangled}$${unmangled} [[${desc}]]`;
    391            printOnce("D " + functionId(variant_full) + " " + functionId(functionName));
    392        }
    393    }
    394 
    395    // For destructors:
    396    //
    397    // I've never seen D4Ev() + D4Ev(int32), only one or the other. So
    398    // for a D4Ev of any sort, create:
    399    //
    400    //   D0() -> D1()  # deleting destructor calls complete destructor, then deletes
    401    //   D1() -> D2()  # complete destructor calls base destructor, then destroys virtual bases
    402    //   D2() -> D4(?) # base destructor might be aliased to unified destructor
    403    //                 # use whichever one is defined, in-charge or not.
    404    //                 # ('?') means either () or (int32).
    405    //
    406    // Note that this doesn't actually make sense -- D0 and D1 should be
    407    // in-charge, but gcc doesn't seem to give them the in-charge parameter?!
    408    //
    409    if (functionName.indexOf("D4Ev") != -1 && functionName.indexOf("::~") != -1) {
    410        const not_in_charge_dtor = functionName.replace("(int32)", "()");
    411        const D0 = not_in_charge_dtor.replace("D4Ev", "D0Ev") + " [[deleting_dtor]]";
    412        const D1 = not_in_charge_dtor.replace("D4Ev", "D1Ev") + " [[complete_dtor]]";
    413        const D2 = not_in_charge_dtor.replace("D4Ev", "D2Ev") + " [[base_dtor]]";
    414        printOnce("D " + functionId(D0) + " " + functionId(D1));
    415        printOnce("D " + functionId(D1) + " " + functionId(D2));
    416        printOnce("D " + functionId(D2) + " " + functionId(functionName));
    417    }
    418 
    419    if (isJSNative(mangled))
    420        printOnce(`D ${functionId("(js-code)")} ${functionId(functionName)}`);
    421 }
    422 
    423 var start = batchStart(options.batch, options.numBatches, minStream, maxStream);
    424 var end = batchLast(options.batch, options.numBatches, minStream, maxStream);
    425 
    426 for (var nameIndex = start; nameIndex <= end; nameIndex++) {
    427    var name = xdb.read_key(nameIndex);
    428    var data = xdb.read_entry(name);
    429    process(name.readString(), JSON.parse(data.readString()));
    430    xdb.free_string(name);
    431    xdb.free_string(data);
    432 }
    433 
    434 os.file.close(os.file.redirect(origOut));