computeCallgraph.js (16188B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this file, 3 * You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 /* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ 6 7 "use strict"; 8 9 loadRelativeToScript('callgraph.js'); 10 11 var options = parse_options([ 12 { 13 name: '--verbose', 14 type: 'bool' 15 }, 16 { 17 name: '--function', 18 type: 'string' 19 }, 20 { 21 name: 'typeInfo_filename', 22 type: 'string', 23 default: "typeInfo.txt" 24 }, 25 { 26 name: 'callgraphOut_filename', 27 type: 'string', 28 default: "rawcalls.txt" 29 }, 30 { 31 name: 'batch', 32 default: 1, 33 type: 'number' 34 }, 35 { 36 name: 'numBatches', 37 default: 1, 38 type: 'number' 39 }, 40 ]); 41 42 var origOut = os.file.redirect(options.callgraphOut_filename); 43 44 var memoized = new Map(); 45 46 var unmangled2id = new Set(); 47 48 // Insert a string into the name table and return the ID. Do not use for 49 // functions, which must be handled specially. 50 function getId(name) 51 { 52 let id = memoized.get(name); 53 if (id !== undefined) 54 return id; 55 56 id = memoized.size + 1; 57 memoized.set(name, id); 58 print(`#${id} ${name}`); 59 60 return id; 61 } 62 63 // Split a function into mangled and unmangled parts and return the ID for the 64 // function. 65 function functionId(name) 66 { 67 const [mangled, unmangled] = splitFunction(name); 68 const id = getId(mangled); 69 70 // Only produce a mangled -> unmangled mapping once, unless there are 71 // multiple unmangled names for the same mangled name. 72 if (unmangled2id.has(unmangled)) 73 return id; 74 75 print(`= ${id} ${unmangled}`); 76 unmangled2id.add(unmangled); 77 return id; 78 } 79 80 var lastline; 81 function printOnce(line) 82 { 83 if (line != lastline) { 84 print(line); 85 lastline = line; 86 } 87 } 88 89 // Returns a table mapping function name to lists of 90 // [annotation-name, annotation-value] pairs: 91 // { function-name => [ [annotation-name, annotation-value] ] } 92 // 93 // Note that sixgill will only store certain attributes (annotation-names), so 94 // this won't be *all* the attributes in the source, just the ones that sixgill 95 // watches for. 96 function getAllAttributes(body) 97 { 98 var all_annotations = {}; 99 for (var v of (body.DefineVariable || [])) { 100 if (v.Variable.Kind != 'Func') 101 continue; 102 var name = v.Variable.Name[0]; 103 var annotations = all_annotations[name] = []; 104 105 for (var ann of (v.Type.Annotation || [])) { 106 annotations.push(ann.Name); 107 } 108 } 109 110 return all_annotations; 111 } 112 113 // Get just the annotations understood by the hazard analysis. 114 function getAnnotations(functionName, body) { 115 var tags = new Set(); 116 var attributes = getAllAttributes(body); 117 if (functionName in attributes) { 118 for (var [ annName, annValue ] of attributes[functionName]) { 119 if (annName == 'annotate') 120 tags.add(annValue); 121 } 122 } 123 return tags; 124 } 125 126 // Scan through a function body, pulling out all annotations and calls and 127 // recording them in callgraph.txt. 128 function processBody(functionName, body, functionBodies) 129 { 130 if (!('PEdge' in body)) 131 return; 132 133 for (var tag of getAnnotations(functionName, body).values()) { 134 const id = functionId(functionName); 135 print(`T ${id} ${tag}`); 136 if (tag == "Calls JSNatives") 137 printOnce(`D ${id} ${functionId("(js-code)")}`); 138 } 139 140 // Set of all callees that have been output so far, in order to suppress 141 // repeated callgraph edges from being recorded. This uses a Map from 142 // callees to limit sets, because we don't want a limited edge to prevent 143 // an unlimited edge from being recorded later. (So an edge will be skipped 144 // if it exists and is at least as limited as the previously seen edge.) 145 // 146 // Limit sets are implemented as integers interpreted as bitfields. 147 // 148 var seen = new Map(); 149 150 lastline = null; 151 for (var edge of body.PEdge) { 152 if (edge.Kind != "Call") 153 continue; 154 155 // The attrs (eg ATTR_GC_SUPPRESSED) are determined by whatever RAII 156 // scopes might be active, which have been computed previously for all 157 // points in the body. 158 const scopeAttrs = body.attrs[edge.Index[0]] | 0; 159 160 for (const { callee, attrs } of getCallees(body, edge, scopeAttrs, functionBodies)) { 161 // Some function names will be synthesized by manually constructing 162 // their names. Verify that we managed to synthesize an existing function. 163 // This cannot be done later with either the callees or callers tables, 164 // because the function may be an otherwise uncalled leaf. 165 if (attrs & ATTR_SYNTHETIC) { 166 assertFunctionExists(callee.name); 167 } 168 169 // Individual callees may have additional attrs. The only such 170 // bit currently is that nsISupports.{AddRef,Release} are assumed 171 // to never GC. 172 let prologue = attrs ? `/${attrs} ` : ""; 173 prologue += functionId(functionName) + " "; 174 if (callee.kind == 'direct') { 175 const prev_attrs = seen.has(callee.name) ? seen.get(callee.name) : ATTRS_UNVISITED; 176 if (prev_attrs & ~attrs) { 177 // Only output an edge if it loosens a limit. 178 seen.set(callee.name, prev_attrs & attrs); 179 printOnce("D " + prologue + functionId(callee.name)); 180 } 181 } else if (callee.kind == 'field') { 182 var { csu, field, isVirtual } = callee; 183 const tag = isVirtual ? 'V' : 'F'; 184 const fullfield = `${csu}.${field}`; 185 printOnce(`${tag} ${prologue}${getId(fullfield)} CLASS ${csu} FIELD ${field}`); 186 } else if (callee.kind == 'resolved-field') { 187 // Fully-resolved field (virtual method) call. Record the 188 // callgraph edges. Do not consider attrs, since they are local 189 // to this callsite and we are writing out a global record 190 // here. 191 // 192 // Any field call that does *not* have an R entry must be 193 // assumed to call anything. 194 var { csu, field, callees } = callee; 195 var fullFieldName = csu + "." + field; 196 if (!virtualResolutionsSeen.has(fullFieldName)) { 197 virtualResolutionsSeen.add(fullFieldName); 198 for (var target of callees) 199 printOnce("R " + getId(fullFieldName) + " " + functionId(target.name)); 200 } 201 } else if (callee.kind == 'indirect') { 202 printOnce("I " + prologue + "VARIABLE " + callee.variable); 203 } else if (callee.kind == 'unknown') { 204 printOnce("I " + prologue + "VARIABLE UNKNOWN"); 205 } else { 206 printErr("invalid " + callee.kind + " callee"); 207 debugger; 208 } 209 } 210 } 211 } 212 213 // Reserve IDs for special function names. 214 215 // represents anything that can run JS 216 assert(ID.jscode == functionId("(js-code)")); 217 218 // function pointers will get an edge to this in loadCallgraph.js; only the ID 219 // reservation is present in callgraph.txt 220 assert(ID.anyfunc == functionId("(any-function)")); 221 222 // same as above, but for fields annotated to never GC 223 assert(ID.nogcfunc == functionId("(nogc-function)")); 224 225 // garbage collection 226 assert(ID.gc == functionId("(GC)")); 227 228 var typeInfo = loadTypeInfo(options.typeInfo_filename); 229 230 loadTypes("src_comp.xdb"); 231 232 // Arbitrary JS code must always be assumed to GC. In real code, there would 233 // always be a path anyway through some arbitrary JSNative, but this route will be shorter. 234 print(`D ${ID.jscode} ${ID.gc}`); 235 236 // An unknown function is assumed to GC. 237 print(`D ${ID.anyfunc} ${ID.gc}`); 238 239 // Output call edges for all virtual methods defined anywhere, from 240 // Class.methodname to what a (dynamic) instance of Class would run when 241 // methodname was called (either Class::methodname() if defined, or some 242 // Base::methodname() for inherited method definitions). 243 for (const [fieldkey, methods] of virtualDefinitions) { 244 const caller = getId(fieldkey); 245 for (const name of methods) { 246 const callee = functionId(name); 247 printOnce(`D ${caller} ${callee}`); 248 } 249 } 250 251 // Output call edges from C.methodname -> S.methodname for all subclasses S of 252 // class C. This is for when you are calling methodname on a pointer/ref of 253 // dynamic type C, so that the callgraph contains calls to all descendant 254 // subclasses' implementations. 255 for (const [csu, methods] of virtualDeclarations) { 256 for (const {field, dtor} of methods) { 257 const caller = getId(fieldKey(csu, field)); 258 if (virtualCanRunJS(csu, field.Name[0])) 259 printOnce(`D ${caller} ${functionId("(js-code)")}`); 260 if (dtor) 261 printOnce(`D ${caller} ${functionId(dtor)}`); 262 if (!subclasses.has(csu)) 263 continue; 264 for (const sub of subclasses.get(csu)) { 265 printOnce(`D ${caller} ${getId(fieldKey(sub, field))}`); 266 } 267 } 268 } 269 270 var xdb = xdbLibrary(); 271 xdb.open("src_body.xdb"); 272 273 if (options.verbose) { 274 printErr("Finished loading data structures"); 275 } 276 277 var minStream = xdb.min_data_stream(); 278 var maxStream = xdb.max_data_stream(); 279 280 if (options.function) { 281 var index = xdb.lookup_key(options.function); 282 if (!index) { 283 printErr("Function not found"); 284 quit(1); 285 } 286 minStream = maxStream = index; 287 } 288 289 function assertFunctionExists(name) { 290 var data = xdb.read_entry(name); 291 assert(data.contents != 0, `synthetic function '${name}' not found!`); 292 } 293 294 function process(functionName, functionBodies) 295 { 296 for (var body of functionBodies) 297 body.attrs = []; 298 299 for (var body of functionBodies) { 300 for (var [pbody, id, attrs] of allRAIIGuardedCallPoints(typeInfo, functionBodies, body, isLimitConstructor)) { 301 pbody.attrs[id] = attrs; 302 } 303 } 304 305 if (options.function) { 306 debugger; 307 } 308 for (var body of functionBodies) { 309 processBody(functionName, body, functionBodies); 310 } 311 312 // Not strictly necessary, but add an edge from the synthetic "(js-code)" 313 // to RunScript to allow better stacks than just randomly selecting a 314 // JSNative to blame things on. 315 if (functionName.includes("js::RunScript")) 316 print(`D ${functionId("(js-code)")} ${functionId(functionName)}`); 317 318 // GCC generates multiple constructors and destructors ("in-charge" and 319 // "not-in-charge") to handle virtual base classes. They are normally 320 // identical, and it appears that GCC does some magic to alias them to the 321 // same thing. But this aliasing is not visible to the analysis. So we'll 322 // add a dummy call edge from "foo" -> "foo *INTERNAL* ", since only "foo" 323 // will show up as called but only "foo *INTERNAL* " will be emitted in the 324 // case where the constructors are identical. 325 // 326 // This is slightly conservative in the case where they are *not* 327 // identical, but that should be rare enough that we don't care. 328 var markerPos = functionName.indexOf(internalMarker); 329 if (markerPos > 0) { 330 var inChargeXTor = functionName.replace(internalMarker, ""); 331 printOnce("D " + functionId(inChargeXTor) + " " + functionId(functionName)); 332 } 333 334 const [ mangled, unmangled ] = splitFunction(functionName); 335 336 // Further note: from https://itanium-cxx-abi.github.io/cxx-abi/abi.html the 337 // different kinds of constructors/destructors are: 338 // C1 # complete object constructor 339 // C2 # base object constructor 340 // C3 # complete object allocating constructor 341 // D0 # deleting destructor 342 // D1 # complete object destructor 343 // D2 # base object destructor 344 // 345 // In actual practice, I have observed C4 and D4 xtors generated by gcc 346 // 4.9.3 (but not 4.7.3). The gcc source code says: 347 // 348 // /* This is the old-style "[unified]" constructor. 349 // In some cases, we may emit this function and call 350 // it from the clones in order to share code and save space. */ 351 // 352 // Unfortunately, that "call... from the clones" does not seem to appear in 353 // the CFG we get from GCC. So if we see a C4 constructor or D4 destructor, 354 // inject an edge to it from C1, C2, and C3 (or D1, D2, and D3). (Note that 355 // C3 isn't even used in current GCC, but add the edge anyway just in 356 // case.) 357 // 358 // from gcc/cp/mangle.c: 359 // 360 // <special-name> ::= D0 # deleting (in-charge) destructor 361 // ::= D1 # complete object (in-charge) destructor 362 // ::= D2 # base object (not-in-charge) destructor 363 // <special-name> ::= C1 # complete object constructor 364 // ::= C2 # base object constructor 365 // ::= C3 # complete object allocating constructor 366 // 367 // Currently, allocating constructors are never used. 368 // 369 if (functionName.indexOf("C4") != -1) { 370 // E terminates the method name (and precedes the method parameters). 371 // If eg "C4E" shows up in the mangled name for another reason, this 372 // will create bogus edges in the callgraph. But it will affect little 373 // and is somewhat difficult to avoid, so we will live with it. 374 // 375 // Another possibility! A templatized constructor will contain C4I...E 376 // for template arguments. 377 // 378 for (let [synthetic, variant, desc] of [ 379 ['C4E', 'C1E', 'complete_ctor'], 380 ['C4E', 'C2E', 'base_ctor'], 381 ['C4E', 'C3E', 'complete_alloc_ctor'], 382 ['C4I', 'C1I', 'complete_ctor'], 383 ['C4I', 'C2I', 'base_ctor'], 384 ['C4I', 'C3I', 'complete_alloc_ctor']]) 385 { 386 if (mangled.indexOf(synthetic) == -1) 387 continue; 388 389 let variant_mangled = mangled.replace(synthetic, variant); 390 let variant_full = `${variant_mangled}$${unmangled} [[${desc}]]`; 391 printOnce("D " + functionId(variant_full) + " " + functionId(functionName)); 392 } 393 } 394 395 // For destructors: 396 // 397 // I've never seen D4Ev() + D4Ev(int32), only one or the other. So 398 // for a D4Ev of any sort, create: 399 // 400 // D0() -> D1() # deleting destructor calls complete destructor, then deletes 401 // D1() -> D2() # complete destructor calls base destructor, then destroys virtual bases 402 // D2() -> D4(?) # base destructor might be aliased to unified destructor 403 // # use whichever one is defined, in-charge or not. 404 // # ('?') means either () or (int32). 405 // 406 // Note that this doesn't actually make sense -- D0 and D1 should be 407 // in-charge, but gcc doesn't seem to give them the in-charge parameter?! 408 // 409 if (functionName.indexOf("D4Ev") != -1 && functionName.indexOf("::~") != -1) { 410 const not_in_charge_dtor = functionName.replace("(int32)", "()"); 411 const D0 = not_in_charge_dtor.replace("D4Ev", "D0Ev") + " [[deleting_dtor]]"; 412 const D1 = not_in_charge_dtor.replace("D4Ev", "D1Ev") + " [[complete_dtor]]"; 413 const D2 = not_in_charge_dtor.replace("D4Ev", "D2Ev") + " [[base_dtor]]"; 414 printOnce("D " + functionId(D0) + " " + functionId(D1)); 415 printOnce("D " + functionId(D1) + " " + functionId(D2)); 416 printOnce("D " + functionId(D2) + " " + functionId(functionName)); 417 } 418 419 if (isJSNative(mangled)) 420 printOnce(`D ${functionId("(js-code)")} ${functionId(functionName)}`); 421 } 422 423 var start = batchStart(options.batch, options.numBatches, minStream, maxStream); 424 var end = batchLast(options.batch, options.numBatches, minStream, maxStream); 425 426 for (var nameIndex = start; nameIndex <= end; nameIndex++) { 427 var name = xdb.read_key(nameIndex); 428 var data = xdb.read_entry(name); 429 process(name.readString(), JSON.parse(data.readString())); 430 xdb.free_string(name); 431 xdb.free_string(data); 432 } 433 434 os.file.close(os.file.redirect(origOut));