loadCallgraph.js (23601B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this file, 3 * You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 /* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ 6 7 "use strict"; 8 9 loadRelativeToScript('utility.js'); 10 loadRelativeToScript('callgraph.js'); 11 12 // Functions come out of sixgill in the form "mangled$readable". The mangled 13 // name is Truth. One mangled name might correspond to multiple readable names, 14 // for multiple reasons, including (1) sixgill/gcc doesn't always qualify types 15 // the same way or de-typedef the same amount; (2) sixgill's output treats 16 // references and pointers the same, and so doesn't distinguish them, but C++ 17 // treats them as separate for overloading and linking; (3) (identical) 18 // destructors sometimes have an int32 parameter, sometimes not. 19 // 20 // The readable names are useful because they're far more meaningful to the 21 // user, and are what should show up in reports and questions to mrgiggles. At 22 // least in most cases, it's fine to have the extra mangled name tacked onto 23 // the beginning for these. 24 // 25 // The strategy used is to separate out the pieces whenever they are read in, 26 // create a table mapping mangled names to all readable names, and use the 27 // mangled names in all computation -- except for limited circumstances when 28 // the readable name is used in annotations. 29 // 30 // Note that callgraph.txt uses a compressed representation -- each name is 31 // mapped to an integer, and those integers are what is recorded in the edges. 32 // But the integers depend on the full name, whereas the true edge should only 33 // consider the mangled name. And some of the names encoded in callgraph.txt 34 // are FieldCalls, not just function names. 35 36 var gcEdges = {}; 37 38 // Returns whether the function was added. (It will be refused if it was 39 // already there, or if attrs or annotations say it shouldn't be added.) 40 function addGCFunction(caller, reason, gcFunctions, functionAttrs, functions) 41 { 42 if (functionAttrs[caller] && functionAttrs[caller][1] & ATTR_GC_SUPPRESSED) 43 return false; 44 45 if (ignoreGCFunction(functions.name[caller], functions.readableName)) 46 return false; 47 48 if (!(caller in gcFunctions)) { 49 gcFunctions[caller] = reason; 50 return true; 51 } 52 53 return false; 54 } 55 56 // Every caller->callee callsite is associated with attrs saying what is 57 // allowed at that callsite (eg if it's in a GC suppression zone, it would have 58 // ATTR_GC_SUPPRESSED set.) A given caller might call the same callee multiple 59 // times, with different attributes. Associate the <caller,callee> edge with 60 // the intersection (AND) and disjunction (OR) of all of the callsites' attrs. 61 // The AND ('all') says what attributes are present for all callers; the OR 62 // ('any') says what attributes are present on any caller. Preserve the 63 // original order. 64 // 65 // During the same scan, build callersOf from calleesOf. 66 function generate_callgraph(rawCallees) { 67 const callersOf = new Map(); 68 const calleesOf = new Map(); 69 70 for (const [caller, callee_attrs] of rawCallees) { 71 const ordered_callees = []; 72 73 // callee_attrs is a list of {callee,any,all} objects. 74 const callee2any = new Map(); 75 const callee2all = new Map(); 76 for (const {callee, any, all} of callee_attrs) { 77 const prev_any = callee2any.get(callee); 78 if (prev_any === undefined) { 79 assert(!callee2all.has(callee)); 80 callee2any.set(callee, any); 81 callee2all.set(callee, all); 82 ordered_callees.push(callee); 83 } else { 84 const prev_all = callee2all.get(callee); 85 callee2any.set(callee, prev_any | any); 86 callee2all.set(callee, prev_all & all); 87 } 88 } 89 90 // Update the contents of callee_attrs to contain a single entry for 91 // each callee, with its attrs set to the AND of the attrs observed at 92 // all callsites within this caller function. 93 callee_attrs.length = 0; 94 for (const callee of ordered_callees) { 95 const any = callee2any.get(callee); 96 const all = callee2all.get(callee); 97 if (!calleesOf.has(caller)) 98 calleesOf.set(caller, new Map()); 99 calleesOf.get(caller).set(callee, {any, all}); 100 if (!callersOf.has(callee)) 101 callersOf.set(callee, new Map()); 102 callersOf.get(callee).set(caller, {any, all}); 103 } 104 } 105 106 return {callersOf, calleesOf}; 107 } 108 109 // Returns object mapping mangled => reason for GCing 110 function loadRawCallgraphFile(file, verbose) 111 { 112 const functions = { 113 // "Map" from identifier to mangled name, or sometimes to a Class.Field name. 114 name: [""], 115 116 // map from mangled name => list of readable names 117 readableName: {}, 118 119 mangledToId: {} 120 }; 121 122 const fieldCallAttrs = {}; 123 const fieldCallCSU = new Map(); // map from full field name id => csu name 124 125 // set of mangled names (map from mangled name => [any,all]) 126 var functionAttrs = {}; 127 128 const gcCalls = []; 129 const indirectCalls = []; 130 131 // map from mangled => list of tuples of {'callee':mangled, 'any':intset, 'all':intset} 132 const rawCallees = new Map(); 133 134 for (let line of readFileLines_gen(file)) { 135 line = line.replace(/\n/, ""); 136 137 let match; 138 if (match = line.charAt(0) == "#" && /^\#(\d+) (.*)/.exec(line)) { 139 const [ _, id, mangled ] = match; 140 assert(functions.name.length == id); 141 functions.name.push(mangled); 142 functions.mangledToId[mangled] = id|0; 143 continue; 144 } 145 if (match = line.charAt(0) == "=" && /^= (\d+) (.*)/.exec(line)) { 146 const [ _, id, readable ] = match; 147 const mangled = functions.name[id]; 148 if (mangled in functions.readableName) 149 functions.readableName[mangled].push(readable); 150 else 151 functions.readableName[mangled] = [ readable ]; 152 continue; 153 } 154 155 let attrs = 0; 156 // Example line: D /17 6 7 157 // 158 // This means a direct call from 6 -> 7, but within a scope that 159 // applies attrs 0x1 and 0x10 to the callee. 160 // 161 // Look for a bit specifier and remove it from the line if found. 162 if (line.indexOf("/") != -1) { 163 match = /^(..)\/(\d+) (.*)/.exec(line); 164 line = match[1] + match[3]; 165 attrs = match[2]|0; 166 } 167 const tag = line.charAt(0); 168 if (match = tag == 'I' && /^I (\d+) VARIABLE ([^\,]*)/.exec(line)) { 169 const caller = match[1]|0; 170 const name = match[2]; 171 if (indirectCallCannotGC(functions.name[caller], name)) 172 attrs |= ATTR_GC_SUPPRESSED; 173 indirectCalls.push([caller, "IndirectCall: " + name, attrs]); 174 } else if (match = tag == 'F' && /^F (\d+) (\d+) CLASS (.*?) FIELD (.*)/.exec(line)) { 175 const caller = match[1]|0; 176 const fullfield = match[2]|0; 177 const csu = match[3]; 178 const fullfield_str = csu + "." + match[4]; 179 assert(functions.name[fullfield] == fullfield_str); 180 if (attrs) 181 fieldCallAttrs[fullfield] = attrs; 182 addToMappedList(rawCallees, caller, {callee:fullfield, any:attrs, all:attrs}); 183 fieldCallCSU.set(fullfield, csu); 184 185 if (fieldCallCannotGC(csu, fullfield_str)) 186 addToMappedList(rawCallees, fullfield, {callee:ID.nogcfunc, any:0, all:0}); 187 else 188 addToMappedList(rawCallees, fullfield, {callee:ID.anyfunc, any:0, all:0}); 189 } else if (match = tag == 'V' && /^V (\d+) (\d+) CLASS (.*?) FIELD (.*)/.exec(line)) { 190 // V tag is no longer used, but we are still emitting it becasue it 191 // can be helpful to understand what's going on. 192 } else if (match = tag == 'D' && /^D (\d+) (\d+)/.exec(line)) { 193 const caller = match[1]|0; 194 const callee = match[2]|0; 195 addToMappedList(rawCallees, caller, {callee, any:attrs, all:attrs}); 196 } else if (match = tag == 'R' && /^R (\d+) (\d+)/.exec(line)) { 197 assert(false, "R tag is no longer used"); 198 } else if (match = tag == 'T' && /^T (\d+) (.*)/.exec(line)) { 199 const id = match[1]|0; 200 let tag = match[2]; 201 if (tag == 'GC Call') 202 gcCalls.push(id); 203 } else { 204 assert(false, "Invalid format in callgraph line: " + line); 205 } 206 } 207 208 if (verbose) { 209 printErr("Loaded[verbose=" + verbose + "] " + file); 210 } 211 212 return { 213 fieldCallAttrs, 214 fieldCallCSU, 215 gcCalls, 216 indirectCalls, 217 rawCallees, 218 functions 219 }; 220 } 221 222 // Take a set of rawcalls filenames (as in, the raw callgraph data output by 223 // computeCallgraph.js) and combine them into a global callgraph, renumbering 224 // the IDs as needed. 225 function mergeRawCallgraphs(filenames, verbose) { 226 let d; 227 for (const filename of filenames) { 228 const raw = loadRawCallgraphFile(filename, verbose); 229 if (!d) { 230 d = raw; 231 continue; 232 } 233 234 const { 235 fieldCallAttrs, 236 fieldCallCSU, 237 gcCalls, 238 indirectCalls, 239 rawCallees, 240 functions 241 } = raw; 242 243 // Compute the ID mapping. Incoming functions that already have an ID 244 // will be mapped to that ID; new ones will allocate a fresh ID. 245 const remap = new Array(functions.name.length); 246 for (let i = 1; i < functions.name.length; i++) { 247 const mangled = functions.name[i]; 248 const old_id = d.functions.mangledToId[mangled] 249 if (old_id) { 250 remap[i] = old_id; 251 } else { 252 const newid = d.functions.name.length; 253 d.functions.mangledToId[mangled] = newid; 254 d.functions.name.push(mangled); 255 remap[i] = newid; 256 assert(!(mangled in d.functions.readableName), mangled + " readable name is already found"); 257 const readables = functions.readableName[mangled]; 258 if (readables !== undefined) 259 d.functions.readableName[mangled] = readables; 260 } 261 } 262 263 for (const [fullfield, attrs] of Object.entries(fieldCallAttrs)) 264 d.fieldCallAttrs[remap[fullfield]] = attrs; 265 for (const [fullfield, csu] of fieldCallCSU.entries()) 266 d.fieldCallCSU.set(remap[fullfield], csu); 267 for (const call of gcCalls) 268 d.gcCalls.push(remap[call]); 269 for (const [caller, name, attrs] of indirectCalls) 270 d.indirectCalls.push([remap[caller], name, attrs]); 271 for (const [caller, callees] of rawCallees) { 272 for (const {callee, any, all} of callees) { 273 addToMappedList(d.rawCallees, remap[caller]|0, {callee:remap[callee], any, all}); 274 } 275 } 276 } 277 278 return d; 279 } 280 281 function loadCallgraph(files, verbose) 282 { 283 const { 284 fieldCallAttrs, 285 fieldCallCSU, 286 gcCalls, 287 indirectCalls, 288 rawCallees, 289 functions 290 } = mergeRawCallgraphs(files, verbose); 291 292 assert(ID.jscode == functions.mangledToId["(js-code)"]); 293 assert(ID.anyfunc == functions.mangledToId["(any-function)"]); 294 assert(ID.nogcfunc == functions.mangledToId["(nogc-function)"]); 295 assert(ID.gc == functions.mangledToId["(GC)"]); 296 297 addToMappedList(rawCallees, functions.mangledToId["(any-function)"], {callee:ID.gc, any:0, all:0}); 298 299 // Compute functionAttrs: it should contain the set of functions that 300 // are *always* called within some sort of limited context (eg GC 301 // suppression). 302 303 // set of mangled names (map from mangled name => [any,all]) 304 const functionAttrs = {}; 305 306 // Initialize to field calls with attrs set. 307 for (var [name, attrs] of Object.entries(fieldCallAttrs)) 308 functionAttrs[name] = [attrs, attrs]; 309 310 // map from ID => reason 311 const gcFunctions = { [ID.gc]: 'internal' }; 312 313 // Add in any extra functions at the end. (If we did this early, it would 314 // mess up the id <-> name correspondence. Also, we need to know if the 315 // functions even exist in the first place.) 316 for (var func of extraGCFunctions(functions.readableName)) { 317 addGCFunction(functions.mangledToId[func], "annotation", gcFunctions, functionAttrs, functions); 318 } 319 320 for (const func of gcCalls) 321 addToMappedList(rawCallees, func, {callee:ID.gc, any:0, all:0}); 322 323 for (const [caller, indirect, attrs] of indirectCalls) { 324 const id = functions.name.length; 325 functions.name.push(indirect); 326 functions.mangledToId[indirect] = id; 327 addToMappedList(rawCallees, caller, {callee:id, any:attrs, all:attrs}); 328 addToMappedList(rawCallees, id, {callee:ID.anyfunc, any:0, all:0}); 329 } 330 331 // Callers have a list of callees, with duplicates (if the same function is 332 // called more than once.) Merge the repeated calls, only keeping attrs 333 // that are in force for *every* callsite of that callee. Also, generate 334 // the callersOf table at the same time. 335 // 336 // calleesOf : map from mangled => {mangled callee => {'any':intset, 'all':intset}} 337 // callersOf : map from mangled => {mangled caller => {'any':intset, 'all':intset}} 338 const {callersOf, calleesOf} = generate_callgraph(rawCallees); 339 340 // Compute functionAttrs: it should contain the set of functions that 341 // are *always* called within some sort of limited context (eg GC 342 // suppression). 343 344 // Initialize to field calls with attrs set. 345 for (var [name, attrs] of Object.entries(fieldCallAttrs)) 346 functionAttrs[name] = [attrs, attrs]; 347 348 // Initialize functionAttrs to the set of all functions, where each one is 349 // maximally attributed, and return a worklist containing all simple roots 350 // (nodes with no callers). 351 const simple_roots = gather_simple_roots(functionAttrs, calleesOf, callersOf); 352 353 // Traverse the graph, spreading the attrs down from the roots. 354 propagate_attrs(simple_roots, functionAttrs, calleesOf); 355 356 // There are a surprising number of "recursive roots", where there is a 357 // cycle of functions calling each other but not called by anything else, 358 // and these roots may also have descendants. Now that the above traversal 359 // has eliminated everything reachable from simple roots, traverse the 360 // remaining graph to gather up a representative function from each root 361 // cycle. 362 // 363 // Simple example: in the JS shell build, moz_xstrdup calls itself, but 364 // there are no calls to it from within js/src. 365 const recursive_roots = gather_recursive_roots(functionAttrs, calleesOf, callersOf, functions); 366 367 // And do a final traversal starting with the recursive roots. 368 propagate_attrs(recursive_roots, functionAttrs, calleesOf); 369 370 for (const [f, [any, all]] of Object.entries(functionAttrs)) { 371 // Throw out all functions with no attrs set, to reduce the size of the 372 // output. From now on, "not in functionAttrs" means [any=0, all=0]. 373 if (any == 0 && all == 0) 374 delete functionAttrs[f]; 375 376 // Remove GC-suppressed functions from the set of functions known to GC. 377 // Also remove functions only reachable through calls that have been 378 // replaced. 379 if (all & (ATTR_GC_SUPPRESSED | ATTR_REPLACED)) 380 delete gcFunctions[name]; 381 } 382 383 // functionAttrs now contains all functions that are ever called in an 384 // attributed context, based on the known callgraph (i.e., calls through 385 // function pointers are not taken into consideration.) 386 387 // Sanity check to make sure the callgraph has some functions annotated as 388 // GC Calls. This is mostly a check to be sure the earlier processing 389 // succeeded (as opposed to, say, running on empty xdb files because you 390 // didn't actually compile anything interesting.) 391 assert(gcCalls.length > 0, "No GC functions found!"); 392 393 // Initialize the worklist to all known gcFunctions. 394 const worklist = [ID.gc]; 395 396 // Include all field calls (but not virtual method calls). 397 for (const [name, csuName] of fieldCallCSU) { 398 const fullFieldName = functions.name[name]; 399 if (!fieldCallCannotGC(csuName, fullFieldName)) { 400 gcFunctions[name] = 'arbitrary function pointer ' + fullFieldName; 401 worklist.push(name); 402 } 403 } 404 405 // Recursively find all callers not always called in a GC suppression 406 // context, and add them to the set of gcFunctions. 407 while (worklist.length) { 408 name = worklist.shift(); 409 assert(name in gcFunctions, "gcFunctions does not contain " + name); 410 if (!callersOf.has(name)) 411 continue; 412 for (const [caller, {any, all}] of callersOf.get(name)) { 413 if ((all & (ATTR_GC_SUPPRESSED | ATTR_REPLACED)) == 0) { 414 if (addGCFunction(caller, name, gcFunctions, functionAttrs, functions)) 415 worklist.push(caller); 416 } 417 } 418 } 419 420 // Convert functionAttrs to limitedFunctions (using mangled names instead 421 // of ids.) 422 423 // set of mangled names (map from mangled name => {any,all,recursive_root:bool} 424 var limitedFunctions = {}; 425 426 for (const [id, [any, all]] of Object.entries(functionAttrs)) { 427 if (all) { 428 limitedFunctions[functions.name[id]] = { attributes: all }; 429 } 430 } 431 432 for (const [id, limits, label] of recursive_roots) { 433 const name = functions.name[id]; 434 const s = limitedFunctions[name] || (limitedFunctions[name] = {}); 435 s.recursive_root = true; 436 } 437 438 // Remap ids to mangled names. 439 const namedGCFunctions = {}; 440 for (const [caller, reason] of Object.entries(gcFunctions)) { 441 namedGCFunctions[functions.name[caller]] = functions.name[reason] || reason; 442 } 443 444 return { 445 gcFunctions: namedGCFunctions, 446 functions, 447 calleesOf, 448 callersOf, 449 limitedFunctions 450 }; 451 } 452 453 function saveCallgraph(functions, calleesOf) { 454 // Write out all the ids and their readable names. 455 let id = -1; 456 for (const name of functions.name) { 457 id += 1; 458 if (id == 0) continue; 459 print(`#${id} ${name}`); 460 for (const readable of (functions.readableName[name] || [])) { 461 if (readable != name) 462 print(`= ${id} ${readable}`); 463 } 464 } 465 466 // Omit field calls for now; let them appear as if they were functions. 467 468 const attrstring = range => range.any || range.all ? `${range.all}:${range.any} ` : ''; 469 for (const [caller, callees] of calleesOf) { 470 for (const [callee, attrs] of callees) { 471 print(`D ${attrstring(attrs)}${caller} ${callee}`); 472 } 473 } 474 475 // Omit tags for now. This really should preserve all tags. The "GC Call" 476 // tag will already be represented in the graph by having an edge to the 477 // "(GC)" node. 478 } 479 480 // Return a worklist of functions with no callers, and also initialize 481 // functionAttrs to the set of all functions, each mapped to 482 // [ATTRS_NONE, ATTRS_UNVISITED]. 483 function gather_simple_roots(functionAttrs, calleesOf, callersOf) { 484 const roots = []; 485 for (const callee of callersOf.keys()) 486 functionAttrs[callee] = [ATTRS_NONE, ATTRS_UNVISITED]; 487 for (const caller of calleesOf.keys()) { 488 functionAttrs[caller] = [ATTRS_NONE, ATTRS_UNVISITED]; 489 if (!callersOf.has(caller)) 490 roots.push([caller, ATTRS_NONE, 'root']); 491 } 492 493 return roots; 494 } 495 496 // Recursively traverse the callgraph from the roots. Recurse through every 497 // edge that weakens the attrs. (Attrs that entirely disappear, ie go to a zero 498 // intset, will be removed from functionAttrs.) 499 function propagate_attrs(roots, functionAttrs, calleesOf) { 500 const worklist = Array.from(roots); 501 let top = worklist.length; 502 while (top > 0) { 503 // Consider caller where (graph) -> caller -> (0 or more callees) 504 // 'callercaller' is for debugging. 505 const [caller, edge_attrs, callercaller] = worklist[--top]; 506 assert(caller in functionAttrs); 507 const [prev_any, prev_all] = functionAttrs[caller]; 508 assert(prev_any !== undefined); 509 assert(prev_all !== undefined); 510 const [new_any, new_all] = [prev_any | edge_attrs, prev_all & edge_attrs]; 511 if (prev_any != new_any || prev_all != new_all) { 512 // Update function attrs, then recurse to the children if anything 513 // was updated. 514 functionAttrs[caller] = [new_any, new_all]; 515 for (const [callee, {any, all}] of (calleesOf.get(caller) || new Map)) 516 worklist[top++] = [callee, all | edge_attrs, caller]; 517 } 518 } 519 } 520 521 // Mutually-recursive roots and their descendants will not have been visited, 522 // and will still be set to [0, ATTRS_UNVISITED]. Scan through and gather them. 523 function gather_recursive_roots(functionAttrs, calleesOf, callersOf, functions) { 524 const roots = []; 525 526 // Pick any node. Mark everything reachable by adding to a 'seen' set. At 527 // the end, if there are any incoming edges to that node from an unmarked 528 // node, then it is not a root. Otherwise, mark the node as a root. (There 529 // will be at least one back edge coming into the node from a marked node 530 // in this case, since otherwise it would have already been considered to 531 // be a root.) 532 // 533 // Repeat with remaining unmarked nodes until all nodes are marked. 534 const seen = new Set(); 535 for (let [func, [any, all]] of Object.entries(functionAttrs)) { 536 func = func|0; 537 if (all != ATTRS_UNVISITED) 538 continue; 539 540 // We should only be looking at nodes with callers, since otherwise 541 // they would have been handled in the previous pass! 542 assert(callersOf.has(func)); 543 assert(callersOf.get(func).size > 0); 544 545 if (seen.has(func)) 546 continue; 547 548 const work = [func]; 549 while (work.length > 0) { 550 const f = work.pop(); 551 if (!calleesOf.has(f)) continue; 552 for (const callee of calleesOf.get(f).keys()) { 553 if (!seen.has(callee) && 554 callee != func && 555 functionAttrs[callee][1] == ATTRS_UNVISITED) 556 { 557 work.push(callee); 558 seen.add(callee); 559 } 560 } 561 } 562 563 assert(!seen.has(func)); 564 seen.add(func); 565 if ([...callersOf.get(func).keys()].findIndex(f => !seen.has(f)) == -1) { 566 // No unmarked incoming edges, including self-edges, so this is a 567 // (recursive) root. 568 roots.push([func, ATTRS_NONE, 'recursive-root']); 569 } 570 } 571 572 return roots; 573 574 tmp = calleesOf; 575 calleesOf = {}; 576 for (const [callerId, callees] of Object.entries(calleesOf)) { 577 const caller = functionNames[callerId]; 578 for (const {calleeId, limits} of callees) 579 calleesOf[caller][functionNames[calleeId]] = limits; 580 } 581 582 tmp = callersOf; 583 callersOf = {}; 584 for (const [calleeId, callers] of Object.entries(callersOf)) { 585 const callee = functionNames[calleeId]; 586 callersOf[callee] = {}; 587 for (const {callerId, limits} of callers) 588 callersOf[callee][functionNames[caller]] = limits; 589 } 590 }