CFG.js (43244B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this file, 3 * You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 /* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ 6 7 // Utility code for traversing the JSON data structures produced by sixgill. 8 9 "use strict"; 10 11 var TRACING = false; 12 13 // When edge.Kind == "Pointer", these are the meanings of the edge.Reference field. 14 var PTR_POINTER = 0; 15 var PTR_REFERENCE = 1; 16 var PTR_RVALUE_REF = 2; 17 18 // Find all points (positions within the code) of the body given by the list of 19 // bodies and the blockId to match (which will specify an outer function or a 20 // loop within it), recursing into loops if needed. 21 function findAllPoints(bodies, blockId, bits) 22 { 23 var points = []; 24 var body; 25 26 for (var xbody of bodies) { 27 if (sameBlockId(xbody.BlockId, blockId)) { 28 assert(!body); 29 body = xbody; 30 } 31 } 32 assert(body); 33 34 if (!("PEdge" in body)) 35 return; 36 for (var edge of body.PEdge) { 37 points.push([body, edge.Index[0], bits]); 38 if (edge.Kind == "Loop") 39 points.push(...findAllPoints(bodies, edge.BlockId, bits)); 40 } 41 42 return points; 43 } 44 45 // Visitor of a graph of <body, ppoint> vertexes and sixgill-generated edges, 46 // where the edges represent the actual computation happening. 47 // 48 // Uses the syntax `var Visitor = class { ... }` rather than `class Visitor` 49 // to allow reloading this file with the JS debugger. 50 var Visitor = class { 51 constructor(bodies) { 52 this.visited_bodies = new Map(); 53 for (const body of bodies) { 54 this.visited_bodies.set(body, new Map()); 55 } 56 } 57 58 // Prepend `edge` to the info stored at the successor node, returning 59 // the updated info value. This should be overridden by pretty much any 60 // subclass, as a traversal's semantics are largely determined by this method. 61 extend_path(edge, body, ppoint, successor_value) { return true; } 62 63 // Default implementation does a basic "only visit nodes once" search. 64 // (Whether this is BFS/DFS/other is determined by the caller.) 65 66 // Override if you need to revisit nodes. Valid actions are "continue", 67 // "prune", and "done". "continue" means continue with the search. "prune" 68 // means do not continue to predecessors of this node, only continue with 69 // the remaining entries in the work queue. "done" means the 70 // whole search is complete even if unvisited nodes remain. 71 next_action(prev, current) { return prev ? "prune" : "continue"; } 72 73 // Update the info at a node. If this is the first time the node has been 74 // seen, `prev` will be undefined. `current` will be the info computed by 75 // `extend_path`. The node will be updated with the return value. 76 merge_info(prev, current) { return true; } 77 78 // Default visit() implementation. Subclasses will usually leave this alone 79 // and use the other methods as extension points. 80 // 81 // Take a body, a point within that body, and the info computed by 82 // extend_path() for that point when traversing an edge. Return whether the 83 // search should continue ("continue"), the search should be pruned and 84 // other paths followed ("prune"), or that the whole search is complete and 85 // it is time to return a value ("done", and the value returned by 86 // merge_info() will be returned by the overall search). 87 // 88 // Persistently record the value computed so far at each point, and call 89 // (overridable) next_action() and merge_info() methods with the previous 90 // and freshly-computed value for each point. 91 // 92 // Often, extend_path() will decide how/whether to continue the search and 93 // will return the search action to take, and next_action() will blindly 94 // return it if the point has not yet been visited. (And if it has, it will 95 // prune this branch of the search so that no point is visited multiple 96 // times.) 97 visit(body, ppoint, info) { 98 const visited_value_table = this.visited_bodies.get(body); 99 const existing_value_if_visited = visited_value_table.get(ppoint); 100 const action = this.next_action(existing_value_if_visited, info); 101 const merged = this.merge_info(existing_value_if_visited, info); 102 visited_value_table.set(ppoint, merged); 103 return [action, merged]; 104 } 105 }; 106 107 function findMatchingBlock(bodies, blockId) { 108 for (const body of bodies) { 109 if (sameBlockId(body.BlockId, blockId)) { 110 return body; 111 } 112 } 113 assert(false); 114 } 115 116 // For a given function containing a set of bodies, each containing a set of 117 // ppoints, perform a mostly breadth-first traversal through the complete graph 118 // of all <body, ppoint> nodes throughout all the bodies of the function. 119 // 120 // When traversing, every <body, ppoint> node is associated with a value that 121 // is assigned or updated whenever it is visited. The overall traversal 122 // terminates when a given condition is reached, and an arbitrary custom value 123 // is returned. If the search completes without the termination condition 124 // being reached, it will return the value associated with the entrypoint 125 // node, which is initialized to `entrypoint_fallback_value` (and thus serves as 126 // the fallback return value if all search paths are pruned before reaching 127 // the entrypoint.) 128 // 129 // The traversal is only *mostly* breadth-first because the visitor decides 130 // whether to stop searching when it sees a node. If a node is visited for a 131 // second time, the visitor can choose to continue (and thus revisit the node) 132 // in order to find "better" paths that may include a node more than once. 133 // The search is done in the "upwards" direction -- as in, it starts at the 134 // exit point and searches through predecessors. 135 // 136 // Override visitor.visit() to return an action and a value. The action 137 // determines whether the overall search should terminate ('done'), or 138 // continues looking through the predecessors of the current node ('continue'), 139 // or whether it should just continue processing the work queue without 140 // looking at predecessors ('prune'). 141 // 142 // This allows this function to be used in different ways. If the visitor 143 // associates a value with each node that chains onto its forward-flow successors 144 // (predecessors in the "upwards" search order), then a complete path through 145 // the graph will be returned. 146 // 147 // Alternatively, BFS_upwards() can be used to test whether a condition holds 148 // (eg "the exit point is reachable only after calling SomethingImportant()"), 149 // in which case no path is needed and the visitor can compute a simple boolean 150 // every time it encounters a point. Note that `entrypoint_fallback_value` will 151 // still be returned if the search terminates without ever reaching the 152 // entrypoint, which is useful for dominator analyses. 153 // 154 // See the Visitor base class's implementation of visit(), above, for the 155 // most commonly used visit logic. 156 function BFS_upwards(start_body, start_ppoint, bodies, visitor, 157 initial_successor_value = {}, 158 entrypoint_fallback_value=null) 159 { 160 let entrypoint_value = entrypoint_fallback_value; 161 162 const work = [[start_body, start_ppoint, null, initial_successor_value]]; 163 if (TRACING) { 164 printErr(`BFS start at ${blockIdentifier(start_body)}:${start_ppoint}`); 165 } 166 167 while (work.length > 0) { 168 const [body, ppoint, edgeToAdd, successor_value] = work.shift(); 169 if (TRACING) { 170 const s = edgeToAdd ? " : " + str(edgeToAdd) : ""; 171 printErr(`prepending edge from ${ppoint} to state '${successor_value}'${s}`); 172 } 173 let value = visitor.extend_path(edgeToAdd, body, ppoint, successor_value); 174 175 const [action, merged_value] = visitor.visit(body, ppoint, value); 176 if (action === "done") { 177 return merged_value; 178 } 179 if (action === "prune") { 180 // Do not push anything else to the work queue, but continue processing 181 // other branches. 182 continue; 183 } 184 assert(action == "continue"); 185 value = merged_value; 186 187 const predecessors = getPredecessors(body); 188 for (const edge of (predecessors[ppoint] || [])) { 189 if (edge.Kind == "Loop") { 190 // Propagate the search into the exit point of the loop body. 191 const loopBody = findMatchingBlock(bodies, edge.BlockId); 192 const loopEnd = loopBody.Index[1]; 193 work.push([loopBody, loopEnd, null, value]); 194 // Don't continue to predecessors here without going through 195 // the loop. (The points in this body that enter the loop will 196 // be traversed when we reach the entry point of the loop.) 197 } 198 work.push([body, edge.Index[0], edge, value]); 199 } 200 201 // Check for hitting the entry point of a loop body. 202 if (ppoint == body.Index[0] && body.BlockId.Kind == "Loop") { 203 // Propagate to outer body parents that enter the loop body. 204 for (const parent of (body.BlockPPoint || [])) { 205 const parentBody = findMatchingBlock(bodies, parent.BlockId); 206 work.push([parentBody, parent.Index, null, value]); 207 } 208 209 // This point is also preceded by the *end* of this loop, for the 210 // previous iteration. 211 work.push([body, body.Index[1], null, value]); 212 } 213 214 // Check for reaching the entrypoint of the function. 215 if (body === start_body && ppoint == body.Index[0]) { 216 entrypoint_value = value; 217 } 218 } 219 220 // The search space was exhausted without finding a 'done' state. That 221 // might be because all search paths were pruned before reaching the entry 222 // point of the function, in which case entrypoint_value will still be its initial 223 // value. (If entrypoint_value has been set, then we may still not have visited the 224 // entire graph, if some paths were pruned but at least one made it to the entrypoint.) 225 return entrypoint_value; 226 } 227 228 // Given the CFG for the constructor call of some RAII, return whether the 229 // given edge is the matching destructor call. 230 function isMatchingDestructor(constructor, edge) 231 { 232 if (edge.Kind != "Call") 233 return false; 234 var callee = edge.Exp[0]; 235 if (callee.Kind != "Var") 236 return false; 237 var variable = callee.Variable; 238 assert(variable.Kind == "Func"); 239 if (variable.Name[1].charAt(0) != '~') 240 return false; 241 242 // Note that in some situations, a regular function can begin with '~', so 243 // we don't necessarily have a destructor in hand. This is probably a 244 // sixgill artifact, but in js::wasm::ModuleGenerator::~ModuleGenerator, a 245 // templatized static inline EraseIf is invoked, and it gets named ~EraseIf 246 // for some reason. 247 if (!("PEdgeCallInstance" in edge)) 248 return false; 249 250 var constructExp = constructor.PEdgeCallInstance.Exp; 251 assert(constructExp.Kind == "Var"); 252 253 var destructExp = edge.PEdgeCallInstance.Exp; 254 if (destructExp.Kind != "Var") 255 return false; 256 257 return sameVariable(constructExp.Variable, destructExp.Variable); 258 } 259 260 // Return all calls within the RAII scope of any constructor matched by 261 // isConstructor(). (Note that this would be insufficient if you needed to 262 // treat each instance separately, such as when different regions of a function 263 // body were guarded by these constructors and you needed to do something 264 // different with each.) 265 function allRAIIGuardedCallPoints(typeInfo, bodies, body, isConstructor) 266 { 267 if (!("PEdge" in body)) 268 return []; 269 270 var points = []; 271 272 for (var edge of body.PEdge) { 273 if (edge.Kind != "Call") 274 continue; 275 var callee = edge.Exp[0]; 276 if (callee.Kind != "Var") 277 continue; 278 var variable = callee.Variable; 279 assert(variable.Kind == "Func"); 280 const bits = isConstructor(typeInfo, edge.Type, variable.Name); 281 if (!bits) 282 continue; 283 if (!("PEdgeCallInstance" in edge)) 284 continue; 285 if (edge.PEdgeCallInstance.Exp.Kind != "Var") 286 continue; 287 288 points.push(...pointsInRAIIScope(bodies, body, edge, bits)); 289 } 290 291 return points; 292 } 293 294 // Test whether the given edge is the constructor corresponding to the given 295 // destructor edge. 296 function isMatchingConstructor(destructor, edge) 297 { 298 if (edge.Kind != "Call") 299 return false; 300 var callee = edge.Exp[0]; 301 if (callee.Kind != "Var") 302 return false; 303 var variable = callee.Variable; 304 if (variable.Kind != "Func") 305 return false; 306 var name = readable(variable.Name[0]); 307 var destructorName = readable(destructor.Exp[0].Variable.Name[0]); 308 var match = destructorName.match(/^(.*?::)~(\w+)\(/); 309 if (!match) { 310 printErr("Unhandled destructor syntax: " + destructorName); 311 return false; 312 } 313 var constructorSubstring = match[1] + match[2]; 314 if (name.indexOf(constructorSubstring) == -1) 315 return false; 316 317 var destructExp = destructor.PEdgeCallInstance.Exp; 318 if (destructExp.Kind != "Var") 319 return false; 320 321 var constructExp = edge.PEdgeCallInstance.Exp; 322 if (constructExp.Kind != "Var") 323 return false; 324 325 return sameVariable(constructExp.Variable, destructExp.Variable); 326 } 327 328 function findMatchingConstructor(destructorEdge, body, warnIfNotFound=true) 329 { 330 var worklist = [destructorEdge]; 331 var predecessors = getPredecessors(body); 332 while(worklist.length > 0) { 333 var edge = worklist.pop(); 334 if (isMatchingConstructor(destructorEdge, edge)) 335 return edge; 336 if (edge.Index[0] in predecessors) { 337 for (var e of predecessors[edge.Index[0]]) 338 worklist.push(e); 339 } 340 } 341 if (warnIfNotFound) 342 printErr("Could not find matching constructor!"); 343 return undefined; 344 } 345 346 function pointsInRAIIScope(bodies, body, constructorEdge, bits) { 347 var seen = {}; 348 var worklist = [constructorEdge.Index[1]]; 349 var points = []; 350 while (worklist.length) { 351 var point = worklist.pop(); 352 if (point in seen) 353 continue; 354 seen[point] = true; 355 points.push([body, point, bits]); 356 var successors = getSuccessors(body); 357 if (!(point in successors)) 358 continue; 359 for (var nedge of successors[point]) { 360 if (isMatchingDestructor(constructorEdge, nedge)) 361 continue; 362 if (nedge.Kind == "Loop") 363 points.push(...findAllPoints(bodies, nedge.BlockId, bits)); 364 worklist.push(nedge.Index[1]); 365 } 366 } 367 368 return points; 369 } 370 371 function isImmobileValue(exp) { 372 if (exp.Kind == "Int" && exp.String == "0") { 373 return true; 374 } 375 return false; 376 } 377 378 // Returns whether decl is a body.DefineVariable[] entry for a non-temporary reference. 379 function isReferenceDecl(decl) { 380 return decl.Type.Kind == "Pointer" && decl.Type.Reference != PTR_POINTER && decl.Variable.Kind != "Temp"; 381 } 382 383 function expressionIsVariableAddress(exp, variable) 384 { 385 while (exp.Kind == "Fld") 386 exp = exp.Exp[0]; 387 return exp.Kind == "Var" && sameVariable(exp.Variable, variable); 388 } 389 390 function edgeTakesVariableAddress(edge, variable, body) 391 { 392 if (ignoreEdgeUse(edge, variable, body)) 393 return false; 394 if (ignoreEdgeAddressTaken(edge)) 395 return false; 396 switch (edge.Kind) { 397 case "Assign": 398 return expressionIsVariableAddress(edge.Exp[1], variable); 399 case "Call": 400 if ("PEdgeCallArguments" in edge) { 401 for (var exp of edge.PEdgeCallArguments.Exp) { 402 if (expressionIsVariableAddress(exp, variable)) 403 return true; 404 } 405 } 406 return false; 407 default: 408 return false; 409 } 410 } 411 412 // Look at an invocation of a virtual method or function pointer contained in a 413 // field, and return the static type of the invocant (or the containing struct, 414 // for a function pointer field.) 415 function getFieldCallInstanceCSU(edge, field) 416 { 417 if ("FieldInstanceFunction" in field) { 418 // We have a 'this'. 419 const instanceExp = edge.PEdgeCallInstance.Exp; 420 if (instanceExp.Kind == 'Drf') { 421 // somevar->foo() 422 return edge.Type.TypeFunctionCSU.Type.Name; 423 } else if (instanceExp.Kind == 'Fld') { 424 // somevar.foo() 425 return instanceExp.Field.FieldCSU.Type.Name; 426 } else if (instanceExp.Kind == 'Index') { 427 // A strange construct. 428 // C++ code: static_cast<JS::CustomAutoRooter*>(this)->trace(trc); 429 // CFG: Call(21,30, this*[-1]{JS::CustomAutoRooter}.trace*(trc*)) 430 return instanceExp.Type.Name; 431 } else if (instanceExp.Kind == 'Var') { 432 // C++: reinterpret_cast<SimpleTimeZone*>(gRawGMT)->~SimpleTimeZone(); 433 // CFG: 434 // # icu_64::SimpleTimeZone::icu_64::SimpleTimeZone.__comp_dtor 435 // [6,7] Call gRawGMT.icu_64::SimpleTimeZone.__comp_dtor () 436 return field.FieldCSU.Type.Name; 437 } else { 438 printErr("------------------ edge -------------------"); 439 printErr(JSON.stringify(edge, null, 4)); 440 printErr("------------------ field -------------------"); 441 printErr(JSON.stringify(field, null, 4)); 442 assert(false, `unrecognized FieldInstanceFunction Kind ${instanceExp.Kind}`); 443 } 444 } else { 445 // somefar.foo() where somevar is a field of some CSU. 446 return field.FieldCSU.Type.Name; 447 } 448 } 449 450 function expressionUsesVariable(exp, variable) 451 { 452 if (exp.Kind == "Var" && sameVariable(exp.Variable, variable)) 453 return true; 454 if (!("Exp" in exp)) 455 return false; 456 for (var childExp of exp.Exp) { 457 if (expressionUsesVariable(childExp, variable)) 458 return true; 459 } 460 return false; 461 } 462 463 function expressionUsesVariableContents(exp, variable) 464 { 465 if (!("Exp" in exp)) 466 return false; 467 for (var childExp of exp.Exp) { 468 if (childExp.Kind == 'Drf') { 469 if (expressionUsesVariable(childExp, variable)) 470 return true; 471 } else if (expressionUsesVariableContents(childExp, variable)) { 472 return true; 473 } 474 } 475 return false; 476 } 477 478 // Detect simple |return nullptr;| statements. 479 function isReturningImmobileValue(edge, variable) 480 { 481 if (variable.Kind == "Return") { 482 if (edge.Exp[0].Kind == "Var" && sameVariable(edge.Exp[0].Variable, variable)) { 483 if (isImmobileValue(edge.Exp[1])) 484 return true; 485 } 486 } 487 return false; 488 } 489 490 // If the edge uses the given variable's value, return the earliest point at 491 // which the use is definite. Usually, that means the source of the edge 492 // (anything that reaches that source point will end up using the variable, but 493 // there may be other ways to reach the destination of the edge.) 494 // 495 // Return values are implicitly used at the very last point in the function. 496 // This makes a difference: if an RAII class GCs in its destructor, we need to 497 // start looking at the final point in the function, not one point back from 498 // that, since that would skip over the GCing call. 499 // 500 // Certain references may be annotated to be live to the end of the function 501 // as well (eg AutoCheckCannotGC&& parameters). 502 // 503 // Note that this returns a nonzero value only if the variable's incoming value is used. 504 // So this would return 0 for 'obj': 505 // 506 // obj = someFunction(); 507 // 508 // but these would return a positive value: 509 // 510 // obj = someFunction(obj); 511 // obj->foo = someFunction(); 512 // 513 function edgeUsesVariable(edge, variable, body, liveToEnd=false) 514 { 515 if (ignoreEdgeUse(edge, variable, body)) 516 return 0; 517 518 if (variable.Kind == "Return") { 519 liveToEnd = true; 520 } 521 522 if (liveToEnd && body.Index[1] == edge.Index[1] && body.BlockId.Kind == "Function") { 523 // The last point in the function body is treated as using the return 524 // value. This is the only time the destination point is returned 525 // rather than the source point. 526 return edge.Index[1]; 527 } 528 529 var src = edge.Index[0]; 530 531 switch (edge.Kind) { 532 533 case "Assign": { 534 // Detect `Return := nullptr`. 535 if (isReturningImmobileValue(edge, variable)) 536 return 0; 537 const [lhs, rhs] = edge.Exp; 538 // Detect `lhs := ...variable...` 539 if (expressionUsesVariable(rhs, variable)) 540 return src; 541 // Detect `...variable... := rhs` but not `variable := rhs`. The latter 542 // overwrites the previous value of `variable` without using it. 543 if (expressionUsesVariable(lhs, variable) && !expressionIsVariable(lhs, variable)) 544 return src; 545 return 0; 546 } 547 548 case "Assume": 549 return expressionUsesVariableContents(edge.Exp[0], variable) ? src : 0; 550 551 case "Call": { 552 const callee = edge.Exp[0]; 553 if (expressionUsesVariable(callee, variable)) 554 return src; 555 if ("PEdgeCallInstance" in edge) { 556 if (expressionUsesVariable(edge.PEdgeCallInstance.Exp, variable)) { 557 if (edgeStartsValueLiveRange(edge, variable)) { 558 // If the variable is being constructed, then the incoming 559 // value is not used here; it didn't exist before 560 // construction. (The analysis doesn't get told where 561 // variables are defined, so must infer it from 562 // construction. If the variable does not have a 563 // constructor, its live range may be larger than it really 564 // ought to be if it is defined within a loop body, but 565 // that is conservative.) 566 } else { 567 return src; 568 } 569 } 570 } 571 if ("PEdgeCallArguments" in edge) { 572 for (var exp of edge.PEdgeCallArguments.Exp) { 573 if (expressionUsesVariable(exp, variable)) 574 return src; 575 } 576 } 577 if (edge.Exp.length == 1) 578 return 0; 579 580 // Assigning call result to a variable. 581 const lhs = edge.Exp[1]; 582 if (expressionUsesVariable(lhs, variable) && !expressionIsVariable(lhs, variable)) 583 return src; 584 return 0; 585 } 586 587 case "Loop": 588 return 0; 589 590 case "Assembly": 591 return 0; 592 593 default: 594 assert(false); 595 } 596 } 597 598 // If `decl` is the body.DefineVariable[] declaration of a reference type, then 599 // return the expression without the outer dereference. Otherwise, return the 600 // original expression. 601 function maybeDereference(exp, decl) { 602 if (exp.Kind == "Drf" && exp.Exp[0].Kind == "Var") { 603 if (isReferenceDecl(decl)) { 604 return exp.Exp[0]; 605 } 606 } 607 return exp; 608 } 609 610 function expressionIsVariable(exp, variable) 611 { 612 return exp.Kind == "Var" && sameVariable(exp.Variable, variable); 613 } 614 615 // Similar to the above, except treat uses of a reference as if they were uses 616 // of the dereferenced contents. This requires knowing the type of the 617 // variable, and so takes its declaration rather than the variable itself. 618 function expressionIsDeclaredVariable(exp, decl) 619 { 620 exp = maybeDereference(exp, decl); 621 return expressionIsVariable(exp, decl.Variable); 622 } 623 624 function expressionIsMethodOnVariableDecl(exp, decl) 625 { 626 // This might be calling a method on a base class, in which case exp will 627 // be an unnamed field of the variable instead of the variable itself. 628 while (exp.Kind == "Fld" && exp.Field.Name[0].startsWith("field:")) 629 exp = exp.Exp[0]; 630 return expressionIsDeclaredVariable(exp, decl); 631 } 632 633 // Return whether the edge starts the live range of a variable's value, by setting 634 // it to some new value. Examples of starting obj's live range: 635 // 636 // obj = foo; 637 // obj = foo(); 638 // obj = foo(obj); // uses previous value but then sets to new value 639 // SomeClass obj(true, 1); // constructor 640 // 641 function edgeStartsValueLiveRange(edge, variable) 642 { 643 // Direct assignments start live range of lhs: var = value 644 if (edge.Kind == "Assign") { 645 const [lhs, rhs] = edge.Exp; 646 return (expressionIsVariable(lhs, variable) && 647 !isReturningImmobileValue(edge, variable)); 648 } 649 650 if (edge.Kind != "Call") 651 return false; 652 653 // Assignments of call results start live range: var = foo() 654 if (1 in edge.Exp) { 655 var lhs = edge.Exp[1]; 656 if (expressionIsVariable(lhs, variable)) 657 return true; 658 } 659 660 // Constructor calls start live range of instance: SomeClass var(...) 661 if ("PEdgeCallInstance" in edge) { 662 var instance = edge.PEdgeCallInstance.Exp; 663 664 // Kludge around incorrect dereference on some constructor calls. 665 if (instance.Kind == "Drf") 666 instance = instance.Exp[0]; 667 668 if (!expressionIsVariable(instance, variable)) 669 return false; 670 671 var callee = edge.Exp[0]; 672 if (callee.Kind != "Var") 673 return false; 674 675 assert(callee.Variable.Kind == "Func"); 676 var calleeName = readable(callee.Variable.Name[0]); 677 678 // Constructor calls include the text 'Name::Name(' or 'Name<...>::Name('. 679 var openParen = calleeName.indexOf('('); 680 if (openParen < 0) 681 return false; 682 calleeName = calleeName.substring(0, openParen); 683 684 var lastColon = calleeName.lastIndexOf('::'); 685 if (lastColon < 0) 686 return false; 687 var constructorName = calleeName.substr(lastColon + 2); 688 calleeName = calleeName.substr(0, lastColon); 689 690 var lastTemplateOpen = calleeName.lastIndexOf('<'); 691 if (lastTemplateOpen >= 0) 692 calleeName = calleeName.substr(0, lastTemplateOpen); 693 694 if (calleeName.endsWith(constructorName)) 695 return true; 696 } 697 698 return false; 699 } 700 701 // Return the result of a `matcher` callback on the call found in the given 702 // `edge`, if the edge is a direct call to a named function (if not, return false). 703 // `matcher` is given the name of the callee (actually, a tuple 704 // [fully qualified name, base name]), an array of expressions containing the 705 // arguments, and if the result of the call is assigned to a variable, 706 // the expression representing that variable(the lhs). 707 // 708 // https://firefox-source-docs.mozilla.org/js/HazardAnalysis/CFG.html for 709 // documentation of the data structure used here. 710 function matchEdgeCall(edge, matcher) { 711 if (edge.Kind != "Call") { 712 return false; 713 } 714 715 const callee = edge.Exp[0]; 716 717 if (edge.Type.Kind == 'Function' && 718 edge.Exp[0].Kind == 'Var' && 719 edge.Exp[0].Variable.Kind == 'Func') { 720 const calleeName = edge.Exp[0].Variable.Name; 721 const args = edge.PEdgeCallArguments; 722 const argExprs = args ? args.Exp : []; 723 const lhs = edge.Exp[1]; // May be undefined 724 return matcher(calleeName, argExprs, lhs); 725 } 726 727 return false; 728 } 729 730 function edgeMarksVariableGCSafe(edge, variable) { 731 return matchEdgeCall(edge, (calleeName, argExprs, _lhs) => { 732 // explicit JS_HAZ_VARIABLE_IS_GC_SAFE annotation 733 return (calleeName[1] == 'MarkVariableAsGCSafe' && 734 calleeName[0].includes("JS::detail::MarkVariableAsGCSafe") && 735 argExprs.length == 1 && 736 expressionIsVariable(argExprs[0], variable)); 737 }); 738 } 739 740 // Match an optional <namespace>:: followed by the class name, 741 // and then an optional template parameter marker. 742 // 743 // Example: mozilla::dom::UniquePtr<... 744 // 745 function parseTypeName(typeName) { 746 const m = typeName.match(/^(((?:\w|::)+::)?(\w+))\b(\<)?/); 747 if (!m) { 748 return undefined; 749 } 750 const [, type, raw_namespace, classname, is_specialized] = m; 751 const namespace = raw_namespace === null ? "" : raw_namespace; 752 return { type, namespace, classname, is_specialized } 753 } 754 755 // Return whether an edge "clears out" a variable's value. A simple example 756 // would be 757 // 758 // var = nullptr; 759 // 760 // for analyses for which nullptr is a "safe" value (eg GC rooting hazards; you 761 // can't get in trouble by holding a nullptr live across a GC.) A more complex 762 // example is a Maybe<T> that gets reset: 763 // 764 // Maybe<AutoCheckCannotGC> nogc; 765 // nogc.emplace(cx); 766 // nogc.reset(); 767 // gc(); // <-- not a problem; nogc is invalidated by prev line 768 // nogc.emplace(cx); 769 // foo(nogc); 770 // 771 // Yet another example is a UniquePtr being passed by value, which means the 772 // receiver takes ownership: 773 // 774 // UniquePtr<JSObject*> uobj(obj); 775 // foo(uobj); 776 // gc(); 777 // 778 function edgeEndsValueLiveRange(edge, variable, body) 779 { 780 // var = nullptr; 781 if (edge.Kind == "Assign") { 782 const [lhs, rhs] = edge.Exp; 783 return expressionIsVariable(lhs, variable) && isImmobileValue(rhs); 784 } 785 786 if (edge.Kind != "Call") 787 return false; 788 789 if (edgeMarksVariableGCSafe(edge, variable)) { 790 // explicit JS_HAZ_VARIABLE_IS_GC_SAFE annotation 791 return true; 792 } 793 794 const decl = lookupVariable(body, variable); 795 796 if (matchEdgeCall(edge, (calleeName, argExprs, lhs) => { 797 return calleeName[1] == 'move' && calleeName[0].includes('std::move(') && 798 expressionIsDeclaredVariable(argExprs[0], decl) && 799 lhs && 800 lhs.Kind == 'Var' && 801 lhs.Variable.Kind == 'Temp'; 802 })) { 803 // temp = std::move(var) 804 // 805 // If var is a UniquePtr, and we pass it into something that takes 806 // ownership, then it should be considered to be invalid. Example: 807 // 808 // consume(std::move(var)); 809 // 810 // where consume takes a UniquePtr. This will compile to something like 811 // 812 // UniquePtr* __temp_1 = &std::move(var); 813 // UniquePtr&& __temp_2(*temp_1); // move constructor 814 // consume(__temp_2); 815 // ~UniquePtr(__temp_2); 816 // 817 // The line commented with "// move constructor" is a result of passing 818 // a UniquePtr as a parameter. If consume() took a UniquePtr&& 819 // directly, this would just be: 820 // 821 // UniquePtr* __temp_1 = &std::move(var); 822 // consume(__temp_1); 823 // 824 // which is not guaranteed to move from the reference. It might just 825 // ignore the parameter. We can't predict what consume(UniquePtr&&) 826 // will do. We do know that UniquePtr(UniquePtr&& other) moves out of 827 // `other`. 828 // 829 // The std::move() technically is irrelevant, but because we only care 830 // about bare variables, it has to be used, which is fortunate because 831 // the UniquePtr&& constructor operates on a temporary, not the 832 // variable we care about. 833 834 const lhs = edge.Exp[1].Variable; 835 if (basicBlockEatsVariable(lhs, body, edge.Index[1])) 836 return true; 837 } 838 839 const callee = edge.Exp[0]; 840 841 if (edge.Type.Kind == 'Function' && 842 edge.Type.TypeFunctionCSU && 843 edge.PEdgeCallInstance && 844 expressionIsMethodOnVariableDecl(edge.PEdgeCallInstance.Exp, decl)) 845 { 846 const typeName = edge.Type.TypeFunctionCSU.Type.Name; 847 848 // Synthesize a zero-arg constructor name like 849 // mozilla::dom::UniquePtr<T>::UniquePtr(). Note that the `<T>` is 850 // literal -- the pretty name from sixgill will render the actual 851 // constructor name as something like 852 // 853 // UniquePtr<T>::UniquePtr() [where T = int] 854 // 855 const parsed = parseTypeName(typeName); 856 if (parsed) { 857 const { type, namespace, classname, is_specialized } = parsed; 858 859 // special-case: the initial constructor that doesn't provide a value. 860 // Useful for things like Maybe<T>. 861 const template = is_specialized ? '<T>' : ''; 862 const ctorName = `${namespace}${classname}${template}::${classname}()`; 863 if (callee.Kind == 'Var' && 864 typesWithSafeConstructors.has(type) && 865 callee.Variable.Name[0].includes(ctorName)) 866 { 867 return true; 868 } 869 870 // special-case: UniquePtr::reset() and similar. 871 if (callee.Kind == 'Var' && 872 type in resetterMethods && 873 resetterMethods[type].has(callee.Variable.Name[1])) 874 { 875 return true; 876 } 877 } 878 } 879 880 // special-case: passing UniquePtr<T> by value. 881 if (edge.Type.Kind == 'Function' && 882 edge.Type.TypeFunctionArgument && 883 edge.PEdgeCallArguments) 884 { 885 for (const i in edge.Type.TypeFunctionArgument) { 886 const param = edge.Type.TypeFunctionArgument[i]; 887 if (param.Type.Kind != 'CSU') 888 continue; 889 if (!param.Type.Name.startsWith("mozilla::UniquePtr<")) 890 continue; 891 const arg = edge.PEdgeCallArguments.Exp[i]; 892 if (expressionIsVariable(arg, variable)) { 893 return true; 894 } 895 } 896 } 897 898 return false; 899 } 900 901 // Look up a variable in the list of declarations for this body. 902 function lookupVariable(body, variable) { 903 for (const decl of (body.DefineVariable || [])) { 904 if (sameVariable(decl.Variable, variable)) { 905 return decl; 906 } 907 } 908 return undefined; 909 } 910 911 function edgeMovesVariable(edge, variable, body) 912 { 913 if (edge.Kind != 'Call') 914 return false; 915 const callee = edge.Exp[0]; 916 if (callee.Kind == 'Var' && 917 callee.Variable.Kind == 'Func') 918 { 919 const { Variable: { Name: [ fullname, shortname ] } } = callee; 920 921 // Match an rvalue parameter. 922 923 if (!edge || !edge.PEdgeCallArguments || !edge.PEdgeCallArguments.Exp) { 924 return false; 925 } 926 927 for (const arg of edge.PEdgeCallArguments.Exp) { 928 if (arg.Kind != 'Drf') continue; 929 const val = arg.Exp[0]; 930 if (val.Kind == 'Var' && sameVariable(val.Variable, variable)) { 931 // This argument is the variable we're looking for. Return true 932 // if it is passed as an rvalue reference. 933 const type = lookupVariable(body, variable).Type; 934 if (type.Kind == "Pointer" && type.Reference == PTR_RVALUE_REF) { 935 return true; 936 } 937 } 938 } 939 } 940 941 return false; 942 } 943 944 // Scan forward through the basic block in 'body' starting at 'startpoint', 945 // looking for a call that passes 'variable' to a move constructor that 946 // "consumes" it (eg UniquePtr::UniquePtr(UniquePtr&&)). 947 function basicBlockEatsVariable(variable, body, startpoint) 948 { 949 const successors = getSuccessors(body); 950 let point = startpoint; 951 while (point in successors) { 952 // Only handle a single basic block. If it forks, stop looking. 953 const edges = successors[point]; 954 if (edges.length != 1) { 955 return false; 956 } 957 const edge = edges[0]; 958 959 if (edgeMovesVariable(edge, variable, body)) { 960 return true; 961 } 962 963 // edgeStartsValueLiveRange will find places where 'variable' is given 964 // a new value. Never observed in practice, since this function is only 965 // called with a temporary resulting from std::move(), which is used 966 // immediately for a call. But just to be robust to future uses: 967 if (edgeStartsValueLiveRange(edge, variable)) { 968 return false; 969 } 970 971 point = edge.Index[1]; 972 } 973 974 return false; 975 } 976 977 var PROP_REFCNT = 1 << 0; 978 var PROP_SHARED_PTR_DTOR = 1 << 1; 979 980 function getCalleeProperties(calleeName) { 981 let props = 0; 982 983 if (isRefcountedDtor(calleeName)) { 984 props |= PROP_REFCNT; 985 } 986 if (calleeName.includes("~shared_ptr()")) { 987 props |= PROP_SHARED_PTR_DTOR; 988 } 989 return props; 990 } 991 992 // Basic C++ ABI mangling: prefix an identifier with its length, in decimal. 993 function mangle(name) { 994 return name.length + name; 995 } 996 997 var TriviallyDestructibleTypes = new Set([ 998 // Single-token types from 999 // https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-builtin 1000 "void", "wchar_t", "bool", "char", "short", "int", "long", "float", "double", 1001 "__int64", "__int128", "__float128", "char32_t", "char16_t", "char8_t", 1002 // Remaining observed cases. These are types T in shared_ptr<T> that have 1003 // been observed, where the types themselves have trivial destructors, and 1004 // the custom deleter doesn't do anything nontrivial that we might care about. 1005 "_IO_FILE" 1006 ]); 1007 function synthesizeDestructorName(className) { 1008 if (className.includes("<") || className.includes(" ") || className.includes("{")) { 1009 return; 1010 } 1011 if (TriviallyDestructibleTypes.has(className)) { 1012 return; 1013 } 1014 const parts = className.split("::"); 1015 const mangled_dtor = "_ZN" + parts.map(p => mangle(p)).join("") + "D2Ev"; 1016 const pretty_dtor = `void ${className}::~${parts.at(-1)}()`; 1017 // Note that there will be a later check to verify that the function name 1018 // synthesized here is an actual function, and assert if not (see 1019 // assertFunctionExists() in computeCallgraph.js.) 1020 return mangled_dtor + "$" + pretty_dtor; 1021 } 1022 1023 function getCallEdgeProperties(body, edge, calleeName, functionBodies) { 1024 let attrs = 0; 1025 let extraCalls = []; 1026 1027 if (edge.Kind !== "Call") { 1028 return { attrs, extraCalls }; 1029 } 1030 1031 const props = getCalleeProperties(calleeName); 1032 if (props & PROP_REFCNT) { 1033 // std::swap of two refcounted values thinks it can drop the 1034 // ref count to zero. Or rather, it just calls operator=() in a context 1035 // where the refcount will never drop to zero. 1036 const blockId = blockIdentifier(body); 1037 if (blockId.includes("std::swap") || blockId.includes("mozilla::Swap")) { 1038 // Replace the refcnt release call with nothing. It's not going to happen. 1039 attrs |= ATTR_REPLACED; 1040 } 1041 } 1042 1043 if (props & PROP_SHARED_PTR_DTOR) { 1044 // Replace shared_ptr<T>::~shared_ptr() calls to T::~T() calls. 1045 // Note that this will only apply to simple cases. 1046 // Any templatized type, in particular, will be ignored and the original 1047 // call tree will be left alone. If this triggers a hazard, then we can 1048 // consider extending the mangling support. 1049 // 1050 // If the call to ~shared_ptr is not replaced, then it might end up calling 1051 // an unknown function pointer. This does not always happen-- in some cases, 1052 // the call tree below ~shared_ptr will invoke the correct destructor without 1053 // going through function pointers. 1054 const m = calleeName.match(/shared_ptr<(.*?)>::~shared_ptr\(\)(?: \[with T = ([\w:]+))?/); 1055 assert(m); 1056 let className = m[1] == "T" ? m[2] : m[1]; 1057 assert(className != ""); 1058 // cv qualification does not apply to destructors. 1059 className = className.replace("const ", ""); 1060 className = className.replace("volatile ", ""); 1061 const dtor = synthesizeDestructorName(className); 1062 if (dtor) { 1063 attrs |= ATTR_REPLACED; 1064 extraCalls.push({ 1065 attrs: ATTR_SYNTHETIC, 1066 name: dtor, 1067 }); 1068 } 1069 } 1070 1071 if ((props & PROP_REFCNT) == 0) { 1072 return { attrs, extraCalls }; 1073 } 1074 1075 let callee = edge.Exp[0]; 1076 while (callee.Kind === "Drf") { 1077 callee = callee.Exp[0]; 1078 } 1079 1080 const instance = edge.PEdgeCallInstance.Exp; 1081 if (instance.Kind !== "Var") { 1082 // TODO: handle field destructors 1083 return { attrs, extraCalls }; 1084 } 1085 1086 // Test whether the dtor call is dominated by operations on the variable 1087 // that mean it will not go to a zero refcount in the dtor: either because 1088 // it's already dead (eg r.forget() was called) or because it can be proven 1089 // to have a ref count of greater than 1. This is implemented by looking 1090 // for the reverse: find a path scanning backwards from the dtor call where 1091 // the variable is used in any way that does *not* ensure that it is 1092 // trivially destructible. 1093 1094 const variable = instance.Variable; 1095 1096 const visitor = new class DominatorVisitor extends Visitor { 1097 // Do not revisit nodes. For new nodes, relay the decision made by 1098 // extend_path. 1099 next_action(seen, current) { return seen ? "prune" : current; } 1100 1101 // We don't revisit, so always use the new. 1102 merge_info(seen, current) { return current; } 1103 1104 // Return the action to take from this node. 1105 extend_path(edge, body, ppoint, successor_value) { 1106 if (!edge) { 1107 // Dummy edge to join two points. 1108 return "continue"; 1109 } 1110 1111 if (!edgeUsesVariable(edge, variable, body)) { 1112 // Nothing of interest on this edge, keep searching. 1113 return "continue"; 1114 } 1115 1116 if (edgeEndsValueLiveRange(edge, variable, body)) { 1117 // This path is safe! 1118 return "prune"; 1119 } 1120 1121 // Unsafe. Found a use that might set the variable to a 1122 // nonzero refcount. 1123 return "done"; 1124 } 1125 }(functionBodies); 1126 1127 // Searching upwards from a destructor call, return the opposite of: is 1128 // there a path to a use or the start of the function that does NOT hit a 1129 // safe assignment like refptr.forget() first? 1130 // 1131 // In graph terms: return whether the destructor call is dominated by forget() calls (or similar). 1132 const edgeIsNonReleasingDtor = !BFS_upwards( 1133 body, edge.Index[0], functionBodies, visitor, "start", 1134 false // Return value if we do not reach the root without finding a non-forget() use. 1135 ); 1136 if (edgeIsNonReleasingDtor) { 1137 attrs |= ATTR_GC_SUPPRESSED | ATTR_NONRELEASING; 1138 } 1139 return { attrs, extraCalls }; 1140 } 1141 1142 // gcc uses something like "__dt_del " for virtual destructors that it 1143 // generates. 1144 function isSyntheticVirtualDestructor(funcName) { 1145 return funcName.endsWith(" "); 1146 } 1147 1148 function typedField(field) 1149 { 1150 if ("FieldInstanceFunction" in field) { 1151 // Virtual call 1152 // 1153 // This makes a minimal attempt at dealing with overloading, by 1154 // incorporating the number of parameters. So far, that is all that has 1155 // been needed. If more is needed, sixgill will need to produce a full 1156 // mangled type. 1157 const {Type, Name: [name]} = field; 1158 1159 // Virtual destructors don't need a type or argument count, 1160 // and synthetic ones don't have them filled in. 1161 if (isSyntheticVirtualDestructor(name)) { 1162 return name; 1163 } 1164 1165 var nargs = 0; 1166 if (Type.Kind == "Function" && "TypeFunctionArguments" in Type) 1167 nargs = Type.TypeFunctionArguments.Type.length; 1168 return name + ":" + nargs; 1169 } else { 1170 // Function pointer field 1171 return field.Name[0]; 1172 } 1173 } 1174 1175 function fieldKey(csuName, field) 1176 { 1177 return csuName + "." + typedField(field); 1178 }