analyze.py (13470B)
1 #!/usr/bin/env python3 2 3 # 4 # This Source Code Form is subject to the terms of the Mozilla Public 5 # License, v. 2.0. If a copy of the MPL was not distributed with this 6 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 8 """ 9 Runs the static rooting analysis 10 """ 11 12 import argparse 13 import os 14 import subprocess 15 import sys 16 from shlex import quote 17 from subprocess import Popen 18 19 20 def execfile(thefile, globals): 21 exec(compile(open(thefile).read(), filename=thefile, mode="exec"), globals) 22 23 24 # Label a string as an output. 25 class Output(str): 26 pass 27 28 29 # Label a string as a pattern for multiple inputs. 30 class MultiInput(str): 31 pass 32 33 34 # Construct a new environment by merging in some settings needed for running the individual scripts. 35 def env(config): 36 # Add config['sixgill_bin'] to $PATH if not already there. 37 path = os.environ["PATH"].split(":") 38 if dir := config.get("sixgill_bin"): 39 if dir not in path: 40 path.insert(0, dir) 41 42 return dict( 43 os.environ, 44 PATH=":".join(path), 45 XDB=f"{config['sixgill_bin']}/xdb.so", 46 SOURCE=config["source"], 47 ) 48 49 50 def fill(command, config): 51 filled = [] 52 for s in command: 53 try: 54 rep = s.format(**config) 55 except KeyError: 56 print("Substitution failed: %s" % s) 57 filled = None 58 break 59 60 if isinstance(s, Output): 61 filled.append(Output(rep)) 62 elif isinstance(s, MultiInput): 63 N = int(config["jobs"]) 64 for i in range(1, N + 1): 65 filled.append(rep.format(i=i, n=N)) 66 else: 67 filled.append(rep) 68 69 if filled is None: 70 raise Exception("substitution failure") 71 72 return tuple(filled) 73 74 75 def print_command(job, config, env=None): 76 # Display a command to run that has roughly the same effect as what was 77 # actually run. The actual command uses temporary files that get renamed at 78 # the end, and run some commands in parallel chunks. The printed command 79 # will substitute in the actual output and run in a single chunk, so that 80 # it is easier to cut & paste and add a --function flag for debugging. 81 cfg = dict(config, n=1, i=1, jobs=1) 82 cmd = job_command_with_final_output_names(job) 83 cmd = fill(cmd, cfg) 84 85 cmd = [quote(s) for s in cmd] 86 if outfile := job.get("redirect-output"): 87 cmd.extend([">", quote(outfile.format(**cfg))]) 88 if HOME := os.environ.get("HOME"): 89 cmd = [s.replace(HOME, "~") for s in cmd] 90 91 if env: 92 # Try to keep the command as short as possible by only displaying 93 # modified environment variable settings. 94 e = os.environ 95 changed = {key: value for key, value in env.items() if value != e.get(key)} 96 if changed: 97 settings = [] 98 for key, value in changed.items(): 99 if key in e and e[key] in value: 100 # Display modifications as V=prefix${V}suffix when 101 # possible. This can make a huge different for $PATH. 102 start = value.index(e[key]) 103 end = start + len(e[key]) 104 setting = '%s="%s${%s}%s"' % (key, value[:start], key, value[end:]) 105 else: 106 setting = '%s="%s"' % (key, value) 107 if HOME: 108 setting = setting.replace(HOME, "$HOME") 109 settings.append(setting) 110 111 cmd = settings + cmd 112 113 print(" " + " ".join(cmd)) 114 115 116 JOBS = { 117 "list-dbs": {"command": ["ls", "-l"]}, 118 "rawcalls": { 119 "command": [ 120 "{js}", 121 "{analysis_scriptdir}/computeCallgraph.js", 122 "{typeInfo}", 123 Output("{rawcalls}"), 124 "{i}", 125 "{n}", 126 ], 127 "multi-output": True, 128 "outputs": ["rawcalls.{i}.of.{n}"], 129 }, 130 "gcFunctions": { 131 "command": [ 132 "{js}", 133 "{analysis_scriptdir}/computeGCFunctions.js", 134 MultiInput("{rawcalls}"), 135 "--outputs", 136 Output("{callgraph}"), 137 Output("{gcFunctions}"), 138 Output("{gcFunctions_list}"), 139 Output("{limitedFunctions_list}"), 140 ], 141 "outputs": [ 142 "callgraph.txt", 143 "gcFunctions.txt", 144 "gcFunctions.lst", 145 "limitedFunctions.lst", 146 ], 147 }, 148 "gcTypes": { 149 "command": [ 150 "{js}", 151 "{analysis_scriptdir}/computeGCTypes.js", 152 Output("{gcTypes}"), 153 Output("{typeInfo}"), 154 ], 155 "outputs": ["gcTypes.txt", "typeInfo.txt"], 156 }, 157 "allFunctions": { 158 "command": ["{sixgill_bin}/xdbkeys", "src_body.xdb"], 159 "redirect-output": "allFunctions.txt", 160 }, 161 "hazards": { 162 "command": [ 163 "{js}", 164 "{analysis_scriptdir}/analyzeRoots.js", 165 "{gcFunctions_list}", 166 "{limitedFunctions_list}", 167 "{gcTypes}", 168 "{typeInfo}", 169 "{i}", 170 "{n}", 171 "tmp.{i}.of.{n}", 172 ], 173 "multi-output": True, 174 "redirect-output": "rootingHazards.{i}.of.{n}", 175 }, 176 "gather-hazards": { 177 "command": [ 178 "{js}", 179 "{analysis_scriptdir}/mergeJSON.js", 180 MultiInput("{hazards}"), 181 Output("{all_hazards}"), 182 ], 183 "outputs": ["rootingHazards.json"], 184 }, 185 "explain": { 186 "command": [ 187 sys.executable, 188 "{analysis_scriptdir}/explain.py", 189 "{all_hazards}", 190 "{gcFunctions}", 191 Output("{explained_hazards}"), 192 Output("{unnecessary}"), 193 Output("{refs}"), 194 Output("{html}"), 195 ], 196 "outputs": ["hazards.txt", "unnecessary.txt", "refs.txt", "hazards.html"], 197 }, 198 "heapwrites": { 199 "command": ["{js}", "{analysis_scriptdir}/analyzeHeapWrites.js"], 200 "redirect-output": "heapWriteHazards.txt", 201 }, 202 } 203 204 205 # Generator of (i, j, item) tuples corresponding to outputs: 206 # - i is just the index of the yielded tuple (a la enumerate()) 207 # - j is the index of the item in the command list 208 # - item is command[j] 209 def out_indexes(command): 210 i = 0 211 for j, fragment in enumerate(command): 212 if isinstance(fragment, Output): 213 yield (i, j, fragment) 214 i += 1 215 216 217 def job_command_with_final_output_names(job): 218 outfiles = job.get("outputs", []) 219 command = list(job["command"]) 220 for i, j, name in out_indexes(job["command"]): 221 command[j] = outfiles[i] 222 return command 223 224 225 def run_job(name, config): 226 job = JOBS[name] 227 outs = job.get("outputs") or job.get("redirect-output") 228 print("Running " + name + " to generate " + str(outs)) 229 if "function" in job: 230 job["function"](config, job["redirect-output"]) 231 return 232 233 N = int(config["jobs"]) if job.get("multi-output") else 1 234 config["n"] = N 235 jobs = {} 236 for i in range(1, N + 1): 237 config["i"] = i 238 cmd = fill(job["command"], config) 239 info = spawn_command(cmd, job, name, config) 240 jobs[info["proc"].pid] = info 241 242 if config["verbose"] > 0: 243 print_command(job, config, env=env(config)) 244 245 final_status = 0 246 while jobs: 247 pid, status = os.wait() 248 final_status = final_status or status 249 info = jobs[pid] 250 del jobs[pid] 251 if "redirect" in info: 252 info["redirect"].close() 253 254 # Rename the temporary files to their final names. 255 for temp, final in info["rename_map"].items(): 256 try: 257 if config["verbose"] > 1: 258 print("Renaming %s -> %s" % (temp, final)) 259 os.rename(temp, final) 260 except OSError: 261 print("Error renaming %s -> %s" % (temp, final)) 262 raise 263 264 if final_status != 0: 265 raise Exception(f"job {name} returned status {final_status}") 266 267 268 def spawn_command(cmdspec, job, name, config): 269 rename_map = {} 270 271 if "redirect-output" in job: 272 stdout_filename = "{}.tmp{}".format(name, config.get("i", "")) 273 final_outfile = job["redirect-output"].format(**config) 274 rename_map[stdout_filename] = final_outfile 275 command = cmdspec 276 else: 277 outfiles = fill(job["outputs"], config) 278 stdout_filename = None 279 280 # Replace the Outputs with temporary filenames, and record a mapping 281 # from those temp names to their actual final names that will be used 282 # if the command succeeds. 283 command = list(cmdspec) 284 for i, j, raw_name in out_indexes(cmdspec): 285 [name] = fill([raw_name], config) 286 command[j] = "{}.tmp{}".format(name, config.get("i", "")) 287 rename_map[command[j]] = outfiles[i] 288 289 sys.stdout.flush() 290 info = {"rename_map": rename_map} 291 if stdout_filename: 292 info["redirect"] = open(stdout_filename, "w") 293 info["proc"] = Popen(command, stdout=info["redirect"], env=env(config)) 294 else: 295 info["proc"] = Popen(command, env=env(config)) 296 297 if config["verbose"] > 1: 298 print("Spawned process {}".format(info["proc"].pid)) 299 300 return info 301 302 303 # Default to conservatively assuming 4GB/job. 304 def max_parallel_jobs(job_size=4 * 2**30): 305 """Return the max number of parallel jobs we can run without overfilling 306 memory, assuming heavyweight jobs.""" 307 from_cores = int(subprocess.check_output(["nproc", "--ignore=1"]).strip()) 308 mem_bytes = os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES") 309 from_mem = round(mem_bytes / job_size) 310 return min(from_cores, from_mem) 311 312 313 config = {"analysis_scriptdir": os.path.dirname(__file__)} 314 315 defaults = [ 316 "%s/defaults.py" % config["analysis_scriptdir"], 317 "%s/defaults.py" % os.getcwd(), 318 ] 319 320 parser = argparse.ArgumentParser( 321 description="Statically analyze build tree for rooting hazards." 322 ) 323 parser.add_argument( 324 "step", metavar="STEP", type=str, nargs="?", help="run only step STEP" 325 ) 326 parser.add_argument( 327 "--source", metavar="SOURCE", type=str, nargs="?", help="source code to analyze" 328 ) 329 parser.add_argument( 330 "--js", 331 metavar="JSSHELL", 332 type=str, 333 nargs="?", 334 help="full path to ctypes-capable JS shell", 335 ) 336 parser.add_argument( 337 "--first", 338 metavar="STEP", 339 type=str, 340 nargs="?", 341 help="execute all jobs starting with STEP", 342 ) 343 parser.add_argument( 344 "--last", metavar="STEP", type=str, nargs="?", help="stop at step STEP" 345 ) 346 parser.add_argument( 347 "--jobs", 348 "-j", 349 default=None, 350 metavar="JOBS", 351 type=int, 352 help="number of simultaneous analyzeRoots.js jobs", 353 ) 354 parser.add_argument( 355 "--list", const=True, nargs="?", type=bool, help="display available steps" 356 ) 357 parser.add_argument( 358 "--expect-file", 359 type=str, 360 nargs="?", 361 help="deprecated option, temporarily still present for backwards compatibility", 362 ) 363 parser.add_argument( 364 "--verbose", 365 "-v", 366 action="count", 367 default=1, 368 help="Display cut & paste commands to run individual steps (give twice for more output)", 369 ) 370 parser.add_argument("--quiet", "-q", action="count", default=0, help="Suppress output") 371 372 args = parser.parse_args() 373 args.verbose = max(0, args.verbose - args.quiet) 374 375 for default in defaults: 376 try: 377 execfile(default, config) 378 if args.verbose > 1: 379 print("Loaded %s" % default) 380 except Exception: 381 pass 382 383 # execfile() used config as the globals for running the 384 # defaults.py script, and will have set a __builtins__ key as a side effect. 385 del config["__builtins__"] 386 data = config.copy() 387 388 for k, v in vars(args).items(): 389 if v is not None: 390 data[k] = v 391 392 if args.jobs is not None: 393 data["jobs"] = args.jobs 394 if not data.get("jobs"): 395 data["jobs"] = max_parallel_jobs() 396 397 if "GECKO_PATH" in os.environ: 398 data["source"] = os.environ["GECKO_PATH"] 399 if "SOURCE" in os.environ: 400 data["source"] = os.environ["SOURCE"] 401 402 steps = [ 403 "gcTypes", 404 "rawcalls", 405 "gcFunctions", 406 "allFunctions", 407 "hazards", 408 "gather-hazards", 409 "explain", 410 "heapwrites", 411 ] 412 413 if args.list: 414 for step in steps: 415 job = JOBS[step] 416 outfiles = job.get("outputs") or job.get("redirect-output") 417 if outfiles: 418 print( 419 "%s\n ->%s %s" 420 % (step, "*" if job.get("multi-output") else "", outfiles) 421 ) 422 else: 423 print(step) 424 sys.exit(0) 425 426 for step in steps: 427 job = JOBS[step] 428 if "redirect-output" in job: 429 data[step] = job["redirect-output"] 430 elif "outputs" in job and "command" in job: 431 outfiles = job["outputs"] 432 num_outputs = 0 433 for i, j, name in out_indexes(job["command"]): 434 # Trim the {curly brackets} off of the output keys. 435 data[name[1:-1]] = outfiles[i] 436 num_outputs += 1 437 assert len(outfiles) == num_outputs, ( 438 'step "%s": mismatched number of output files (%d) and params (%d)' 439 % ( 440 step, 441 num_outputs, 442 len(outfiles), 443 ) 444 ) # NOQA: E501 445 446 if args.step: 447 if args.first or args.last: 448 raise Exception( 449 "--first and --last cannot be used when a step argument is given" 450 ) 451 steps = [args.step] 452 else: 453 if args.first: 454 steps = steps[steps.index(args.first) :] 455 if args.last: 456 steps = steps[: steps.index(args.last) + 1] 457 458 for step in steps: 459 run_job(step, data)