tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

analyze.py (13470B)


      1 #!/usr/bin/env python3
      2 
      3 #
      4 # This Source Code Form is subject to the terms of the Mozilla Public
      5 # License, v. 2.0. If a copy of the MPL was not distributed with this
      6 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      7 
      8 """
      9 Runs the static rooting analysis
     10 """
     11 
     12 import argparse
     13 import os
     14 import subprocess
     15 import sys
     16 from shlex import quote
     17 from subprocess import Popen
     18 
     19 
     20 def execfile(thefile, globals):
     21    exec(compile(open(thefile).read(), filename=thefile, mode="exec"), globals)
     22 
     23 
     24 # Label a string as an output.
     25 class Output(str):
     26    pass
     27 
     28 
     29 # Label a string as a pattern for multiple inputs.
     30 class MultiInput(str):
     31    pass
     32 
     33 
     34 # Construct a new environment by merging in some settings needed for running the individual scripts.
     35 def env(config):
     36    # Add config['sixgill_bin'] to $PATH if not already there.
     37    path = os.environ["PATH"].split(":")
     38    if dir := config.get("sixgill_bin"):
     39        if dir not in path:
     40            path.insert(0, dir)
     41 
     42    return dict(
     43        os.environ,
     44        PATH=":".join(path),
     45        XDB=f"{config['sixgill_bin']}/xdb.so",
     46        SOURCE=config["source"],
     47    )
     48 
     49 
     50 def fill(command, config):
     51    filled = []
     52    for s in command:
     53        try:
     54            rep = s.format(**config)
     55        except KeyError:
     56            print("Substitution failed: %s" % s)
     57            filled = None
     58            break
     59 
     60        if isinstance(s, Output):
     61            filled.append(Output(rep))
     62        elif isinstance(s, MultiInput):
     63            N = int(config["jobs"])
     64            for i in range(1, N + 1):
     65                filled.append(rep.format(i=i, n=N))
     66        else:
     67            filled.append(rep)
     68 
     69    if filled is None:
     70        raise Exception("substitution failure")
     71 
     72    return tuple(filled)
     73 
     74 
     75 def print_command(job, config, env=None):
     76    # Display a command to run that has roughly the same effect as what was
     77    # actually run. The actual command uses temporary files that get renamed at
     78    # the end, and run some commands in parallel chunks. The printed command
     79    # will substitute in the actual output and run in a single chunk, so that
     80    # it is easier to cut & paste and add a --function flag for debugging.
     81    cfg = dict(config, n=1, i=1, jobs=1)
     82    cmd = job_command_with_final_output_names(job)
     83    cmd = fill(cmd, cfg)
     84 
     85    cmd = [quote(s) for s in cmd]
     86    if outfile := job.get("redirect-output"):
     87        cmd.extend([">", quote(outfile.format(**cfg))])
     88    if HOME := os.environ.get("HOME"):
     89        cmd = [s.replace(HOME, "~") for s in cmd]
     90 
     91    if env:
     92        # Try to keep the command as short as possible by only displaying
     93        # modified environment variable settings.
     94        e = os.environ
     95        changed = {key: value for key, value in env.items() if value != e.get(key)}
     96        if changed:
     97            settings = []
     98            for key, value in changed.items():
     99                if key in e and e[key] in value:
    100                    # Display modifications as V=prefix${V}suffix when
    101                    # possible. This can make a huge different for $PATH.
    102                    start = value.index(e[key])
    103                    end = start + len(e[key])
    104                    setting = '%s="%s${%s}%s"' % (key, value[:start], key, value[end:])
    105                else:
    106                    setting = '%s="%s"' % (key, value)
    107                if HOME:
    108                    setting = setting.replace(HOME, "$HOME")
    109                settings.append(setting)
    110 
    111            cmd = settings + cmd
    112 
    113    print("  " + " ".join(cmd))
    114 
    115 
    116 JOBS = {
    117    "list-dbs": {"command": ["ls", "-l"]},
    118    "rawcalls": {
    119        "command": [
    120            "{js}",
    121            "{analysis_scriptdir}/computeCallgraph.js",
    122            "{typeInfo}",
    123            Output("{rawcalls}"),
    124            "{i}",
    125            "{n}",
    126        ],
    127        "multi-output": True,
    128        "outputs": ["rawcalls.{i}.of.{n}"],
    129    },
    130    "gcFunctions": {
    131        "command": [
    132            "{js}",
    133            "{analysis_scriptdir}/computeGCFunctions.js",
    134            MultiInput("{rawcalls}"),
    135            "--outputs",
    136            Output("{callgraph}"),
    137            Output("{gcFunctions}"),
    138            Output("{gcFunctions_list}"),
    139            Output("{limitedFunctions_list}"),
    140        ],
    141        "outputs": [
    142            "callgraph.txt",
    143            "gcFunctions.txt",
    144            "gcFunctions.lst",
    145            "limitedFunctions.lst",
    146        ],
    147    },
    148    "gcTypes": {
    149        "command": [
    150            "{js}",
    151            "{analysis_scriptdir}/computeGCTypes.js",
    152            Output("{gcTypes}"),
    153            Output("{typeInfo}"),
    154        ],
    155        "outputs": ["gcTypes.txt", "typeInfo.txt"],
    156    },
    157    "allFunctions": {
    158        "command": ["{sixgill_bin}/xdbkeys", "src_body.xdb"],
    159        "redirect-output": "allFunctions.txt",
    160    },
    161    "hazards": {
    162        "command": [
    163            "{js}",
    164            "{analysis_scriptdir}/analyzeRoots.js",
    165            "{gcFunctions_list}",
    166            "{limitedFunctions_list}",
    167            "{gcTypes}",
    168            "{typeInfo}",
    169            "{i}",
    170            "{n}",
    171            "tmp.{i}.of.{n}",
    172        ],
    173        "multi-output": True,
    174        "redirect-output": "rootingHazards.{i}.of.{n}",
    175    },
    176    "gather-hazards": {
    177        "command": [
    178            "{js}",
    179            "{analysis_scriptdir}/mergeJSON.js",
    180            MultiInput("{hazards}"),
    181            Output("{all_hazards}"),
    182        ],
    183        "outputs": ["rootingHazards.json"],
    184    },
    185    "explain": {
    186        "command": [
    187            sys.executable,
    188            "{analysis_scriptdir}/explain.py",
    189            "{all_hazards}",
    190            "{gcFunctions}",
    191            Output("{explained_hazards}"),
    192            Output("{unnecessary}"),
    193            Output("{refs}"),
    194            Output("{html}"),
    195        ],
    196        "outputs": ["hazards.txt", "unnecessary.txt", "refs.txt", "hazards.html"],
    197    },
    198    "heapwrites": {
    199        "command": ["{js}", "{analysis_scriptdir}/analyzeHeapWrites.js"],
    200        "redirect-output": "heapWriteHazards.txt",
    201    },
    202 }
    203 
    204 
    205 # Generator of (i, j, item) tuples corresponding to outputs:
    206 #  - i is just the index of the yielded tuple (a la enumerate())
    207 #  - j is the index of the item in the command list
    208 #  - item is command[j]
    209 def out_indexes(command):
    210    i = 0
    211    for j, fragment in enumerate(command):
    212        if isinstance(fragment, Output):
    213            yield (i, j, fragment)
    214            i += 1
    215 
    216 
    217 def job_command_with_final_output_names(job):
    218    outfiles = job.get("outputs", [])
    219    command = list(job["command"])
    220    for i, j, name in out_indexes(job["command"]):
    221        command[j] = outfiles[i]
    222    return command
    223 
    224 
    225 def run_job(name, config):
    226    job = JOBS[name]
    227    outs = job.get("outputs") or job.get("redirect-output")
    228    print("Running " + name + " to generate " + str(outs))
    229    if "function" in job:
    230        job["function"](config, job["redirect-output"])
    231        return
    232 
    233    N = int(config["jobs"]) if job.get("multi-output") else 1
    234    config["n"] = N
    235    jobs = {}
    236    for i in range(1, N + 1):
    237        config["i"] = i
    238        cmd = fill(job["command"], config)
    239        info = spawn_command(cmd, job, name, config)
    240        jobs[info["proc"].pid] = info
    241 
    242    if config["verbose"] > 0:
    243        print_command(job, config, env=env(config))
    244 
    245    final_status = 0
    246    while jobs:
    247        pid, status = os.wait()
    248        final_status = final_status or status
    249        info = jobs[pid]
    250        del jobs[pid]
    251        if "redirect" in info:
    252            info["redirect"].close()
    253 
    254        # Rename the temporary files to their final names.
    255        for temp, final in info["rename_map"].items():
    256            try:
    257                if config["verbose"] > 1:
    258                    print("Renaming %s -> %s" % (temp, final))
    259                os.rename(temp, final)
    260            except OSError:
    261                print("Error renaming %s -> %s" % (temp, final))
    262                raise
    263 
    264    if final_status != 0:
    265        raise Exception(f"job {name} returned status {final_status}")
    266 
    267 
    268 def spawn_command(cmdspec, job, name, config):
    269    rename_map = {}
    270 
    271    if "redirect-output" in job:
    272        stdout_filename = "{}.tmp{}".format(name, config.get("i", ""))
    273        final_outfile = job["redirect-output"].format(**config)
    274        rename_map[stdout_filename] = final_outfile
    275        command = cmdspec
    276    else:
    277        outfiles = fill(job["outputs"], config)
    278        stdout_filename = None
    279 
    280        # Replace the Outputs with temporary filenames, and record a mapping
    281        # from those temp names to their actual final names that will be used
    282        # if the command succeeds.
    283        command = list(cmdspec)
    284        for i, j, raw_name in out_indexes(cmdspec):
    285            [name] = fill([raw_name], config)
    286            command[j] = "{}.tmp{}".format(name, config.get("i", ""))
    287            rename_map[command[j]] = outfiles[i]
    288 
    289    sys.stdout.flush()
    290    info = {"rename_map": rename_map}
    291    if stdout_filename:
    292        info["redirect"] = open(stdout_filename, "w")
    293        info["proc"] = Popen(command, stdout=info["redirect"], env=env(config))
    294    else:
    295        info["proc"] = Popen(command, env=env(config))
    296 
    297    if config["verbose"] > 1:
    298        print("Spawned process {}".format(info["proc"].pid))
    299 
    300    return info
    301 
    302 
    303 # Default to conservatively assuming 4GB/job.
    304 def max_parallel_jobs(job_size=4 * 2**30):
    305    """Return the max number of parallel jobs we can run without overfilling
    306    memory, assuming heavyweight jobs."""
    307    from_cores = int(subprocess.check_output(["nproc", "--ignore=1"]).strip())
    308    mem_bytes = os.sysconf("SC_PAGE_SIZE") * os.sysconf("SC_PHYS_PAGES")
    309    from_mem = round(mem_bytes / job_size)
    310    return min(from_cores, from_mem)
    311 
    312 
    313 config = {"analysis_scriptdir": os.path.dirname(__file__)}
    314 
    315 defaults = [
    316    "%s/defaults.py" % config["analysis_scriptdir"],
    317    "%s/defaults.py" % os.getcwd(),
    318 ]
    319 
    320 parser = argparse.ArgumentParser(
    321    description="Statically analyze build tree for rooting hazards."
    322 )
    323 parser.add_argument(
    324    "step", metavar="STEP", type=str, nargs="?", help="run only step STEP"
    325 )
    326 parser.add_argument(
    327    "--source", metavar="SOURCE", type=str, nargs="?", help="source code to analyze"
    328 )
    329 parser.add_argument(
    330    "--js",
    331    metavar="JSSHELL",
    332    type=str,
    333    nargs="?",
    334    help="full path to ctypes-capable JS shell",
    335 )
    336 parser.add_argument(
    337    "--first",
    338    metavar="STEP",
    339    type=str,
    340    nargs="?",
    341    help="execute all jobs starting with STEP",
    342 )
    343 parser.add_argument(
    344    "--last", metavar="STEP", type=str, nargs="?", help="stop at step STEP"
    345 )
    346 parser.add_argument(
    347    "--jobs",
    348    "-j",
    349    default=None,
    350    metavar="JOBS",
    351    type=int,
    352    help="number of simultaneous analyzeRoots.js jobs",
    353 )
    354 parser.add_argument(
    355    "--list", const=True, nargs="?", type=bool, help="display available steps"
    356 )
    357 parser.add_argument(
    358    "--expect-file",
    359    type=str,
    360    nargs="?",
    361    help="deprecated option, temporarily still present for backwards compatibility",
    362 )
    363 parser.add_argument(
    364    "--verbose",
    365    "-v",
    366    action="count",
    367    default=1,
    368    help="Display cut & paste commands to run individual steps (give twice for more output)",
    369 )
    370 parser.add_argument("--quiet", "-q", action="count", default=0, help="Suppress output")
    371 
    372 args = parser.parse_args()
    373 args.verbose = max(0, args.verbose - args.quiet)
    374 
    375 for default in defaults:
    376    try:
    377        execfile(default, config)
    378        if args.verbose > 1:
    379            print("Loaded %s" % default)
    380    except Exception:
    381        pass
    382 
    383 # execfile() used config as the globals for running the
    384 # defaults.py script, and will have set a __builtins__ key as a side effect.
    385 del config["__builtins__"]
    386 data = config.copy()
    387 
    388 for k, v in vars(args).items():
    389    if v is not None:
    390        data[k] = v
    391 
    392 if args.jobs is not None:
    393    data["jobs"] = args.jobs
    394 if not data.get("jobs"):
    395    data["jobs"] = max_parallel_jobs()
    396 
    397 if "GECKO_PATH" in os.environ:
    398    data["source"] = os.environ["GECKO_PATH"]
    399 if "SOURCE" in os.environ:
    400    data["source"] = os.environ["SOURCE"]
    401 
    402 steps = [
    403    "gcTypes",
    404    "rawcalls",
    405    "gcFunctions",
    406    "allFunctions",
    407    "hazards",
    408    "gather-hazards",
    409    "explain",
    410    "heapwrites",
    411 ]
    412 
    413 if args.list:
    414    for step in steps:
    415        job = JOBS[step]
    416        outfiles = job.get("outputs") or job.get("redirect-output")
    417        if outfiles:
    418            print(
    419                "%s\n    ->%s %s"
    420                % (step, "*" if job.get("multi-output") else "", outfiles)
    421            )
    422        else:
    423            print(step)
    424    sys.exit(0)
    425 
    426 for step in steps:
    427    job = JOBS[step]
    428    if "redirect-output" in job:
    429        data[step] = job["redirect-output"]
    430    elif "outputs" in job and "command" in job:
    431        outfiles = job["outputs"]
    432        num_outputs = 0
    433        for i, j, name in out_indexes(job["command"]):
    434            # Trim the {curly brackets} off of the output keys.
    435            data[name[1:-1]] = outfiles[i]
    436            num_outputs += 1
    437        assert len(outfiles) == num_outputs, (
    438            'step "%s": mismatched number of output files (%d) and params (%d)'
    439            % (
    440                step,
    441                num_outputs,
    442                len(outfiles),
    443            )
    444        )  # NOQA: E501
    445 
    446 if args.step:
    447    if args.first or args.last:
    448        raise Exception(
    449            "--first and --last cannot be used when a step argument is given"
    450        )
    451    steps = [args.step]
    452 else:
    453    if args.first:
    454        steps = steps[steps.index(args.first) :]
    455    if args.last:
    456        steps = steps[: steps.index(args.last) + 1]
    457 
    458 for step in steps:
    459    run_job(step, data)