tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

testlib.py (8305B)


      1 import json
      2 import os
      3 import re
      4 import subprocess
      5 import sys
      6 from collections import defaultdict, namedtuple
      7 
      8 from sixgill import Body
      9 
     10 scriptdir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
     11 
     12 HazardSummary = namedtuple(
     13    "HazardSummary", ["function", "variable", "type", "GCFunction", "location"]
     14 )
     15 
     16 Callgraph = namedtuple(
     17    "Callgraph",
     18    [
     19        "functionNames",
     20        "nameToId",
     21        "mangledToUnmangled",
     22        "unmangledToMangled",
     23        "calleesOf",
     24        "callersOf",
     25        "tags",
     26        "calleeGraph",
     27        "callerGraph",
     28    ],
     29 )
     30 
     31 
     32 def equal(got, expected):
     33    if got != expected:
     34        print("Got '%s', expected '%s'" % (got, expected))
     35 
     36 
     37 def extract_unmangled(func):
     38    return func.split("$")[-1]
     39 
     40 
     41 class Test:
     42    def __init__(self, indir, outdir, cfg, verbose=0):
     43        self.indir = indir
     44        self.outdir = outdir
     45        self.cfg = cfg
     46        self.verbose = verbose
     47 
     48    def infile(self, path):
     49        return os.path.join(self.indir, path)
     50 
     51    def binpath(self, prog):
     52        return os.path.join(self.cfg.sixgill_bin, prog)
     53 
     54    def compile(self, source, options=""):
     55        env = os.environ
     56        env["CCACHE_DISABLE"] = "1"
     57        if "-fexceptions" not in options and "-fno-exceptions" not in options:
     58            options += " -fno-exceptions"
     59        cmd = f"{self.cfg.cxx} -c {self.infile(source)} -O3 -std=c++17 -fplugin={self.cfg.sixgill_plugin} -fplugin-arg-xgill-mangle=1 {options}"
     60        if self.cfg.verbose > 0:
     61            print("Running %s" % cmd)
     62        subprocess.check_call(["sh", "-c", cmd])
     63 
     64    def load_db_entry(self, dbname, pattern):
     65        """Look up an entry from an XDB database file, 'pattern' may be an exact
     66        matching string, or an re pattern object matching a single entry."""
     67 
     68        if hasattr(pattern, "match"):
     69            output = subprocess.check_output(
     70                [self.binpath("xdbkeys"), dbname + ".xdb"], universal_newlines=True
     71            )
     72            matches = list(filter(lambda _: re.search(pattern, _), output.splitlines()))
     73            if len(matches) == 0:
     74                raise Exception("entry not found")
     75            if len(matches) > 1:
     76                raise Exception("multiple entries found")
     77            pattern = matches[0]
     78 
     79        output = subprocess.check_output(
     80            [self.binpath("xdbfind"), "-json", dbname + ".xdb", pattern],
     81            universal_newlines=True,
     82        )
     83        return json.loads(output)
     84 
     85    def run_analysis_script(self, startPhase="gcTypes", upto=None):
     86        open("defaults.py", "w").write(
     87            f"""\
     88 analysis_scriptdir = '{scriptdir}'
     89 sixgill_bin = '{self.cfg.sixgill_bin}'
     90 """
     91        )
     92        cmd = [
     93            sys.executable,
     94            os.path.join(scriptdir, "analyze.py"),
     95            ["-q", "", "-v"][min(self.verbose, 2)],
     96        ]
     97        cmd += ["--first", startPhase]
     98        if upto:
     99            cmd += ["--last", upto]
    100        cmd.append("--source=%s" % self.indir)
    101        cmd.append("--js=%s" % self.cfg.js)
    102        if self.cfg.verbose:
    103            print("Running " + " ".join(cmd))
    104        subprocess.check_call(cmd)
    105 
    106    def computeGCTypes(self):
    107        self.run_analysis_script("gcTypes", upto="gcTypes")
    108 
    109    def computeHazards(self):
    110        self.run_analysis_script("gcTypes")
    111 
    112    def load_text_file(self, filename, extract=lambda l: l):
    113        fullpath = os.path.join(self.outdir, filename)
    114        values = (extract(line.strip()) for line in open(fullpath))
    115        return list(filter(lambda _: _ is not None, values))
    116 
    117    def load_json_file(self, filename, reviver=None):
    118        fullpath = os.path.join(self.outdir, filename)
    119        with open(fullpath) as fh:
    120            return json.load(fh, object_hook=reviver)
    121 
    122    def load_gcTypes(self):
    123        def grab_type(line):
    124            m = re.match(r"^(GC\w+): (.*)", line)
    125            if m:
    126                return (m.group(1) + "s", m.group(2))
    127            return None
    128 
    129        gctypes = defaultdict(list)
    130        for collection, typename in self.load_text_file(
    131            "gcTypes.txt", extract=grab_type
    132        ):
    133            gctypes[collection].append(typename)
    134        return gctypes
    135 
    136    def load_typeInfo(self, filename="typeInfo.txt"):
    137        return self.load_json_file(filename)
    138 
    139    def load_funcInfo(self, filename="limitedFunctions.lst"):
    140        return self.load_json_file(filename)
    141 
    142    def load_gcFunctions(self):
    143        return self.load_text_file("gcFunctions.lst", extract=extract_unmangled)
    144 
    145    def load_callgraph(self):
    146        data = Callgraph(
    147            functionNames=["dummy"],
    148            nameToId={},
    149            mangledToUnmangled={},
    150            unmangledToMangled={},
    151            calleesOf=defaultdict(list),
    152            callersOf=defaultdict(list),
    153            tags=defaultdict(set),
    154            calleeGraph=defaultdict(dict),
    155            callerGraph=defaultdict(dict),
    156        )
    157 
    158        def lookup(id):
    159            mangled = data.functionNames[int(id)]
    160            return data.mangledToUnmangled.get(mangled, mangled)
    161 
    162        def add_call(caller, callee, limit):
    163            data.calleesOf[caller].append(callee)
    164            data.callersOf[callee].append(caller)
    165            data.calleeGraph[caller][callee] = True
    166            data.callerGraph[callee][caller] = True
    167 
    168        def process(line):
    169            if line.startswith("#"):
    170                name = line.split(" ", 1)[1]
    171                data.nameToId[name] = len(data.functionNames)
    172                data.functionNames.append(name)
    173                return
    174 
    175            if line.startswith("="):
    176                m = re.match(r"^= (\d+) (.*)", line)
    177                mangled = data.functionNames[int(m.group(1))]
    178                unmangled = m.group(2)
    179                data.nameToId[unmangled] = id
    180                data.mangledToUnmangled[mangled] = unmangled
    181                data.unmangledToMangled[unmangled] = mangled
    182                return
    183 
    184            # Sample lines:
    185            #   D 10 20
    186            #   D /3 10 20
    187            #   D 3:3 10 20
    188            # All of these mean that there is a direct call from function #10
    189            # to function #20. The latter two mean that the call is made in a
    190            # context where the 0x1 and 0x2 properties (3 == 0x1 | 0x2) are in
    191            # effect. The `/n` syntax was the original, which was then expanded
    192            # to `m:n` to allow multiple calls to be combined together when not
    193            # all calls have the same properties in effect. The `/n` syntax is
    194            # deprecated.
    195            #
    196            # The properties usually refer to "limits", eg "GC is suppressed
    197            # in the scope surrounding this call". For testing purposes, the
    198            # difference between `m` and `n` in `m:n` is currently ignored.
    199            tokens = line.split(" ")
    200            limit = 0
    201            if tokens[1].startswith("/"):
    202                attr_str = tokens.pop(1)
    203                limit = int(attr_str[1:])
    204            elif ":" in tokens[1]:
    205                attr_str = tokens.pop(1)
    206                limit = int(attr_str[0 : attr_str.index(":")])
    207 
    208            if tokens[0] in ("D", "R"):
    209                _, caller, callee = tokens
    210                add_call(lookup(caller), lookup(callee), limit)
    211            elif tokens[0] == "T":
    212                data.tags[tokens[1]].add(line.split(" ", 2)[2])
    213            elif tokens[0] in ("F", "V"):
    214                pass
    215 
    216            elif tokens[0] == "I":
    217                m = re.match(r"^I (\d+) VARIABLE ([^\,]*)", line)
    218                pass
    219 
    220        self.load_text_file("callgraph.txt", extract=process)
    221        return data
    222 
    223    def load_hazards(self):
    224        def grab_hazard(line):
    225            m = re.match(
    226                r"Function '(.*?)' has unrooted '(.*?)' of type '(.*?)' live across GC call '(.*?)' at (.*)",  # NOQA: E501
    227                line,
    228            )
    229            if m:
    230                info = list(m.groups())
    231                info[0] = info[0].split("$")[-1]
    232                info[3] = info[3].split("$")[-1]
    233                return HazardSummary(*info)
    234            return None
    235 
    236        return self.load_text_file("hazards.txt", extract=grab_hazard)
    237 
    238    def process_body(self, body):
    239        return Body(body)
    240 
    241    def process_bodies(self, bodies):
    242        return [self.process_body(b) for b in bodies]