testlib.py (8305B)
1 import json 2 import os 3 import re 4 import subprocess 5 import sys 6 from collections import defaultdict, namedtuple 7 8 from sixgill import Body 9 10 scriptdir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) 11 12 HazardSummary = namedtuple( 13 "HazardSummary", ["function", "variable", "type", "GCFunction", "location"] 14 ) 15 16 Callgraph = namedtuple( 17 "Callgraph", 18 [ 19 "functionNames", 20 "nameToId", 21 "mangledToUnmangled", 22 "unmangledToMangled", 23 "calleesOf", 24 "callersOf", 25 "tags", 26 "calleeGraph", 27 "callerGraph", 28 ], 29 ) 30 31 32 def equal(got, expected): 33 if got != expected: 34 print("Got '%s', expected '%s'" % (got, expected)) 35 36 37 def extract_unmangled(func): 38 return func.split("$")[-1] 39 40 41 class Test: 42 def __init__(self, indir, outdir, cfg, verbose=0): 43 self.indir = indir 44 self.outdir = outdir 45 self.cfg = cfg 46 self.verbose = verbose 47 48 def infile(self, path): 49 return os.path.join(self.indir, path) 50 51 def binpath(self, prog): 52 return os.path.join(self.cfg.sixgill_bin, prog) 53 54 def compile(self, source, options=""): 55 env = os.environ 56 env["CCACHE_DISABLE"] = "1" 57 if "-fexceptions" not in options and "-fno-exceptions" not in options: 58 options += " -fno-exceptions" 59 cmd = f"{self.cfg.cxx} -c {self.infile(source)} -O3 -std=c++17 -fplugin={self.cfg.sixgill_plugin} -fplugin-arg-xgill-mangle=1 {options}" 60 if self.cfg.verbose > 0: 61 print("Running %s" % cmd) 62 subprocess.check_call(["sh", "-c", cmd]) 63 64 def load_db_entry(self, dbname, pattern): 65 """Look up an entry from an XDB database file, 'pattern' may be an exact 66 matching string, or an re pattern object matching a single entry.""" 67 68 if hasattr(pattern, "match"): 69 output = subprocess.check_output( 70 [self.binpath("xdbkeys"), dbname + ".xdb"], universal_newlines=True 71 ) 72 matches = list(filter(lambda _: re.search(pattern, _), output.splitlines())) 73 if len(matches) == 0: 74 raise Exception("entry not found") 75 if len(matches) > 1: 76 raise Exception("multiple entries found") 77 pattern = matches[0] 78 79 output = subprocess.check_output( 80 [self.binpath("xdbfind"), "-json", dbname + ".xdb", pattern], 81 universal_newlines=True, 82 ) 83 return json.loads(output) 84 85 def run_analysis_script(self, startPhase="gcTypes", upto=None): 86 open("defaults.py", "w").write( 87 f"""\ 88 analysis_scriptdir = '{scriptdir}' 89 sixgill_bin = '{self.cfg.sixgill_bin}' 90 """ 91 ) 92 cmd = [ 93 sys.executable, 94 os.path.join(scriptdir, "analyze.py"), 95 ["-q", "", "-v"][min(self.verbose, 2)], 96 ] 97 cmd += ["--first", startPhase] 98 if upto: 99 cmd += ["--last", upto] 100 cmd.append("--source=%s" % self.indir) 101 cmd.append("--js=%s" % self.cfg.js) 102 if self.cfg.verbose: 103 print("Running " + " ".join(cmd)) 104 subprocess.check_call(cmd) 105 106 def computeGCTypes(self): 107 self.run_analysis_script("gcTypes", upto="gcTypes") 108 109 def computeHazards(self): 110 self.run_analysis_script("gcTypes") 111 112 def load_text_file(self, filename, extract=lambda l: l): 113 fullpath = os.path.join(self.outdir, filename) 114 values = (extract(line.strip()) for line in open(fullpath)) 115 return list(filter(lambda _: _ is not None, values)) 116 117 def load_json_file(self, filename, reviver=None): 118 fullpath = os.path.join(self.outdir, filename) 119 with open(fullpath) as fh: 120 return json.load(fh, object_hook=reviver) 121 122 def load_gcTypes(self): 123 def grab_type(line): 124 m = re.match(r"^(GC\w+): (.*)", line) 125 if m: 126 return (m.group(1) + "s", m.group(2)) 127 return None 128 129 gctypes = defaultdict(list) 130 for collection, typename in self.load_text_file( 131 "gcTypes.txt", extract=grab_type 132 ): 133 gctypes[collection].append(typename) 134 return gctypes 135 136 def load_typeInfo(self, filename="typeInfo.txt"): 137 return self.load_json_file(filename) 138 139 def load_funcInfo(self, filename="limitedFunctions.lst"): 140 return self.load_json_file(filename) 141 142 def load_gcFunctions(self): 143 return self.load_text_file("gcFunctions.lst", extract=extract_unmangled) 144 145 def load_callgraph(self): 146 data = Callgraph( 147 functionNames=["dummy"], 148 nameToId={}, 149 mangledToUnmangled={}, 150 unmangledToMangled={}, 151 calleesOf=defaultdict(list), 152 callersOf=defaultdict(list), 153 tags=defaultdict(set), 154 calleeGraph=defaultdict(dict), 155 callerGraph=defaultdict(dict), 156 ) 157 158 def lookup(id): 159 mangled = data.functionNames[int(id)] 160 return data.mangledToUnmangled.get(mangled, mangled) 161 162 def add_call(caller, callee, limit): 163 data.calleesOf[caller].append(callee) 164 data.callersOf[callee].append(caller) 165 data.calleeGraph[caller][callee] = True 166 data.callerGraph[callee][caller] = True 167 168 def process(line): 169 if line.startswith("#"): 170 name = line.split(" ", 1)[1] 171 data.nameToId[name] = len(data.functionNames) 172 data.functionNames.append(name) 173 return 174 175 if line.startswith("="): 176 m = re.match(r"^= (\d+) (.*)", line) 177 mangled = data.functionNames[int(m.group(1))] 178 unmangled = m.group(2) 179 data.nameToId[unmangled] = id 180 data.mangledToUnmangled[mangled] = unmangled 181 data.unmangledToMangled[unmangled] = mangled 182 return 183 184 # Sample lines: 185 # D 10 20 186 # D /3 10 20 187 # D 3:3 10 20 188 # All of these mean that there is a direct call from function #10 189 # to function #20. The latter two mean that the call is made in a 190 # context where the 0x1 and 0x2 properties (3 == 0x1 | 0x2) are in 191 # effect. The `/n` syntax was the original, which was then expanded 192 # to `m:n` to allow multiple calls to be combined together when not 193 # all calls have the same properties in effect. The `/n` syntax is 194 # deprecated. 195 # 196 # The properties usually refer to "limits", eg "GC is suppressed 197 # in the scope surrounding this call". For testing purposes, the 198 # difference between `m` and `n` in `m:n` is currently ignored. 199 tokens = line.split(" ") 200 limit = 0 201 if tokens[1].startswith("/"): 202 attr_str = tokens.pop(1) 203 limit = int(attr_str[1:]) 204 elif ":" in tokens[1]: 205 attr_str = tokens.pop(1) 206 limit = int(attr_str[0 : attr_str.index(":")]) 207 208 if tokens[0] in ("D", "R"): 209 _, caller, callee = tokens 210 add_call(lookup(caller), lookup(callee), limit) 211 elif tokens[0] == "T": 212 data.tags[tokens[1]].add(line.split(" ", 2)[2]) 213 elif tokens[0] in ("F", "V"): 214 pass 215 216 elif tokens[0] == "I": 217 m = re.match(r"^I (\d+) VARIABLE ([^\,]*)", line) 218 pass 219 220 self.load_text_file("callgraph.txt", extract=process) 221 return data 222 223 def load_hazards(self): 224 def grab_hazard(line): 225 m = re.match( 226 r"Function '(.*?)' has unrooted '(.*?)' of type '(.*?)' live across GC call '(.*?)' at (.*)", # NOQA: E501 227 line, 228 ) 229 if m: 230 info = list(m.groups()) 231 info[0] = info[0].split("$")[-1] 232 info[3] = info[3].split("$")[-1] 233 return HazardSummary(*info) 234 return None 235 236 return self.load_text_file("hazards.txt", extract=grab_hazard) 237 238 def process_body(self, body): 239 return Body(body) 240 241 def process_bodies(self, bodies): 242 return [self.process_body(b) for b in bodies]