check_vanilla_allocations.py (11924B)
1 # vim: set ts=8 sts=4 et sw=4 tw=79: 2 # This Source Code Form is subject to the terms of the Mozilla Public 3 # License, v. 2.0. If a copy of the MPL was not distributed with this 4 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 5 6 # ---------------------------------------------------------------------------- 7 # All heap allocations in SpiderMonkey must go through js_malloc, js_calloc, 8 # js_realloc, and js_free. This is so that any embedder who uses a custom 9 # allocator (by defining JS_USE_CUSTOM_ALLOCATOR) will see all heap allocation 10 # go through that custom allocator. 11 # 12 # Therefore, the presence of any calls to "vanilla" allocation/free functions 13 # from within SpiderMonkey itself (e.g. malloc(), free()) is a bug. Calls from 14 # within mozglue and non-SpiderMonkey locations are fine; there is a list of 15 # exceptions that can be added to as the need arises. 16 # 17 # This script checks for the presence of such disallowed vanilla 18 # allocation/free function in SpiderMonkey when it's built as a library. It 19 # relies on |nm| from the GNU binutils, and so only works on Linux, but one 20 # platform is good enough to catch almost all violations. 21 # 22 # This checking is only 100% reliable in a JS_USE_CUSTOM_ALLOCATOR build in 23 # which the default definitions of js_malloc et al (in Utility.h) -- which call 24 # malloc et al -- are replaced with empty definitions. This is because the 25 # presence and possible inlining of the default js_malloc et al can cause 26 # malloc/calloc/realloc/free calls show up in unpredictable places. 27 # 28 # Unfortunately, that configuration cannot be tested on Mozilla's standard 29 # testing infrastructure. Instead, by default this script only tests that none 30 # of the other vanilla allocation/free functions (operator new, memalign, etc) 31 # are present. If given the --aggressive flag, it will also check for 32 # malloc/calloc/realloc/free. 33 # 34 # Note: We don't check for |operator delete| and |operator delete[]|. These 35 # can be present somehow due to virtual destructors, but this is not too 36 # because vanilla delete/delete[] calls don't make sense without corresponding 37 # vanilla new/new[] calls, and any explicit calls will be caught by Valgrind's 38 # mismatched alloc/free checking. 39 # ---------------------------------------------------------------------------- 40 41 import argparse 42 import re 43 import subprocess 44 import sys 45 from collections import defaultdict 46 47 import buildconfig 48 49 # The obvious way to implement this script is to search for occurrences of 50 # malloc et al, succeed if none are found, and fail is some are found. 51 # However, "none are found" does not necessarily mean "none are present" -- 52 # this script could be buggy. (Or the output format of |nm| might change in 53 # the future.) 54 # 55 # So util/Utility.cpp deliberately contains a (never-called) function that 56 # contains a single use of all the vanilla allocation/free functions. And this 57 # script fails if it (a) finds uses of those functions in files other than 58 # util/Utility.cpp, *or* (b) fails to find them in util/Utility.cpp. 59 60 # Tracks overall success of the test. 61 has_failed = False 62 63 64 def fail(msg): 65 print("TEST-UNEXPECTED-FAIL | check_vanilla_allocations.py |", msg) 66 global has_failed 67 has_failed = True 68 69 70 def main(): 71 parser = argparse.ArgumentParser() 72 parser.add_argument( 73 "--aggressive", 74 action="store_true", 75 help="also check for malloc, calloc, realloc and free", 76 ) 77 parser.add_argument("file", type=str, help="name of the file to check") 78 args = parser.parse_args() 79 80 # Run |nm|. Options: 81 # -C: demangle symbol names 82 # -A: show an object filename for each undefined symbol 83 nm = buildconfig.substs.get("NM") or "nm" 84 cmd = [nm, "-C", "-A", args.file] 85 lines = subprocess.check_output( 86 cmd, universal_newlines=True, stderr=subprocess.PIPE 87 ).split("\n") 88 89 # alloc_fns contains all the vanilla allocation/free functions that we look 90 # for. Regexp chars are escaped appropriately. 91 92 operator_news = [ 93 # Matches |operator new(unsigned T)|, where |T| is |int| or |long|. 94 r"operator new(unsigned", 95 # Matches |operator new[](unsigned T)|, where |T| is |int| or |long|. 96 r"operator new[](unsigned", 97 ] 98 99 # operator new may end up inlined and replaced with moz_xmalloc. 100 inlined_operator_news = [ 101 r"moz_xmalloc", 102 ] 103 104 alloc_fns = ( 105 operator_news 106 + inlined_operator_news 107 + [ 108 r"memalign", 109 # These three aren't available on all Linux configurations. 110 # r'posix_memalign', 111 # r'aligned_alloc', 112 # r'valloc', 113 ] 114 ) 115 116 if args.aggressive: 117 alloc_fns += [r"malloc", r"calloc", r"realloc", r"free", r"strdup"] 118 119 # This is like alloc_fns, but regexp chars are not escaped. 120 alloc_fns_escaped = [re.escape(fn) for fn in alloc_fns] 121 122 # This regexp matches the relevant lines in the output of |nm|, which look 123 # like the following. 124 # 125 # js/src/libjs_static.a:Utility.o: U malloc 126 # js/src/libjs_static.a:Utility.o: 00000000000007e0 T js::SetSourceOptions(...) 127 # 128 # It may also, in LTO builds, look like 129 # js/src/libjs_static.a:Utility.o: ---------------- T js::SetSourceOptions(...) 130 # 131 nm_line_re = re.compile(r"([^:/ ]+):\s*(?:[0-9a-fA-F]*|-*)\s+([TUw]) (.*)") 132 alloc_fns_re = re.compile(r"|".join(alloc_fns_escaped)) 133 134 # This tracks which allocation/free functions have been seen. 135 functions = defaultdict(set) 136 files = defaultdict(int) 137 138 # Files to ignore allocation/free functions from. 139 ignored_files = [ 140 # Ignore implicit call to operator new in std::condition_variable_any. 141 # 142 # From intl/icu/source/common/umutex.h: 143 # On Linux, the default constructor of std::condition_variable_any 144 # produces an in-line reference to global operator new(), [...]. 145 "umutex.o", 146 # Ignore allocations from decimal conversion functions inside mozglue. 147 "Decimal.o", 148 # Ignore use of std::string in regexp AST debug output. 149 "regexp-ast.o", 150 # mozglue/misc/Debug.cpp contains a call to `printf_stderr("%s", aStr.str().c_str())` 151 # where `aStr` is a `std::stringstream`. In inlined opt builds, this calls 152 # `operator new()` and `operator delete` for a temporary. 153 "Debug.o", 154 ] 155 all_ignored_files = set((f, 1) for f in ignored_files) 156 157 # Would it be helpful to emit detailed line number information after a failure? 158 emit_line_info = False 159 160 prev_filename = None 161 for line in lines: 162 m = nm_line_re.search(line) 163 if m is None: 164 continue 165 166 filename, symtype, fn = m.groups() 167 if prev_filename != filename: 168 # When the same filename appears multiple times, separated by other 169 # file names, this denotes a different file. Thankfully, we can more 170 # or less safely assume that dir1/Foo.o and dir2/Foo.o are not going 171 # to be next to each other. 172 files[filename] += 1 173 prev_filename = filename 174 175 # The stdc++compat library has an implicit call to operator new in 176 # thread::_M_start_thread. 177 if "stdc++compat" in filename: 178 continue 179 180 # The memory allocator code contains calls to memalign. These are ok, so 181 # we whitelist them. 182 if "_memory_" in filename: 183 continue 184 185 # {fmt} can do transient allocations when formatting strings 186 if "third_party_fmt" in filename: 187 continue 188 189 # Ignore the fuzzing code imported from m-c 190 if "Fuzzer" in filename: 191 continue 192 193 # Ignore the profiling pseudo-stack, since it needs to run even when 194 # SpiderMonkey's allocator isn't initialized. 195 if "ProfilingStack" in filename: 196 continue 197 198 if symtype == "T": 199 # We can't match intl/components files by file name because in 200 # non-unified builds they overlap with files in js/src. 201 # So we check symbols they define, and consider files with symbols 202 # in the mozilla::intl namespace to be those. 203 if fn.startswith("mozilla::intl::"): 204 all_ignored_files.add((filename, files[filename])) 205 else: 206 m = alloc_fns_re.match(fn) 207 if m: 208 functions[(filename, files[filename])].add(m.group(0)) 209 210 util_Utility_cpp = functions.pop(("Utility.o", 1)) 211 if ("Utility.o", 2) in functions: 212 fail("There should be only one Utility.o file") 213 214 for f, n in all_ignored_files: 215 functions.pop((f, n), None) 216 if f in ignored_files and (f, 2) in functions: 217 fail(f"There should be only one {f} file") 218 219 for filename, n in sorted(functions): 220 for fn in functions[(filename, n)]: 221 # An allocation is present in a non-special file. Fail! 222 fail("'" + fn + "' present in " + filename) 223 # Try to give more precise information about the offending code. 224 emit_line_info = True 225 226 # Check that all functions we expect are used in util/Utility.cpp. (This 227 # will fail if the function-detection code breaks at any point.) 228 # operator new and its inlined equivalent are mutually exclusive. 229 has_operator_news = any(fn in operator_news for fn in util_Utility_cpp) 230 has_inlined_operator_news = any( 231 fn in inlined_operator_news for fn in util_Utility_cpp 232 ) 233 if has_operator_news and has_inlined_operator_news: 234 fail( 235 "Both operator new and moz_xmalloc aren't expected in util/Utility.cpp at the same time" 236 ) 237 238 for fn in alloc_fns: 239 if fn not in util_Utility_cpp: 240 if ( 241 (fn in operator_news and not has_inlined_operator_news) 242 or (fn in inlined_operator_news and not has_operator_news) 243 or (fn not in operator_news and fn not in inlined_operator_news) 244 ): 245 fail("'" + fn + "' isn't used as expected in util/Utility.cpp") 246 else: 247 util_Utility_cpp.remove(fn) 248 249 # This should never happen, but check just in case. 250 if util_Utility_cpp: 251 fail( 252 "unexpected allocation fns used in util/Utility.cpp: " 253 + ", ".join(util_Utility_cpp) 254 ) 255 256 # If we found any improper references to allocation functions, try to use 257 # DWARF debug info to get more accurate line number information about the 258 # bad calls. This is a lot slower than 'nm -A', and it is not always 259 # precise when building with --enable-optimized. 260 if emit_line_info: 261 print("check_vanilla_allocations.py: Source lines with allocation calls:") 262 print( 263 "check_vanilla_allocations.py: Accurate in unoptimized builds; " 264 "util/Utility.cpp expected." 265 ) 266 267 # Run |nm|. Options: 268 # -u: show only undefined symbols 269 # -C: demangle symbol names 270 # -l: show line number information for each undefined symbol 271 cmd = ["nm", "-u", "-C", "-l", args.file] 272 lines = subprocess.check_output( 273 cmd, universal_newlines=True, stderr=subprocess.PIPE 274 ).split("\n") 275 276 # This regexp matches the relevant lines in the output of |nm -l|, 277 # which look like the following. 278 # 279 # U malloc util/Utility.cpp:117 280 # 281 alloc_lines_re = ( 282 r"[Uw] ((" + r"|".join(alloc_fns_escaped) + r").*)\s+(\S+:\d+)$" 283 ) 284 285 for line in lines: 286 m = re.search(alloc_lines_re, line) 287 if m: 288 print( 289 "check_vanilla_allocations.py:", m.group(1), "called at", m.group(3) 290 ) 291 292 if has_failed: 293 sys.exit(1) 294 295 print("TEST-PASS | check_vanilla_allocations.py | ok") 296 sys.exit(0) 297 298 299 if __name__ == "__main__": 300 main()