tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

check_vanilla_allocations.py (11924B)


      1 # vim: set ts=8 sts=4 et sw=4 tw=79:
      2 # This Source Code Form is subject to the terms of the Mozilla Public
      3 # License, v. 2.0. If a copy of the MPL was not distributed with this
      4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      5 
      6 # ----------------------------------------------------------------------------
      7 # All heap allocations in SpiderMonkey must go through js_malloc, js_calloc,
      8 # js_realloc, and js_free.  This is so that any embedder who uses a custom
      9 # allocator (by defining JS_USE_CUSTOM_ALLOCATOR) will see all heap allocation
     10 # go through that custom allocator.
     11 #
     12 # Therefore, the presence of any calls to "vanilla" allocation/free functions
     13 # from within SpiderMonkey itself (e.g. malloc(), free()) is a bug.  Calls from
     14 # within mozglue and non-SpiderMonkey locations are fine; there is a list of
     15 # exceptions that can be added to as the need arises.
     16 #
     17 # This script checks for the presence of such disallowed vanilla
     18 # allocation/free function in SpiderMonkey when it's built as a library.  It
     19 # relies on |nm| from the GNU binutils, and so only works on Linux, but one
     20 # platform is good enough to catch almost all violations.
     21 #
     22 # This checking is only 100% reliable in a JS_USE_CUSTOM_ALLOCATOR build in
     23 # which the default definitions of js_malloc et al (in Utility.h) -- which call
     24 # malloc et al -- are replaced with empty definitions.  This is because the
     25 # presence and possible inlining of the default js_malloc et al can cause
     26 # malloc/calloc/realloc/free calls show up in unpredictable places.
     27 #
     28 # Unfortunately, that configuration cannot be tested on Mozilla's standard
     29 # testing infrastructure.  Instead, by default this script only tests that none
     30 # of the other vanilla allocation/free functions (operator new, memalign, etc)
     31 # are present.  If given the --aggressive flag, it will also check for
     32 # malloc/calloc/realloc/free.
     33 #
     34 # Note:  We don't check for |operator delete| and |operator delete[]|.  These
     35 # can be present somehow due to virtual destructors, but this is not too
     36 # because vanilla delete/delete[] calls don't make sense without corresponding
     37 # vanilla new/new[] calls, and any explicit calls will be caught by Valgrind's
     38 # mismatched alloc/free checking.
     39 # ----------------------------------------------------------------------------
     40 
     41 import argparse
     42 import re
     43 import subprocess
     44 import sys
     45 from collections import defaultdict
     46 
     47 import buildconfig
     48 
     49 # The obvious way to implement this script is to search for occurrences of
     50 # malloc et al, succeed if none are found, and fail is some are found.
     51 # However, "none are found" does not necessarily mean "none are present" --
     52 # this script could be buggy.  (Or the output format of |nm| might change in
     53 # the future.)
     54 #
     55 # So util/Utility.cpp deliberately contains a (never-called) function that
     56 # contains a single use of all the vanilla allocation/free functions.  And this
     57 # script fails if it (a) finds uses of those functions in files other than
     58 # util/Utility.cpp, *or* (b) fails to find them in util/Utility.cpp.
     59 
     60 # Tracks overall success of the test.
     61 has_failed = False
     62 
     63 
     64 def fail(msg):
     65    print("TEST-UNEXPECTED-FAIL | check_vanilla_allocations.py |", msg)
     66    global has_failed
     67    has_failed = True
     68 
     69 
     70 def main():
     71    parser = argparse.ArgumentParser()
     72    parser.add_argument(
     73        "--aggressive",
     74        action="store_true",
     75        help="also check for malloc, calloc, realloc and free",
     76    )
     77    parser.add_argument("file", type=str, help="name of the file to check")
     78    args = parser.parse_args()
     79 
     80    # Run |nm|.  Options:
     81    # -C: demangle symbol names
     82    # -A: show an object filename for each undefined symbol
     83    nm = buildconfig.substs.get("NM") or "nm"
     84    cmd = [nm, "-C", "-A", args.file]
     85    lines = subprocess.check_output(
     86        cmd, universal_newlines=True, stderr=subprocess.PIPE
     87    ).split("\n")
     88 
     89    # alloc_fns contains all the vanilla allocation/free functions that we look
     90    # for. Regexp chars are escaped appropriately.
     91 
     92    operator_news = [
     93        # Matches |operator new(unsigned T)|, where |T| is |int| or |long|.
     94        r"operator new(unsigned",
     95        # Matches |operator new[](unsigned T)|, where |T| is |int| or |long|.
     96        r"operator new[](unsigned",
     97    ]
     98 
     99    # operator new may end up inlined and replaced with moz_xmalloc.
    100    inlined_operator_news = [
    101        r"moz_xmalloc",
    102    ]
    103 
    104    alloc_fns = (
    105        operator_news
    106        + inlined_operator_news
    107        + [
    108            r"memalign",
    109            # These three aren't available on all Linux configurations.
    110            # r'posix_memalign',
    111            # r'aligned_alloc',
    112            # r'valloc',
    113        ]
    114    )
    115 
    116    if args.aggressive:
    117        alloc_fns += [r"malloc", r"calloc", r"realloc", r"free", r"strdup"]
    118 
    119    # This is like alloc_fns, but regexp chars are not escaped.
    120    alloc_fns_escaped = [re.escape(fn) for fn in alloc_fns]
    121 
    122    # This regexp matches the relevant lines in the output of |nm|, which look
    123    # like the following.
    124    #
    125    #   js/src/libjs_static.a:Utility.o:                  U malloc
    126    #   js/src/libjs_static.a:Utility.o: 00000000000007e0 T js::SetSourceOptions(...)
    127    #
    128    # It may also, in LTO builds, look like
    129    #   js/src/libjs_static.a:Utility.o: ---------------- T js::SetSourceOptions(...)
    130    #
    131    nm_line_re = re.compile(r"([^:/ ]+):\s*(?:[0-9a-fA-F]*|-*)\s+([TUw]) (.*)")
    132    alloc_fns_re = re.compile(r"|".join(alloc_fns_escaped))
    133 
    134    # This tracks which allocation/free functions have been seen.
    135    functions = defaultdict(set)
    136    files = defaultdict(int)
    137 
    138    # Files to ignore allocation/free functions from.
    139    ignored_files = [
    140        # Ignore implicit call to operator new in std::condition_variable_any.
    141        #
    142        # From intl/icu/source/common/umutex.h:
    143        # On Linux, the default constructor of std::condition_variable_any
    144        # produces an in-line reference to global operator new(), [...].
    145        "umutex.o",
    146        # Ignore allocations from decimal conversion functions inside mozglue.
    147        "Decimal.o",
    148        # Ignore use of std::string in regexp AST debug output.
    149        "regexp-ast.o",
    150        # mozglue/misc/Debug.cpp contains a call to `printf_stderr("%s", aStr.str().c_str())`
    151        # where `aStr` is a `std::stringstream`. In inlined opt builds, this calls
    152        # `operator new()` and `operator delete` for a temporary.
    153        "Debug.o",
    154    ]
    155    all_ignored_files = set((f, 1) for f in ignored_files)
    156 
    157    # Would it be helpful to emit detailed line number information after a failure?
    158    emit_line_info = False
    159 
    160    prev_filename = None
    161    for line in lines:
    162        m = nm_line_re.search(line)
    163        if m is None:
    164            continue
    165 
    166        filename, symtype, fn = m.groups()
    167        if prev_filename != filename:
    168            # When the same filename appears multiple times, separated by other
    169            # file names, this denotes a different file. Thankfully, we can more
    170            # or less safely assume that dir1/Foo.o and dir2/Foo.o are not going
    171            # to be next to each other.
    172            files[filename] += 1
    173            prev_filename = filename
    174 
    175        # The stdc++compat library has an implicit call to operator new in
    176        # thread::_M_start_thread.
    177        if "stdc++compat" in filename:
    178            continue
    179 
    180        # The memory allocator code contains calls to memalign. These are ok, so
    181        # we whitelist them.
    182        if "_memory_" in filename:
    183            continue
    184 
    185        # {fmt} can do transient allocations when formatting strings
    186        if "third_party_fmt" in filename:
    187            continue
    188 
    189        # Ignore the fuzzing code imported from m-c
    190        if "Fuzzer" in filename:
    191            continue
    192 
    193        # Ignore the profiling pseudo-stack, since it needs to run even when
    194        # SpiderMonkey's allocator isn't initialized.
    195        if "ProfilingStack" in filename:
    196            continue
    197 
    198        if symtype == "T":
    199            # We can't match intl/components files by file name because in
    200            # non-unified builds they overlap with files in js/src.
    201            # So we check symbols they define, and consider files with symbols
    202            # in the mozilla::intl namespace to be those.
    203            if fn.startswith("mozilla::intl::"):
    204                all_ignored_files.add((filename, files[filename]))
    205        else:
    206            m = alloc_fns_re.match(fn)
    207            if m:
    208                functions[(filename, files[filename])].add(m.group(0))
    209 
    210    util_Utility_cpp = functions.pop(("Utility.o", 1))
    211    if ("Utility.o", 2) in functions:
    212        fail("There should be only one Utility.o file")
    213 
    214    for f, n in all_ignored_files:
    215        functions.pop((f, n), None)
    216        if f in ignored_files and (f, 2) in functions:
    217            fail(f"There should be only one {f} file")
    218 
    219    for filename, n in sorted(functions):
    220        for fn in functions[(filename, n)]:
    221            # An allocation is present in a non-special file.  Fail!
    222            fail("'" + fn + "' present in " + filename)
    223            # Try to give more precise information about the offending code.
    224            emit_line_info = True
    225 
    226    # Check that all functions we expect are used in util/Utility.cpp.  (This
    227    # will fail if the function-detection code breaks at any point.)
    228    # operator new and its inlined equivalent are mutually exclusive.
    229    has_operator_news = any(fn in operator_news for fn in util_Utility_cpp)
    230    has_inlined_operator_news = any(
    231        fn in inlined_operator_news for fn in util_Utility_cpp
    232    )
    233    if has_operator_news and has_inlined_operator_news:
    234        fail(
    235            "Both operator new and moz_xmalloc aren't expected in util/Utility.cpp at the same time"
    236        )
    237 
    238    for fn in alloc_fns:
    239        if fn not in util_Utility_cpp:
    240            if (
    241                (fn in operator_news and not has_inlined_operator_news)
    242                or (fn in inlined_operator_news and not has_operator_news)
    243                or (fn not in operator_news and fn not in inlined_operator_news)
    244            ):
    245                fail("'" + fn + "' isn't used as expected in util/Utility.cpp")
    246        else:
    247            util_Utility_cpp.remove(fn)
    248 
    249    # This should never happen, but check just in case.
    250    if util_Utility_cpp:
    251        fail(
    252            "unexpected allocation fns used in util/Utility.cpp: "
    253            + ", ".join(util_Utility_cpp)
    254        )
    255 
    256    # If we found any improper references to allocation functions, try to use
    257    # DWARF debug info to get more accurate line number information about the
    258    # bad calls. This is a lot slower than 'nm -A', and it is not always
    259    # precise when building with --enable-optimized.
    260    if emit_line_info:
    261        print("check_vanilla_allocations.py: Source lines with allocation calls:")
    262        print(
    263            "check_vanilla_allocations.py: Accurate in unoptimized builds; "
    264            "util/Utility.cpp expected."
    265        )
    266 
    267        # Run |nm|.  Options:
    268        # -u: show only undefined symbols
    269        # -C: demangle symbol names
    270        # -l: show line number information for each undefined symbol
    271        cmd = ["nm", "-u", "-C", "-l", args.file]
    272        lines = subprocess.check_output(
    273            cmd, universal_newlines=True, stderr=subprocess.PIPE
    274        ).split("\n")
    275 
    276        # This regexp matches the relevant lines in the output of |nm -l|,
    277        # which look like the following.
    278        #
    279        #       U malloc util/Utility.cpp:117
    280        #
    281        alloc_lines_re = (
    282            r"[Uw] ((" + r"|".join(alloc_fns_escaped) + r").*)\s+(\S+:\d+)$"
    283        )
    284 
    285        for line in lines:
    286            m = re.search(alloc_lines_re, line)
    287            if m:
    288                print(
    289                    "check_vanilla_allocations.py:", m.group(1), "called at", m.group(3)
    290                )
    291 
    292    if has_failed:
    293        sys.exit(1)
    294 
    295    print("TEST-PASS | check_vanilla_allocations.py | ok")
    296    sys.exit(0)
    297 
    298 
    299 if __name__ == "__main__":
    300    main()