tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

block_analyzer.py (8177B)


      1 #!/usr/bin/env python3
      2 
      3 # This Source Code Form is subject to the terms of the Mozilla Public
      4 # License, v. 2.0. If a copy of the MPL was not distributed with this
      5 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      6 
      7 # From a scan mode DMD log, extract some information about a
      8 # particular block, such as its allocation stack or which other blocks
      9 # contain pointers to it. This can be useful when investigating leaks
     10 # caused by unknown references to refcounted objects.
     11 
     12 import argparse
     13 import gzip
     14 import json
     15 import re
     16 import sys
     17 
     18 # The DMD output version this script handles.
     19 outputVersion = 5
     20 
     21 # If --ignore-alloc-fns is specified, stack frames containing functions that
     22 # match these strings will be removed from the *start* of stack traces. (Once
     23 # we hit a non-matching frame, any subsequent frames won't be removed even if
     24 # they do match.)
     25 allocatorFns = [
     26    "malloc (",
     27    "replace_malloc",
     28    "replace_calloc",
     29    "replace_realloc",
     30    "replace_memalign",
     31    "replace_posix_memalign",
     32    "malloc_zone_malloc",
     33    "moz_xmalloc",
     34    "moz_xcalloc",
     35    "moz_xrealloc",
     36    "operator new(",
     37    "operator new[](",
     38    "g_malloc",
     39    "g_slice_alloc",
     40    "callocCanGC",
     41    "reallocCanGC",
     42    "vpx_malloc",
     43    "vpx_calloc",
     44    "vpx_realloc",
     45    "vpx_memalign",
     46    "js_malloc",
     47    "js_calloc",
     48    "js_realloc",
     49    "pod_malloc",
     50    "pod_calloc",
     51    "pod_realloc",
     52    "nsTArrayInfallibleAllocator::Malloc",
     53    "Allocator<ReplaceMallocBase>::malloc(",
     54    "mozilla::dmd::StackTrace::Get(",
     55    "mozilla::dmd::AllocCallback(",
     56    "mozilla::dom::DOMArena::Allocate(",
     57    # This one necessary to fully filter some sequences of allocation functions
     58    # that happen in practice. Note that ??? entries that follow non-allocation
     59    # functions won't be stripped, as explained above.
     60    "???",
     61 ]
     62 
     63 ####
     64 
     65 # Command line arguments
     66 
     67 
     68 def range_1_24(string):
     69    value = int(string)
     70    if value < 1 or value > 24:
     71        msg = f"{string:s} is not in the range 1..24"
     72        raise argparse.ArgumentTypeError(msg)
     73    return value
     74 
     75 
     76 parser = argparse.ArgumentParser(
     77    description="Analyze the heap graph to find out things about an object. \
     78 By default this prints out information about blocks that point to the given block."
     79 )
     80 
     81 parser.add_argument("dmd_log_file_name", help="clamped DMD log file name")
     82 
     83 parser.add_argument("block", help="address of the block of interest")
     84 
     85 parser.add_argument(
     86    "--info",
     87    dest="info",
     88    action="store_true",
     89    default=False,
     90    help="Print out information about the block.",
     91 )
     92 
     93 parser.add_argument(
     94    "-sfl",
     95    "--max-stack-frame-length",
     96    type=int,
     97    default=300,
     98    help="Maximum number of characters to print from each stack frame",
     99 )
    100 
    101 parser.add_argument(
    102    "-a",
    103    "--ignore-alloc-fns",
    104    action="store_true",
    105    help="ignore allocation functions at the start of traces",
    106 )
    107 
    108 parser.add_argument(
    109    "-f",
    110    "--max-frames",
    111    type=range_1_24,
    112    default=8,
    113    help="maximum number of frames to consider in each trace",
    114 )
    115 
    116 parser.add_argument(
    117    "-c",
    118    "--chain-reports",
    119    action="store_true",
    120    help="if only one block is found to hold onto the object, report the next one, too",
    121 )
    122 
    123 
    124 ####
    125 
    126 
    127 class BlockData:
    128    def __init__(self, json_block):
    129        self.addr = json_block["addr"]
    130 
    131        if "contents" in json_block:
    132            contents = json_block["contents"]
    133        else:
    134            contents = []
    135        self.contents = []
    136        for c in contents:
    137            self.contents.append(int(c, 16))
    138 
    139        self.req_size = json_block["req"]
    140 
    141        self.alloc_stack = json_block["alloc"]
    142 
    143 
    144 def print_trace_segment(args, stacks, block):
    145    (traceTable, frameTable) = stacks
    146 
    147    for l in traceTable[block.alloc_stack]:
    148        # The 5: is to remove the bogus leading "#00: " from the stack frame.
    149        print("  " + frameTable[l][5 : args.max_stack_frame_length])
    150 
    151 
    152 def show_referrers(args, blocks, stacks, block):
    153    visited = set([])
    154 
    155    anyFound = False
    156 
    157    while True:
    158        referrers = {}
    159 
    160        for b, data in blocks.items():
    161            which_edge = 0
    162            for e in data.contents:
    163                if e == block:
    164                    # 8 is the number of bytes per word on a 64-bit system.
    165                    # XXX This means that this output will be wrong for logs from 32-bit systems!
    166                    referrers.setdefault(b, []).append(8 * which_edge)
    167                    anyFound = True
    168                which_edge += 1
    169 
    170        for r in referrers:
    171            sys.stdout.write(f"0x{blocks[r].addr} size = {blocks[r].req_size} bytes")
    172            plural = "s" if len(referrers[r]) > 1 else ""
    173            print(
    174                " at byte offset"
    175                + plural
    176                + " "
    177                + (", ".join(str(x) for x in referrers[r]))
    178            )
    179            print_trace_segment(args, stacks, blocks[r])
    180            print("")
    181 
    182        if args.chain_reports:
    183            if len(referrers) == 0:
    184                sys.stdout.write("Found no more referrers.\n")
    185                break
    186            if len(referrers) > 1:
    187                sys.stdout.write("Found too many referrers.\n")
    188                break
    189 
    190            sys.stdout.write("Chaining to next referrer.\n\n")
    191            for r in referrers:
    192                block = r
    193            if block in visited:
    194                sys.stdout.write("Found a loop.\n")
    195                break
    196            visited.add(block)
    197        else:
    198            break
    199 
    200    if not anyFound:
    201        print("No referrers found.")
    202 
    203 
    204 def show_block_info(args, blocks, stacks, block):
    205    b = blocks[block]
    206    sys.stdout.write(f"block: 0x{b.addr}\n")
    207    sys.stdout.write(f"requested size: {b.req_size} bytes\n")
    208    sys.stdout.write("\n")
    209    sys.stdout.write("block contents: ")
    210    for c in b.contents:
    211        v = "0" if c == 0 else blocks[c].addr
    212        sys.stdout.write(f"0x{v} ")
    213    sys.stdout.write("\n\n")
    214    sys.stdout.write("allocation stack:\n")
    215    print_trace_segment(args, stacks, b)
    216 
    217 
    218 def cleanupTraceTable(args, frameTable, traceTable):
    219    # Remove allocation functions at the start of traces.
    220    if args.ignore_alloc_fns:
    221        # Build a regexp that matches every function in allocatorFns.
    222        escapedAllocatorFns = map(re.escape, allocatorFns)
    223        fn_re = re.compile("|".join(escapedAllocatorFns))
    224 
    225        # Remove allocator fns from each stack trace.
    226        for traceKey, frameKeys in traceTable.items():
    227            numSkippedFrames = 0
    228            for frameKey in frameKeys:
    229                frameDesc = frameTable[frameKey]
    230                if re.search(fn_re, frameDesc):
    231                    numSkippedFrames += 1
    232                else:
    233                    break
    234            if numSkippedFrames > 0:
    235                traceTable[traceKey] = frameKeys[numSkippedFrames:]
    236 
    237    # Trim the number of frames.
    238    for traceKey, frameKeys in traceTable.items():
    239        if len(frameKeys) > args.max_frames:
    240            traceTable[traceKey] = frameKeys[: args.max_frames]
    241 
    242 
    243 def loadGraph(options):
    244    # Handle gzipped input if necessary.
    245    isZipped = options.dmd_log_file_name.endswith(".gz")
    246    opener = gzip.open if isZipped else open
    247 
    248    with opener(options.dmd_log_file_name, "rb") as f:
    249        j = json.load(f)
    250 
    251    if j["version"] != outputVersion:
    252        raise Exception(f"'version' property isn't '{outputVersion:d}'")
    253 
    254    block_list = j["blockList"]
    255    blocks = {}
    256 
    257    for json_block in block_list:
    258        blocks[int(json_block["addr"], 16)] = BlockData(json_block)
    259 
    260    traceTable = j["traceTable"]
    261    frameTable = j["frameTable"]
    262 
    263    cleanupTraceTable(options, frameTable, traceTable)
    264 
    265    return (blocks, (traceTable, frameTable))
    266 
    267 
    268 def analyzeLogs():
    269    options = parser.parse_args()
    270 
    271    (blocks, stacks) = loadGraph(options)
    272 
    273    block = int(options.block, 16)
    274 
    275    if block not in blocks:
    276        print("Object " + options.block + " not found in traces.")
    277        print("It could still be the target of some nodes.")
    278        return
    279 
    280    if options.info:
    281        show_block_info(options, blocks, stacks, block)
    282        return
    283 
    284    show_referrers(options, blocks, stacks, block)
    285 
    286 
    287 if __name__ == "__main__":
    288    analyzeLogs()