tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

logalloc_munge.py (4042B)


      1 # This Source Code Form is subject to the terms of the Mozilla Public
      2 # License, v. 2.0. If a copy of the MPL was not distributed with this
      3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      4 
      5 """
      6 This script takes a log from the replace-malloc logalloc library on stdin
      7 and munges it so that it can be used with the logalloc-replay tool.
      8 
      9 Given the following output:
     10  13663 malloc(42)=0x7f0c33502040
     11  13663 malloc(24)=0x7f0c33503040
     12  13663 free(0x7f0c33502040)
     13 The resulting output is:
     14  1 malloc(42)=#1
     15  1 malloc(24)=#2
     16  1 free(#1)
     17 
     18 See README for more details.
     19 """
     20 
     21 import sys
     22 from collections import defaultdict, deque
     23 
     24 
     25 class IdMapping:
     26    """Class to map values to ids.
     27 
     28    Each value is associated to an increasing id, starting from 1.
     29    When a value is removed, its id is recycled and will be reused for
     30    subsequent values.
     31    """
     32 
     33    def __init__(self):
     34        self.id = 1
     35        self._values = {}
     36        self._recycle = deque()
     37 
     38    def __getitem__(self, value):
     39        if value not in self._values:
     40            if self._recycle:
     41                self._values[value] = self._recycle.popleft()
     42            else:
     43                self._values[value] = self.id
     44                self.id += 1
     45        return self._values[value]
     46 
     47    def __delitem__(self, value):
     48        if value == 0:
     49            return
     50        self._recycle.append(self._values[value])
     51        del self._values[value]
     52 
     53    def __contains__(self, value):
     54        return value == 0 or value in self._values
     55 
     56 
     57 class Ignored(Exception):
     58    pass
     59 
     60 
     61 def split_log_line(line):
     62    try:
     63        # The format for each line is:
     64        # <pid> [<tid>] <function>([<args>])[=<result>]
     65        #
     66        # The original format didn't include the tid, so we try to parse
     67        # lines whether they have one or not.
     68        pid, func_call = line.split(" ", 1)
     69        call, result = func_call.split(")")
     70        func, args = call.split("(")
     71        args = args.split(",") if args else []
     72        if result:
     73            if result[0] != "=":
     74                raise Ignored("Malformed input")
     75            result = result[1:]
     76        if " " in func:
     77            tid, func = func.split(" ", 1)
     78        else:
     79            tid = pid
     80        return pid, tid, func, args, result
     81    except Exception:
     82        raise Ignored("Malformed input")
     83 
     84 
     85 NUM_ARGUMENTS = {
     86    "jemalloc_stats": 0,
     87    "free": 1,
     88    "malloc": 1,
     89    "posix_memalign": 2,
     90    "aligned_alloc": 2,
     91    "calloc": 2,
     92    "realloc": 2,
     93    "memalign": 2,
     94    "valloc": 1,
     95 }
     96 
     97 
     98 def main():
     99    pids = IdMapping()
    100    processes = defaultdict(lambda: {"pointers": IdMapping(), "tids": IdMapping()})
    101    for line in sys.stdin:
    102        line = line.strip()
    103 
    104        try:
    105            pid, tid, func, args, result = split_log_line(line)
    106 
    107            # Replace pid with an id.
    108            pid = pids[int(pid)]
    109 
    110            process = processes[pid]
    111            tid = process["tids"][int(tid)]
    112 
    113            pointers = process["pointers"]
    114 
    115            if func not in NUM_ARGUMENTS:
    116                raise Ignored("Unknown function")
    117 
    118            if len(args) != NUM_ARGUMENTS[func]:
    119                raise Ignored("Malformed input")
    120 
    121            if func in ("jemalloc_stats", "free") and result:
    122                raise Ignored("Malformed input")
    123 
    124            if func in ("free", "realloc"):
    125                ptr = int(args[0], 16)
    126                if ptr and ptr not in pointers:
    127                    raise Ignored("Did not see an alloc for pointer")
    128                args[0] = "#%d" % pointers[ptr]
    129                del pointers[ptr]
    130 
    131            if result:
    132                result = int(result, 16)
    133                if not result:
    134                    raise Ignored("Result is NULL")
    135                result = "#%d" % pointers[result]
    136 
    137            print(
    138                "%d %d %s(%s)%s"
    139                % (pid, tid, func, ",".join(args), "=%s" % result if result else "")
    140            )
    141 
    142        except Exception as e:
    143            print('Ignored "%s": %s' % (line, e), file=sys.stderr)
    144 
    145 
    146 if __name__ == "__main__":
    147    main()