tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

explain.py (10561B)


      1 #!/usr/bin/python3
      2 # This Source Code Form is subject to the terms of the Mozilla Public
      3 # License, v. 2.0. If a copy of the MPL was not distributed with this file,
      4 # You can obtain one at http://mozilla.org/MPL/2.0/.
      5 
      6 
      7 import argparse
      8 import json
      9 import pathlib
     10 import re
     11 from html import escape
     12 
     13 SRCDIR = pathlib.Path(__file__).parent.parent.parent.absolute()
     14 
     15 parser = argparse.ArgumentParser(
     16    description="Convert the JSON output of the hazard analysis into various text files describing the results.",
     17    formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     18 )
     19 parser.add_argument("--verbose", type=bool, default=False, help="verbose output")
     20 
     21 inputs = parser.add_argument_group("Input")
     22 inputs.add_argument(
     23    "rootingHazards",
     24    nargs="?",
     25    default="rootingHazards.json",
     26    help="JSON input file describing the output of the hazard analysis",
     27 )
     28 
     29 outputs = parser.add_argument_group("Output")
     30 outputs.add_argument(
     31    "gcFunctions",
     32    nargs="?",
     33    default="gcFunctions.txt",
     34    help="file containing a list of functions that can GC",
     35 )
     36 outputs.add_argument(
     37    "hazards",
     38    nargs="?",
     39    default="hazards.txt",
     40    help="file containing the rooting hazards found",
     41 )
     42 outputs.add_argument(
     43    "extra",
     44    nargs="?",
     45    default="unnecessary.txt",
     46    help="file containing unnecessary roots",
     47 )
     48 outputs.add_argument(
     49    "refs",
     50    nargs="?",
     51    default="refs.txt",
     52    help="file containing a list of unsafe references to unrooted values",
     53 )
     54 outputs.add_argument(
     55    "html",
     56    nargs="?",
     57    default="hazards.html",
     58    help="HTML-formatted file with the hazards found",
     59 )
     60 
     61 args = parser.parse_args()
     62 
     63 
     64 # Imitate splitFunction from utility.js.
     65 def splitfunc(full):
     66    idx = full.find("$")
     67    if idx == -1:
     68        return (full, full)
     69    return (full[0:idx], full[idx + 1 :])
     70 
     71 
     72 def print_header(outfh):
     73    print(
     74        """\
     75 <!DOCTYPE html>
     76 <head>
     77 <meta charset="utf-8">
     78 <style>
     79 input {
     80  position: absolute;
     81  opacity: 0;
     82  z-index: -1;
     83 }
     84 tt {
     85  background: #eee;
     86 }
     87 .tab-label {
     88  cursor: s-resize;
     89 }
     90 .tab-label a {
     91  color: #222;
     92 }
     93 .tab-label:hover {
     94  background: #eee;
     95 }
     96 .tab-label::after {
     97  content: " \\25B6";
     98  width: 1em;
     99  height: 1em;
    100  color: #75f;
    101  text-align: center;
    102  transition: all 0.35s;
    103 }
    104 .accorntent {
    105  max-height: 0;
    106  padding: 0 1em;
    107  color: #2c3e50;
    108  overflow: hidden;
    109  background: white;
    110  transition: all 0.35s;
    111 }
    112 
    113 input:checked + .tab-label::after {
    114  transform: rotate(90deg);
    115  content: " \\25BC";
    116 }
    117 input:checked + .tab-label {
    118  cursor: n-resize;
    119 }
    120 input:checked ~ .accorntent {
    121  max-height: 100vh;
    122 }
    123 </style>
    124 </head>
    125 <body>""",
    126        file=outfh,
    127    )
    128 
    129 
    130 def print_footer(outfh):
    131    print("</ol></body>", file=outfh)
    132 
    133 
    134 def sourcelink(symbol=None, loc=None, range=None):
    135    if symbol:
    136        return f"https://searchfox.org/mozilla-central/search?q=symbol:{symbol}"
    137    elif range:
    138        filename, lineno = loc.split(":")
    139        [f0, l0] = range[0]
    140        [f1, l1] = range[1]
    141        if f0 == f1 and l1 > l0:
    142            return f"../{filename}?L={l0}-{l1 - 1}#{l0}"
    143        else:
    144            return f"../{filename}?L={l0}#{l0}"
    145    elif loc:
    146        filename, lineno = loc.split(":")
    147        return f"../{filename}?L={lineno}#{lineno}"
    148    else:
    149        raise Exception("missing argument to sourcelink()")
    150 
    151 
    152 def quoted_dict(d):
    153    return {k: escape(v) for k, v in d.items() if type(v) is str}
    154 
    155 
    156 num_hazards = 0
    157 num_refs = 0
    158 num_missing = 0
    159 
    160 try:
    161    with open(args.rootingHazards) as rootingHazards, open(
    162        args.hazards, "w"
    163    ) as hazards, open(args.extra, "w") as extra, open(args.refs, "w") as refs, open(
    164        args.html, "w"
    165    ) as html:
    166        current_gcFunction = None
    167 
    168        hazardousGCFunctions = set()
    169 
    170        results = json.load(rootingHazards)
    171        print_header(html)
    172 
    173        when = min((r for r in results if r["record"] == "time"), key=lambda r: r["t"])[
    174            "iso"
    175        ]
    176        line = f"Time: {when}"
    177        print(line, file=hazards)
    178        print(line, file=extra)
    179        print(line, file=refs)
    180 
    181        checkboxCounter = 0
    182        hazard_results = []
    183        seen_time = False
    184        for result in results:
    185            if result["record"] == "unrooted":
    186                hazard_results.append(result)
    187                gccall_mangled, _ = splitfunc(result["gccall"])
    188                hazardousGCFunctions.add(gccall_mangled)
    189                if not result.get("expected"):
    190                    num_hazards += 1
    191 
    192            elif result["record"] == "unnecessary":
    193                print(
    194                    "\nFunction '{mangled}' has unnecessary root '{variable}' of type {type} at {loc}".format(
    195                        **result
    196                    ),
    197                    file=extra,
    198                )
    199 
    200            elif result["record"] == "address":
    201                print(
    202                    (
    203                        "\nFunction '{functionName}'"
    204                        " takes unsafe address of unrooted '{variable}'"
    205                        " at {loc}"
    206                    ).format(**result),
    207                    file=refs,
    208                )
    209                num_refs += 1
    210 
    211            elif result["record"] == "missing":
    212                print(
    213                    "\nFunction '{functionName}' expected hazard(s) but none were found at {loc}".format(
    214                        **result
    215                    ),
    216                    file=hazards,
    217                )
    218                num_missing += 1
    219 
    220        readable2mangled = {}
    221        with open(args.gcFunctions) as gcFunctions:
    222            gcExplanations = {}  # gcFunction => stack showing why it can GC
    223 
    224            current_func = None
    225            explanation = []
    226            for line in gcFunctions:
    227                if m := re.match(r"^GC Function: (.*)", line):
    228                    if current_func:
    229                        gcExplanations[splitfunc(current_func)[0]] = explanation
    230                    functionName = m.group(1)
    231                    mangled, readable = splitfunc(functionName)
    232                    if mangled not in hazardousGCFunctions:
    233                        current_func = None
    234                        continue
    235                    current_func = functionName
    236                    if readable != mangled:
    237                        readable2mangled[readable] = mangled
    238                    # TODO: store the mangled name here, and change
    239                    # gcFunctions.txt -> gcFunctions.json and key off of the mangled name.
    240                    explanation = [readable]
    241                elif current_func:
    242                    explanation.append(line.strip())
    243            if current_func:
    244                gcExplanations[splitfunc(current_func)[0]] = explanation
    245 
    246        print(
    247            "Found %d hazards, %d unsafe references, %d missing."
    248            % (num_hazards, num_refs, num_missing),
    249            file=html,
    250        )
    251        print("<ol>", file=html)
    252 
    253        for result in hazard_results:
    254            (result["gccall_mangled"], result["gccall_readable"]) = splitfunc(
    255                result["gccall"]
    256            )
    257            # Attempt to extract out the function name. Won't handle `Foo<int, Bar<int>>::Foo()`.
    258            if m := re.search(r"((?:\w|:|<[^>]*?>)+)\(", result["gccall_readable"]):
    259                result["gccall_short"] = m.group(1) + "()"
    260            else:
    261                result["gccall_short"] = result["gccall_readable"]
    262            if result.get("expected"):
    263                print("\nThis is expected, but ", end="", file=hazards)
    264            else:
    265                print("\nFunction ", end="", file=hazards)
    266            print(
    267                "'{readable}' has unrooted '{variable}'"
    268                " of type '{type}' live across GC call '{gccall_readable}' at {loc}".format(
    269                    **result
    270                ),
    271                file=hazards,
    272            )
    273            for edge in result["trace"]:
    274                print("    {lineText}: {edgeText}".format(**edge), file=hazards)
    275            explanation = gcExplanations.get(result["gccall_mangled"])
    276            explanation = explanation or gcExplanations.get(
    277                readable2mangled.get(
    278                    result["gccall_readable"], result["gccall_readable"]
    279                ),
    280                [],
    281            )
    282            if explanation:
    283                print("GC Function: " + explanation[0], file=hazards)
    284                for func in explanation[1:]:
    285                    print("   " + func, file=hazards)
    286            print(file=hazards)
    287 
    288            if result.get("expected"):
    289                continue
    290 
    291            cfgid = f"CFG_{checkboxCounter}"
    292            gcid = f"GC_{checkboxCounter}"
    293            checkboxCounter += 1
    294            print(
    295                (
    296                    "<li><ul>\n"
    297                    "<li>Function <a href='{symbol_url}'>{readable}</a>\n"
    298                    "<li>has unrooted <tt>{variable}</tt> of type '<tt>{type}</tt>'\n"
    299                    "<li><input type='checkbox' id='{cfgid}'><label class='tab-label' for='{cfgid}'>"
    300                    "live across GC call to"
    301                    "</label>\n"
    302                    "<div class='accorntent'>\n"
    303                ).format(
    304                    **quoted_dict(result),
    305                    symbol_url=sourcelink(symbol=result["mangled"]),
    306                    cfgid=cfgid,
    307                ),
    308                file=html,
    309            )
    310            for edge in result["trace"]:
    311                print(
    312                    "<pre>    {lineText}: {edgeText}</pre>".format(**quoted_dict(edge)),
    313                    file=html,
    314                )
    315            print("</div>", file=html)
    316            print(
    317                "<li><input type='checkbox' id='{gcid}'><label class='tab-label' for='{gcid}'>"
    318                "<a href='{loc_url}'><tt>{gccall_short}</tt></a> at {loc}"
    319                "</label>\n"
    320                "<div class='accorntent'>".format(
    321                    **quoted_dict(result),
    322                    loc_url=sourcelink(range=result["gcrange"], loc=result["loc"]),
    323                    gcid=gcid,
    324                ),
    325                file=html,
    326            )
    327            for func in explanation:
    328                print(f"<pre>{escape(func)}</pre>", file=html)
    329            print("</div><hr></ul>", file=html)
    330 
    331        print_footer(html)
    332 
    333 except OSError as e:
    334    print("Failed: %s" % str(e))
    335 
    336 if args.verbose:
    337    print("Wrote %s" % args.hazards)
    338    print("Wrote %s" % args.extra)
    339    print("Wrote %s" % args.refs)
    340    print("Wrote %s" % args.html)
    341 
    342 print(
    343    "Found %d hazards %d unsafe references %d missing"
    344    % (num_hazards, num_refs, num_missing)
    345 )