tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

getCTKnownLogs.py (18256B)


      1 #!/usr/bin/env python3
      2 # This Source Code Form is subject to the terms of the Mozilla Public
      3 # License, v. 2.0. If a copy of the MPL was not distributed with this
      4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      5 
      6 """
      7 Parses a JSON file listing the known Certificate Transparency logs
      8 (log_list.json) and generates a C++ header file to be included in Firefox.
      9 
     10 The current log_list.json file available under security/manager/tools
     11 was originally downloaded from
     12 https://www.gstatic.com/ct/log_list/v3/log_list.json
     13 See more information at https://certificate.transparency.dev/google/
     14 """
     15 
     16 import argparse
     17 import base64
     18 import datetime
     19 import hashlib
     20 import json
     21 import os.path
     22 import ssl
     23 import sys
     24 import textwrap
     25 import time
     26 from string import Template
     27 from urllib.request import urlopen
     28 
     29 import buildconfig
     30 import certifi
     31 import mozpack.path as mozpath
     32 import rsa
     33 from pyasn1.codec.der import decoder
     34 from pyasn1_modules import pem, rfc2314
     35 
     36 OUTPUT_TEMPLATE = """\
     37 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     38 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
     39 /* This Source Code Form is subject to the terms of the Mozilla Public
     40 * License, v. 2.0. If a copy of the MPL was not distributed with this
     41 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     42 
     43 /* This file was automatically generated by $prog. */
     44 
     45 #ifndef $include_guard
     46 #define $include_guard
     47 
     48 #include "CTLog.h"
     49 #include "prtime.h"
     50 
     51 #include <stddef.h>
     52 
     53 static const PRTime kCTExpirationTime = INT64_C($expiration_time);
     54 
     55 namespace mozilla::ct {
     56 
     57 enum class CTLogState {
     58  Admissible,  // Qualified, Usable, or ReadOnly
     59  Retired,
     60 };
     61 
     62 enum class CTLogFormat {
     63  RFC6962,
     64  Tiled,
     65 };
     66 
     67 struct CTLogInfo {
     68  // See bug 1338873 about making these fields const.
     69  const char* name;
     70  CTLogState state;
     71  CTLogFormat format;
     72  uint64_t timestamp;
     73  // Index within kCTLogOperatorList.
     74  size_t operatorIndex;
     75  const char* key;
     76  size_t keyLength;
     77 };
     78 
     79 struct CTLogOperatorInfo {
     80  // See bug 1338873 about making these fields const.
     81  const char* name;
     82  mozilla::ct::CTLogOperatorId id;
     83 };
     84 
     85 const CTLogInfo kCTLogList[] = {
     86 $logs
     87 };
     88 
     89 const CTLogOperatorInfo kCTLogOperatorList[] = {
     90 $operators
     91 };
     92 
     93 }  // namespace mozilla::ct
     94 
     95 #endif  // $include_guard
     96 """
     97 
     98 
     99 LOG_NAME_TABLE_TEMPLATE = """\
    100 /* This Source Code Form is subject to the terms of the Mozilla Public
    101 * License, v. 2.0. If a copy of the MPL was not distributed with this
    102 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
    103 
    104 /* This file was automatically generated by $prog. */
    105 
    106 // prettier-ignore
    107 export const logNameTable = $logNameTable
    108 """
    109 
    110 
    111 def get_timestamp(time_str):
    112    """
    113    Convert a time string such as "2017-01-01T00:00:00Z" to an integer
    114    representing milliseconds since the epoch.
    115    Timezones in the string are not supported and will result in an exception.
    116    """
    117    t = datetime.datetime.strptime(time_str, "%Y-%m-%dT%H:%M:%SZ")
    118    epoch = datetime.datetime.utcfromtimestamp(0)
    119    seconds_since_epoch = (t - epoch).total_seconds()
    120    return int(seconds_since_epoch * 1000)
    121 
    122 
    123 def get_hex_lines(blob, width):
    124    """Convert a binary string to a multiline text of C escape sequences."""
    125    text = "".join([f"\\x{c:02x}" for c in blob])
    126    # When escaped, a single byte takes 4 chars (e.g. "\x00").
    127    # Make sure we don't break an escaped byte between the lines.
    128    return textwrap.wrap(text, width - width % 4)
    129 
    130 
    131 def get_operator_index(json_data, target_name):
    132    """Return operator's entry from the JSON along with its array index."""
    133    matches = [
    134        (operator, index)
    135        for (index, operator) in enumerate(json_data["operators"])
    136        if operator["name"] == target_name
    137    ]
    138    assert len(matches) != 0, f"No operators with id {target_name} defined."
    139    assert len(matches) == 1, f"Found multiple operators with id {target_name}."
    140    return matches[0][1]
    141 
    142 
    143 LOG_INFO_TEMPLATE = """\
    144    {$description, $state, $log_format,
    145     $timestamp,  // $timestamp_comment
    146     $operator_index,$spaces  // $operator_comment
    147 $indented_log_key,
    148     $log_key_len}"""
    149 
    150 
    151 class UnhandledLogStateException(Exception):
    152    pass
    153 
    154 
    155 def map_state(state):
    156    """
    157    Maps a log state string to the appropriate CTLogState enum value or None,
    158    if the log state indicates that the log should not be included.  Valid
    159    states to be included are 'qualified', 'usable', 'readonly', or 'retired'.
    160    Valid states that are not to be included are 'pending' or 'rejected'.
    161    """
    162    if state in {"qualified", "usable", "readonly"}:
    163        return "CTLogState::Admissible"
    164    elif state == "retired":
    165        return "CTLogState::Retired"
    166    elif state in {"pending", "rejected"}:
    167        return None
    168    else:
    169        raise UnhandledLogStateException(f"unhandled log state '{state}'")
    170 
    171 
    172 def get_initializer_for_log(log, operator, json_data, log_format):
    173    log_key = base64.b64decode(log["key"])
    174    operator_name = operator["name"]
    175    operator_index = get_operator_index(json_data, operator_name)
    176    state = list(log["state"].keys())[0]
    177    timestamp_comment = log["state"][state]["timestamp"]
    178    timestamp = get_timestamp(timestamp_comment)
    179    state = map_state(state)
    180    if state is None:
    181        return None
    182    is_test_log = "test_only" in operator and operator["test_only"]
    183    prefix = ""
    184    suffix = ","
    185    if is_test_log:
    186        prefix = "#ifdef DEBUG\n"
    187        suffix = ",\n#endif  // DEBUG"
    188    num_spaces = len(str(timestamp)) - len(str(operator_index))
    189    spaces = " " * num_spaces
    190    tmpl = Template(LOG_INFO_TEMPLATE)
    191    toappend = tmpl.substitute(
    192        # Use json.dumps for C-escaping strings.
    193        # Not perfect but close enough.
    194        description=json.dumps(log["description"]),
    195        operator_index=operator_index,
    196        operator_comment=f"operated by {operator_name}".replace("/", "|"),
    197        state=state,
    198        log_format=log_format,
    199        timestamp=timestamp,
    200        spaces=spaces,
    201        timestamp_comment=timestamp_comment,
    202        # Maximum line width is 80.
    203        indented_log_key="\n".join([f'     "{l}"' for l in get_hex_lines(log_key, 74)]),
    204        log_key_len=len(log_key),
    205    )
    206    return prefix + toappend + suffix
    207 
    208 
    209 def get_log_info_structs(json_data):
    210    """Return array of CTLogInfo initializers for the known logs."""
    211    initializers = []
    212    for operator in json_data["operators"]:
    213        if "logs" in operator:
    214            for log in operator["logs"]:
    215                initializer = get_initializer_for_log(
    216                    log, operator, json_data, "CTLogFormat::RFC6962"
    217                )
    218                if initializer:
    219                    initializers.append(initializer)
    220        if "tiled_logs" in operator:
    221            for log in operator["tiled_logs"]:
    222                initializer = get_initializer_for_log(
    223                    log, operator, json_data, "CTLogFormat::Tiled"
    224                )
    225                if initializer:
    226                    initializers.append(initializer)
    227    return initializers
    228 
    229 
    230 def get_log_operator_structs(json_data):
    231    """Return array of CTLogOperatorInfo initializers."""
    232    tmpl = Template("    {$name, $id}")
    233    initializers = []
    234    currentId = 0
    235    for operator in json_data["operators"]:
    236        prefix = ""
    237        suffix = ","
    238        is_test_log = "test_only" in operator and operator["test_only"]
    239        if is_test_log:
    240            prefix = "#ifdef DEBUG\n"
    241            suffix = ",\n#endif  // DEBUG"
    242        toappend = tmpl.substitute(name=json.dumps(operator["name"]), id=currentId)
    243        currentId += 1
    244        initializers.append(prefix + toappend + suffix)
    245    return initializers
    246 
    247 
    248 TEN_WEEKS_IN_SECONDS = 60 * 60 * 24 * 7 * 10
    249 MICROSECONDS_PER_SECOND = 1000000
    250 
    251 
    252 def generate_cpp_header_file(json_data, out_file):
    253    """Generate the C++ header file for the known logs."""
    254    filename = os.path.basename(out_file.name)
    255    include_guard = filename.replace(".", "_").replace("/", "_")
    256    log_info_initializers = get_log_info_structs(json_data)
    257    operator_info_initializers = get_log_operator_structs(json_data)
    258    expiration_time = (
    259        int(time.time()) + TEN_WEEKS_IN_SECONDS
    260    ) * MICROSECONDS_PER_SECOND
    261    out_file.write(
    262        Template(OUTPUT_TEMPLATE).substitute(
    263            prog=os.path.basename(sys.argv[0]),
    264            include_guard=include_guard,
    265            logs="\n".join(log_info_initializers),
    266            operators="\n".join(operator_info_initializers),
    267            expiration_time=expiration_time,
    268        )
    269    )
    270 
    271 
    272 def generate_log_name_table_file(json_data, out_file):
    273    """Generate an mjs file that defines a map from log["log_id"] to log["description"] for each of the known logs."""
    274    entries = {}
    275    for operator in json_data["operators"]:
    276        for log in operator.get("logs", []):
    277            entries[log["log_id"]] = log.get("description", "")
    278        for log in operator.get("tiled_logs", []):
    279            entries[log["log_id"]] = log.get("description", "")
    280    logNameTable = json.dumps(entries, indent=2, sort_keys=True)
    281    out_file.write(
    282        Template(LOG_NAME_TABLE_TEMPLATE).substitute(
    283            prog=os.path.basename(sys.argv[0]), logNameTable=logNameTable
    284        )
    285    )
    286 
    287 
    288 def patch_in_test_logs(json_data):
    289    """Insert Mozilla-specific test log data."""
    290    max_id = len(json_data["operators"])
    291    mozilla_test_operator_1 = {
    292        "name": "Mozilla Test Org 1",
    293        "id": max_id + 1,
    294        "test_only": True,
    295        "logs": [
    296            {
    297                "description": "Mozilla Test RSA Log 1",
    298                # `openssl x509 -noout -pubkey -in <path/to/default-ee.pem>`
    299                "key": """
    300            MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAuohRqESOFtZB/W62iAY2
    301            ED08E9nq5DVKtOz1aFdsJHvBxyWo4NgfvbGcBptuGobya+KvWnVramRxCHqlWqdF
    302            h/cc1SScAn7NQ/weadA4ICmTqyDDSeTbuUzCa2wO7RWCD/F+rWkasdMCOosqQe6n
    303            cOAPDY39ZgsrsCSSpH25iGF5kLFXkD3SO8XguEgfqDfTiEPvJxbYVbdmWqp+ApAv
    304            OnsQgAYkzBxsl62WYVu34pYSwHUxowyR3bTK9/ytHSXTCe+5Fw6naOGzey8ib2nj
    305            tIqVYR3uJtYlnauRCE42yxwkBCy/Fosv5fGPmRcxuLP+SSP6clHEMdUDrNoYCjXt
    306            jQIDAQAB
    307        """,
    308                "state": {
    309                    "qualified": {
    310                        "timestamp": "2024-07-22T16:44:26Z",
    311                    },
    312                },
    313            },
    314            {
    315                "description": "Mozilla Test EC Log",
    316                # `openssl x509 -noout -pubkey -in <path/to/root_secp256r1_256.pem`
    317                "key": """
    318            MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAET7+7u2Hg+PmxpgpZrIcE4uwFC0I+
    319            PPcukj8sT3lLRVwqadIzRWw2xBGdBwbgDu3I0ZOQ15kbey0HowTqoEqmwA==
    320        """,
    321                "state": {
    322                    "qualified": {
    323                        "timestamp": "2024-07-22T16:44:26Z",
    324                    },
    325                },
    326            },
    327        ],
    328    }
    329    mozilla_test_operator_2 = {
    330        "name": "Mozilla Test Org 2",
    331        "id": max_id + 2,
    332        "test_only": True,
    333        "logs": [
    334            {
    335                "description": "Mozilla Test RSA Log 2",
    336                # `openssl x509 -noout -pubkey -in <path/to/other-test-ca.pem>`
    337                "key": """
    338            MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAwXXGUmYJn3cIKmeR8bh2
    339            w39c5TiwbErNIrHL1G+mWtoq3UHIwkmKxKOzwfYUh/QbaYlBvYClHDwSAkTFhKTE
    340            SDMF5ROMAQbPCL6ahidguuai6PNvI8XZgxO53683g0XazlHU1tzSpss8xwbrzTBw
    341            7JjM5AqlkdcpWn9xxb5maR0rLf7ISURZC8Wj6kn9k7HXU0BfF3N2mZWGZiVHl+1C
    342            aQiICBFCIGmYikP+5Izmh4HdIramnNKDdRMfkysSjOKG+n0lHAYq0n7wFvGHzdVO
    343            gys1uJMPdLqQqovHYWckKrH9bWIUDRjEwLjGj8N0hFcyStfehuZVLx0eGR1xIWjT
    344            uwIDAQAB
    345        """,
    346                "state": {
    347                    "qualified": {
    348                        "timestamp": "2024-07-22T16:44:26Z",
    349                    },
    350                },
    351            }
    352        ],
    353    }
    354    mozilla_test_operator_3 = {
    355        "name": "Mozilla Test Org 3",
    356        "id": max_id + 3,
    357        "test_only": True,
    358        "tiled_logs": [
    359            {
    360                "description": "Mozilla Test RSA Log 4",
    361                # `openssl x509 -noout -pubkey -in <path/to/evroot.pem>`
    362                "key": """
    363            MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAtUmJXJ0AEI0Rofmfh6nj
    364            0aXbXfrs8YjaV79kE2iPLORyLP8QkDjBdALJOipHPb0oau0/3NNjz1opFHcBvdgY
    365            12Fb+TVr1dPIM2qqjAlxNooGw81EYbk+UUJOF0S7LqrUaqs4zhloIZYfh3FKFmNp
    366            Pwl2HN9Na6El6s7HvicNOI94n2zfeK4xRO0oxF55KThjp6IqSJgKNqQOctV5ybkl
    367            3/jHkzYv/WiXp8F1TF6XyWfD6t0aroqizM40igFpuA4ootcMGpYMbzNfLaCbm2Q/
    368            Wr+6SeiqqYHpYOJ9h0gL3VXdlBf6GFCfu1VMz4GkOX6LqBKKNL3yeGXBieVzT7Ip
    369            BQIDAQAB
    370        """,
    371                "state": {
    372                    "qualified": {
    373                        "timestamp": "2025-06-25T12:09:26Z",
    374                    },
    375                },
    376            }
    377        ],
    378    }
    379 
    380    def add_log_id(log):
    381        log["log_id"] = base64.b64encode(
    382            hashlib.sha256(base64.b64decode(log["key"])).digest()
    383        ).decode("utf-8")
    384 
    385    def add_log_ids(operator):
    386        for log in operator.get("tiled_logs", []):
    387            add_log_id(log)
    388        for log in operator.get("logs", []):
    389            add_log_id(log)
    390 
    391    add_log_ids(mozilla_test_operator_1)
    392    add_log_ids(mozilla_test_operator_2)
    393    add_log_ids(mozilla_test_operator_3)
    394 
    395    json_data["operators"].append(mozilla_test_operator_1)
    396    json_data["operators"].append(mozilla_test_operator_2)
    397    json_data["operators"].append(mozilla_test_operator_3)
    398 
    399 
    400 def get_content_at(url):
    401    print("Fetching URL: ", url)
    402    ssl_context = ssl.create_default_context(cafile=certifi.where())
    403    f = urlopen(url, context=ssl_context)
    404    return f.read()
    405 
    406 
    407 def read_rsa_key(path):
    408    """
    409    Read the PEM subject public key info at the given path and
    410    return it as an RSA public key.
    411    """
    412    with open(path) as f:
    413        spki = pem.readPemFromFile(
    414            f, "-----BEGIN PUBLIC KEY-----", "-----END PUBLIC KEY-----"
    415        )
    416    decoded, _ = decoder.decode(spki, rfc2314.SubjectPublicKeyInfo())
    417    return rsa.PublicKey.load_pkcs1(
    418        decoded["subjectPublicKey"].asOctets(), format="DER"
    419    )
    420 
    421 
    422 class UnsupportedSignatureHashAlgorithmException(Exception):
    423    pass
    424 
    425 
    426 def run(args):
    427    """
    428    Load the input JSON file and generate the C++ header according to the
    429    command line arguments.
    430    """
    431    if args.json_file:
    432        print("Reading file: ", args.json_file)
    433        with open(args.json_file, "rb") as json_file:
    434            json_text = json_file.read()
    435    else:
    436        json_text = get_content_at(args.url)
    437        signature = get_content_at(args.signature_url)
    438        key = read_rsa_key(args.key_file)
    439        print("Validating signature...")
    440        hash_alg = rsa.verify(json_text, signature, key)
    441        if hash_alg != "SHA-256":
    442            raise UnsupportedSignatureHashAlgorithmException(
    443                f"unsupported hash algorithm '{hash_alg}'"
    444            )
    445        print("Writing output: ", args.json_file_out)
    446        with open(args.json_file_out, "wb") as json_file_out:
    447            json_file_out.write(json_text)
    448 
    449    json_data = json.loads(json_text)
    450    patch_in_test_logs(json_data)
    451 
    452    print("Writing cpp header output: ", args.out)
    453    with open(args.out, "w") as out_file:
    454        generate_cpp_header_file(json_data, out_file)
    455 
    456    print("Writing log name table output: ", args.log_name_table_out)
    457    with open(args.log_name_table_out, "w") as out_file:
    458        generate_log_name_table_file(json_data, out_file)
    459 
    460    print("Done.")
    461 
    462 
    463 def parse_arguments_and_run():
    464    """Parse the command line arguments and run the program."""
    465    arg_parser = argparse.ArgumentParser(
    466        description="Parses a JSON file listing the known "
    467        "Certificate Transparency logs and generates "
    468        "a C++ header file to be included in Firefox."
    469        "Downloads the JSON file from the known source "
    470        "of truth by default, but can also operate on a "
    471        "previously-downloaded file. See https://certificate.transparency.dev/google/",
    472        epilog=f"Example: ./mach python {os.path.basename(sys.argv[0])}",
    473    )
    474 
    475    arg_parser.add_argument(
    476        "--url",
    477        default="https://www.gstatic.com/ct/log_list/v3/log_list.json",
    478        help="download the known CT logs JSON file from the specified URL (default: %(default)s)",
    479    )
    480    arg_parser.add_argument(
    481        "--signature-url",
    482        default="https://www.gstatic.com/ct/log_list/v3/log_list.sig",
    483        help="download the signature on the known CT logs JSON file from the specified URL (default: %(default)s)",
    484    )
    485    arg_parser.add_argument(
    486        "--key-file",
    487        default=mozpath.join(
    488            buildconfig.topsrcdir, "security", "manager", "tools", "log_list_pubkey.pem"
    489        ),
    490        help="verify the signature on the downloaded CT logs JSON file with the key in the specified file (default: %(default)s)",
    491    )
    492    arg_parser.add_argument(
    493        "--json-file",
    494        nargs="?",
    495        const=mozpath.join(
    496            buildconfig.topsrcdir, "security", "manager", "tools", "log_list.json"
    497        ),
    498        help="read the known CT logs JSON data from the specified file (default: %(const)s)",
    499    )
    500    arg_parser.add_argument(
    501        "--json-file-out",
    502        default=mozpath.join(
    503            buildconfig.topsrcdir, "security", "manager", "tools", "log_list.json"
    504        ),
    505        help="write the known CT logs JSON data to the specified file when downloading it from the given url (default: %(default)s)",
    506    )
    507    arg_parser.add_argument(
    508        "--log-name-table-out",
    509        default=mozpath.join(
    510            buildconfig.topsrcdir,
    511            "toolkit",
    512            "components",
    513            "certviewer",
    514            "content",
    515            "components",
    516            "logNameTable.mjs",
    517        ),
    518        help="path and filename of the log name table to be generated (default: %(default)s)",
    519    )
    520    arg_parser.add_argument(
    521        "--out",
    522        default=mozpath.join(buildconfig.topsrcdir, "security", "ct", "CTKnownLogs.h"),
    523        help="path and filename of the header file to be generated (default: %(default)s)",
    524    )
    525 
    526    run(arg_parser.parse_args())
    527 
    528 
    529 if __name__ == "__main__":
    530    parse_arguments_and_run()