tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

regen_root_ca_metadata.py (12034B)


      1 #!/usr/bin/env python3
      2 
      3 # This Source Code Form is subject to the terms of the Mozilla Public
      4 # License, v. 2.0. If a copy of the MPL was not distributed with this file,
      5 # You can obtain one at http://mozilla.org/MPL/2.0/.
      6 
      7 import base64
      8 import hashlib
      9 import sys
     10 from pathlib import Path
     11 from string import Template
     12 
     13 # This script regenerates telemetry IDs for the TLS server auth trust
     14 # anchors included in the build. This script must be run whenever new
     15 # roots are added (typically when updating NSS). The values for
     16 # previously-known roots are re-used, and new values are assigned to new
     17 # roots. Given the IDs and a list of sha256 hashes of the roots, this
     18 # script outputs both a human-readable list (KnownRootHashes.txt) and a
     19 # C++ source file (RootHashes.inc) that is included in the build.
     20 # Currently these IDs are used in the metrics
     21 # cert.validation_success_by_ca_2, cert_pinning.failures_by_ca_2, and
     22 # ssl.ct_policy_non_compliant_connections_by_ca_2.
     23 
     24 
     25 class Attribute:
     26    """Helper class to keep track of attribute (name, data type,
     27    value) tuples."""
     28 
     29    def __init__(self, name, data_type, value):
     30        self.name = name
     31        self.data_type = data_type
     32        self.value = value
     33 
     34 
     35 def skip_object_separator(stream):
     36    """Objects are separated by one or more blank lines. Advance the stream to
     37    the start of the next object or to the end of file."""
     38    pos = stream.tell()
     39    line = stream.readline()
     40    while line and not line.strip():
     41        pos = stream.tell()
     42        line = stream.readline()
     43    stream.seek(pos)
     44 
     45 
     46 def maybe_read_attribute(stream):
     47    """Skipping any comments (lines starting with '#'), maybe
     48    read a (attribute name, data type, value) tuple from the
     49    stream. For example, 'CKA_CLASS CK_OBJECT_CLASS
     50    CKO_CERTIFICATE' has name 'CKA_CLASS', data type
     51    'CK_OBJECT_CLASS', and value 'CKO_CERTIFICATE'. If the data
     52    type is 'MULTILINE_OCTAL', the value begins on the next line
     53    and consists of a series of octal values until a line
     54    consisting solely of 'END' is encountered. If a blank line
     55    is encountered, there are no more attributes in the current
     56    object being read."""
     57 
     58    line = stream.readline()
     59    while line.startswith("#"):
     60        line = stream.readline()
     61    if not line.strip():
     62        skip_object_separator(stream)
     63        return None
     64    (name, data_type_and_value) = line.strip().split(" ", maxsplit=1)
     65    if data_type_and_value == "MULTILINE_OCTAL":
     66        data_type = "MULTILINE_OCTAL"
     67        value = b""
     68        line = stream.readline()
     69        while line and line.strip() != "END":
     70            octets = [int(octal, base=8) for octal in line.strip().split("\\")[1:]]
     71            value += bytes(octets)
     72            line = stream.readline()
     73    else:
     74        (data_type, value) = data_type_and_value.split(" ", maxsplit=1)
     75    return Attribute(name, data_type, value)
     76 
     77 
     78 class Object:
     79    """Helper class representing objects, each of which consist
     80    of a series of attributes."""
     81 
     82    def __init__(self, attributes):
     83        self.attributes = attributes
     84 
     85    def get_attribute_value(self, name):
     86        """Helper function to get the value of a particular
     87        attribute, if present. Returns None otherwise."""
     88        for attribute in self.attributes:
     89            if attribute.name == name:
     90                return attribute.value
     91        return None
     92 
     93    def clss(self):
     94        """Get the 'CKA_CLASS' attribute."""
     95        return self.get_attribute_value("CKA_CLASS")
     96 
     97    def label(self):
     98        """Get the 'CKA_LABEL' attribute, removing any leading
     99        and trailing '"'."""
    100        return self.get_attribute_value("CKA_LABEL").removeprefix('"').removesuffix('"')
    101 
    102    def sha1(self):
    103        """Get the 'CKA_CERT_SHA1_HASH' attribute. Calculates it
    104        based on the 'CKA_VALUE' attribute if it is not
    105        present."""
    106        digest = self.get_attribute_value("CKA_CERT_SHA1_HASH")
    107        if digest:
    108            return digest
    109        value = self.get_attribute_value("CKA_VALUE")
    110        if value:
    111            return hashlib.sha1(value).digest()
    112        return None
    113 
    114    def sha256(self):
    115        """Calculates and returns the sha256 hash of the
    116        'CKA_CLASS' attribute."""
    117        value = self.get_attribute_value("CKA_VALUE")
    118        if not value:
    119            return None
    120        return hashlib.sha256(value).digest()
    121 
    122    def sha256base64(self):
    123        """Calculates and returns the sha256 hash of the
    124        'CKA_CLASS' attribute, base64-encoded."""
    125        value = self.get_attribute_value("CKA_VALUE")
    126        if not value:
    127            return None
    128        return base64.b64encode(hashlib.sha256(value).digest()).decode("ascii")
    129 
    130    def trust_server_auth(self):
    131        """Get the 'CKA_TRUST_SERVER_AUTH' attribute."""
    132        return self.get_attribute_value("CKA_TRUST_SERVER_AUTH")
    133 
    134 
    135 def maybe_read_object(stream):
    136    """Maybe read an object, which is a series of one or more
    137    attributes. Returns None if no more attributes are in the
    138    stream."""
    139    attributes = []
    140    while True:
    141        attribute = maybe_read_attribute(stream)
    142        if not attribute:
    143            break
    144        attributes.append(attribute)
    145    if attributes:
    146        return Object(attributes)
    147    return None
    148 
    149 
    150 def read_certdata(path):
    151    """Read a certdata.txt file at the given path and return all
    152    certificate objects that are TLS server auth trust anchors,
    153    sorted by sha256 hash."""
    154    certdata = open(path, encoding="utf-8")
    155    line = certdata.readline()
    156    # Discard everything up until the "BEGINDATA" line.
    157    while line and line.strip() != "BEGINDATA":
    158        line = certdata.readline()
    159    objects = []
    160    while True:
    161        object = maybe_read_object(certdata)
    162        if not object:
    163            break
    164        objects.append(object)
    165    # Get all certificate objects.
    166    certificates = [o for o in objects if o.clss() == "CKO_CERTIFICATE"]
    167    # Get a map of all sha1 hashes of certificates to trust objects.
    168    trusts = {o.sha1(): o for o in objects if o.clss() == "CKO_NSS_TRUST"}
    169    # Get a list of certificates where the sha1 hash of each certificate
    170    # corresponds to a trust object indicating that that certificate is
    171    # a trust anchor.
    172    server_auth_trust_anchors = [
    173        c
    174        for c in certificates
    175        if c.sha1() in trusts
    176        and trusts[c.sha1()].trust_server_auth() == "CKT_NSS_TRUSTED_DELEGATOR"
    177    ]
    178    server_auth_trust_anchors.sort(key=Object.sha256)
    179    return server_auth_trust_anchors
    180 
    181 
    182 class RootHash:
    183    """Helper class to keep track of (certificate sha256 digest,
    184    bin number, label) tuples."""
    185 
    186    def __init__(self, digest_b64, bin_number, label):
    187        self.digest_b64 = digest_b64
    188        self.digest = base64.b64decode(digest_b64)
    189        self.bin_number = bin_number
    190        self.label = label
    191 
    192    def digest(self):
    193        return self.digest
    194 
    195    def bin_number(self):
    196        return self.bin_number
    197 
    198 
    199 def read_known_root_hashes(path):
    200    """Read the known (sha256 digest, bin number, label) tuples
    201    from the file at the given path."""
    202    known_root_hashes_data = open(path, encoding="utf-8")
    203    known_root_hashes = {}
    204    line = known_root_hashes_data.readline()
    205    while line:
    206        # Lines beginning with '#' are comments.
    207        if not line.startswith("#"):
    208            (digest_b64, bin_number, label) = line.strip().split(" ", maxsplit=2)
    209            known_root_hashes[digest_b64] = RootHash(digest_b64, int(bin_number), label)
    210        line = known_root_hashes_data.readline()
    211    return known_root_hashes
    212 
    213 
    214 KNOWN_ROOT_HASHES_HEADER = """\
    215 # This is a generated file.
    216 """
    217 
    218 
    219 def write_known_root_hashes(path, known_root_hashes):
    220    """Write the known root hashes as a flat list of tuples to
    221    the given path."""
    222    with open(path, "w") as f:
    223        f.write(KNOWN_ROOT_HASHES_HEADER)
    224        for root_hash in known_root_hashes.values():
    225            f.write(
    226                f"{root_hash.digest_b64} {root_hash.bin_number} {root_hash.label}\n"
    227            )
    228 
    229 
    230 ROOT_HASHES_HEADER = """\
    231 /* This Source Code Form is subject to the terms of the Mozilla Public
    232 * License, v. 2.0. If a copy of the MPL was not distributed with this
    233 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
    234 
    235 /*****************************************************************************/
    236 /* This is an automatically generated file. If you're not                    */
    237 /* RootCertificateTelemetryUtils.cpp, you shouldn't be #including it.        */
    238 /*****************************************************************************/
    239 
    240 #define HASH_LEN 32
    241 struct CertAuthorityHash {
    242  // See bug 1338873 about making these fields const.
    243  uint8_t hash[HASH_LEN];
    244  int32_t binNumber;
    245 };
    246 
    247 static const struct CertAuthorityHash ROOT_TABLE[] = {
    248 """
    249 
    250 
    251 ROOT_HASHES_ENTRY_TEMPLATE = """\
    252  {
    253    /* $label */
    254    { $digest_half_1
    255      $digest_half_2 },
    256    $bin_number /* Bin Number */
    257  },
    258 """
    259 
    260 
    261 ROOT_HASHES_FOOTER = """\
    262 };
    263 """
    264 
    265 
    266 def write_root_hashes(path, certdata, known_root_hashes):
    267    """Write the known root hashes C++ source file for inclusion
    268    in the build."""
    269    with open(root_hashes_path, "w") as f:
    270        f.write(ROOT_HASHES_HEADER)
    271        tmpl = Template(ROOT_HASHES_ENTRY_TEMPLATE)
    272        for root in certdata:
    273            root_hash = known_root_hashes[root.sha256base64()]
    274            digest_half_1 = "".join([
    275                f"0x{c:02x}, " for c in root_hash.digest[: len(root_hash.digest) >> 1]
    276            ]).removesuffix(" ")
    277            digest_half_2 = "".join([
    278                f"0x{c:02x}, " for c in root_hash.digest[len(root_hash.digest) >> 1 :]
    279            ]).removesuffix(", ")
    280            f.write(
    281                tmpl.substitute(
    282                    label=root_hash.label,
    283                    digest_half_1=digest_half_1,
    284                    digest_half_2=digest_half_2,
    285                    bin_number=root_hash.bin_number,
    286                )
    287            )
    288        f.write(ROOT_HASHES_FOOTER)
    289 
    290 
    291 if __name__ == "__main__":
    292    # Read and parse the certdata.txt file that will be used to build
    293    # the builtin roots module.
    294    certdata_path = Path("security/nss/lib/ckfw/builtins/certdata.txt")
    295    if not certdata_path.exists():
    296        print("Could not find certdata.txt.")
    297        sys.exit(1)
    298    certdata = read_certdata(certdata_path)
    299 
    300    # Read the list of known root hashes.
    301    known_root_hashes_path = Path("security/manager/tools/KnownRootHashes.txt")
    302    if not known_root_hashes_path.exists():
    303        print("Could not read KnownRootHashes.txt.")
    304        sys.exit(1)
    305    known_root_hashes = read_known_root_hashes(known_root_hashes_path)
    306 
    307    # Assign bin numbers to any newly-added roots. If there are no known roots,
    308    # start at 4, because:
    309    # 0 is reserved for "unknown" (likely indicating an error or a non-Mozilla
    310    # builtin roots module).
    311    # 1 is reserved for "softoken/cert9.db".
    312    # 2 is reserved for "external PKCS#11 module".
    313    # 3 is reserved for "third-party root from OS".
    314    # Otherwise, start with one more than the largest currently-known.
    315    next_bin_number = (
    316        max(map(RootHash.bin_number, known_root_hashes.values()), default=3) + 1
    317    )
    318    for root in certdata:
    319        digest_b64 = root.sha256base64()
    320        if digest_b64 not in known_root_hashes:
    321            known_root_hashes[digest_b64] = RootHash(
    322                digest_b64, next_bin_number, root.label()
    323            )
    324            next_bin_number += 1
    325    # Save the (potentially-updated) list of known roots as a flat list
    326    # of tuples.
    327    write_known_root_hashes(known_root_hashes_path, known_root_hashes)
    328 
    329    # Write the array of root telemetry information as a C++ source file to
    330    # include in the build. Whereas the flat list of known root hashes
    331    # (KnownRootHashes.txt) contains all roots ever known (including removed
    332    # ones), this file only needs to include the roots currently in
    333    # certdata.txt.
    334    root_hashes_path = Path("security/manager/ssl/RootHashes.inc")
    335    write_root_hashes(root_hashes_path, certdata, known_root_hashes)