regen_root_ca_metadata.py (12034B)
1 #!/usr/bin/env python3 2 3 # This Source Code Form is subject to the terms of the Mozilla Public 4 # License, v. 2.0. If a copy of the MPL was not distributed with this file, 5 # You can obtain one at http://mozilla.org/MPL/2.0/. 6 7 import base64 8 import hashlib 9 import sys 10 from pathlib import Path 11 from string import Template 12 13 # This script regenerates telemetry IDs for the TLS server auth trust 14 # anchors included in the build. This script must be run whenever new 15 # roots are added (typically when updating NSS). The values for 16 # previously-known roots are re-used, and new values are assigned to new 17 # roots. Given the IDs and a list of sha256 hashes of the roots, this 18 # script outputs both a human-readable list (KnownRootHashes.txt) and a 19 # C++ source file (RootHashes.inc) that is included in the build. 20 # Currently these IDs are used in the metrics 21 # cert.validation_success_by_ca_2, cert_pinning.failures_by_ca_2, and 22 # ssl.ct_policy_non_compliant_connections_by_ca_2. 23 24 25 class Attribute: 26 """Helper class to keep track of attribute (name, data type, 27 value) tuples.""" 28 29 def __init__(self, name, data_type, value): 30 self.name = name 31 self.data_type = data_type 32 self.value = value 33 34 35 def skip_object_separator(stream): 36 """Objects are separated by one or more blank lines. Advance the stream to 37 the start of the next object or to the end of file.""" 38 pos = stream.tell() 39 line = stream.readline() 40 while line and not line.strip(): 41 pos = stream.tell() 42 line = stream.readline() 43 stream.seek(pos) 44 45 46 def maybe_read_attribute(stream): 47 """Skipping any comments (lines starting with '#'), maybe 48 read a (attribute name, data type, value) tuple from the 49 stream. For example, 'CKA_CLASS CK_OBJECT_CLASS 50 CKO_CERTIFICATE' has name 'CKA_CLASS', data type 51 'CK_OBJECT_CLASS', and value 'CKO_CERTIFICATE'. If the data 52 type is 'MULTILINE_OCTAL', the value begins on the next line 53 and consists of a series of octal values until a line 54 consisting solely of 'END' is encountered. If a blank line 55 is encountered, there are no more attributes in the current 56 object being read.""" 57 58 line = stream.readline() 59 while line.startswith("#"): 60 line = stream.readline() 61 if not line.strip(): 62 skip_object_separator(stream) 63 return None 64 (name, data_type_and_value) = line.strip().split(" ", maxsplit=1) 65 if data_type_and_value == "MULTILINE_OCTAL": 66 data_type = "MULTILINE_OCTAL" 67 value = b"" 68 line = stream.readline() 69 while line and line.strip() != "END": 70 octets = [int(octal, base=8) for octal in line.strip().split("\\")[1:]] 71 value += bytes(octets) 72 line = stream.readline() 73 else: 74 (data_type, value) = data_type_and_value.split(" ", maxsplit=1) 75 return Attribute(name, data_type, value) 76 77 78 class Object: 79 """Helper class representing objects, each of which consist 80 of a series of attributes.""" 81 82 def __init__(self, attributes): 83 self.attributes = attributes 84 85 def get_attribute_value(self, name): 86 """Helper function to get the value of a particular 87 attribute, if present. Returns None otherwise.""" 88 for attribute in self.attributes: 89 if attribute.name == name: 90 return attribute.value 91 return None 92 93 def clss(self): 94 """Get the 'CKA_CLASS' attribute.""" 95 return self.get_attribute_value("CKA_CLASS") 96 97 def label(self): 98 """Get the 'CKA_LABEL' attribute, removing any leading 99 and trailing '"'.""" 100 return self.get_attribute_value("CKA_LABEL").removeprefix('"').removesuffix('"') 101 102 def sha1(self): 103 """Get the 'CKA_CERT_SHA1_HASH' attribute. Calculates it 104 based on the 'CKA_VALUE' attribute if it is not 105 present.""" 106 digest = self.get_attribute_value("CKA_CERT_SHA1_HASH") 107 if digest: 108 return digest 109 value = self.get_attribute_value("CKA_VALUE") 110 if value: 111 return hashlib.sha1(value).digest() 112 return None 113 114 def sha256(self): 115 """Calculates and returns the sha256 hash of the 116 'CKA_CLASS' attribute.""" 117 value = self.get_attribute_value("CKA_VALUE") 118 if not value: 119 return None 120 return hashlib.sha256(value).digest() 121 122 def sha256base64(self): 123 """Calculates and returns the sha256 hash of the 124 'CKA_CLASS' attribute, base64-encoded.""" 125 value = self.get_attribute_value("CKA_VALUE") 126 if not value: 127 return None 128 return base64.b64encode(hashlib.sha256(value).digest()).decode("ascii") 129 130 def trust_server_auth(self): 131 """Get the 'CKA_TRUST_SERVER_AUTH' attribute.""" 132 return self.get_attribute_value("CKA_TRUST_SERVER_AUTH") 133 134 135 def maybe_read_object(stream): 136 """Maybe read an object, which is a series of one or more 137 attributes. Returns None if no more attributes are in the 138 stream.""" 139 attributes = [] 140 while True: 141 attribute = maybe_read_attribute(stream) 142 if not attribute: 143 break 144 attributes.append(attribute) 145 if attributes: 146 return Object(attributes) 147 return None 148 149 150 def read_certdata(path): 151 """Read a certdata.txt file at the given path and return all 152 certificate objects that are TLS server auth trust anchors, 153 sorted by sha256 hash.""" 154 certdata = open(path, encoding="utf-8") 155 line = certdata.readline() 156 # Discard everything up until the "BEGINDATA" line. 157 while line and line.strip() != "BEGINDATA": 158 line = certdata.readline() 159 objects = [] 160 while True: 161 object = maybe_read_object(certdata) 162 if not object: 163 break 164 objects.append(object) 165 # Get all certificate objects. 166 certificates = [o for o in objects if o.clss() == "CKO_CERTIFICATE"] 167 # Get a map of all sha1 hashes of certificates to trust objects. 168 trusts = {o.sha1(): o for o in objects if o.clss() == "CKO_NSS_TRUST"} 169 # Get a list of certificates where the sha1 hash of each certificate 170 # corresponds to a trust object indicating that that certificate is 171 # a trust anchor. 172 server_auth_trust_anchors = [ 173 c 174 for c in certificates 175 if c.sha1() in trusts 176 and trusts[c.sha1()].trust_server_auth() == "CKT_NSS_TRUSTED_DELEGATOR" 177 ] 178 server_auth_trust_anchors.sort(key=Object.sha256) 179 return server_auth_trust_anchors 180 181 182 class RootHash: 183 """Helper class to keep track of (certificate sha256 digest, 184 bin number, label) tuples.""" 185 186 def __init__(self, digest_b64, bin_number, label): 187 self.digest_b64 = digest_b64 188 self.digest = base64.b64decode(digest_b64) 189 self.bin_number = bin_number 190 self.label = label 191 192 def digest(self): 193 return self.digest 194 195 def bin_number(self): 196 return self.bin_number 197 198 199 def read_known_root_hashes(path): 200 """Read the known (sha256 digest, bin number, label) tuples 201 from the file at the given path.""" 202 known_root_hashes_data = open(path, encoding="utf-8") 203 known_root_hashes = {} 204 line = known_root_hashes_data.readline() 205 while line: 206 # Lines beginning with '#' are comments. 207 if not line.startswith("#"): 208 (digest_b64, bin_number, label) = line.strip().split(" ", maxsplit=2) 209 known_root_hashes[digest_b64] = RootHash(digest_b64, int(bin_number), label) 210 line = known_root_hashes_data.readline() 211 return known_root_hashes 212 213 214 KNOWN_ROOT_HASHES_HEADER = """\ 215 # This is a generated file. 216 """ 217 218 219 def write_known_root_hashes(path, known_root_hashes): 220 """Write the known root hashes as a flat list of tuples to 221 the given path.""" 222 with open(path, "w") as f: 223 f.write(KNOWN_ROOT_HASHES_HEADER) 224 for root_hash in known_root_hashes.values(): 225 f.write( 226 f"{root_hash.digest_b64} {root_hash.bin_number} {root_hash.label}\n" 227 ) 228 229 230 ROOT_HASHES_HEADER = """\ 231 /* This Source Code Form is subject to the terms of the Mozilla Public 232 * License, v. 2.0. If a copy of the MPL was not distributed with this 233 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 234 235 /*****************************************************************************/ 236 /* This is an automatically generated file. If you're not */ 237 /* RootCertificateTelemetryUtils.cpp, you shouldn't be #including it. */ 238 /*****************************************************************************/ 239 240 #define HASH_LEN 32 241 struct CertAuthorityHash { 242 // See bug 1338873 about making these fields const. 243 uint8_t hash[HASH_LEN]; 244 int32_t binNumber; 245 }; 246 247 static const struct CertAuthorityHash ROOT_TABLE[] = { 248 """ 249 250 251 ROOT_HASHES_ENTRY_TEMPLATE = """\ 252 { 253 /* $label */ 254 { $digest_half_1 255 $digest_half_2 }, 256 $bin_number /* Bin Number */ 257 }, 258 """ 259 260 261 ROOT_HASHES_FOOTER = """\ 262 }; 263 """ 264 265 266 def write_root_hashes(path, certdata, known_root_hashes): 267 """Write the known root hashes C++ source file for inclusion 268 in the build.""" 269 with open(root_hashes_path, "w") as f: 270 f.write(ROOT_HASHES_HEADER) 271 tmpl = Template(ROOT_HASHES_ENTRY_TEMPLATE) 272 for root in certdata: 273 root_hash = known_root_hashes[root.sha256base64()] 274 digest_half_1 = "".join([ 275 f"0x{c:02x}, " for c in root_hash.digest[: len(root_hash.digest) >> 1] 276 ]).removesuffix(" ") 277 digest_half_2 = "".join([ 278 f"0x{c:02x}, " for c in root_hash.digest[len(root_hash.digest) >> 1 :] 279 ]).removesuffix(", ") 280 f.write( 281 tmpl.substitute( 282 label=root_hash.label, 283 digest_half_1=digest_half_1, 284 digest_half_2=digest_half_2, 285 bin_number=root_hash.bin_number, 286 ) 287 ) 288 f.write(ROOT_HASHES_FOOTER) 289 290 291 if __name__ == "__main__": 292 # Read and parse the certdata.txt file that will be used to build 293 # the builtin roots module. 294 certdata_path = Path("security/nss/lib/ckfw/builtins/certdata.txt") 295 if not certdata_path.exists(): 296 print("Could not find certdata.txt.") 297 sys.exit(1) 298 certdata = read_certdata(certdata_path) 299 300 # Read the list of known root hashes. 301 known_root_hashes_path = Path("security/manager/tools/KnownRootHashes.txt") 302 if not known_root_hashes_path.exists(): 303 print("Could not read KnownRootHashes.txt.") 304 sys.exit(1) 305 known_root_hashes = read_known_root_hashes(known_root_hashes_path) 306 307 # Assign bin numbers to any newly-added roots. If there are no known roots, 308 # start at 4, because: 309 # 0 is reserved for "unknown" (likely indicating an error or a non-Mozilla 310 # builtin roots module). 311 # 1 is reserved for "softoken/cert9.db". 312 # 2 is reserved for "external PKCS#11 module". 313 # 3 is reserved for "third-party root from OS". 314 # Otherwise, start with one more than the largest currently-known. 315 next_bin_number = ( 316 max(map(RootHash.bin_number, known_root_hashes.values()), default=3) + 1 317 ) 318 for root in certdata: 319 digest_b64 = root.sha256base64() 320 if digest_b64 not in known_root_hashes: 321 known_root_hashes[digest_b64] = RootHash( 322 digest_b64, next_bin_number, root.label() 323 ) 324 next_bin_number += 1 325 # Save the (potentially-updated) list of known roots as a flat list 326 # of tuples. 327 write_known_root_hashes(known_root_hashes_path, known_root_hashes) 328 329 # Write the array of root telemetry information as a C++ source file to 330 # include in the build. Whereas the flat list of known root hashes 331 # (KnownRootHashes.txt) contains all roots ever known (including removed 332 # ones), this file only needs to include the roots currently in 333 # certdata.txt. 334 root_hashes_path = Path("security/manager/ssl/RootHashes.inc") 335 write_root_hashes(root_hashes_path, certdata, known_root_hashes)