getCTKnownLogs.py (18256B)
1 #!/usr/bin/env python3 2 # This Source Code Form is subject to the terms of the Mozilla Public 3 # License, v. 2.0. If a copy of the MPL was not distributed with this 4 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 5 6 """ 7 Parses a JSON file listing the known Certificate Transparency logs 8 (log_list.json) and generates a C++ header file to be included in Firefox. 9 10 The current log_list.json file available under security/manager/tools 11 was originally downloaded from 12 https://www.gstatic.com/ct/log_list/v3/log_list.json 13 See more information at https://certificate.transparency.dev/google/ 14 """ 15 16 import argparse 17 import base64 18 import datetime 19 import hashlib 20 import json 21 import os.path 22 import ssl 23 import sys 24 import textwrap 25 import time 26 from string import Template 27 from urllib.request import urlopen 28 29 import buildconfig 30 import certifi 31 import mozpack.path as mozpath 32 import rsa 33 from pyasn1.codec.der import decoder 34 from pyasn1_modules import pem, rfc2314 35 36 OUTPUT_TEMPLATE = """\ 37 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 38 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 39 /* This Source Code Form is subject to the terms of the Mozilla Public 40 * License, v. 2.0. If a copy of the MPL was not distributed with this 41 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 42 43 /* This file was automatically generated by $prog. */ 44 45 #ifndef $include_guard 46 #define $include_guard 47 48 #include "CTLog.h" 49 #include "prtime.h" 50 51 #include <stddef.h> 52 53 static const PRTime kCTExpirationTime = INT64_C($expiration_time); 54 55 namespace mozilla::ct { 56 57 enum class CTLogState { 58 Admissible, // Qualified, Usable, or ReadOnly 59 Retired, 60 }; 61 62 enum class CTLogFormat { 63 RFC6962, 64 Tiled, 65 }; 66 67 struct CTLogInfo { 68 // See bug 1338873 about making these fields const. 69 const char* name; 70 CTLogState state; 71 CTLogFormat format; 72 uint64_t timestamp; 73 // Index within kCTLogOperatorList. 74 size_t operatorIndex; 75 const char* key; 76 size_t keyLength; 77 }; 78 79 struct CTLogOperatorInfo { 80 // See bug 1338873 about making these fields const. 81 const char* name; 82 mozilla::ct::CTLogOperatorId id; 83 }; 84 85 const CTLogInfo kCTLogList[] = { 86 $logs 87 }; 88 89 const CTLogOperatorInfo kCTLogOperatorList[] = { 90 $operators 91 }; 92 93 } // namespace mozilla::ct 94 95 #endif // $include_guard 96 """ 97 98 99 LOG_NAME_TABLE_TEMPLATE = """\ 100 /* This Source Code Form is subject to the terms of the Mozilla Public 101 * License, v. 2.0. If a copy of the MPL was not distributed with this 102 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 103 104 /* This file was automatically generated by $prog. */ 105 106 // prettier-ignore 107 export const logNameTable = $logNameTable 108 """ 109 110 111 def get_timestamp(time_str): 112 """ 113 Convert a time string such as "2017-01-01T00:00:00Z" to an integer 114 representing milliseconds since the epoch. 115 Timezones in the string are not supported and will result in an exception. 116 """ 117 t = datetime.datetime.strptime(time_str, "%Y-%m-%dT%H:%M:%SZ") 118 epoch = datetime.datetime.utcfromtimestamp(0) 119 seconds_since_epoch = (t - epoch).total_seconds() 120 return int(seconds_since_epoch * 1000) 121 122 123 def get_hex_lines(blob, width): 124 """Convert a binary string to a multiline text of C escape sequences.""" 125 text = "".join([f"\\x{c:02x}" for c in blob]) 126 # When escaped, a single byte takes 4 chars (e.g. "\x00"). 127 # Make sure we don't break an escaped byte between the lines. 128 return textwrap.wrap(text, width - width % 4) 129 130 131 def get_operator_index(json_data, target_name): 132 """Return operator's entry from the JSON along with its array index.""" 133 matches = [ 134 (operator, index) 135 for (index, operator) in enumerate(json_data["operators"]) 136 if operator["name"] == target_name 137 ] 138 assert len(matches) != 0, f"No operators with id {target_name} defined." 139 assert len(matches) == 1, f"Found multiple operators with id {target_name}." 140 return matches[0][1] 141 142 143 LOG_INFO_TEMPLATE = """\ 144 {$description, $state, $log_format, 145 $timestamp, // $timestamp_comment 146 $operator_index,$spaces // $operator_comment 147 $indented_log_key, 148 $log_key_len}""" 149 150 151 class UnhandledLogStateException(Exception): 152 pass 153 154 155 def map_state(state): 156 """ 157 Maps a log state string to the appropriate CTLogState enum value or None, 158 if the log state indicates that the log should not be included. Valid 159 states to be included are 'qualified', 'usable', 'readonly', or 'retired'. 160 Valid states that are not to be included are 'pending' or 'rejected'. 161 """ 162 if state in {"qualified", "usable", "readonly"}: 163 return "CTLogState::Admissible" 164 elif state == "retired": 165 return "CTLogState::Retired" 166 elif state in {"pending", "rejected"}: 167 return None 168 else: 169 raise UnhandledLogStateException(f"unhandled log state '{state}'") 170 171 172 def get_initializer_for_log(log, operator, json_data, log_format): 173 log_key = base64.b64decode(log["key"]) 174 operator_name = operator["name"] 175 operator_index = get_operator_index(json_data, operator_name) 176 state = list(log["state"].keys())[0] 177 timestamp_comment = log["state"][state]["timestamp"] 178 timestamp = get_timestamp(timestamp_comment) 179 state = map_state(state) 180 if state is None: 181 return None 182 is_test_log = "test_only" in operator and operator["test_only"] 183 prefix = "" 184 suffix = "," 185 if is_test_log: 186 prefix = "#ifdef DEBUG\n" 187 suffix = ",\n#endif // DEBUG" 188 num_spaces = len(str(timestamp)) - len(str(operator_index)) 189 spaces = " " * num_spaces 190 tmpl = Template(LOG_INFO_TEMPLATE) 191 toappend = tmpl.substitute( 192 # Use json.dumps for C-escaping strings. 193 # Not perfect but close enough. 194 description=json.dumps(log["description"]), 195 operator_index=operator_index, 196 operator_comment=f"operated by {operator_name}".replace("/", "|"), 197 state=state, 198 log_format=log_format, 199 timestamp=timestamp, 200 spaces=spaces, 201 timestamp_comment=timestamp_comment, 202 # Maximum line width is 80. 203 indented_log_key="\n".join([f' "{l}"' for l in get_hex_lines(log_key, 74)]), 204 log_key_len=len(log_key), 205 ) 206 return prefix + toappend + suffix 207 208 209 def get_log_info_structs(json_data): 210 """Return array of CTLogInfo initializers for the known logs.""" 211 initializers = [] 212 for operator in json_data["operators"]: 213 if "logs" in operator: 214 for log in operator["logs"]: 215 initializer = get_initializer_for_log( 216 log, operator, json_data, "CTLogFormat::RFC6962" 217 ) 218 if initializer: 219 initializers.append(initializer) 220 if "tiled_logs" in operator: 221 for log in operator["tiled_logs"]: 222 initializer = get_initializer_for_log( 223 log, operator, json_data, "CTLogFormat::Tiled" 224 ) 225 if initializer: 226 initializers.append(initializer) 227 return initializers 228 229 230 def get_log_operator_structs(json_data): 231 """Return array of CTLogOperatorInfo initializers.""" 232 tmpl = Template(" {$name, $id}") 233 initializers = [] 234 currentId = 0 235 for operator in json_data["operators"]: 236 prefix = "" 237 suffix = "," 238 is_test_log = "test_only" in operator and operator["test_only"] 239 if is_test_log: 240 prefix = "#ifdef DEBUG\n" 241 suffix = ",\n#endif // DEBUG" 242 toappend = tmpl.substitute(name=json.dumps(operator["name"]), id=currentId) 243 currentId += 1 244 initializers.append(prefix + toappend + suffix) 245 return initializers 246 247 248 TEN_WEEKS_IN_SECONDS = 60 * 60 * 24 * 7 * 10 249 MICROSECONDS_PER_SECOND = 1000000 250 251 252 def generate_cpp_header_file(json_data, out_file): 253 """Generate the C++ header file for the known logs.""" 254 filename = os.path.basename(out_file.name) 255 include_guard = filename.replace(".", "_").replace("/", "_") 256 log_info_initializers = get_log_info_structs(json_data) 257 operator_info_initializers = get_log_operator_structs(json_data) 258 expiration_time = ( 259 int(time.time()) + TEN_WEEKS_IN_SECONDS 260 ) * MICROSECONDS_PER_SECOND 261 out_file.write( 262 Template(OUTPUT_TEMPLATE).substitute( 263 prog=os.path.basename(sys.argv[0]), 264 include_guard=include_guard, 265 logs="\n".join(log_info_initializers), 266 operators="\n".join(operator_info_initializers), 267 expiration_time=expiration_time, 268 ) 269 ) 270 271 272 def generate_log_name_table_file(json_data, out_file): 273 """Generate an mjs file that defines a map from log["log_id"] to log["description"] for each of the known logs.""" 274 entries = {} 275 for operator in json_data["operators"]: 276 for log in operator.get("logs", []): 277 entries[log["log_id"]] = log.get("description", "") 278 for log in operator.get("tiled_logs", []): 279 entries[log["log_id"]] = log.get("description", "") 280 logNameTable = json.dumps(entries, indent=2, sort_keys=True) 281 out_file.write( 282 Template(LOG_NAME_TABLE_TEMPLATE).substitute( 283 prog=os.path.basename(sys.argv[0]), logNameTable=logNameTable 284 ) 285 ) 286 287 288 def patch_in_test_logs(json_data): 289 """Insert Mozilla-specific test log data.""" 290 max_id = len(json_data["operators"]) 291 mozilla_test_operator_1 = { 292 "name": "Mozilla Test Org 1", 293 "id": max_id + 1, 294 "test_only": True, 295 "logs": [ 296 { 297 "description": "Mozilla Test RSA Log 1", 298 # `openssl x509 -noout -pubkey -in <path/to/default-ee.pem>` 299 "key": """ 300 MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAuohRqESOFtZB/W62iAY2 301 ED08E9nq5DVKtOz1aFdsJHvBxyWo4NgfvbGcBptuGobya+KvWnVramRxCHqlWqdF 302 h/cc1SScAn7NQ/weadA4ICmTqyDDSeTbuUzCa2wO7RWCD/F+rWkasdMCOosqQe6n 303 cOAPDY39ZgsrsCSSpH25iGF5kLFXkD3SO8XguEgfqDfTiEPvJxbYVbdmWqp+ApAv 304 OnsQgAYkzBxsl62WYVu34pYSwHUxowyR3bTK9/ytHSXTCe+5Fw6naOGzey8ib2nj 305 tIqVYR3uJtYlnauRCE42yxwkBCy/Fosv5fGPmRcxuLP+SSP6clHEMdUDrNoYCjXt 306 jQIDAQAB 307 """, 308 "state": { 309 "qualified": { 310 "timestamp": "2024-07-22T16:44:26Z", 311 }, 312 }, 313 }, 314 { 315 "description": "Mozilla Test EC Log", 316 # `openssl x509 -noout -pubkey -in <path/to/root_secp256r1_256.pem` 317 "key": """ 318 MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAET7+7u2Hg+PmxpgpZrIcE4uwFC0I+ 319 PPcukj8sT3lLRVwqadIzRWw2xBGdBwbgDu3I0ZOQ15kbey0HowTqoEqmwA== 320 """, 321 "state": { 322 "qualified": { 323 "timestamp": "2024-07-22T16:44:26Z", 324 }, 325 }, 326 }, 327 ], 328 } 329 mozilla_test_operator_2 = { 330 "name": "Mozilla Test Org 2", 331 "id": max_id + 2, 332 "test_only": True, 333 "logs": [ 334 { 335 "description": "Mozilla Test RSA Log 2", 336 # `openssl x509 -noout -pubkey -in <path/to/other-test-ca.pem>` 337 "key": """ 338 MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAwXXGUmYJn3cIKmeR8bh2 339 w39c5TiwbErNIrHL1G+mWtoq3UHIwkmKxKOzwfYUh/QbaYlBvYClHDwSAkTFhKTE 340 SDMF5ROMAQbPCL6ahidguuai6PNvI8XZgxO53683g0XazlHU1tzSpss8xwbrzTBw 341 7JjM5AqlkdcpWn9xxb5maR0rLf7ISURZC8Wj6kn9k7HXU0BfF3N2mZWGZiVHl+1C 342 aQiICBFCIGmYikP+5Izmh4HdIramnNKDdRMfkysSjOKG+n0lHAYq0n7wFvGHzdVO 343 gys1uJMPdLqQqovHYWckKrH9bWIUDRjEwLjGj8N0hFcyStfehuZVLx0eGR1xIWjT 344 uwIDAQAB 345 """, 346 "state": { 347 "qualified": { 348 "timestamp": "2024-07-22T16:44:26Z", 349 }, 350 }, 351 } 352 ], 353 } 354 mozilla_test_operator_3 = { 355 "name": "Mozilla Test Org 3", 356 "id": max_id + 3, 357 "test_only": True, 358 "tiled_logs": [ 359 { 360 "description": "Mozilla Test RSA Log 4", 361 # `openssl x509 -noout -pubkey -in <path/to/evroot.pem>` 362 "key": """ 363 MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAtUmJXJ0AEI0Rofmfh6nj 364 0aXbXfrs8YjaV79kE2iPLORyLP8QkDjBdALJOipHPb0oau0/3NNjz1opFHcBvdgY 365 12Fb+TVr1dPIM2qqjAlxNooGw81EYbk+UUJOF0S7LqrUaqs4zhloIZYfh3FKFmNp 366 Pwl2HN9Na6El6s7HvicNOI94n2zfeK4xRO0oxF55KThjp6IqSJgKNqQOctV5ybkl 367 3/jHkzYv/WiXp8F1TF6XyWfD6t0aroqizM40igFpuA4ootcMGpYMbzNfLaCbm2Q/ 368 Wr+6SeiqqYHpYOJ9h0gL3VXdlBf6GFCfu1VMz4GkOX6LqBKKNL3yeGXBieVzT7Ip 369 BQIDAQAB 370 """, 371 "state": { 372 "qualified": { 373 "timestamp": "2025-06-25T12:09:26Z", 374 }, 375 }, 376 } 377 ], 378 } 379 380 def add_log_id(log): 381 log["log_id"] = base64.b64encode( 382 hashlib.sha256(base64.b64decode(log["key"])).digest() 383 ).decode("utf-8") 384 385 def add_log_ids(operator): 386 for log in operator.get("tiled_logs", []): 387 add_log_id(log) 388 for log in operator.get("logs", []): 389 add_log_id(log) 390 391 add_log_ids(mozilla_test_operator_1) 392 add_log_ids(mozilla_test_operator_2) 393 add_log_ids(mozilla_test_operator_3) 394 395 json_data["operators"].append(mozilla_test_operator_1) 396 json_data["operators"].append(mozilla_test_operator_2) 397 json_data["operators"].append(mozilla_test_operator_3) 398 399 400 def get_content_at(url): 401 print("Fetching URL: ", url) 402 ssl_context = ssl.create_default_context(cafile=certifi.where()) 403 f = urlopen(url, context=ssl_context) 404 return f.read() 405 406 407 def read_rsa_key(path): 408 """ 409 Read the PEM subject public key info at the given path and 410 return it as an RSA public key. 411 """ 412 with open(path) as f: 413 spki = pem.readPemFromFile( 414 f, "-----BEGIN PUBLIC KEY-----", "-----END PUBLIC KEY-----" 415 ) 416 decoded, _ = decoder.decode(spki, rfc2314.SubjectPublicKeyInfo()) 417 return rsa.PublicKey.load_pkcs1( 418 decoded["subjectPublicKey"].asOctets(), format="DER" 419 ) 420 421 422 class UnsupportedSignatureHashAlgorithmException(Exception): 423 pass 424 425 426 def run(args): 427 """ 428 Load the input JSON file and generate the C++ header according to the 429 command line arguments. 430 """ 431 if args.json_file: 432 print("Reading file: ", args.json_file) 433 with open(args.json_file, "rb") as json_file: 434 json_text = json_file.read() 435 else: 436 json_text = get_content_at(args.url) 437 signature = get_content_at(args.signature_url) 438 key = read_rsa_key(args.key_file) 439 print("Validating signature...") 440 hash_alg = rsa.verify(json_text, signature, key) 441 if hash_alg != "SHA-256": 442 raise UnsupportedSignatureHashAlgorithmException( 443 f"unsupported hash algorithm '{hash_alg}'" 444 ) 445 print("Writing output: ", args.json_file_out) 446 with open(args.json_file_out, "wb") as json_file_out: 447 json_file_out.write(json_text) 448 449 json_data = json.loads(json_text) 450 patch_in_test_logs(json_data) 451 452 print("Writing cpp header output: ", args.out) 453 with open(args.out, "w") as out_file: 454 generate_cpp_header_file(json_data, out_file) 455 456 print("Writing log name table output: ", args.log_name_table_out) 457 with open(args.log_name_table_out, "w") as out_file: 458 generate_log_name_table_file(json_data, out_file) 459 460 print("Done.") 461 462 463 def parse_arguments_and_run(): 464 """Parse the command line arguments and run the program.""" 465 arg_parser = argparse.ArgumentParser( 466 description="Parses a JSON file listing the known " 467 "Certificate Transparency logs and generates " 468 "a C++ header file to be included in Firefox." 469 "Downloads the JSON file from the known source " 470 "of truth by default, but can also operate on a " 471 "previously-downloaded file. See https://certificate.transparency.dev/google/", 472 epilog=f"Example: ./mach python {os.path.basename(sys.argv[0])}", 473 ) 474 475 arg_parser.add_argument( 476 "--url", 477 default="https://www.gstatic.com/ct/log_list/v3/log_list.json", 478 help="download the known CT logs JSON file from the specified URL (default: %(default)s)", 479 ) 480 arg_parser.add_argument( 481 "--signature-url", 482 default="https://www.gstatic.com/ct/log_list/v3/log_list.sig", 483 help="download the signature on the known CT logs JSON file from the specified URL (default: %(default)s)", 484 ) 485 arg_parser.add_argument( 486 "--key-file", 487 default=mozpath.join( 488 buildconfig.topsrcdir, "security", "manager", "tools", "log_list_pubkey.pem" 489 ), 490 help="verify the signature on the downloaded CT logs JSON file with the key in the specified file (default: %(default)s)", 491 ) 492 arg_parser.add_argument( 493 "--json-file", 494 nargs="?", 495 const=mozpath.join( 496 buildconfig.topsrcdir, "security", "manager", "tools", "log_list.json" 497 ), 498 help="read the known CT logs JSON data from the specified file (default: %(const)s)", 499 ) 500 arg_parser.add_argument( 501 "--json-file-out", 502 default=mozpath.join( 503 buildconfig.topsrcdir, "security", "manager", "tools", "log_list.json" 504 ), 505 help="write the known CT logs JSON data to the specified file when downloading it from the given url (default: %(default)s)", 506 ) 507 arg_parser.add_argument( 508 "--log-name-table-out", 509 default=mozpath.join( 510 buildconfig.topsrcdir, 511 "toolkit", 512 "components", 513 "certviewer", 514 "content", 515 "components", 516 "logNameTable.mjs", 517 ), 518 help="path and filename of the log name table to be generated (default: %(default)s)", 519 ) 520 arg_parser.add_argument( 521 "--out", 522 default=mozpath.join(buildconfig.topsrcdir, "security", "ct", "CTKnownLogs.h"), 523 help="path and filename of the header file to be generated (default: %(default)s)", 524 ) 525 526 run(arg_parser.parse_args()) 527 528 529 if __name__ == "__main__": 530 parse_arguments_and_run()