generate-checksums.py (9143B)
1 # This Source Code Form is subject to the terms of the Mozilla Public 2 # License, v. 2.0. If a copy of the MPL was not distributed with this 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5 import binascii 6 import hashlib 7 import os 8 import re 9 import sys 10 from multiprocessing.pool import ThreadPool 11 12 sys.path.insert(1, os.path.dirname(os.path.dirname(sys.path[0]))) 13 14 from mozharness.base.python import VirtualenvMixin, virtualenv_config_options 15 from mozharness.base.script import BaseScript 16 from mozharness.mozilla.checksums import parse_checksums_file 17 from mozharness.mozilla.merkle import MerkleTree 18 19 20 class ChecksumsGenerator(BaseScript, VirtualenvMixin): 21 config_options = [ 22 [ 23 ["--stage-product"], 24 { 25 "dest": "stage_product", 26 "help": "Name of product used in file server's directory structure, " 27 "e.g.: firefox, mobile", 28 }, 29 ], 30 [ 31 ["--version"], 32 { 33 "dest": "version", 34 "help": "Version of release, e.g.: 59.0b5", 35 }, 36 ], 37 [ 38 ["--build-number"], 39 { 40 "dest": "build_number", 41 "help": "Build number of release, e.g.: 2", 42 }, 43 ], 44 [ 45 ["--bucket-name"], 46 { 47 "dest": "bucket_name", 48 "help": "Full bucket name e.g.: moz-fx-productdelivery-pr-38b5-productdelivery.", 49 }, 50 ], 51 [ 52 ["-j", "--parallelization"], 53 { 54 "dest": "parallelization", 55 "default": 20, 56 "type": int, 57 "help": "Number of checksums file to download concurrently", 58 }, 59 ], 60 [ 61 ["--branch"], 62 { 63 "dest": "branch", 64 "help": "dummy option", 65 }, 66 ], 67 ] + virtualenv_config_options 68 69 def __init__(self): 70 BaseScript.__init__( 71 self, 72 config_options=self.config_options, 73 require_config_file=False, 74 config={ 75 "virtualenv_modules": [ 76 "boto", 77 ], 78 "virtualenv_path": "venv", 79 }, 80 all_actions=[ 81 "create-virtualenv", 82 "collect-individual-checksums", 83 "create-big-checksums", 84 "create-summary", 85 ], 86 default_actions=[ 87 "create-virtualenv", 88 "collect-individual-checksums", 89 "create-big-checksums", 90 "create-summary", 91 ], 92 ) 93 94 self.checksums = {} 95 self.file_prefix = self._get_file_prefix() 96 97 def _pre_config_lock(self, rw_config): 98 super()._pre_config_lock(rw_config) 99 100 # These defaults are set here rather in the config because default 101 # lists cannot be completely overidden, only appended to. 102 if not self.config.get("formats"): 103 self.config["formats"] = ["sha512", "sha256"] 104 105 if not self.config.get("includes"): 106 self.config["includes"] = [ 107 r"^.*\.tar\.bz2$", 108 r"^.*\.tar\.xz$", 109 r"^.*\.snap$", 110 r"^.*\.dmg$", 111 r"^.*\.pkg$", 112 r"^.*\.bundle$", 113 r"^.*\.mar$", 114 r"^.*Setup.*\.exe$", 115 r"^.*Installer\.exe$", 116 r"^.*\.msi$", 117 r"^.*\.xpi$", 118 r"^.*fennec.*\.apk$", 119 r"^.*/jsshell.*$", 120 ] 121 122 def _get_file_prefix(self): 123 return "pub/{}/candidates/{}-candidates/build{}/".format( 124 self.config["stage_product"], 125 self.config["version"], 126 self.config["build_number"], 127 ) 128 129 def _get_sums_filename(self, format_): 130 return f"{format_.upper()}SUMS" 131 132 def _get_summary_filename(self, format_): 133 return f"{format_.upper()}SUMMARY" 134 135 def _get_hash_function(self, format_): 136 if format_ in ("sha256", "sha384", "sha512"): 137 return getattr(hashlib, format_) 138 else: 139 self.fatal(f"Unsupported format {format_}") 140 141 def _get_bucket(self): 142 self.activate_virtualenv() 143 from boto import connect_s3 144 145 self.info("Connecting to S3") 146 conn = connect_s3(anon=True, host="storage.googleapis.com") 147 self.info("Connecting to bucket {}".format(self.config["bucket_name"])) 148 self.bucket = conn.get_bucket(self.config["bucket_name"]) 149 return self.bucket 150 151 def collect_individual_checksums(self): 152 """This step grabs all of the small checksums files for the release, 153 filters out any unwanted files from within them, and adds the remainder 154 to self.checksums for subsequent steps to use.""" 155 bucket = self._get_bucket() 156 self.info(f"File prefix is: {self.file_prefix}") 157 158 # temporary holding place for checksums 159 raw_checksums = [] 160 161 def worker(item): 162 self.debug(f"Downloading {item}") 163 sums = bucket.get_key(item).get_contents_as_string() 164 raw_checksums.append(sums) 165 166 def find_checksums_files(): 167 self.info("Getting key names from bucket") 168 checksum_files = {"beets": [], "checksums": []} 169 for key in bucket.list(prefix=self.file_prefix): 170 if key.key.endswith(".checksums"): 171 self.debug(f"Found checksums file: {key.key}") 172 checksum_files["checksums"].append(key.key) 173 elif key.key.endswith(".beet"): 174 self.debug(f"Found beet file: {key.key}") 175 checksum_files["beets"].append(key.key) 176 else: 177 self.debug(f"Ignoring non-checksums file: {key.key}") 178 if checksum_files["beets"]: 179 self.log("Using beet format") 180 return checksum_files["beets"] 181 else: 182 self.log("Using checksums format") 183 return checksum_files["checksums"] 184 185 pool = ThreadPool(self.config["parallelization"]) 186 pool.map(worker, find_checksums_files()) 187 188 for c in raw_checksums: 189 for f, info in parse_checksums_file(c).items(): 190 for pattern in self.config["includes"]: 191 if re.search(pattern, f): 192 if f in self.checksums: 193 if info == self.checksums[f]: 194 self.debug( 195 f"Duplicate checksum for file {f}" 196 " but the data matches;" 197 " continuing..." 198 ) 199 continue 200 self.fatal( 201 f"Found duplicate checksum entry for {f}, " 202 "don't know which one to pick." 203 ) 204 if not set(self.config["formats"]) <= set(info["hashes"]): 205 self.fatal(f"Missing necessary format for file {f}") 206 self.debug(f"Adding checksums for file: {f}") 207 self.checksums[f] = info 208 break 209 else: 210 self.debug(f"Ignoring checksums for file: {f}") 211 212 def create_summary(self): 213 """ 214 This step computes a Merkle tree over the checksums for each format 215 and writes a file containing the head of the tree and inclusion proofs 216 for each file. 217 """ 218 for fmt in self.config["formats"]: 219 hash_fn = self._get_hash_function(fmt) 220 files = [fn for fn in sorted(self.checksums)] 221 data = [self.checksums[fn]["hashes"][fmt] for fn in files] 222 223 tree = MerkleTree(hash_fn, data) 224 head = binascii.hexlify(tree.head()) 225 proofs = [ 226 binascii.hexlify(tree.inclusion_proof(i).to_rfc6962_bis()) 227 for i in range(len(files)) 228 ] 229 230 summary = self._get_summary_filename(fmt) 231 self.info(f"Creating summary file: {summary}") 232 233 content = "{} TREE_HEAD\n".format(head.decode("ascii")) 234 for i in range(len(files)): 235 content += "{} {}\n".format(proofs[i].decode("ascii"), files[i]) 236 237 self.write_to_file(summary, content) 238 239 def create_big_checksums(self): 240 for fmt in self.config["formats"]: 241 sums = self._get_sums_filename(fmt) 242 self.info(f"Creating big checksums file: {sums}") 243 with open(sums, "w+") as output_file: 244 for fn in sorted(self.checksums): 245 output_file.write( 246 "{} {}\n".format( 247 self.checksums[fn]["hashes"][fmt].decode("ascii"), fn 248 ) 249 ) 250 251 252 if __name__ == "__main__": 253 myScript = ChecksumsGenerator() 254 myScript.run_and_exit()