tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

generate-checksums.py (9143B)


      1 # This Source Code Form is subject to the terms of the Mozilla Public
      2 # License, v. 2.0. If a copy of the MPL was not distributed with this
      3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      4 
      5 import binascii
      6 import hashlib
      7 import os
      8 import re
      9 import sys
     10 from multiprocessing.pool import ThreadPool
     11 
     12 sys.path.insert(1, os.path.dirname(os.path.dirname(sys.path[0])))
     13 
     14 from mozharness.base.python import VirtualenvMixin, virtualenv_config_options
     15 from mozharness.base.script import BaseScript
     16 from mozharness.mozilla.checksums import parse_checksums_file
     17 from mozharness.mozilla.merkle import MerkleTree
     18 
     19 
     20 class ChecksumsGenerator(BaseScript, VirtualenvMixin):
     21    config_options = [
     22        [
     23            ["--stage-product"],
     24            {
     25                "dest": "stage_product",
     26                "help": "Name of product used in file server's directory structure, "
     27                "e.g.: firefox, mobile",
     28            },
     29        ],
     30        [
     31            ["--version"],
     32            {
     33                "dest": "version",
     34                "help": "Version of release, e.g.: 59.0b5",
     35            },
     36        ],
     37        [
     38            ["--build-number"],
     39            {
     40                "dest": "build_number",
     41                "help": "Build number of release, e.g.: 2",
     42            },
     43        ],
     44        [
     45            ["--bucket-name"],
     46            {
     47                "dest": "bucket_name",
     48                "help": "Full bucket name e.g.: moz-fx-productdelivery-pr-38b5-productdelivery.",
     49            },
     50        ],
     51        [
     52            ["-j", "--parallelization"],
     53            {
     54                "dest": "parallelization",
     55                "default": 20,
     56                "type": int,
     57                "help": "Number of checksums file to download concurrently",
     58            },
     59        ],
     60        [
     61            ["--branch"],
     62            {
     63                "dest": "branch",
     64                "help": "dummy option",
     65            },
     66        ],
     67    ] + virtualenv_config_options
     68 
     69    def __init__(self):
     70        BaseScript.__init__(
     71            self,
     72            config_options=self.config_options,
     73            require_config_file=False,
     74            config={
     75                "virtualenv_modules": [
     76                    "boto",
     77                ],
     78                "virtualenv_path": "venv",
     79            },
     80            all_actions=[
     81                "create-virtualenv",
     82                "collect-individual-checksums",
     83                "create-big-checksums",
     84                "create-summary",
     85            ],
     86            default_actions=[
     87                "create-virtualenv",
     88                "collect-individual-checksums",
     89                "create-big-checksums",
     90                "create-summary",
     91            ],
     92        )
     93 
     94        self.checksums = {}
     95        self.file_prefix = self._get_file_prefix()
     96 
     97    def _pre_config_lock(self, rw_config):
     98        super()._pre_config_lock(rw_config)
     99 
    100        # These defaults are set here rather in the config because default
    101        # lists cannot be completely overidden, only appended to.
    102        if not self.config.get("formats"):
    103            self.config["formats"] = ["sha512", "sha256"]
    104 
    105        if not self.config.get("includes"):
    106            self.config["includes"] = [
    107                r"^.*\.tar\.bz2$",
    108                r"^.*\.tar\.xz$",
    109                r"^.*\.snap$",
    110                r"^.*\.dmg$",
    111                r"^.*\.pkg$",
    112                r"^.*\.bundle$",
    113                r"^.*\.mar$",
    114                r"^.*Setup.*\.exe$",
    115                r"^.*Installer\.exe$",
    116                r"^.*\.msi$",
    117                r"^.*\.xpi$",
    118                r"^.*fennec.*\.apk$",
    119                r"^.*/jsshell.*$",
    120            ]
    121 
    122    def _get_file_prefix(self):
    123        return "pub/{}/candidates/{}-candidates/build{}/".format(
    124            self.config["stage_product"],
    125            self.config["version"],
    126            self.config["build_number"],
    127        )
    128 
    129    def _get_sums_filename(self, format_):
    130        return f"{format_.upper()}SUMS"
    131 
    132    def _get_summary_filename(self, format_):
    133        return f"{format_.upper()}SUMMARY"
    134 
    135    def _get_hash_function(self, format_):
    136        if format_ in ("sha256", "sha384", "sha512"):
    137            return getattr(hashlib, format_)
    138        else:
    139            self.fatal(f"Unsupported format {format_}")
    140 
    141    def _get_bucket(self):
    142        self.activate_virtualenv()
    143        from boto import connect_s3
    144 
    145        self.info("Connecting to S3")
    146        conn = connect_s3(anon=True, host="storage.googleapis.com")
    147        self.info("Connecting to bucket {}".format(self.config["bucket_name"]))
    148        self.bucket = conn.get_bucket(self.config["bucket_name"])
    149        return self.bucket
    150 
    151    def collect_individual_checksums(self):
    152        """This step grabs all of the small checksums files for the release,
    153        filters out any unwanted files from within them, and adds the remainder
    154        to self.checksums for subsequent steps to use."""
    155        bucket = self._get_bucket()
    156        self.info(f"File prefix is: {self.file_prefix}")
    157 
    158        # temporary holding place for checksums
    159        raw_checksums = []
    160 
    161        def worker(item):
    162            self.debug(f"Downloading {item}")
    163            sums = bucket.get_key(item).get_contents_as_string()
    164            raw_checksums.append(sums)
    165 
    166        def find_checksums_files():
    167            self.info("Getting key names from bucket")
    168            checksum_files = {"beets": [], "checksums": []}
    169            for key in bucket.list(prefix=self.file_prefix):
    170                if key.key.endswith(".checksums"):
    171                    self.debug(f"Found checksums file: {key.key}")
    172                    checksum_files["checksums"].append(key.key)
    173                elif key.key.endswith(".beet"):
    174                    self.debug(f"Found beet file: {key.key}")
    175                    checksum_files["beets"].append(key.key)
    176                else:
    177                    self.debug(f"Ignoring non-checksums file: {key.key}")
    178            if checksum_files["beets"]:
    179                self.log("Using beet format")
    180                return checksum_files["beets"]
    181            else:
    182                self.log("Using checksums format")
    183                return checksum_files["checksums"]
    184 
    185        pool = ThreadPool(self.config["parallelization"])
    186        pool.map(worker, find_checksums_files())
    187 
    188        for c in raw_checksums:
    189            for f, info in parse_checksums_file(c).items():
    190                for pattern in self.config["includes"]:
    191                    if re.search(pattern, f):
    192                        if f in self.checksums:
    193                            if info == self.checksums[f]:
    194                                self.debug(
    195                                    f"Duplicate checksum for file {f}"
    196                                    " but the data matches;"
    197                                    " continuing..."
    198                                )
    199                                continue
    200                            self.fatal(
    201                                f"Found duplicate checksum entry for {f}, "
    202                                "don't know which one to pick."
    203                            )
    204                        if not set(self.config["formats"]) <= set(info["hashes"]):
    205                            self.fatal(f"Missing necessary format for file {f}")
    206                        self.debug(f"Adding checksums for file: {f}")
    207                        self.checksums[f] = info
    208                        break
    209                else:
    210                    self.debug(f"Ignoring checksums for file: {f}")
    211 
    212    def create_summary(self):
    213        """
    214        This step computes a Merkle tree over the checksums for each format
    215        and writes a file containing the head of the tree and inclusion proofs
    216        for each file.
    217        """
    218        for fmt in self.config["formats"]:
    219            hash_fn = self._get_hash_function(fmt)
    220            files = [fn for fn in sorted(self.checksums)]
    221            data = [self.checksums[fn]["hashes"][fmt] for fn in files]
    222 
    223            tree = MerkleTree(hash_fn, data)
    224            head = binascii.hexlify(tree.head())
    225            proofs = [
    226                binascii.hexlify(tree.inclusion_proof(i).to_rfc6962_bis())
    227                for i in range(len(files))
    228            ]
    229 
    230            summary = self._get_summary_filename(fmt)
    231            self.info(f"Creating summary file: {summary}")
    232 
    233            content = "{} TREE_HEAD\n".format(head.decode("ascii"))
    234            for i in range(len(files)):
    235                content += "{} {}\n".format(proofs[i].decode("ascii"), files[i])
    236 
    237            self.write_to_file(summary, content)
    238 
    239    def create_big_checksums(self):
    240        for fmt in self.config["formats"]:
    241            sums = self._get_sums_filename(fmt)
    242            self.info(f"Creating big checksums file: {sums}")
    243            with open(sums, "w+") as output_file:
    244                for fn in sorted(self.checksums):
    245                    output_file.write(
    246                        "{}  {}\n".format(
    247                            self.checksums[fn]["hashes"][fmt].decode("ascii"), fn
    248                        )
    249                    )
    250 
    251 
    252 if __name__ == "__main__":
    253    myScript = ChecksumsGenerator()
    254    myScript.run_and_exit()