tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

tooltool.py (56358B)


      1 #!/usr/bin/env python3
      2 
      3 # tooltool is a lookaside cache implemented in Python
      4 # Copyright (C) 2011 John H. Ford <john@johnford.info>
      5 #
      6 # This program is free software; you can redistribute it and/or
      7 # modify it under the terms of the GNU General Public License
      8 # as published by the Free Software Foundation version 2
      9 #
     10 # This program is distributed in the hope that it will be useful,
     11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
     12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13 # GNU General Public License for more details.
     14 #
     15 # You should have received a copy of the GNU General Public License
     16 # along with this program; if not, write to the Free Software
     17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     18 # 02110-1301, USA.
     19 
     20 # A manifest file specifies files in that directory that are stored
     21 # elsewhere. This file should only list files in the same directory
     22 # in which the manifest file resides and it should be called
     23 # 'manifest.tt'
     24 
     25 import base64
     26 import calendar
     27 import hashlib
     28 import hmac
     29 import json
     30 import logging
     31 import math
     32 import optparse
     33 import os
     34 import pprint
     35 import re
     36 import shutil
     37 import ssl
     38 import stat
     39 import sys
     40 import tarfile
     41 import tempfile
     42 import threading
     43 import time
     44 import zipfile
     45 from contextlib import closing, contextmanager
     46 from functools import wraps
     47 from io import open
     48 from random import random
     49 from subprocess import PIPE, Popen
     50 
     51 if os.name == "nt":
     52    import certifi
     53 
     54 __version__ = "1.4.0"
     55 
     56 # Allowed request header characters:
     57 # !#$%&'()*+,-./:;<=>?@[]^_`{|}~ and space, a-z, A-Z, 0-9, \, "
     58 REQUEST_HEADER_ATTRIBUTE_CHARS = re.compile(
     59    r"^[ a-zA-Z0-9_\!#\$%&'\(\)\*\+,\-\./\:;<\=>\?@\[\]\^`\{\|\}~]*$"
     60 )
     61 DEFAULT_MANIFEST_NAME = "manifest.tt"
     62 TOOLTOOL_PACKAGE_SUFFIX = ".TOOLTOOL-PACKAGE"
     63 HAWK_VER = 1
     64 
     65 import urllib.request as urllib2
     66 from http.client import HTTPConnection, HTTPSConnection
     67 from urllib.error import HTTPError, URLError
     68 from urllib.parse import urljoin, urlparse
     69 from urllib.request import Request
     70 
     71 log = logging.getLogger(__name__)
     72 
     73 
     74 # Vendored code from `redo` module
     75 def retrier(attempts=5, sleeptime=10, max_sleeptime=300, sleepscale=1.5, jitter=1):
     76    """
     77    This function originates from redo 2.0.3 https://github.com/mozilla-releng/redo
     78    A generator function that sleeps between retries, handles exponential
     79    backoff and jitter. The action you are retrying is meant to run after
     80    retrier yields.
     81    """
     82    jitter = jitter or 0  # py35 barfs on the next line if jitter is None
     83    if jitter > sleeptime:
     84        # To prevent negative sleep times
     85        raise Exception(
     86            "jitter ({}) must be less than sleep time ({})".format(jitter, sleeptime)
     87        )
     88 
     89    sleeptime_real = sleeptime
     90    for _ in range(attempts):
     91        log.debug("attempt %i/%i", _ + 1, attempts)
     92 
     93        yield sleeptime_real
     94 
     95        if jitter:
     96            sleeptime_real = sleeptime + random.uniform(-jitter, jitter)
     97            # our jitter should scale along with the sleeptime
     98            jitter = jitter * sleepscale
     99        else:
    100            sleeptime_real = sleeptime
    101 
    102        sleeptime *= sleepscale
    103 
    104        if sleeptime_real > max_sleeptime:
    105            sleeptime_real = max_sleeptime
    106 
    107        # Don't need to sleep the last time
    108        if _ < attempts - 1:
    109            log.debug(
    110                "sleeping for %.2fs (attempt %i/%i)", sleeptime_real, _ + 1, attempts
    111            )
    112            time.sleep(sleeptime_real)
    113 
    114 
    115 def retry(
    116    action,
    117    attempts=5,
    118    sleeptime=60,
    119    max_sleeptime=5 * 60,
    120    sleepscale=1.5,
    121    jitter=1,
    122    retry_exceptions=(Exception,),
    123    cleanup=None,
    124    args=(),
    125    kwargs={},
    126    log_args=True,
    127 ):
    128    """
    129    This function originates from redo 2.0.3 https://github.com/mozilla-releng/redo
    130    Calls an action function until it succeeds, or we give up.
    131    """
    132    assert callable(action)
    133    assert not cleanup or callable(cleanup)
    134 
    135    action_name = getattr(action, "__name__", action)
    136    if log_args and (args or kwargs):
    137        log_attempt_args = (
    138            "retry: calling %s with args: %s," " kwargs: %s, attempt #%d",
    139            action_name,
    140            args,
    141            kwargs,
    142        )
    143    else:
    144        log_attempt_args = ("retry: calling %s, attempt #%d", action_name)
    145 
    146    if max_sleeptime < sleeptime:
    147        log.debug("max_sleeptime %d less than sleeptime %d", max_sleeptime, sleeptime)
    148 
    149    n = 1
    150    for _ in retrier(
    151        attempts=attempts,
    152        sleeptime=sleeptime,
    153        max_sleeptime=max_sleeptime,
    154        sleepscale=sleepscale,
    155        jitter=jitter,
    156    ):
    157        try:
    158            logfn = log.info if n != 1 else log.debug
    159            logfn_args = log_attempt_args + (n,)
    160            logfn(*logfn_args)
    161            return action(*args, **kwargs)
    162        except retry_exceptions:
    163            log.debug("retry: Caught exception: ", exc_info=True)
    164            if cleanup:
    165                cleanup()
    166            if n == attempts:
    167                log.info("retry: Giving up on %s", action_name)
    168                raise
    169            continue
    170        finally:
    171            n += 1
    172 
    173 
    174 def retriable(*retry_args, **retry_kwargs):
    175    """
    176    This function originates from redo 2.0.3 https://github.com/mozilla-releng/redo
    177    A decorator factory for retry(). Wrap your function in @retriable(...) to
    178    give it retry powers!
    179    """
    180 
    181    def _retriable_factory(func):
    182        @wraps(func)
    183        def _retriable_wrapper(*args, **kwargs):
    184            return retry(func, args=args, kwargs=kwargs, *retry_args, **retry_kwargs)
    185 
    186        return _retriable_wrapper
    187 
    188    return _retriable_factory
    189 
    190 
    191 # end of vendored code from redo module
    192 
    193 
    194 def request_has_data(req):
    195    return req.data is not None
    196 
    197 
    198 def get_hexdigest(val):
    199    return hashlib.sha512(val).hexdigest()
    200 
    201 
    202 class FileRecordJSONEncoderException(Exception):
    203    pass
    204 
    205 
    206 class InvalidManifest(Exception):
    207    pass
    208 
    209 
    210 class ExceptionWithFilename(Exception):
    211    def __init__(self, filename):
    212        Exception.__init__(self)
    213        self.filename = filename
    214 
    215 
    216 class BadFilenameException(ExceptionWithFilename):
    217    pass
    218 
    219 
    220 class DigestMismatchException(ExceptionWithFilename):
    221    pass
    222 
    223 
    224 class MissingFileException(ExceptionWithFilename):
    225    pass
    226 
    227 
    228 class InvalidCredentials(Exception):
    229    pass
    230 
    231 
    232 class BadHeaderValue(Exception):
    233    pass
    234 
    235 
    236 def parse_url(url):
    237    url_parts = urlparse(url)
    238    url_dict = {
    239        "scheme": url_parts.scheme,
    240        "hostname": url_parts.hostname,
    241        "port": url_parts.port,
    242        "path": url_parts.path,
    243        "resource": url_parts.path,
    244        "query": url_parts.query,
    245    }
    246    if len(url_dict["query"]) > 0:
    247        url_dict["resource"] = "%s?%s" % (
    248            url_dict["resource"],  # pragma: no cover
    249            url_dict["query"],
    250        )
    251 
    252    if url_parts.port is None:
    253        if url_parts.scheme == "http":
    254            url_dict["port"] = 80
    255        elif url_parts.scheme == "https":  # pragma: no cover
    256            url_dict["port"] = 443
    257    return url_dict
    258 
    259 
    260 def utc_now(offset_in_seconds=0.0):
    261    return int(math.floor(calendar.timegm(time.gmtime()) + float(offset_in_seconds)))
    262 
    263 
    264 def random_string(length):
    265    return base64.urlsafe_b64encode(os.urandom(length))[:length]
    266 
    267 
    268 def prepare_header_val(val):
    269    if isinstance(val, bytes):
    270        val = val.decode("utf-8")
    271 
    272    if not REQUEST_HEADER_ATTRIBUTE_CHARS.match(val):
    273        raise BadHeaderValue(  # pragma: no cover
    274            "header value value={val} contained an illegal character".format(
    275                val=repr(val)
    276            )
    277        )
    278 
    279    return val
    280 
    281 
    282 def parse_content_type(content_type):  # pragma: no cover
    283    if content_type:
    284        return content_type.split(";")[0].strip().lower()
    285    else:
    286        return ""
    287 
    288 
    289 def calculate_payload_hash(algorithm, payload, content_type):  # pragma: no cover
    290    parts = [
    291        part if isinstance(part, bytes) else part.encode("utf8")
    292        for part in [
    293            "hawk." + str(HAWK_VER) + ".payload\n",
    294            parse_content_type(content_type) + "\n",
    295            payload or "",
    296            "\n",
    297        ]
    298    ]
    299 
    300    p_hash = hashlib.new(algorithm)
    301    for p in parts:
    302        p_hash.update(p)
    303 
    304    log.debug(
    305        "calculating payload hash from:\n{parts}".format(parts=pprint.pformat(parts))
    306    )
    307 
    308    return base64.b64encode(p_hash.digest())
    309 
    310 
    311 def validate_taskcluster_credentials(credentials):
    312    if not hasattr(credentials, "__getitem__"):
    313        raise InvalidCredentials(
    314            "credentials must be a dict-like object"
    315        )  # pragma: no cover
    316    try:
    317        credentials["clientId"]
    318        credentials["accessToken"]
    319    except KeyError:  # pragma: no cover
    320        etype, val, tb = sys.exc_info()
    321        raise InvalidCredentials("{etype}: {val}".format(etype=etype, val=val))
    322 
    323 
    324 def normalize_header_attr(val):
    325    if isinstance(val, bytes):
    326        return val.decode("utf-8")
    327    return val  # pragma: no cover
    328 
    329 
    330 def normalize_string(
    331    mac_type,
    332    timestamp,
    333    nonce,
    334    method,
    335    name,
    336    host,
    337    port,
    338    content_hash,
    339 ):
    340    return "\n".join(
    341        [
    342            normalize_header_attr(header)
    343            # The blank lines are important. They follow what the Node Hawk lib does.
    344            for header in [
    345                "hawk." + str(HAWK_VER) + "." + mac_type,
    346                timestamp,
    347                nonce,
    348                method or "",
    349                name or "",
    350                host,
    351                port,
    352                content_hash or "",
    353                "",  # for ext which is empty in this case
    354                "",  # Add trailing new line.
    355            ]
    356        ]
    357    )
    358 
    359 
    360 def calculate_mac(
    361    mac_type,
    362    access_token,
    363    algorithm,
    364    timestamp,
    365    nonce,
    366    method,
    367    name,
    368    host,
    369    port,
    370    content_hash,
    371 ):
    372    normalized = normalize_string(
    373        mac_type, timestamp, nonce, method, name, host, port, content_hash
    374    )
    375    log.debug("normalized resource for mac calc: {norm}".format(norm=normalized))
    376    digestmod = getattr(hashlib, algorithm)
    377 
    378    if not isinstance(normalized, bytes):
    379        normalized = normalized.encode("utf8")
    380 
    381    if not isinstance(access_token, bytes):
    382        access_token = access_token.encode("ascii")
    383 
    384    result = hmac.new(access_token, normalized, digestmod)
    385    return base64.b64encode(result.digest())
    386 
    387 
    388 def make_taskcluster_header(credentials, req):
    389    validate_taskcluster_credentials(credentials)
    390 
    391    url = req.get_full_url()
    392    method = req.get_method()
    393    algorithm = "sha256"
    394    timestamp = str(utc_now())
    395    nonce = random_string(6)
    396    url_parts = parse_url(url)
    397 
    398    content_hash = None
    399    if request_has_data(req):
    400        data = req.data
    401        content_hash = calculate_payload_hash(  # pragma: no cover
    402            algorithm,
    403            data,
    404            # maybe we should detect this from req.headers but we anyway expect json
    405            content_type="application/json",
    406        )
    407 
    408    mac = calculate_mac(
    409        "header",
    410        credentials["accessToken"],
    411        algorithm,
    412        timestamp,
    413        nonce,
    414        method,
    415        url_parts["resource"],
    416        url_parts["hostname"],
    417        str(url_parts["port"]),
    418        content_hash,
    419    )
    420 
    421    header = 'Hawk mac="{}"'.format(prepare_header_val(mac))
    422 
    423    if content_hash:  # pragma: no cover
    424        header = '{}, hash="{}"'.format(header, prepare_header_val(content_hash))
    425 
    426    header = '{header}, id="{id}", ts="{ts}", nonce="{nonce}"'.format(
    427        header=header,
    428        id=prepare_header_val(credentials["clientId"]),
    429        ts=prepare_header_val(timestamp),
    430        nonce=prepare_header_val(nonce),
    431    )
    432 
    433    log.debug("Hawk header for URL={} method={}: {}".format(url, method, header))
    434 
    435    return header
    436 
    437 
    438 class FileRecord(object):
    439    def __init__(
    440        self,
    441        filename,
    442        size,
    443        digest,
    444        algorithm,
    445        unpack=False,
    446        version=None,
    447        visibility=None,
    448    ):
    449        object.__init__(self)
    450        if "/" in filename or "\\" in filename:
    451            log.error(
    452                "The filename provided contains path information and is, therefore, invalid."
    453            )
    454            raise BadFilenameException(filename=filename)
    455        self.filename = filename
    456        self.size = size
    457        self.digest = digest
    458        self.algorithm = algorithm
    459        self.unpack = unpack
    460        self.version = version
    461        self.visibility = visibility
    462 
    463    def __eq__(self, other):
    464        if self is other:
    465            return True
    466        if (
    467            self.filename == other.filename
    468            and self.size == other.size
    469            and self.digest == other.digest
    470            and self.algorithm == other.algorithm
    471            and self.version == other.version
    472            and self.visibility == other.visibility
    473        ):
    474            return True
    475        else:
    476            return False
    477 
    478    def __ne__(self, other):
    479        return not self.__eq__(other)
    480 
    481    def __str__(self):
    482        return repr(self)
    483 
    484    def __repr__(self):
    485        return (
    486            "%s.%s(filename='%s', size=%s, digest='%s', algorithm='%s', visibility=%r)"
    487            % (
    488                __name__,
    489                self.__class__.__name__,
    490                self.filename,
    491                self.size,
    492                self.digest,
    493                self.algorithm,
    494                self.visibility,
    495            )
    496        )
    497 
    498    def present(self):
    499        # Doesn't check validity
    500        return os.path.exists(self.filename)
    501 
    502    def validate_size(self):
    503        if self.present():
    504            return self.size == os.path.getsize(self.filename)
    505        else:
    506            log.debug("trying to validate size on a missing file, %s", self.filename)
    507            raise MissingFileException(filename=self.filename)
    508 
    509    def validate_digest(self):
    510        if self.present():
    511            with open(self.filename, "rb") as f:
    512                return self.digest == digest_file(f, self.algorithm)
    513        else:
    514            log.debug("trying to validate digest on a missing file, %s', self.filename")
    515            raise MissingFileException(filename=self.filename)
    516 
    517    def validate(self):
    518        if self.size is None or self.validate_size():
    519            if self.validate_digest():
    520                return True
    521        return False
    522 
    523    def describe(self):
    524        if self.present() and self.validate():
    525            return "'%s' is present and valid" % self.filename
    526        elif self.present():
    527            return "'%s' is present and invalid" % self.filename
    528        else:
    529            return "'%s' is absent" % self.filename
    530 
    531 
    532 def create_file_record(filename, algorithm):
    533    fo = open(filename, "rb")
    534    stored_filename = os.path.split(filename)[1]
    535    fr = FileRecord(
    536        stored_filename,
    537        os.path.getsize(filename),
    538        digest_file(fo, algorithm),
    539        algorithm,
    540    )
    541    fo.close()
    542    return fr
    543 
    544 
    545 class FileRecordJSONEncoder(json.JSONEncoder):
    546    def encode_file_record(self, obj):
    547        if not issubclass(type(obj), FileRecord):
    548            err = (
    549                "FileRecordJSONEncoder is only for FileRecord and lists of FileRecords, "
    550                "not %s" % obj.__class__.__name__
    551            )
    552            log.warning(err)
    553            raise FileRecordJSONEncoderException(err)
    554        else:
    555            rv = {
    556                "filename": obj.filename,
    557                "size": obj.size,
    558                "algorithm": obj.algorithm,
    559                "digest": obj.digest,
    560            }
    561            if obj.unpack:
    562                rv["unpack"] = True
    563            if obj.version:
    564                rv["version"] = obj.version
    565            if obj.visibility is not None:
    566                rv["visibility"] = obj.visibility
    567            return rv
    568 
    569    def default(self, f):
    570        if issubclass(type(f), list):
    571            record_list = []
    572            for i in f:
    573                record_list.append(self.encode_file_record(i))
    574            return record_list
    575        else:
    576            return self.encode_file_record(f)
    577 
    578 
    579 class FileRecordJSONDecoder(json.JSONDecoder):
    580 
    581    """I help the json module materialize a FileRecord from
    582    a JSON file.  I understand FileRecords and lists of
    583    FileRecords.  I ignore things that I don't expect for now"""
    584 
    585    # TODO: make this more explicit in what it's looking for
    586    # and error out on unexpected things
    587 
    588    def process_file_records(self, obj):
    589        if isinstance(obj, list):
    590            record_list = []
    591            for i in obj:
    592                record = self.process_file_records(i)
    593                if issubclass(type(record), FileRecord):
    594                    record_list.append(record)
    595            return record_list
    596        required_fields = [
    597            "filename",
    598            "size",
    599            "algorithm",
    600            "digest",
    601        ]
    602        if isinstance(obj, dict):
    603            missing = False
    604            for req in required_fields:
    605                if req not in obj:
    606                    missing = True
    607                    break
    608 
    609            if not missing:
    610                unpack = obj.get("unpack", False)
    611                version = obj.get("version", None)
    612                visibility = obj.get("visibility", None)
    613                rv = FileRecord(
    614                    obj["filename"],
    615                    obj["size"],
    616                    obj["digest"],
    617                    obj["algorithm"],
    618                    unpack,
    619                    version,
    620                    visibility,
    621                )
    622                log.debug("materialized %s" % rv)
    623                return rv
    624        return obj
    625 
    626    def decode(self, s):
    627        decoded = json.JSONDecoder.decode(self, s)
    628        rv = self.process_file_records(decoded)
    629        return rv
    630 
    631 
    632 class Manifest(object):
    633    valid_formats = ("json",)
    634 
    635    def __init__(self, file_records=None):
    636        self.file_records = file_records or []
    637 
    638    def __eq__(self, other):
    639        if self is other:
    640            return True
    641        if len(self.file_records) != len(other.file_records):
    642            log.debug("Manifests differ in number of files")
    643            return False
    644        # sort the file records by filename before comparing
    645        mine = sorted((fr.filename, fr) for fr in self.file_records)
    646        theirs = sorted((fr.filename, fr) for fr in other.file_records)
    647        return mine == theirs
    648 
    649    def __ne__(self, other):
    650        return not self.__eq__(other)
    651 
    652    def __deepcopy__(self, memo):
    653        # This is required for a deep copy
    654        return Manifest(self.file_records[:])
    655 
    656    def __copy__(self):
    657        return Manifest(self.file_records)
    658 
    659    def copy(self):
    660        return Manifest(self.file_records[:])
    661 
    662    def present(self):
    663        return all(i.present() for i in self.file_records)
    664 
    665    def validate_sizes(self):
    666        return all(i.validate_size() for i in self.file_records)
    667 
    668    def validate_digests(self):
    669        return all(i.validate_digest() for i in self.file_records)
    670 
    671    def validate(self):
    672        return all(i.validate() for i in self.file_records)
    673 
    674    def load(self, data_file, fmt="json"):
    675        assert fmt in self.valid_formats
    676        if fmt == "json":
    677            try:
    678                self.file_records.extend(
    679                    json.load(data_file, cls=FileRecordJSONDecoder)
    680                )
    681            except ValueError:
    682                raise InvalidManifest("trying to read invalid manifest file")
    683 
    684    def loads(self, data_string, fmt="json"):
    685        assert fmt in self.valid_formats
    686        if fmt == "json":
    687            try:
    688                self.file_records.extend(
    689                    json.loads(data_string, cls=FileRecordJSONDecoder)
    690                )
    691            except ValueError:
    692                raise InvalidManifest("trying to read invalid manifest file")
    693 
    694    def dump(self, output_file, fmt="json"):
    695        assert fmt in self.valid_formats
    696        if fmt == "json":
    697            return json.dump(
    698                self.file_records,
    699                output_file,
    700                indent=2,
    701                separators=(",", ": "),
    702                cls=FileRecordJSONEncoder,
    703            )
    704 
    705    def dumps(self, fmt="json"):
    706        assert fmt in self.valid_formats
    707        if fmt == "json":
    708            return json.dumps(
    709                self.file_records,
    710                indent=2,
    711                separators=(",", ": "),
    712                cls=FileRecordJSONEncoder,
    713            )
    714 
    715 
    716 def digest_file(f, a):
    717    """I take a file like object 'f' and return a hex-string containing
    718    of the result of the algorithm 'a' applied to 'f'."""
    719    h = hashlib.new(a)
    720    chunk_size = 1024 * 10
    721    data = f.read(chunk_size)
    722    while data:
    723        h.update(data)
    724        data = f.read(chunk_size)
    725    name = repr(f.name) if hasattr(f, "name") else "a file"
    726    log.debug("hashed %s with %s to be %s", name, a, h.hexdigest())
    727    return h.hexdigest()
    728 
    729 
    730 def execute(cmd):
    731    """Execute CMD, logging its stdout at the info level"""
    732    process = Popen(cmd, shell=True, stdout=PIPE)
    733    while True:
    734        line = process.stdout.readline()
    735        if not line:
    736            break
    737        log.info(line.replace("\n", " "))
    738    return process.wait() == 0
    739 
    740 
    741 def open_manifest(manifest_file):
    742    """I know how to take a filename and load it into a Manifest object"""
    743    if os.path.exists(manifest_file):
    744        manifest = Manifest()
    745        with open(manifest_file, "r") as f:
    746            manifest.load(f)
    747            log.debug("loaded manifest from file '%s'" % manifest_file)
    748        return manifest
    749    else:
    750        log.debug("tried to load absent file '%s' as manifest" % manifest_file)
    751        raise InvalidManifest("manifest file '%s' does not exist" % manifest_file)
    752 
    753 
    754 def list_manifest(manifest_file):
    755    """I know how print all the files in a location"""
    756    try:
    757        manifest = open_manifest(manifest_file)
    758    except InvalidManifest as e:
    759        log.error(
    760            "failed to load manifest file at '%s': %s"
    761            % (
    762                manifest_file,
    763                str(e),
    764            )
    765        )
    766        return False
    767    for f in manifest.file_records:
    768        print(
    769            "{}\t{}\t{}".format(
    770                "P" if f.present() else "-",
    771                "V" if f.present() and f.validate() else "-",
    772                f.filename,
    773            )
    774        )
    775    return True
    776 
    777 
    778 def validate_manifest(manifest_file):
    779    """I validate that all files in a manifest are present and valid but
    780    don't fetch or delete them if they aren't"""
    781    try:
    782        manifest = open_manifest(manifest_file)
    783    except InvalidManifest as e:
    784        log.error(
    785            "failed to load manifest file at '%s': %s"
    786            % (
    787                manifest_file,
    788                str(e),
    789            )
    790        )
    791        return False
    792    invalid_files = []
    793    absent_files = []
    794    for f in manifest.file_records:
    795        if not f.present():
    796            absent_files.append(f)
    797        elif not f.validate():
    798            invalid_files.append(f)
    799    if len(invalid_files + absent_files) == 0:
    800        return True
    801    else:
    802        return False
    803 
    804 
    805 def add_files(manifest_file, algorithm, filenames, version, visibility, unpack):
    806    # returns True if all files successfully added, False if not
    807    # and doesn't catch library Exceptions.  If any files are already
    808    # tracked in the manifest, return will be False because they weren't
    809    # added
    810    all_files_added = True
    811    # Create a old_manifest object to add to
    812    if os.path.exists(manifest_file):
    813        old_manifest = open_manifest(manifest_file)
    814    else:
    815        old_manifest = Manifest()
    816        log.debug("creating a new manifest file")
    817    new_manifest = Manifest()  # use a different manifest for the output
    818    for filename in filenames:
    819        log.debug("adding %s" % filename)
    820        path, name = os.path.split(filename)
    821        new_fr = create_file_record(filename, algorithm)
    822        new_fr.version = version
    823        new_fr.visibility = visibility
    824        new_fr.unpack = unpack
    825        log.debug("appending a new file record to manifest file")
    826        add = True
    827        for fr in old_manifest.file_records:
    828            log.debug(
    829                "manifest file has '%s'"
    830                % "', ".join([x.filename for x in old_manifest.file_records])
    831            )
    832            if new_fr == fr:
    833                log.info("file already in old_manifest")
    834                add = False
    835            elif filename == fr.filename:
    836                log.error(
    837                    "manifest already contains a different file named %s" % filename
    838                )
    839                add = False
    840        if add:
    841            new_manifest.file_records.append(new_fr)
    842            log.debug("added '%s' to manifest" % filename)
    843        else:
    844            all_files_added = False
    845    # copy any files in the old manifest that aren't in the new one
    846    new_filenames = set(fr.filename for fr in new_manifest.file_records)
    847    for old_fr in old_manifest.file_records:
    848        if old_fr.filename not in new_filenames:
    849            new_manifest.file_records.append(old_fr)
    850 
    851    with open(manifest_file, mode="w") as output:
    852        new_manifest.dump(output, fmt="json")
    853 
    854    return all_files_added
    855 
    856 
    857 def touch(f):
    858    """Used to modify mtime in cached files;
    859    mtime is used by the purge command"""
    860    try:
    861        os.utime(f, None)
    862    except OSError:
    863        log.warning("impossible to update utime of file %s" % f)
    864 
    865 
    866 def _urlopen(req):
    867    ssl_context = None
    868    if os.name == "nt":
    869        ssl_context = ssl.create_default_context(cafile=certifi.where())
    870    return urllib2.urlopen(req, context=ssl_context)
    871 
    872 
    873 @contextmanager
    874 @retriable(sleeptime=2)
    875 def request(url, auth_file=None):
    876    req = Request(url)
    877    _authorize(req, auth_file)
    878    with closing(_urlopen(req)) as f:
    879        log.debug("opened %s for reading" % url)
    880        yield f
    881 
    882 
    883 def fetch_file(base_urls, file_record, grabchunk=1024 * 4, auth_file=None, region=None):
    884    # A file which is requested to be fetched that exists locally will be
    885    # overwritten by this function
    886    fd, temp_path = tempfile.mkstemp(dir=os.getcwd())
    887    os.close(fd)
    888    fetched_path = None
    889    for base_url in base_urls:
    890        # Generate the URL for the file on the server side
    891        url = urljoin(base_url, "%s/%s" % (file_record.algorithm, file_record.digest))
    892        if region is not None:
    893            url += "?region=" + region
    894 
    895        log.info("Attempting to fetch from '%s'..." % base_url)
    896 
    897        # Well, the file doesn't exist locally.  Let's fetch it.
    898        try:
    899            with request(url, auth_file) as f, open(temp_path, mode="wb") as out:
    900                k = True
    901                size = 0
    902                while k:
    903                    # TODO: print statistics as file transfers happen both for info and to stop
    904                    # buildbot timeouts
    905                    indata = f.read(grabchunk)
    906                    out.write(indata)
    907                    size += len(indata)
    908                    if len(indata) == 0:
    909                        k = False
    910                log.info(
    911                    "File %s fetched from %s as %s"
    912                    % (file_record.filename, base_url, temp_path)
    913                )
    914                fetched_path = temp_path
    915                break
    916        except (URLError, HTTPError, ValueError):
    917            log.info(
    918                "...failed to fetch '%s' from %s" % (file_record.filename, base_url),
    919                exc_info=True,
    920            )
    921        except IOError:  # pragma: no cover
    922            log.info(
    923                "failed to write to temporary file for '%s'" % file_record.filename,
    924                exc_info=True,
    925            )
    926 
    927    # cleanup temp file in case of issues
    928    if fetched_path:
    929        return os.path.split(fetched_path)[1]
    930    else:
    931        try:
    932            os.remove(temp_path)
    933        except OSError:  # pragma: no cover
    934            pass
    935        return None
    936 
    937 
    938 def clean_path(dirname):
    939    """Remove a subtree if is exists. Helper for unpack_file()."""
    940    if os.path.exists(dirname):
    941        log.info("rm tree: %s" % dirname)
    942        shutil.rmtree(dirname)
    943 
    944 
    945 CHECKSUM_SUFFIX = ".checksum"
    946 
    947 
    948 def validate_tar_member(member, path):
    949    def _is_within_directory(directory, target):
    950        real_directory = os.path.realpath(directory)
    951        real_target = os.path.realpath(target)
    952        prefix = os.path.commonprefix([real_directory, real_target])
    953        return prefix == real_directory
    954 
    955    member_path = os.path.join(path, member.name)
    956    if not _is_within_directory(path, member_path):
    957        raise Exception("Attempted path traversal in tar file: " + member.name)
    958    if member.issym():
    959        link_path = os.path.join(os.path.dirname(member_path), member.linkname)
    960        if not _is_within_directory(path, link_path):
    961            raise Exception("Attempted link path traversal in tar file: " + member.name)
    962    if member.mode & (stat.S_ISUID | stat.S_ISGID):
    963        raise Exception("Attempted setuid or setgid in tar file: " + member.name)
    964 
    965 
    966 class TarFile(tarfile.TarFile):
    967    def _tooltool_do_extract(
    968        self, extract, member, path="", set_attrs=True, numeric_owner=False, **kwargs
    969    ):
    970        deferred_links = getattr(self, "_deferred_links", None)
    971        if not isinstance(member, tarfile.TarInfo):
    972            member = self.getmember(member)
    973        targetpath = os.path.normcase(os.path.join(path, member.name))
    974 
    975        if deferred_links is not None and member.issym():
    976            if os.path.lexists(targetpath):
    977                # Avoid FileExistsError on following os.symlink.
    978                os.unlink(targetpath)
    979            try:
    980                os.symlink(member.linkname, targetpath)
    981            except (NotImplementedError, OSError):
    982                # On Windows, os.symlink can fail, in this case fallback to
    983                # creating a copy. If the destination was not already created,
    984                # defer the link creation.
    985                source = os.path.normcase(
    986                    os.path.join(os.path.dirname(targetpath), member.linkname)
    987                )
    988 
    989                if source in self._extracted_members:
    990                    shutil.copy(source, targetpath)
    991                    self.chown(member, targetpath, numeric_owner)
    992                else:
    993                    deferred_links.setdefault(source, []).append(
    994                        (member, targetpath, numeric_owner)
    995                    )
    996            return
    997 
    998        extract(member, path, set_attrs, numeric_owner=numeric_owner, **kwargs)
    999        if deferred_links is not None:
   1000            for tarinfo, linkpath, numeric_owner in deferred_links.pop(targetpath, []):
   1001                shutil.copy(targetpath, linkpath)
   1002                self.chown(tarinfo, linkpath, numeric_owner)
   1003            self._extracted_members.add(targetpath)
   1004 
   1005    def extract(self, *args, **kwargs):
   1006        self._tooltool_do_extract(super(TarFile, self).extract, *args, **kwargs)
   1007 
   1008    # extractall in versions for cpython that implement PEP 706 call _extract_one
   1009    # instead of extract.
   1010    def _extract_one(self, *args, **kwargs):
   1011        self._tooltool_do_extract(super(TarFile, self)._extract_one, *args, **kwargs)
   1012 
   1013    def extractall(self, *args, **kwargs):
   1014        self._deferred_links = {}
   1015        self._extracted_members = set()
   1016        super(TarFile, self).extractall(*args, **kwargs)
   1017        for links in self._deferred_links.values():
   1018            for tarinfo, linkpath, numeric_owner in links:
   1019                log.warn("Cannot create dangling symbolic link: %s", linkpath)
   1020        delattr(self, "_deferred_links")
   1021        delattr(self, "_extracted_members")
   1022 
   1023 
   1024 def safe_extract(tar, path=".", *, numeric_owner=False):
   1025    def _files(tar, path):
   1026        for member in tar:
   1027            validate_tar_member(member, path)
   1028            yield member
   1029 
   1030    tar.extractall(path, members=_files(tar, path), numeric_owner=numeric_owner)
   1031 
   1032 
   1033 def unpack_file(filename):
   1034    """Untar `filename`, assuming it is uncompressed or compressed with bzip2,
   1035    xz, gzip, zst, or unzip a zip file. The file is assumed to contain a single
   1036    directory with a name matching the base of the given filename.
   1037    Xz support is handled by shelling out to 'tar'."""
   1038    if os.path.isfile(filename) and tarfile.is_tarfile(filename):
   1039        tar_file, zip_ext = os.path.splitext(filename)
   1040        base_file, tar_ext = os.path.splitext(tar_file)
   1041        clean_path(base_file)
   1042        log.info('untarring "%s"' % filename)
   1043        with TarFile.open(filename) as tar:
   1044            safe_extract(tar)
   1045    elif os.path.isfile(filename) and filename.endswith(".tar.zst"):
   1046        import zstandard
   1047 
   1048        base_file = filename.replace(".tar.zst", "")
   1049        clean_path(base_file)
   1050        log.info('untarring "%s"' % filename)
   1051        dctx = zstandard.ZstdDecompressor()
   1052        with dctx.stream_reader(open(filename, "rb")) as fileobj:
   1053            with TarFile.open(fileobj=fileobj, mode="r|") as tar:
   1054                safe_extract(tar)
   1055    elif os.path.isfile(filename) and zipfile.is_zipfile(filename):
   1056        base_file = filename.replace(".zip", "")
   1057        clean_path(base_file)
   1058        log.info('unzipping "%s"' % filename)
   1059        z = zipfile.ZipFile(filename)
   1060        z.extractall()
   1061        z.close()
   1062    else:
   1063        log.error("Unknown archive extension for filename '%s'" % filename)
   1064        return False
   1065    return True
   1066 
   1067 
   1068 def fetch_files(
   1069    manifest_file,
   1070    base_urls,
   1071    filenames=[],
   1072    cache_folder=None,
   1073    auth_file=None,
   1074    region=None,
   1075 ):
   1076    # Lets load the manifest file
   1077    try:
   1078        manifest = open_manifest(manifest_file)
   1079    except InvalidManifest as e:
   1080        log.error(
   1081            "failed to load manifest file at '%s': %s"
   1082            % (
   1083                manifest_file,
   1084                str(e),
   1085            )
   1086        )
   1087        return False
   1088 
   1089    # we want to track files already in current working directory AND valid
   1090    # we will not need to fetch these
   1091    present_files = []
   1092 
   1093    # We want to track files that fail to be fetched as well as
   1094    # files that are fetched
   1095    failed_files = []
   1096    fetched_files = []
   1097 
   1098    # Files that we want to unpack.
   1099    unpack_files = []
   1100 
   1101    # Lets go through the manifest and fetch the files that we want
   1102    for f in manifest.file_records:
   1103        # case 1: files are already present
   1104        if f.present():
   1105            if f.validate():
   1106                present_files.append(f.filename)
   1107                if f.unpack:
   1108                    unpack_files.append(f.filename)
   1109            else:
   1110                # we have an invalid file here, better to cleanup!
   1111                # this invalid file needs to be replaced with a good one
   1112                # from the local cash or fetched from a tooltool server
   1113                log.info(
   1114                    "File %s is present locally but it is invalid, so I will remove it "
   1115                    "and try to fetch it" % f.filename
   1116                )
   1117                os.remove(os.path.join(os.getcwd(), f.filename))
   1118 
   1119        # check if file is already in cache
   1120        if cache_folder and f.filename not in present_files:
   1121            try:
   1122                shutil.copy(
   1123                    os.path.join(cache_folder, f.digest),
   1124                    os.path.join(os.getcwd(), f.filename),
   1125                )
   1126                log.info(
   1127                    "File %s retrieved from local cache %s" % (f.filename, cache_folder)
   1128                )
   1129                touch(os.path.join(cache_folder, f.digest))
   1130 
   1131                filerecord_for_validation = FileRecord(
   1132                    f.filename, f.size, f.digest, f.algorithm
   1133                )
   1134                if filerecord_for_validation.validate():
   1135                    present_files.append(f.filename)
   1136                    if f.unpack:
   1137                        unpack_files.append(f.filename)
   1138                else:
   1139                    # the file copied from the cache is invalid, better to
   1140                    # clean up the cache version itself as well
   1141                    log.warning(
   1142                        "File %s retrieved from cache is invalid! I am deleting it from the "
   1143                        "cache as well" % f.filename
   1144                    )
   1145                    os.remove(os.path.join(os.getcwd(), f.filename))
   1146                    os.remove(os.path.join(cache_folder, f.digest))
   1147            except IOError:
   1148                log.info(
   1149                    "File %s not present in local cache folder %s"
   1150                    % (f.filename, cache_folder)
   1151                )
   1152 
   1153        # now I will try to fetch all files which are not already present and
   1154        # valid, appending a suffix to avoid race conditions
   1155        temp_file_name = None
   1156        # 'filenames' is the list of filenames to be managed, if this variable
   1157        # is a non empty list it can be used to filter if filename is in
   1158        # present_files, it means that I have it already because it was already
   1159        # either in the working dir or in the cache
   1160        if (
   1161            f.filename in filenames or len(filenames) == 0
   1162        ) and f.filename not in present_files:
   1163            log.debug("fetching %s" % f.filename)
   1164            temp_file_name = fetch_file(
   1165                base_urls, f, auth_file=auth_file, region=region
   1166            )
   1167            if temp_file_name:
   1168                fetched_files.append((f, temp_file_name))
   1169            else:
   1170                failed_files.append(f.filename)
   1171        else:
   1172            log.debug("skipping %s" % f.filename)
   1173 
   1174    # lets ensure that fetched files match what the manifest specified
   1175    for localfile, temp_file_name in fetched_files:
   1176        # since I downloaded to a temp file, I need to perform all validations on the temp file
   1177        # this is why filerecord_for_validation is created
   1178 
   1179        filerecord_for_validation = FileRecord(
   1180            temp_file_name, localfile.size, localfile.digest, localfile.algorithm
   1181        )
   1182 
   1183        if filerecord_for_validation.validate():
   1184            # great!
   1185            # I can rename the temp file
   1186            log.info(
   1187                "File integrity verified, renaming %s to %s"
   1188                % (temp_file_name, localfile.filename)
   1189            )
   1190            os.rename(
   1191                os.path.join(os.getcwd(), temp_file_name),
   1192                os.path.join(os.getcwd(), localfile.filename),
   1193            )
   1194 
   1195            if localfile.unpack:
   1196                unpack_files.append(localfile.filename)
   1197 
   1198            # if I am using a cache and a new file has just been retrieved from a
   1199            # remote location, I need to update the cache as well
   1200            if cache_folder:
   1201                log.info("Updating local cache %s..." % cache_folder)
   1202                try:
   1203                    if not os.path.exists(cache_folder):
   1204                        log.info("Creating cache in %s..." % cache_folder)
   1205                        os.makedirs(cache_folder, 0o0700)
   1206                    shutil.copy(
   1207                        os.path.join(os.getcwd(), localfile.filename),
   1208                        os.path.join(cache_folder, localfile.digest),
   1209                    )
   1210                    log.info(
   1211                        "Local cache %s updated with %s"
   1212                        % (cache_folder, localfile.filename)
   1213                    )
   1214                    touch(os.path.join(cache_folder, localfile.digest))
   1215                except (OSError, IOError):
   1216                    log.warning(
   1217                        "Impossible to add file %s to cache folder %s"
   1218                        % (localfile.filename, cache_folder),
   1219                        exc_info=False,
   1220                    )
   1221        else:
   1222            failed_files.append(localfile.filename)
   1223            log.error("'%s'" % filerecord_for_validation.describe())
   1224            os.remove(temp_file_name)
   1225 
   1226    # Unpack files that need to be unpacked.
   1227    for filename in unpack_files:
   1228        if not unpack_file(filename):
   1229            failed_files.append(filename)
   1230 
   1231    # If we failed to fetch or validate a file, we need to fail
   1232    if len(failed_files) > 0:
   1233        log.error("The following files failed: '%s'" % "', ".join(failed_files))
   1234        return False
   1235    return True
   1236 
   1237 
   1238 def freespace(p):
   1239    "Returns the number of bytes free under directory `p`"
   1240    if sys.platform == "win32":  # pragma: no cover
   1241        # os.statvfs doesn't work on Windows
   1242        import win32file
   1243 
   1244        secsPerClus, bytesPerSec, nFreeClus, totClus = win32file.GetDiskFreeSpace(p)
   1245        return secsPerClus * bytesPerSec * nFreeClus
   1246    else:
   1247        r = os.statvfs(p)
   1248        return r.f_frsize * r.f_bavail
   1249 
   1250 
   1251 def purge(folder, gigs):
   1252    """If gigs is non 0, it deletes files in `folder` until `gigs` GB are free,
   1253    starting from older files.  If gigs is 0, a full purge will be performed.
   1254    No recursive deletion of files in subfolder is performed."""
   1255 
   1256    full_purge = bool(gigs == 0)
   1257    gigs *= 1024 * 1024 * 1024
   1258 
   1259    if not full_purge and freespace(folder) >= gigs:
   1260        log.info("No need to cleanup")
   1261        return
   1262 
   1263    files = []
   1264    for f in os.listdir(folder):
   1265        p = os.path.join(folder, f)
   1266        # it deletes files in folder without going into subfolders,
   1267        # assuming the cache has a flat structure
   1268        if not os.path.isfile(p):
   1269            continue
   1270        mtime = os.path.getmtime(p)
   1271        files.append((mtime, p))
   1272 
   1273    # iterate files sorted by mtime
   1274    for _, f in sorted(files):
   1275        log.info("removing %s to free up space" % f)
   1276        try:
   1277            os.remove(f)
   1278        except OSError:
   1279            log.info("Impossible to remove %s" % f, exc_info=True)
   1280        if not full_purge and freespace(folder) >= gigs:
   1281            break
   1282 
   1283 
   1284 def _log_api_error(e):
   1285    if hasattr(e, "hdrs") and e.hdrs["content-type"] == "application/json":
   1286        json_resp = json.load(e.fp)
   1287        log.error(
   1288            "%s: %s" % (json_resp["error"]["name"], json_resp["error"]["description"])
   1289        )
   1290    else:
   1291        log.exception("Error making RelengAPI request:")
   1292 
   1293 
   1294 def _authorize(req, auth_file):
   1295    is_taskcluster_auth = False
   1296 
   1297    if not auth_file:
   1298        try:
   1299            taskcluster_env_keys = {
   1300                "clientId": "TASKCLUSTER_CLIENT_ID",
   1301                "accessToken": "TASKCLUSTER_ACCESS_TOKEN",
   1302            }
   1303            auth_content = {k: os.environ[v] for k, v in taskcluster_env_keys.items()}
   1304            is_taskcluster_auth = True
   1305        except KeyError:
   1306            return
   1307    else:
   1308        with open(auth_file) as f:
   1309            auth_content = f.read().strip()
   1310            try:
   1311                auth_content = json.loads(auth_content)
   1312                is_taskcluster_auth = True
   1313            except Exception:
   1314                pass
   1315 
   1316    if is_taskcluster_auth:
   1317        taskcluster_header = make_taskcluster_header(auth_content, req)
   1318        log.debug("Using taskcluster credentials in %s" % auth_file)
   1319        req.add_unredirected_header("Authorization", taskcluster_header)
   1320    else:
   1321        log.debug("Using Bearer token in %s" % auth_file)
   1322        req.add_unredirected_header("Authorization", "Bearer %s" % auth_content)
   1323 
   1324 
   1325 def _send_batch(base_url, auth_file, batch, region):
   1326    url = urljoin(base_url, "upload")
   1327    if region is not None:
   1328        url += "?region=" + region
   1329    data = json.dumps(batch).encode("utf-8")
   1330    req = Request(url, data, {"Content-Type": "application/json"})
   1331    _authorize(req, auth_file)
   1332    try:
   1333        resp = _urlopen(req)
   1334    except (URLError, HTTPError) as e:
   1335        _log_api_error(e)
   1336        return None
   1337    return json.load(resp)["result"]
   1338 
   1339 
   1340 def _s3_upload(filename, file):
   1341    # urllib2 does not support streaming, so we fall back to good old httplib
   1342    url = urlparse(file["put_url"])
   1343    cls = HTTPSConnection if url.scheme == "https" else HTTPConnection
   1344    host, port = url.netloc.split(":") if ":" in url.netloc else (url.netloc, 443)
   1345    port = int(port)
   1346    conn = cls(host, port)
   1347    try:
   1348        req_path = "%s?%s" % (url.path, url.query) if url.query else url.path
   1349        with open(filename, "rb") as f:
   1350            content_length = file["size"]
   1351            conn.request(
   1352                "PUT",
   1353                req_path,
   1354                f,
   1355                {
   1356                    "Content-Type": "application/octet-stream",
   1357                    "Content-Length": str(content_length),
   1358                },
   1359            )
   1360            resp = conn.getresponse()
   1361            resp_body = resp.read()
   1362            conn.close()
   1363        if resp.status != 200:
   1364            raise RuntimeError(
   1365                "Non-200 return from AWS: %s %s\n%s"
   1366                % (resp.status, resp.reason, resp_body)
   1367            )
   1368    except Exception:
   1369        file["upload_exception"] = sys.exc_info()
   1370        file["upload_ok"] = False
   1371    else:
   1372        file["upload_ok"] = True
   1373 
   1374 
   1375 def _notify_upload_complete(base_url, auth_file, file):
   1376    req = Request(urljoin(base_url, "upload/complete/%(algorithm)s/%(digest)s" % file))
   1377    _authorize(req, auth_file)
   1378    try:
   1379        _urlopen(req)
   1380    except HTTPError as e:
   1381        if e.code != 409:
   1382            _log_api_error(e)
   1383            return
   1384        # 409 indicates that the upload URL hasn't expired yet and we
   1385        # should retry after a delay
   1386        to_wait = int(e.headers.get("X-Retry-After", 60))
   1387        log.warning("Waiting %d seconds for upload URLs to expire" % to_wait)
   1388        time.sleep(to_wait)
   1389        _notify_upload_complete(base_url, auth_file, file)
   1390    except Exception:
   1391        log.exception("While notifying server of upload completion:")
   1392 
   1393 
   1394 def upload(manifest, message, base_urls, auth_file, region):
   1395    try:
   1396        manifest = open_manifest(manifest)
   1397    except InvalidManifest:
   1398        log.exception("failed to load manifest file at '%s'")
   1399        return False
   1400 
   1401    # verify the manifest, since we'll need the files present to upload
   1402    if not manifest.validate():
   1403        log.error("manifest is invalid")
   1404        return False
   1405 
   1406    if any(fr.visibility is None for fr in manifest.file_records):
   1407        log.error("All files in a manifest for upload must have a visibility set")
   1408 
   1409    # convert the manifest to an upload batch
   1410    batch = {
   1411        "message": message,
   1412        "files": {},
   1413    }
   1414    for fr in manifest.file_records:
   1415        batch["files"][fr.filename] = {
   1416            "size": fr.size,
   1417            "digest": fr.digest,
   1418            "algorithm": fr.algorithm,
   1419            "visibility": fr.visibility,
   1420        }
   1421 
   1422    # make the upload request
   1423    resp = _send_batch(base_urls[0], auth_file, batch, region)
   1424    if not resp:
   1425        return None
   1426    files = resp["files"]
   1427 
   1428    # Upload the files, each in a thread.  This allows us to start all of the
   1429    # uploads before any of the URLs expire.
   1430    threads = {}
   1431    for filename, file in files.items():
   1432        if "put_url" in file:
   1433            log.info("%s: starting upload" % (filename,))
   1434            thd = threading.Thread(target=_s3_upload, args=(filename, file))
   1435            thd.daemon = 1
   1436            thd.start()
   1437            threads[filename] = thd
   1438        else:
   1439            log.info("%s: already exists on server" % (filename,))
   1440 
   1441    # re-join all of those threads as they exit
   1442    success = True
   1443    while threads:
   1444        for filename, thread in list(threads.items()):
   1445            if not thread.is_alive():
   1446                # _s3_upload has annotated file with result information
   1447                file = files[filename]
   1448                thread.join()
   1449                if file["upload_ok"]:
   1450                    log.info("%s: uploaded" % filename)
   1451                else:
   1452                    log.error(
   1453                        "%s: failed" % filename, exc_info=file["upload_exception"]
   1454                    )
   1455                    success = False
   1456                del threads[filename]
   1457 
   1458    # notify the server that the uploads are completed.  If the notification
   1459    # fails, we don't consider that an error (the server will notice
   1460    # eventually)
   1461    for filename, file in files.items():
   1462        if "put_url" in file and file["upload_ok"]:
   1463            log.info("notifying server of upload completion for %s" % (filename,))
   1464            _notify_upload_complete(base_urls[0], auth_file, file)
   1465 
   1466    return success
   1467 
   1468 
   1469 def send_operation_on_file(data, base_urls, digest, auth_file):
   1470    url = base_urls[0]
   1471    url = urljoin(url, "file/sha512/" + digest)
   1472 
   1473    data = json.dumps(data)
   1474 
   1475    req = Request(url, data, {"Content-Type": "application/json"})
   1476    req.get_method = lambda: "PATCH"
   1477 
   1478    _authorize(req, auth_file)
   1479 
   1480    try:
   1481        _urlopen(req)
   1482    except (URLError, HTTPError) as e:
   1483        _log_api_error(e)
   1484        return False
   1485    return True
   1486 
   1487 
   1488 def change_visibility(base_urls, digest, visibility, auth_file):
   1489    data = [
   1490        {
   1491            "op": "set_visibility",
   1492            "visibility": visibility,
   1493        }
   1494    ]
   1495    return send_operation_on_file(data, base_urls, digest, auth_file)
   1496 
   1497 
   1498 def delete_instances(base_urls, digest, auth_file):
   1499    data = [
   1500        {
   1501            "op": "delete_instances",
   1502        }
   1503    ]
   1504    return send_operation_on_file(data, base_urls, digest, auth_file)
   1505 
   1506 
   1507 def process_command(options, args):
   1508    """I know how to take a list of program arguments and
   1509    start doing the right thing with them"""
   1510    cmd = args[0]
   1511    cmd_args = args[1:]
   1512    log.debug("processing '%s' command with args '%s'" % (cmd, '", "'.join(cmd_args)))
   1513    log.debug("using options: %s" % options)
   1514 
   1515    if cmd == "list":
   1516        return list_manifest(options["manifest"])
   1517    if cmd == "validate":
   1518        return validate_manifest(options["manifest"])
   1519    elif cmd == "add":
   1520        return add_files(
   1521            options["manifest"],
   1522            options["algorithm"],
   1523            cmd_args,
   1524            options["version"],
   1525            options["visibility"],
   1526            options["unpack"],
   1527        )
   1528    elif cmd == "purge":
   1529        if options["cache_folder"]:
   1530            purge(folder=options["cache_folder"], gigs=options["size"])
   1531        else:
   1532            log.critical("please specify the cache folder to be purged")
   1533            return False
   1534    elif cmd == "fetch":
   1535        return fetch_files(
   1536            options["manifest"],
   1537            options["base_url"],
   1538            cmd_args,
   1539            cache_folder=options["cache_folder"],
   1540            auth_file=options.get("auth_file"),
   1541            region=options.get("region"),
   1542        )
   1543    elif cmd == "upload":
   1544        if not options.get("message"):
   1545            log.critical("upload command requires a message")
   1546            return False
   1547        return upload(
   1548            options.get("manifest"),
   1549            options.get("message"),
   1550            options.get("base_url"),
   1551            options.get("auth_file"),
   1552            options.get("region"),
   1553        )
   1554    elif cmd == "change-visibility":
   1555        if not options.get("digest"):
   1556            log.critical("change-visibility command requires a digest option")
   1557            return False
   1558        if not options.get("visibility"):
   1559            log.critical("change-visibility command requires a visibility option")
   1560            return False
   1561        return change_visibility(
   1562            options.get("base_url"),
   1563            options.get("digest"),
   1564            options.get("visibility"),
   1565            options.get("auth_file"),
   1566        )
   1567    elif cmd == "delete":
   1568        if not options.get("digest"):
   1569            log.critical("delete command requires a digest option")
   1570            return False
   1571        return delete_instances(
   1572            options.get("base_url"),
   1573            options.get("digest"),
   1574            options.get("auth_file"),
   1575        )
   1576    else:
   1577        log.critical('command "%s" is not implemented' % cmd)
   1578        return False
   1579 
   1580 
   1581 def main(argv, _skip_logging=False):
   1582    # Set up option parsing
   1583    usage = """usage: %prog [options] command [FILES]
   1584 
   1585 Supported commands are:
   1586    - list: list files in the manifest
   1587    - validate: validate the manifest
   1588    - add: add records for FILES to the manifest
   1589    - purge: cleans up the cache folder
   1590    - fetch: retrieve files listed in the manifest (or FILES if specified)
   1591    - upload: upload files listed in the manifest; message is required
   1592    - change-visibility: sets the visibility of the file identified by the given digest
   1593    - delete: deletes the file identified by the given digest"""
   1594    parser = optparse.OptionParser(usage=usage)
   1595    parser.add_option(
   1596        "-q",
   1597        "--quiet",
   1598        default=logging.INFO,
   1599        dest="loglevel",
   1600        action="store_const",
   1601        const=logging.ERROR,
   1602    )
   1603    parser.add_option(
   1604        "-v", "--verbose", dest="loglevel", action="store_const", const=logging.DEBUG
   1605    )
   1606    parser.add_option(
   1607        "-m",
   1608        "--manifest",
   1609        default=DEFAULT_MANIFEST_NAME,
   1610        dest="manifest",
   1611        action="store",
   1612        help="specify the manifest file to be operated on",
   1613    )
   1614    parser.add_option(
   1615        "-d",
   1616        "--algorithm",
   1617        default="sha512",
   1618        dest="algorithm",
   1619        action="store",
   1620        help="hashing algorithm to use (only sha512 is allowed)",
   1621    )
   1622    parser.add_option(
   1623        "--digest",
   1624        default=None,
   1625        dest="digest",
   1626        action="store",
   1627        help="digest hash to change visibility for",
   1628    )
   1629    parser.add_option(
   1630        "--visibility",
   1631        default=None,
   1632        dest="visibility",
   1633        choices=["internal", "public"],
   1634        help='Visibility level of this file; "internal" is for '
   1635        "files that cannot be distributed out of the company "
   1636        'but not for secrets; "public" files are available to '
   1637        "anyone without restriction",
   1638    )
   1639    parser.add_option(
   1640        "--unpack",
   1641        default=False,
   1642        dest="unpack",
   1643        action="store_true",
   1644        help="Request unpacking this file after fetch."
   1645        " This is helpful with tarballs.",
   1646    )
   1647    parser.add_option(
   1648        "--version",
   1649        default=None,
   1650        dest="version",
   1651        action="store",
   1652        help="Version string for this file. This annotates the "
   1653        "manifest entry with a version string to help "
   1654        "identify the contents.",
   1655    )
   1656    parser.add_option(
   1657        "-o",
   1658        "--overwrite",
   1659        default=False,
   1660        dest="overwrite",
   1661        action="store_true",
   1662        help="UNUSED; present for backward compatibility",
   1663    )
   1664    parser.add_option(
   1665        "--url",
   1666        dest="base_url",
   1667        action="append",
   1668        help="RelengAPI URL ending with /tooltool/; default "
   1669        "is appropriate for Mozilla",
   1670    )
   1671    parser.add_option(
   1672        "-c", "--cache-folder", dest="cache_folder", help="Local cache folder"
   1673    )
   1674    parser.add_option(
   1675        "-s",
   1676        "--size",
   1677        help="free space required (in GB)",
   1678        dest="size",
   1679        type="float",
   1680        default=0.0,
   1681    )
   1682    parser.add_option(
   1683        "-r",
   1684        "--region",
   1685        help="Preferred AWS region for upload or fetch; " "example: --region=us-west-2",
   1686    )
   1687    parser.add_option(
   1688        "--message",
   1689        help='The "commit message" for an upload; format with a bug number '
   1690        "and brief comment",
   1691        dest="message",
   1692    )
   1693    parser.add_option(
   1694        "--authentication-file",
   1695        help="Use the RelengAPI token found in the given file to "
   1696        "authenticate to the RelengAPI server.",
   1697        dest="auth_file",
   1698    )
   1699 
   1700    (options_obj, args) = parser.parse_args(argv[1:])
   1701 
   1702    if not options_obj.base_url:
   1703        tooltool_host = os.environ.get("TOOLTOOL_HOST", "tooltool.mozilla-releng.net")
   1704        taskcluster_proxy_url = os.environ.get("TASKCLUSTER_PROXY_URL")
   1705        if taskcluster_proxy_url:
   1706            tooltool_url = "{}/{}".format(taskcluster_proxy_url, tooltool_host)
   1707        else:
   1708            tooltool_url = "https://{}".format(tooltool_host)
   1709 
   1710        options_obj.base_url = [tooltool_url]
   1711 
   1712    # ensure all URLs have a trailing slash
   1713    def add_slash(url):
   1714        return url if url.endswith("/") else (url + "/")
   1715 
   1716    options_obj.base_url = [add_slash(u) for u in options_obj.base_url]
   1717 
   1718    # expand ~ in --authentication-file
   1719    if options_obj.auth_file:
   1720        options_obj.auth_file = os.path.expanduser(options_obj.auth_file)
   1721 
   1722    # Dictionaries are easier to work with
   1723    options = vars(options_obj)
   1724 
   1725    log.setLevel(options["loglevel"])
   1726 
   1727    # Set up logging, for now just to the console
   1728    if not _skip_logging:  # pragma: no cover
   1729        ch = logging.StreamHandler()
   1730        cf = logging.Formatter("%(levelname)s - %(message)s")
   1731        ch.setFormatter(cf)
   1732        log.addHandler(ch)
   1733 
   1734    if options["algorithm"] != "sha512":
   1735        parser.error("only --algorithm sha512 is supported")
   1736 
   1737    if len(args) < 1:
   1738        parser.error("You must specify a command")
   1739 
   1740    return 0 if process_command(options, args) else 1
   1741 
   1742 
   1743 if __name__ == "__main__":  # pragma: no cover
   1744    sys.exit(main(sys.argv))