tooltool.py (56358B)
1 #!/usr/bin/env python3 2 3 # tooltool is a lookaside cache implemented in Python 4 # Copyright (C) 2011 John H. Ford <john@johnford.info> 5 # 6 # This program is free software; you can redistribute it and/or 7 # modify it under the terms of the GNU General Public License 8 # as published by the Free Software Foundation version 2 9 # 10 # This program is distributed in the hope that it will be useful, 11 # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 # GNU General Public License for more details. 14 # 15 # You should have received a copy of the GNU General Public License 16 # along with this program; if not, write to the Free Software 17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 18 # 02110-1301, USA. 19 20 # A manifest file specifies files in that directory that are stored 21 # elsewhere. This file should only list files in the same directory 22 # in which the manifest file resides and it should be called 23 # 'manifest.tt' 24 25 import base64 26 import calendar 27 import hashlib 28 import hmac 29 import json 30 import logging 31 import math 32 import optparse 33 import os 34 import pprint 35 import re 36 import shutil 37 import ssl 38 import stat 39 import sys 40 import tarfile 41 import tempfile 42 import threading 43 import time 44 import zipfile 45 from contextlib import closing, contextmanager 46 from functools import wraps 47 from io import open 48 from random import random 49 from subprocess import PIPE, Popen 50 51 if os.name == "nt": 52 import certifi 53 54 __version__ = "1.4.0" 55 56 # Allowed request header characters: 57 # !#$%&'()*+,-./:;<=>?@[]^_`{|}~ and space, a-z, A-Z, 0-9, \, " 58 REQUEST_HEADER_ATTRIBUTE_CHARS = re.compile( 59 r"^[ a-zA-Z0-9_\!#\$%&'\(\)\*\+,\-\./\:;<\=>\?@\[\]\^`\{\|\}~]*$" 60 ) 61 DEFAULT_MANIFEST_NAME = "manifest.tt" 62 TOOLTOOL_PACKAGE_SUFFIX = ".TOOLTOOL-PACKAGE" 63 HAWK_VER = 1 64 65 import urllib.request as urllib2 66 from http.client import HTTPConnection, HTTPSConnection 67 from urllib.error import HTTPError, URLError 68 from urllib.parse import urljoin, urlparse 69 from urllib.request import Request 70 71 log = logging.getLogger(__name__) 72 73 74 # Vendored code from `redo` module 75 def retrier(attempts=5, sleeptime=10, max_sleeptime=300, sleepscale=1.5, jitter=1): 76 """ 77 This function originates from redo 2.0.3 https://github.com/mozilla-releng/redo 78 A generator function that sleeps between retries, handles exponential 79 backoff and jitter. The action you are retrying is meant to run after 80 retrier yields. 81 """ 82 jitter = jitter or 0 # py35 barfs on the next line if jitter is None 83 if jitter > sleeptime: 84 # To prevent negative sleep times 85 raise Exception( 86 "jitter ({}) must be less than sleep time ({})".format(jitter, sleeptime) 87 ) 88 89 sleeptime_real = sleeptime 90 for _ in range(attempts): 91 log.debug("attempt %i/%i", _ + 1, attempts) 92 93 yield sleeptime_real 94 95 if jitter: 96 sleeptime_real = sleeptime + random.uniform(-jitter, jitter) 97 # our jitter should scale along with the sleeptime 98 jitter = jitter * sleepscale 99 else: 100 sleeptime_real = sleeptime 101 102 sleeptime *= sleepscale 103 104 if sleeptime_real > max_sleeptime: 105 sleeptime_real = max_sleeptime 106 107 # Don't need to sleep the last time 108 if _ < attempts - 1: 109 log.debug( 110 "sleeping for %.2fs (attempt %i/%i)", sleeptime_real, _ + 1, attempts 111 ) 112 time.sleep(sleeptime_real) 113 114 115 def retry( 116 action, 117 attempts=5, 118 sleeptime=60, 119 max_sleeptime=5 * 60, 120 sleepscale=1.5, 121 jitter=1, 122 retry_exceptions=(Exception,), 123 cleanup=None, 124 args=(), 125 kwargs={}, 126 log_args=True, 127 ): 128 """ 129 This function originates from redo 2.0.3 https://github.com/mozilla-releng/redo 130 Calls an action function until it succeeds, or we give up. 131 """ 132 assert callable(action) 133 assert not cleanup or callable(cleanup) 134 135 action_name = getattr(action, "__name__", action) 136 if log_args and (args or kwargs): 137 log_attempt_args = ( 138 "retry: calling %s with args: %s," " kwargs: %s, attempt #%d", 139 action_name, 140 args, 141 kwargs, 142 ) 143 else: 144 log_attempt_args = ("retry: calling %s, attempt #%d", action_name) 145 146 if max_sleeptime < sleeptime: 147 log.debug("max_sleeptime %d less than sleeptime %d", max_sleeptime, sleeptime) 148 149 n = 1 150 for _ in retrier( 151 attempts=attempts, 152 sleeptime=sleeptime, 153 max_sleeptime=max_sleeptime, 154 sleepscale=sleepscale, 155 jitter=jitter, 156 ): 157 try: 158 logfn = log.info if n != 1 else log.debug 159 logfn_args = log_attempt_args + (n,) 160 logfn(*logfn_args) 161 return action(*args, **kwargs) 162 except retry_exceptions: 163 log.debug("retry: Caught exception: ", exc_info=True) 164 if cleanup: 165 cleanup() 166 if n == attempts: 167 log.info("retry: Giving up on %s", action_name) 168 raise 169 continue 170 finally: 171 n += 1 172 173 174 def retriable(*retry_args, **retry_kwargs): 175 """ 176 This function originates from redo 2.0.3 https://github.com/mozilla-releng/redo 177 A decorator factory for retry(). Wrap your function in @retriable(...) to 178 give it retry powers! 179 """ 180 181 def _retriable_factory(func): 182 @wraps(func) 183 def _retriable_wrapper(*args, **kwargs): 184 return retry(func, args=args, kwargs=kwargs, *retry_args, **retry_kwargs) 185 186 return _retriable_wrapper 187 188 return _retriable_factory 189 190 191 # end of vendored code from redo module 192 193 194 def request_has_data(req): 195 return req.data is not None 196 197 198 def get_hexdigest(val): 199 return hashlib.sha512(val).hexdigest() 200 201 202 class FileRecordJSONEncoderException(Exception): 203 pass 204 205 206 class InvalidManifest(Exception): 207 pass 208 209 210 class ExceptionWithFilename(Exception): 211 def __init__(self, filename): 212 Exception.__init__(self) 213 self.filename = filename 214 215 216 class BadFilenameException(ExceptionWithFilename): 217 pass 218 219 220 class DigestMismatchException(ExceptionWithFilename): 221 pass 222 223 224 class MissingFileException(ExceptionWithFilename): 225 pass 226 227 228 class InvalidCredentials(Exception): 229 pass 230 231 232 class BadHeaderValue(Exception): 233 pass 234 235 236 def parse_url(url): 237 url_parts = urlparse(url) 238 url_dict = { 239 "scheme": url_parts.scheme, 240 "hostname": url_parts.hostname, 241 "port": url_parts.port, 242 "path": url_parts.path, 243 "resource": url_parts.path, 244 "query": url_parts.query, 245 } 246 if len(url_dict["query"]) > 0: 247 url_dict["resource"] = "%s?%s" % ( 248 url_dict["resource"], # pragma: no cover 249 url_dict["query"], 250 ) 251 252 if url_parts.port is None: 253 if url_parts.scheme == "http": 254 url_dict["port"] = 80 255 elif url_parts.scheme == "https": # pragma: no cover 256 url_dict["port"] = 443 257 return url_dict 258 259 260 def utc_now(offset_in_seconds=0.0): 261 return int(math.floor(calendar.timegm(time.gmtime()) + float(offset_in_seconds))) 262 263 264 def random_string(length): 265 return base64.urlsafe_b64encode(os.urandom(length))[:length] 266 267 268 def prepare_header_val(val): 269 if isinstance(val, bytes): 270 val = val.decode("utf-8") 271 272 if not REQUEST_HEADER_ATTRIBUTE_CHARS.match(val): 273 raise BadHeaderValue( # pragma: no cover 274 "header value value={val} contained an illegal character".format( 275 val=repr(val) 276 ) 277 ) 278 279 return val 280 281 282 def parse_content_type(content_type): # pragma: no cover 283 if content_type: 284 return content_type.split(";")[0].strip().lower() 285 else: 286 return "" 287 288 289 def calculate_payload_hash(algorithm, payload, content_type): # pragma: no cover 290 parts = [ 291 part if isinstance(part, bytes) else part.encode("utf8") 292 for part in [ 293 "hawk." + str(HAWK_VER) + ".payload\n", 294 parse_content_type(content_type) + "\n", 295 payload or "", 296 "\n", 297 ] 298 ] 299 300 p_hash = hashlib.new(algorithm) 301 for p in parts: 302 p_hash.update(p) 303 304 log.debug( 305 "calculating payload hash from:\n{parts}".format(parts=pprint.pformat(parts)) 306 ) 307 308 return base64.b64encode(p_hash.digest()) 309 310 311 def validate_taskcluster_credentials(credentials): 312 if not hasattr(credentials, "__getitem__"): 313 raise InvalidCredentials( 314 "credentials must be a dict-like object" 315 ) # pragma: no cover 316 try: 317 credentials["clientId"] 318 credentials["accessToken"] 319 except KeyError: # pragma: no cover 320 etype, val, tb = sys.exc_info() 321 raise InvalidCredentials("{etype}: {val}".format(etype=etype, val=val)) 322 323 324 def normalize_header_attr(val): 325 if isinstance(val, bytes): 326 return val.decode("utf-8") 327 return val # pragma: no cover 328 329 330 def normalize_string( 331 mac_type, 332 timestamp, 333 nonce, 334 method, 335 name, 336 host, 337 port, 338 content_hash, 339 ): 340 return "\n".join( 341 [ 342 normalize_header_attr(header) 343 # The blank lines are important. They follow what the Node Hawk lib does. 344 for header in [ 345 "hawk." + str(HAWK_VER) + "." + mac_type, 346 timestamp, 347 nonce, 348 method or "", 349 name or "", 350 host, 351 port, 352 content_hash or "", 353 "", # for ext which is empty in this case 354 "", # Add trailing new line. 355 ] 356 ] 357 ) 358 359 360 def calculate_mac( 361 mac_type, 362 access_token, 363 algorithm, 364 timestamp, 365 nonce, 366 method, 367 name, 368 host, 369 port, 370 content_hash, 371 ): 372 normalized = normalize_string( 373 mac_type, timestamp, nonce, method, name, host, port, content_hash 374 ) 375 log.debug("normalized resource for mac calc: {norm}".format(norm=normalized)) 376 digestmod = getattr(hashlib, algorithm) 377 378 if not isinstance(normalized, bytes): 379 normalized = normalized.encode("utf8") 380 381 if not isinstance(access_token, bytes): 382 access_token = access_token.encode("ascii") 383 384 result = hmac.new(access_token, normalized, digestmod) 385 return base64.b64encode(result.digest()) 386 387 388 def make_taskcluster_header(credentials, req): 389 validate_taskcluster_credentials(credentials) 390 391 url = req.get_full_url() 392 method = req.get_method() 393 algorithm = "sha256" 394 timestamp = str(utc_now()) 395 nonce = random_string(6) 396 url_parts = parse_url(url) 397 398 content_hash = None 399 if request_has_data(req): 400 data = req.data 401 content_hash = calculate_payload_hash( # pragma: no cover 402 algorithm, 403 data, 404 # maybe we should detect this from req.headers but we anyway expect json 405 content_type="application/json", 406 ) 407 408 mac = calculate_mac( 409 "header", 410 credentials["accessToken"], 411 algorithm, 412 timestamp, 413 nonce, 414 method, 415 url_parts["resource"], 416 url_parts["hostname"], 417 str(url_parts["port"]), 418 content_hash, 419 ) 420 421 header = 'Hawk mac="{}"'.format(prepare_header_val(mac)) 422 423 if content_hash: # pragma: no cover 424 header = '{}, hash="{}"'.format(header, prepare_header_val(content_hash)) 425 426 header = '{header}, id="{id}", ts="{ts}", nonce="{nonce}"'.format( 427 header=header, 428 id=prepare_header_val(credentials["clientId"]), 429 ts=prepare_header_val(timestamp), 430 nonce=prepare_header_val(nonce), 431 ) 432 433 log.debug("Hawk header for URL={} method={}: {}".format(url, method, header)) 434 435 return header 436 437 438 class FileRecord(object): 439 def __init__( 440 self, 441 filename, 442 size, 443 digest, 444 algorithm, 445 unpack=False, 446 version=None, 447 visibility=None, 448 ): 449 object.__init__(self) 450 if "/" in filename or "\\" in filename: 451 log.error( 452 "The filename provided contains path information and is, therefore, invalid." 453 ) 454 raise BadFilenameException(filename=filename) 455 self.filename = filename 456 self.size = size 457 self.digest = digest 458 self.algorithm = algorithm 459 self.unpack = unpack 460 self.version = version 461 self.visibility = visibility 462 463 def __eq__(self, other): 464 if self is other: 465 return True 466 if ( 467 self.filename == other.filename 468 and self.size == other.size 469 and self.digest == other.digest 470 and self.algorithm == other.algorithm 471 and self.version == other.version 472 and self.visibility == other.visibility 473 ): 474 return True 475 else: 476 return False 477 478 def __ne__(self, other): 479 return not self.__eq__(other) 480 481 def __str__(self): 482 return repr(self) 483 484 def __repr__(self): 485 return ( 486 "%s.%s(filename='%s', size=%s, digest='%s', algorithm='%s', visibility=%r)" 487 % ( 488 __name__, 489 self.__class__.__name__, 490 self.filename, 491 self.size, 492 self.digest, 493 self.algorithm, 494 self.visibility, 495 ) 496 ) 497 498 def present(self): 499 # Doesn't check validity 500 return os.path.exists(self.filename) 501 502 def validate_size(self): 503 if self.present(): 504 return self.size == os.path.getsize(self.filename) 505 else: 506 log.debug("trying to validate size on a missing file, %s", self.filename) 507 raise MissingFileException(filename=self.filename) 508 509 def validate_digest(self): 510 if self.present(): 511 with open(self.filename, "rb") as f: 512 return self.digest == digest_file(f, self.algorithm) 513 else: 514 log.debug("trying to validate digest on a missing file, %s', self.filename") 515 raise MissingFileException(filename=self.filename) 516 517 def validate(self): 518 if self.size is None or self.validate_size(): 519 if self.validate_digest(): 520 return True 521 return False 522 523 def describe(self): 524 if self.present() and self.validate(): 525 return "'%s' is present and valid" % self.filename 526 elif self.present(): 527 return "'%s' is present and invalid" % self.filename 528 else: 529 return "'%s' is absent" % self.filename 530 531 532 def create_file_record(filename, algorithm): 533 fo = open(filename, "rb") 534 stored_filename = os.path.split(filename)[1] 535 fr = FileRecord( 536 stored_filename, 537 os.path.getsize(filename), 538 digest_file(fo, algorithm), 539 algorithm, 540 ) 541 fo.close() 542 return fr 543 544 545 class FileRecordJSONEncoder(json.JSONEncoder): 546 def encode_file_record(self, obj): 547 if not issubclass(type(obj), FileRecord): 548 err = ( 549 "FileRecordJSONEncoder is only for FileRecord and lists of FileRecords, " 550 "not %s" % obj.__class__.__name__ 551 ) 552 log.warning(err) 553 raise FileRecordJSONEncoderException(err) 554 else: 555 rv = { 556 "filename": obj.filename, 557 "size": obj.size, 558 "algorithm": obj.algorithm, 559 "digest": obj.digest, 560 } 561 if obj.unpack: 562 rv["unpack"] = True 563 if obj.version: 564 rv["version"] = obj.version 565 if obj.visibility is not None: 566 rv["visibility"] = obj.visibility 567 return rv 568 569 def default(self, f): 570 if issubclass(type(f), list): 571 record_list = [] 572 for i in f: 573 record_list.append(self.encode_file_record(i)) 574 return record_list 575 else: 576 return self.encode_file_record(f) 577 578 579 class FileRecordJSONDecoder(json.JSONDecoder): 580 581 """I help the json module materialize a FileRecord from 582 a JSON file. I understand FileRecords and lists of 583 FileRecords. I ignore things that I don't expect for now""" 584 585 # TODO: make this more explicit in what it's looking for 586 # and error out on unexpected things 587 588 def process_file_records(self, obj): 589 if isinstance(obj, list): 590 record_list = [] 591 for i in obj: 592 record = self.process_file_records(i) 593 if issubclass(type(record), FileRecord): 594 record_list.append(record) 595 return record_list 596 required_fields = [ 597 "filename", 598 "size", 599 "algorithm", 600 "digest", 601 ] 602 if isinstance(obj, dict): 603 missing = False 604 for req in required_fields: 605 if req not in obj: 606 missing = True 607 break 608 609 if not missing: 610 unpack = obj.get("unpack", False) 611 version = obj.get("version", None) 612 visibility = obj.get("visibility", None) 613 rv = FileRecord( 614 obj["filename"], 615 obj["size"], 616 obj["digest"], 617 obj["algorithm"], 618 unpack, 619 version, 620 visibility, 621 ) 622 log.debug("materialized %s" % rv) 623 return rv 624 return obj 625 626 def decode(self, s): 627 decoded = json.JSONDecoder.decode(self, s) 628 rv = self.process_file_records(decoded) 629 return rv 630 631 632 class Manifest(object): 633 valid_formats = ("json",) 634 635 def __init__(self, file_records=None): 636 self.file_records = file_records or [] 637 638 def __eq__(self, other): 639 if self is other: 640 return True 641 if len(self.file_records) != len(other.file_records): 642 log.debug("Manifests differ in number of files") 643 return False 644 # sort the file records by filename before comparing 645 mine = sorted((fr.filename, fr) for fr in self.file_records) 646 theirs = sorted((fr.filename, fr) for fr in other.file_records) 647 return mine == theirs 648 649 def __ne__(self, other): 650 return not self.__eq__(other) 651 652 def __deepcopy__(self, memo): 653 # This is required for a deep copy 654 return Manifest(self.file_records[:]) 655 656 def __copy__(self): 657 return Manifest(self.file_records) 658 659 def copy(self): 660 return Manifest(self.file_records[:]) 661 662 def present(self): 663 return all(i.present() for i in self.file_records) 664 665 def validate_sizes(self): 666 return all(i.validate_size() for i in self.file_records) 667 668 def validate_digests(self): 669 return all(i.validate_digest() for i in self.file_records) 670 671 def validate(self): 672 return all(i.validate() for i in self.file_records) 673 674 def load(self, data_file, fmt="json"): 675 assert fmt in self.valid_formats 676 if fmt == "json": 677 try: 678 self.file_records.extend( 679 json.load(data_file, cls=FileRecordJSONDecoder) 680 ) 681 except ValueError: 682 raise InvalidManifest("trying to read invalid manifest file") 683 684 def loads(self, data_string, fmt="json"): 685 assert fmt in self.valid_formats 686 if fmt == "json": 687 try: 688 self.file_records.extend( 689 json.loads(data_string, cls=FileRecordJSONDecoder) 690 ) 691 except ValueError: 692 raise InvalidManifest("trying to read invalid manifest file") 693 694 def dump(self, output_file, fmt="json"): 695 assert fmt in self.valid_formats 696 if fmt == "json": 697 return json.dump( 698 self.file_records, 699 output_file, 700 indent=2, 701 separators=(",", ": "), 702 cls=FileRecordJSONEncoder, 703 ) 704 705 def dumps(self, fmt="json"): 706 assert fmt in self.valid_formats 707 if fmt == "json": 708 return json.dumps( 709 self.file_records, 710 indent=2, 711 separators=(",", ": "), 712 cls=FileRecordJSONEncoder, 713 ) 714 715 716 def digest_file(f, a): 717 """I take a file like object 'f' and return a hex-string containing 718 of the result of the algorithm 'a' applied to 'f'.""" 719 h = hashlib.new(a) 720 chunk_size = 1024 * 10 721 data = f.read(chunk_size) 722 while data: 723 h.update(data) 724 data = f.read(chunk_size) 725 name = repr(f.name) if hasattr(f, "name") else "a file" 726 log.debug("hashed %s with %s to be %s", name, a, h.hexdigest()) 727 return h.hexdigest() 728 729 730 def execute(cmd): 731 """Execute CMD, logging its stdout at the info level""" 732 process = Popen(cmd, shell=True, stdout=PIPE) 733 while True: 734 line = process.stdout.readline() 735 if not line: 736 break 737 log.info(line.replace("\n", " ")) 738 return process.wait() == 0 739 740 741 def open_manifest(manifest_file): 742 """I know how to take a filename and load it into a Manifest object""" 743 if os.path.exists(manifest_file): 744 manifest = Manifest() 745 with open(manifest_file, "r") as f: 746 manifest.load(f) 747 log.debug("loaded manifest from file '%s'" % manifest_file) 748 return manifest 749 else: 750 log.debug("tried to load absent file '%s' as manifest" % manifest_file) 751 raise InvalidManifest("manifest file '%s' does not exist" % manifest_file) 752 753 754 def list_manifest(manifest_file): 755 """I know how print all the files in a location""" 756 try: 757 manifest = open_manifest(manifest_file) 758 except InvalidManifest as e: 759 log.error( 760 "failed to load manifest file at '%s': %s" 761 % ( 762 manifest_file, 763 str(e), 764 ) 765 ) 766 return False 767 for f in manifest.file_records: 768 print( 769 "{}\t{}\t{}".format( 770 "P" if f.present() else "-", 771 "V" if f.present() and f.validate() else "-", 772 f.filename, 773 ) 774 ) 775 return True 776 777 778 def validate_manifest(manifest_file): 779 """I validate that all files in a manifest are present and valid but 780 don't fetch or delete them if they aren't""" 781 try: 782 manifest = open_manifest(manifest_file) 783 except InvalidManifest as e: 784 log.error( 785 "failed to load manifest file at '%s': %s" 786 % ( 787 manifest_file, 788 str(e), 789 ) 790 ) 791 return False 792 invalid_files = [] 793 absent_files = [] 794 for f in manifest.file_records: 795 if not f.present(): 796 absent_files.append(f) 797 elif not f.validate(): 798 invalid_files.append(f) 799 if len(invalid_files + absent_files) == 0: 800 return True 801 else: 802 return False 803 804 805 def add_files(manifest_file, algorithm, filenames, version, visibility, unpack): 806 # returns True if all files successfully added, False if not 807 # and doesn't catch library Exceptions. If any files are already 808 # tracked in the manifest, return will be False because they weren't 809 # added 810 all_files_added = True 811 # Create a old_manifest object to add to 812 if os.path.exists(manifest_file): 813 old_manifest = open_manifest(manifest_file) 814 else: 815 old_manifest = Manifest() 816 log.debug("creating a new manifest file") 817 new_manifest = Manifest() # use a different manifest for the output 818 for filename in filenames: 819 log.debug("adding %s" % filename) 820 path, name = os.path.split(filename) 821 new_fr = create_file_record(filename, algorithm) 822 new_fr.version = version 823 new_fr.visibility = visibility 824 new_fr.unpack = unpack 825 log.debug("appending a new file record to manifest file") 826 add = True 827 for fr in old_manifest.file_records: 828 log.debug( 829 "manifest file has '%s'" 830 % "', ".join([x.filename for x in old_manifest.file_records]) 831 ) 832 if new_fr == fr: 833 log.info("file already in old_manifest") 834 add = False 835 elif filename == fr.filename: 836 log.error( 837 "manifest already contains a different file named %s" % filename 838 ) 839 add = False 840 if add: 841 new_manifest.file_records.append(new_fr) 842 log.debug("added '%s' to manifest" % filename) 843 else: 844 all_files_added = False 845 # copy any files in the old manifest that aren't in the new one 846 new_filenames = set(fr.filename for fr in new_manifest.file_records) 847 for old_fr in old_manifest.file_records: 848 if old_fr.filename not in new_filenames: 849 new_manifest.file_records.append(old_fr) 850 851 with open(manifest_file, mode="w") as output: 852 new_manifest.dump(output, fmt="json") 853 854 return all_files_added 855 856 857 def touch(f): 858 """Used to modify mtime in cached files; 859 mtime is used by the purge command""" 860 try: 861 os.utime(f, None) 862 except OSError: 863 log.warning("impossible to update utime of file %s" % f) 864 865 866 def _urlopen(req): 867 ssl_context = None 868 if os.name == "nt": 869 ssl_context = ssl.create_default_context(cafile=certifi.where()) 870 return urllib2.urlopen(req, context=ssl_context) 871 872 873 @contextmanager 874 @retriable(sleeptime=2) 875 def request(url, auth_file=None): 876 req = Request(url) 877 _authorize(req, auth_file) 878 with closing(_urlopen(req)) as f: 879 log.debug("opened %s for reading" % url) 880 yield f 881 882 883 def fetch_file(base_urls, file_record, grabchunk=1024 * 4, auth_file=None, region=None): 884 # A file which is requested to be fetched that exists locally will be 885 # overwritten by this function 886 fd, temp_path = tempfile.mkstemp(dir=os.getcwd()) 887 os.close(fd) 888 fetched_path = None 889 for base_url in base_urls: 890 # Generate the URL for the file on the server side 891 url = urljoin(base_url, "%s/%s" % (file_record.algorithm, file_record.digest)) 892 if region is not None: 893 url += "?region=" + region 894 895 log.info("Attempting to fetch from '%s'..." % base_url) 896 897 # Well, the file doesn't exist locally. Let's fetch it. 898 try: 899 with request(url, auth_file) as f, open(temp_path, mode="wb") as out: 900 k = True 901 size = 0 902 while k: 903 # TODO: print statistics as file transfers happen both for info and to stop 904 # buildbot timeouts 905 indata = f.read(grabchunk) 906 out.write(indata) 907 size += len(indata) 908 if len(indata) == 0: 909 k = False 910 log.info( 911 "File %s fetched from %s as %s" 912 % (file_record.filename, base_url, temp_path) 913 ) 914 fetched_path = temp_path 915 break 916 except (URLError, HTTPError, ValueError): 917 log.info( 918 "...failed to fetch '%s' from %s" % (file_record.filename, base_url), 919 exc_info=True, 920 ) 921 except IOError: # pragma: no cover 922 log.info( 923 "failed to write to temporary file for '%s'" % file_record.filename, 924 exc_info=True, 925 ) 926 927 # cleanup temp file in case of issues 928 if fetched_path: 929 return os.path.split(fetched_path)[1] 930 else: 931 try: 932 os.remove(temp_path) 933 except OSError: # pragma: no cover 934 pass 935 return None 936 937 938 def clean_path(dirname): 939 """Remove a subtree if is exists. Helper for unpack_file().""" 940 if os.path.exists(dirname): 941 log.info("rm tree: %s" % dirname) 942 shutil.rmtree(dirname) 943 944 945 CHECKSUM_SUFFIX = ".checksum" 946 947 948 def validate_tar_member(member, path): 949 def _is_within_directory(directory, target): 950 real_directory = os.path.realpath(directory) 951 real_target = os.path.realpath(target) 952 prefix = os.path.commonprefix([real_directory, real_target]) 953 return prefix == real_directory 954 955 member_path = os.path.join(path, member.name) 956 if not _is_within_directory(path, member_path): 957 raise Exception("Attempted path traversal in tar file: " + member.name) 958 if member.issym(): 959 link_path = os.path.join(os.path.dirname(member_path), member.linkname) 960 if not _is_within_directory(path, link_path): 961 raise Exception("Attempted link path traversal in tar file: " + member.name) 962 if member.mode & (stat.S_ISUID | stat.S_ISGID): 963 raise Exception("Attempted setuid or setgid in tar file: " + member.name) 964 965 966 class TarFile(tarfile.TarFile): 967 def _tooltool_do_extract( 968 self, extract, member, path="", set_attrs=True, numeric_owner=False, **kwargs 969 ): 970 deferred_links = getattr(self, "_deferred_links", None) 971 if not isinstance(member, tarfile.TarInfo): 972 member = self.getmember(member) 973 targetpath = os.path.normcase(os.path.join(path, member.name)) 974 975 if deferred_links is not None and member.issym(): 976 if os.path.lexists(targetpath): 977 # Avoid FileExistsError on following os.symlink. 978 os.unlink(targetpath) 979 try: 980 os.symlink(member.linkname, targetpath) 981 except (NotImplementedError, OSError): 982 # On Windows, os.symlink can fail, in this case fallback to 983 # creating a copy. If the destination was not already created, 984 # defer the link creation. 985 source = os.path.normcase( 986 os.path.join(os.path.dirname(targetpath), member.linkname) 987 ) 988 989 if source in self._extracted_members: 990 shutil.copy(source, targetpath) 991 self.chown(member, targetpath, numeric_owner) 992 else: 993 deferred_links.setdefault(source, []).append( 994 (member, targetpath, numeric_owner) 995 ) 996 return 997 998 extract(member, path, set_attrs, numeric_owner=numeric_owner, **kwargs) 999 if deferred_links is not None: 1000 for tarinfo, linkpath, numeric_owner in deferred_links.pop(targetpath, []): 1001 shutil.copy(targetpath, linkpath) 1002 self.chown(tarinfo, linkpath, numeric_owner) 1003 self._extracted_members.add(targetpath) 1004 1005 def extract(self, *args, **kwargs): 1006 self._tooltool_do_extract(super(TarFile, self).extract, *args, **kwargs) 1007 1008 # extractall in versions for cpython that implement PEP 706 call _extract_one 1009 # instead of extract. 1010 def _extract_one(self, *args, **kwargs): 1011 self._tooltool_do_extract(super(TarFile, self)._extract_one, *args, **kwargs) 1012 1013 def extractall(self, *args, **kwargs): 1014 self._deferred_links = {} 1015 self._extracted_members = set() 1016 super(TarFile, self).extractall(*args, **kwargs) 1017 for links in self._deferred_links.values(): 1018 for tarinfo, linkpath, numeric_owner in links: 1019 log.warn("Cannot create dangling symbolic link: %s", linkpath) 1020 delattr(self, "_deferred_links") 1021 delattr(self, "_extracted_members") 1022 1023 1024 def safe_extract(tar, path=".", *, numeric_owner=False): 1025 def _files(tar, path): 1026 for member in tar: 1027 validate_tar_member(member, path) 1028 yield member 1029 1030 tar.extractall(path, members=_files(tar, path), numeric_owner=numeric_owner) 1031 1032 1033 def unpack_file(filename): 1034 """Untar `filename`, assuming it is uncompressed or compressed with bzip2, 1035 xz, gzip, zst, or unzip a zip file. The file is assumed to contain a single 1036 directory with a name matching the base of the given filename. 1037 Xz support is handled by shelling out to 'tar'.""" 1038 if os.path.isfile(filename) and tarfile.is_tarfile(filename): 1039 tar_file, zip_ext = os.path.splitext(filename) 1040 base_file, tar_ext = os.path.splitext(tar_file) 1041 clean_path(base_file) 1042 log.info('untarring "%s"' % filename) 1043 with TarFile.open(filename) as tar: 1044 safe_extract(tar) 1045 elif os.path.isfile(filename) and filename.endswith(".tar.zst"): 1046 import zstandard 1047 1048 base_file = filename.replace(".tar.zst", "") 1049 clean_path(base_file) 1050 log.info('untarring "%s"' % filename) 1051 dctx = zstandard.ZstdDecompressor() 1052 with dctx.stream_reader(open(filename, "rb")) as fileobj: 1053 with TarFile.open(fileobj=fileobj, mode="r|") as tar: 1054 safe_extract(tar) 1055 elif os.path.isfile(filename) and zipfile.is_zipfile(filename): 1056 base_file = filename.replace(".zip", "") 1057 clean_path(base_file) 1058 log.info('unzipping "%s"' % filename) 1059 z = zipfile.ZipFile(filename) 1060 z.extractall() 1061 z.close() 1062 else: 1063 log.error("Unknown archive extension for filename '%s'" % filename) 1064 return False 1065 return True 1066 1067 1068 def fetch_files( 1069 manifest_file, 1070 base_urls, 1071 filenames=[], 1072 cache_folder=None, 1073 auth_file=None, 1074 region=None, 1075 ): 1076 # Lets load the manifest file 1077 try: 1078 manifest = open_manifest(manifest_file) 1079 except InvalidManifest as e: 1080 log.error( 1081 "failed to load manifest file at '%s': %s" 1082 % ( 1083 manifest_file, 1084 str(e), 1085 ) 1086 ) 1087 return False 1088 1089 # we want to track files already in current working directory AND valid 1090 # we will not need to fetch these 1091 present_files = [] 1092 1093 # We want to track files that fail to be fetched as well as 1094 # files that are fetched 1095 failed_files = [] 1096 fetched_files = [] 1097 1098 # Files that we want to unpack. 1099 unpack_files = [] 1100 1101 # Lets go through the manifest and fetch the files that we want 1102 for f in manifest.file_records: 1103 # case 1: files are already present 1104 if f.present(): 1105 if f.validate(): 1106 present_files.append(f.filename) 1107 if f.unpack: 1108 unpack_files.append(f.filename) 1109 else: 1110 # we have an invalid file here, better to cleanup! 1111 # this invalid file needs to be replaced with a good one 1112 # from the local cash or fetched from a tooltool server 1113 log.info( 1114 "File %s is present locally but it is invalid, so I will remove it " 1115 "and try to fetch it" % f.filename 1116 ) 1117 os.remove(os.path.join(os.getcwd(), f.filename)) 1118 1119 # check if file is already in cache 1120 if cache_folder and f.filename not in present_files: 1121 try: 1122 shutil.copy( 1123 os.path.join(cache_folder, f.digest), 1124 os.path.join(os.getcwd(), f.filename), 1125 ) 1126 log.info( 1127 "File %s retrieved from local cache %s" % (f.filename, cache_folder) 1128 ) 1129 touch(os.path.join(cache_folder, f.digest)) 1130 1131 filerecord_for_validation = FileRecord( 1132 f.filename, f.size, f.digest, f.algorithm 1133 ) 1134 if filerecord_for_validation.validate(): 1135 present_files.append(f.filename) 1136 if f.unpack: 1137 unpack_files.append(f.filename) 1138 else: 1139 # the file copied from the cache is invalid, better to 1140 # clean up the cache version itself as well 1141 log.warning( 1142 "File %s retrieved from cache is invalid! I am deleting it from the " 1143 "cache as well" % f.filename 1144 ) 1145 os.remove(os.path.join(os.getcwd(), f.filename)) 1146 os.remove(os.path.join(cache_folder, f.digest)) 1147 except IOError: 1148 log.info( 1149 "File %s not present in local cache folder %s" 1150 % (f.filename, cache_folder) 1151 ) 1152 1153 # now I will try to fetch all files which are not already present and 1154 # valid, appending a suffix to avoid race conditions 1155 temp_file_name = None 1156 # 'filenames' is the list of filenames to be managed, if this variable 1157 # is a non empty list it can be used to filter if filename is in 1158 # present_files, it means that I have it already because it was already 1159 # either in the working dir or in the cache 1160 if ( 1161 f.filename in filenames or len(filenames) == 0 1162 ) and f.filename not in present_files: 1163 log.debug("fetching %s" % f.filename) 1164 temp_file_name = fetch_file( 1165 base_urls, f, auth_file=auth_file, region=region 1166 ) 1167 if temp_file_name: 1168 fetched_files.append((f, temp_file_name)) 1169 else: 1170 failed_files.append(f.filename) 1171 else: 1172 log.debug("skipping %s" % f.filename) 1173 1174 # lets ensure that fetched files match what the manifest specified 1175 for localfile, temp_file_name in fetched_files: 1176 # since I downloaded to a temp file, I need to perform all validations on the temp file 1177 # this is why filerecord_for_validation is created 1178 1179 filerecord_for_validation = FileRecord( 1180 temp_file_name, localfile.size, localfile.digest, localfile.algorithm 1181 ) 1182 1183 if filerecord_for_validation.validate(): 1184 # great! 1185 # I can rename the temp file 1186 log.info( 1187 "File integrity verified, renaming %s to %s" 1188 % (temp_file_name, localfile.filename) 1189 ) 1190 os.rename( 1191 os.path.join(os.getcwd(), temp_file_name), 1192 os.path.join(os.getcwd(), localfile.filename), 1193 ) 1194 1195 if localfile.unpack: 1196 unpack_files.append(localfile.filename) 1197 1198 # if I am using a cache and a new file has just been retrieved from a 1199 # remote location, I need to update the cache as well 1200 if cache_folder: 1201 log.info("Updating local cache %s..." % cache_folder) 1202 try: 1203 if not os.path.exists(cache_folder): 1204 log.info("Creating cache in %s..." % cache_folder) 1205 os.makedirs(cache_folder, 0o0700) 1206 shutil.copy( 1207 os.path.join(os.getcwd(), localfile.filename), 1208 os.path.join(cache_folder, localfile.digest), 1209 ) 1210 log.info( 1211 "Local cache %s updated with %s" 1212 % (cache_folder, localfile.filename) 1213 ) 1214 touch(os.path.join(cache_folder, localfile.digest)) 1215 except (OSError, IOError): 1216 log.warning( 1217 "Impossible to add file %s to cache folder %s" 1218 % (localfile.filename, cache_folder), 1219 exc_info=False, 1220 ) 1221 else: 1222 failed_files.append(localfile.filename) 1223 log.error("'%s'" % filerecord_for_validation.describe()) 1224 os.remove(temp_file_name) 1225 1226 # Unpack files that need to be unpacked. 1227 for filename in unpack_files: 1228 if not unpack_file(filename): 1229 failed_files.append(filename) 1230 1231 # If we failed to fetch or validate a file, we need to fail 1232 if len(failed_files) > 0: 1233 log.error("The following files failed: '%s'" % "', ".join(failed_files)) 1234 return False 1235 return True 1236 1237 1238 def freespace(p): 1239 "Returns the number of bytes free under directory `p`" 1240 if sys.platform == "win32": # pragma: no cover 1241 # os.statvfs doesn't work on Windows 1242 import win32file 1243 1244 secsPerClus, bytesPerSec, nFreeClus, totClus = win32file.GetDiskFreeSpace(p) 1245 return secsPerClus * bytesPerSec * nFreeClus 1246 else: 1247 r = os.statvfs(p) 1248 return r.f_frsize * r.f_bavail 1249 1250 1251 def purge(folder, gigs): 1252 """If gigs is non 0, it deletes files in `folder` until `gigs` GB are free, 1253 starting from older files. If gigs is 0, a full purge will be performed. 1254 No recursive deletion of files in subfolder is performed.""" 1255 1256 full_purge = bool(gigs == 0) 1257 gigs *= 1024 * 1024 * 1024 1258 1259 if not full_purge and freespace(folder) >= gigs: 1260 log.info("No need to cleanup") 1261 return 1262 1263 files = [] 1264 for f in os.listdir(folder): 1265 p = os.path.join(folder, f) 1266 # it deletes files in folder without going into subfolders, 1267 # assuming the cache has a flat structure 1268 if not os.path.isfile(p): 1269 continue 1270 mtime = os.path.getmtime(p) 1271 files.append((mtime, p)) 1272 1273 # iterate files sorted by mtime 1274 for _, f in sorted(files): 1275 log.info("removing %s to free up space" % f) 1276 try: 1277 os.remove(f) 1278 except OSError: 1279 log.info("Impossible to remove %s" % f, exc_info=True) 1280 if not full_purge and freespace(folder) >= gigs: 1281 break 1282 1283 1284 def _log_api_error(e): 1285 if hasattr(e, "hdrs") and e.hdrs["content-type"] == "application/json": 1286 json_resp = json.load(e.fp) 1287 log.error( 1288 "%s: %s" % (json_resp["error"]["name"], json_resp["error"]["description"]) 1289 ) 1290 else: 1291 log.exception("Error making RelengAPI request:") 1292 1293 1294 def _authorize(req, auth_file): 1295 is_taskcluster_auth = False 1296 1297 if not auth_file: 1298 try: 1299 taskcluster_env_keys = { 1300 "clientId": "TASKCLUSTER_CLIENT_ID", 1301 "accessToken": "TASKCLUSTER_ACCESS_TOKEN", 1302 } 1303 auth_content = {k: os.environ[v] for k, v in taskcluster_env_keys.items()} 1304 is_taskcluster_auth = True 1305 except KeyError: 1306 return 1307 else: 1308 with open(auth_file) as f: 1309 auth_content = f.read().strip() 1310 try: 1311 auth_content = json.loads(auth_content) 1312 is_taskcluster_auth = True 1313 except Exception: 1314 pass 1315 1316 if is_taskcluster_auth: 1317 taskcluster_header = make_taskcluster_header(auth_content, req) 1318 log.debug("Using taskcluster credentials in %s" % auth_file) 1319 req.add_unredirected_header("Authorization", taskcluster_header) 1320 else: 1321 log.debug("Using Bearer token in %s" % auth_file) 1322 req.add_unredirected_header("Authorization", "Bearer %s" % auth_content) 1323 1324 1325 def _send_batch(base_url, auth_file, batch, region): 1326 url = urljoin(base_url, "upload") 1327 if region is not None: 1328 url += "?region=" + region 1329 data = json.dumps(batch).encode("utf-8") 1330 req = Request(url, data, {"Content-Type": "application/json"}) 1331 _authorize(req, auth_file) 1332 try: 1333 resp = _urlopen(req) 1334 except (URLError, HTTPError) as e: 1335 _log_api_error(e) 1336 return None 1337 return json.load(resp)["result"] 1338 1339 1340 def _s3_upload(filename, file): 1341 # urllib2 does not support streaming, so we fall back to good old httplib 1342 url = urlparse(file["put_url"]) 1343 cls = HTTPSConnection if url.scheme == "https" else HTTPConnection 1344 host, port = url.netloc.split(":") if ":" in url.netloc else (url.netloc, 443) 1345 port = int(port) 1346 conn = cls(host, port) 1347 try: 1348 req_path = "%s?%s" % (url.path, url.query) if url.query else url.path 1349 with open(filename, "rb") as f: 1350 content_length = file["size"] 1351 conn.request( 1352 "PUT", 1353 req_path, 1354 f, 1355 { 1356 "Content-Type": "application/octet-stream", 1357 "Content-Length": str(content_length), 1358 }, 1359 ) 1360 resp = conn.getresponse() 1361 resp_body = resp.read() 1362 conn.close() 1363 if resp.status != 200: 1364 raise RuntimeError( 1365 "Non-200 return from AWS: %s %s\n%s" 1366 % (resp.status, resp.reason, resp_body) 1367 ) 1368 except Exception: 1369 file["upload_exception"] = sys.exc_info() 1370 file["upload_ok"] = False 1371 else: 1372 file["upload_ok"] = True 1373 1374 1375 def _notify_upload_complete(base_url, auth_file, file): 1376 req = Request(urljoin(base_url, "upload/complete/%(algorithm)s/%(digest)s" % file)) 1377 _authorize(req, auth_file) 1378 try: 1379 _urlopen(req) 1380 except HTTPError as e: 1381 if e.code != 409: 1382 _log_api_error(e) 1383 return 1384 # 409 indicates that the upload URL hasn't expired yet and we 1385 # should retry after a delay 1386 to_wait = int(e.headers.get("X-Retry-After", 60)) 1387 log.warning("Waiting %d seconds for upload URLs to expire" % to_wait) 1388 time.sleep(to_wait) 1389 _notify_upload_complete(base_url, auth_file, file) 1390 except Exception: 1391 log.exception("While notifying server of upload completion:") 1392 1393 1394 def upload(manifest, message, base_urls, auth_file, region): 1395 try: 1396 manifest = open_manifest(manifest) 1397 except InvalidManifest: 1398 log.exception("failed to load manifest file at '%s'") 1399 return False 1400 1401 # verify the manifest, since we'll need the files present to upload 1402 if not manifest.validate(): 1403 log.error("manifest is invalid") 1404 return False 1405 1406 if any(fr.visibility is None for fr in manifest.file_records): 1407 log.error("All files in a manifest for upload must have a visibility set") 1408 1409 # convert the manifest to an upload batch 1410 batch = { 1411 "message": message, 1412 "files": {}, 1413 } 1414 for fr in manifest.file_records: 1415 batch["files"][fr.filename] = { 1416 "size": fr.size, 1417 "digest": fr.digest, 1418 "algorithm": fr.algorithm, 1419 "visibility": fr.visibility, 1420 } 1421 1422 # make the upload request 1423 resp = _send_batch(base_urls[0], auth_file, batch, region) 1424 if not resp: 1425 return None 1426 files = resp["files"] 1427 1428 # Upload the files, each in a thread. This allows us to start all of the 1429 # uploads before any of the URLs expire. 1430 threads = {} 1431 for filename, file in files.items(): 1432 if "put_url" in file: 1433 log.info("%s: starting upload" % (filename,)) 1434 thd = threading.Thread(target=_s3_upload, args=(filename, file)) 1435 thd.daemon = 1 1436 thd.start() 1437 threads[filename] = thd 1438 else: 1439 log.info("%s: already exists on server" % (filename,)) 1440 1441 # re-join all of those threads as they exit 1442 success = True 1443 while threads: 1444 for filename, thread in list(threads.items()): 1445 if not thread.is_alive(): 1446 # _s3_upload has annotated file with result information 1447 file = files[filename] 1448 thread.join() 1449 if file["upload_ok"]: 1450 log.info("%s: uploaded" % filename) 1451 else: 1452 log.error( 1453 "%s: failed" % filename, exc_info=file["upload_exception"] 1454 ) 1455 success = False 1456 del threads[filename] 1457 1458 # notify the server that the uploads are completed. If the notification 1459 # fails, we don't consider that an error (the server will notice 1460 # eventually) 1461 for filename, file in files.items(): 1462 if "put_url" in file and file["upload_ok"]: 1463 log.info("notifying server of upload completion for %s" % (filename,)) 1464 _notify_upload_complete(base_urls[0], auth_file, file) 1465 1466 return success 1467 1468 1469 def send_operation_on_file(data, base_urls, digest, auth_file): 1470 url = base_urls[0] 1471 url = urljoin(url, "file/sha512/" + digest) 1472 1473 data = json.dumps(data) 1474 1475 req = Request(url, data, {"Content-Type": "application/json"}) 1476 req.get_method = lambda: "PATCH" 1477 1478 _authorize(req, auth_file) 1479 1480 try: 1481 _urlopen(req) 1482 except (URLError, HTTPError) as e: 1483 _log_api_error(e) 1484 return False 1485 return True 1486 1487 1488 def change_visibility(base_urls, digest, visibility, auth_file): 1489 data = [ 1490 { 1491 "op": "set_visibility", 1492 "visibility": visibility, 1493 } 1494 ] 1495 return send_operation_on_file(data, base_urls, digest, auth_file) 1496 1497 1498 def delete_instances(base_urls, digest, auth_file): 1499 data = [ 1500 { 1501 "op": "delete_instances", 1502 } 1503 ] 1504 return send_operation_on_file(data, base_urls, digest, auth_file) 1505 1506 1507 def process_command(options, args): 1508 """I know how to take a list of program arguments and 1509 start doing the right thing with them""" 1510 cmd = args[0] 1511 cmd_args = args[1:] 1512 log.debug("processing '%s' command with args '%s'" % (cmd, '", "'.join(cmd_args))) 1513 log.debug("using options: %s" % options) 1514 1515 if cmd == "list": 1516 return list_manifest(options["manifest"]) 1517 if cmd == "validate": 1518 return validate_manifest(options["manifest"]) 1519 elif cmd == "add": 1520 return add_files( 1521 options["manifest"], 1522 options["algorithm"], 1523 cmd_args, 1524 options["version"], 1525 options["visibility"], 1526 options["unpack"], 1527 ) 1528 elif cmd == "purge": 1529 if options["cache_folder"]: 1530 purge(folder=options["cache_folder"], gigs=options["size"]) 1531 else: 1532 log.critical("please specify the cache folder to be purged") 1533 return False 1534 elif cmd == "fetch": 1535 return fetch_files( 1536 options["manifest"], 1537 options["base_url"], 1538 cmd_args, 1539 cache_folder=options["cache_folder"], 1540 auth_file=options.get("auth_file"), 1541 region=options.get("region"), 1542 ) 1543 elif cmd == "upload": 1544 if not options.get("message"): 1545 log.critical("upload command requires a message") 1546 return False 1547 return upload( 1548 options.get("manifest"), 1549 options.get("message"), 1550 options.get("base_url"), 1551 options.get("auth_file"), 1552 options.get("region"), 1553 ) 1554 elif cmd == "change-visibility": 1555 if not options.get("digest"): 1556 log.critical("change-visibility command requires a digest option") 1557 return False 1558 if not options.get("visibility"): 1559 log.critical("change-visibility command requires a visibility option") 1560 return False 1561 return change_visibility( 1562 options.get("base_url"), 1563 options.get("digest"), 1564 options.get("visibility"), 1565 options.get("auth_file"), 1566 ) 1567 elif cmd == "delete": 1568 if not options.get("digest"): 1569 log.critical("delete command requires a digest option") 1570 return False 1571 return delete_instances( 1572 options.get("base_url"), 1573 options.get("digest"), 1574 options.get("auth_file"), 1575 ) 1576 else: 1577 log.critical('command "%s" is not implemented' % cmd) 1578 return False 1579 1580 1581 def main(argv, _skip_logging=False): 1582 # Set up option parsing 1583 usage = """usage: %prog [options] command [FILES] 1584 1585 Supported commands are: 1586 - list: list files in the manifest 1587 - validate: validate the manifest 1588 - add: add records for FILES to the manifest 1589 - purge: cleans up the cache folder 1590 - fetch: retrieve files listed in the manifest (or FILES if specified) 1591 - upload: upload files listed in the manifest; message is required 1592 - change-visibility: sets the visibility of the file identified by the given digest 1593 - delete: deletes the file identified by the given digest""" 1594 parser = optparse.OptionParser(usage=usage) 1595 parser.add_option( 1596 "-q", 1597 "--quiet", 1598 default=logging.INFO, 1599 dest="loglevel", 1600 action="store_const", 1601 const=logging.ERROR, 1602 ) 1603 parser.add_option( 1604 "-v", "--verbose", dest="loglevel", action="store_const", const=logging.DEBUG 1605 ) 1606 parser.add_option( 1607 "-m", 1608 "--manifest", 1609 default=DEFAULT_MANIFEST_NAME, 1610 dest="manifest", 1611 action="store", 1612 help="specify the manifest file to be operated on", 1613 ) 1614 parser.add_option( 1615 "-d", 1616 "--algorithm", 1617 default="sha512", 1618 dest="algorithm", 1619 action="store", 1620 help="hashing algorithm to use (only sha512 is allowed)", 1621 ) 1622 parser.add_option( 1623 "--digest", 1624 default=None, 1625 dest="digest", 1626 action="store", 1627 help="digest hash to change visibility for", 1628 ) 1629 parser.add_option( 1630 "--visibility", 1631 default=None, 1632 dest="visibility", 1633 choices=["internal", "public"], 1634 help='Visibility level of this file; "internal" is for ' 1635 "files that cannot be distributed out of the company " 1636 'but not for secrets; "public" files are available to ' 1637 "anyone without restriction", 1638 ) 1639 parser.add_option( 1640 "--unpack", 1641 default=False, 1642 dest="unpack", 1643 action="store_true", 1644 help="Request unpacking this file after fetch." 1645 " This is helpful with tarballs.", 1646 ) 1647 parser.add_option( 1648 "--version", 1649 default=None, 1650 dest="version", 1651 action="store", 1652 help="Version string for this file. This annotates the " 1653 "manifest entry with a version string to help " 1654 "identify the contents.", 1655 ) 1656 parser.add_option( 1657 "-o", 1658 "--overwrite", 1659 default=False, 1660 dest="overwrite", 1661 action="store_true", 1662 help="UNUSED; present for backward compatibility", 1663 ) 1664 parser.add_option( 1665 "--url", 1666 dest="base_url", 1667 action="append", 1668 help="RelengAPI URL ending with /tooltool/; default " 1669 "is appropriate for Mozilla", 1670 ) 1671 parser.add_option( 1672 "-c", "--cache-folder", dest="cache_folder", help="Local cache folder" 1673 ) 1674 parser.add_option( 1675 "-s", 1676 "--size", 1677 help="free space required (in GB)", 1678 dest="size", 1679 type="float", 1680 default=0.0, 1681 ) 1682 parser.add_option( 1683 "-r", 1684 "--region", 1685 help="Preferred AWS region for upload or fetch; " "example: --region=us-west-2", 1686 ) 1687 parser.add_option( 1688 "--message", 1689 help='The "commit message" for an upload; format with a bug number ' 1690 "and brief comment", 1691 dest="message", 1692 ) 1693 parser.add_option( 1694 "--authentication-file", 1695 help="Use the RelengAPI token found in the given file to " 1696 "authenticate to the RelengAPI server.", 1697 dest="auth_file", 1698 ) 1699 1700 (options_obj, args) = parser.parse_args(argv[1:]) 1701 1702 if not options_obj.base_url: 1703 tooltool_host = os.environ.get("TOOLTOOL_HOST", "tooltool.mozilla-releng.net") 1704 taskcluster_proxy_url = os.environ.get("TASKCLUSTER_PROXY_URL") 1705 if taskcluster_proxy_url: 1706 tooltool_url = "{}/{}".format(taskcluster_proxy_url, tooltool_host) 1707 else: 1708 tooltool_url = "https://{}".format(tooltool_host) 1709 1710 options_obj.base_url = [tooltool_url] 1711 1712 # ensure all URLs have a trailing slash 1713 def add_slash(url): 1714 return url if url.endswith("/") else (url + "/") 1715 1716 options_obj.base_url = [add_slash(u) for u in options_obj.base_url] 1717 1718 # expand ~ in --authentication-file 1719 if options_obj.auth_file: 1720 options_obj.auth_file = os.path.expanduser(options_obj.auth_file) 1721 1722 # Dictionaries are easier to work with 1723 options = vars(options_obj) 1724 1725 log.setLevel(options["loglevel"]) 1726 1727 # Set up logging, for now just to the console 1728 if not _skip_logging: # pragma: no cover 1729 ch = logging.StreamHandler() 1730 cf = logging.Formatter("%(levelname)s - %(message)s") 1731 ch.setFormatter(cf) 1732 log.addHandler(ch) 1733 1734 if options["algorithm"] != "sha512": 1735 parser.error("only --algorithm sha512 is supported") 1736 1737 if len(args) < 1: 1738 parser.error("You must specify a command") 1739 1740 return 0 if process_command(options, args) else 1 1741 1742 1743 if __name__ == "__main__": # pragma: no cover 1744 sys.exit(main(sys.argv))