tbbutils.py (4422B)
1 import re 2 from urllib.request import Request, urlopen 3 4 5 def list_files_http(url): 6 try: 7 req = Request(url, method="GET") 8 with urlopen(req) as response: 9 if response.status != 200: 10 return [] 11 html = response.read().decode() 12 except Exception: 13 return [] 14 15 links = [] 16 for href in re.findall(r'<a href="([^"]+)"', html): 17 if href == "../": 18 continue 19 20 if "tor-expert-bundle-aar" in href: 21 href = f"{href.rstrip('/')}/tor-expert-bundle.aar" 22 elif "tor-expert-bundle" in href: 23 href = f"{href.rstrip('/')}/tor-expert-bundle.tar.gz" 24 25 links.append(href) 26 27 return links 28 29 30 TOR_BROWSER_BUILD_ARTIFACTS = [ 31 # Tor Browser Build-only artifacts, these artifacts are not common with Firefox. 32 "noscript", 33 "fonts", 34 "tor-expert-bundle", 35 "tor-expert-bundle-aar", 36 "application-services", 37 ] 38 39 # Mapping of artifacts from taskcluster to tor-browser-build. 40 ARTIFACT_NAME_MAP = { 41 "cbindgen": "cbindgen", 42 # FIXME (tor-browser-build#41471): nasm is more or less ready to go, but it needs to have the 43 # executable in the root of the artifact folder instead of nasm/bin. 44 # "nasm": "nasm", 45 # FIXME (tor-browser-build#41421): the clang project as is, is not ready to use. It needs 46 # to be repackaged with a bunch of things that differ per platform. Fun stuff. 47 # "clang": "clang", 48 "node": "node", 49 } 50 51 52 def get_artifact_index(artifact_path, artifact): 53 """ 54 Return a unique identifier for the given artifact based on its path. 55 56 In most cases, artifacts built by tor-browser-build include part of their 57 SHA sum or version in the filename, so the file name itself serves as a unique 58 identifier. However, some artifacts are stored within subfolders where the file 59 name alone is not unique — in those cases, the name of the parent directory 60 provides the unique identifier instead. 61 """ 62 if artifact in ["tor-expert-bundle"]: 63 return artifact_path.rsplit("/", 2)[-2] 64 65 return artifact_path.rsplit("/", 1)[-1] 66 67 68 def get_artifact_name(original_artifact_name, host): 69 # These are not build artifacts, they are pre-built artifacts to be added to the final build, 70 # therefore this check can come before the host check. 71 if original_artifact_name in TOR_BROWSER_BUILD_ARTIFACTS: 72 return original_artifact_name 73 74 if host != "linux64": 75 # Tor browser build only has development artifacts for linux64 host systems. 76 return None 77 78 return ARTIFACT_NAME_MAP.get(original_artifact_name) 79 80 81 def get_artifact_path(url, artifact, target, prefix="", log=lambda *args, **kwargs: {}): 82 if prefix: 83 path = prefix 84 else: 85 path = artifact 86 87 # The `?C=M;O=D` parameters make it so links are ordered by 88 # the last modified date. This here to make us get the latest 89 # version of file in the case there are multiple and we just 90 # grab the first one. 91 files = list_files_http(f"{url}/{path}?C=M;O=D") 92 93 if not files: 94 log(f"No files found in {url} for {artifact}.") 95 return None 96 97 def filter_files(files, keyword): 98 return [file for file in files if keyword in file] 99 100 artifact_files = [file for file in files if file.startswith(artifact)] 101 102 if len(artifact_files) == 0: 103 log(f"No files found in {url} for {artifact}.") 104 return None 105 106 if len(artifact_files) == 1: 107 return f"{url}/{path}/{artifact_files[0]}" 108 109 files_per_os = filter_files(artifact_files, target.tor_browser_build_alias) 110 111 # If there are files in the folder, but they don't have the OS in the name 112 # it probably means we can get any of them because they can be used to build 113 # for any OS. So let's just get the first one. 114 # 115 # Note: It could be the case that the artifact _is_ OS dependant, but there 116 # just are no files for the OS we are looking for. In that case, this will 117 # return an incorrect artifact. This should not happen often though and is 118 # something we cannot address until artifact names are standardized on tbb. 119 if len(files_per_os) == 0: 120 return f"{url}/{artifact}/{artifact_files[0]}" 121 122 elif len(files_per_os) == 1: 123 return f"{url}/{artifact}/{files_per_os[0]}" 124 125 matches = filter_files(files_per_os, target.cpu) 126 127 return f"{url}/{artifact}/{matches[0]}" if matches else None