tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

tbbutils.py (4422B)


      1 import re
      2 from urllib.request import Request, urlopen
      3 
      4 
      5 def list_files_http(url):
      6    try:
      7        req = Request(url, method="GET")
      8        with urlopen(req) as response:
      9            if response.status != 200:
     10                return []
     11            html = response.read().decode()
     12    except Exception:
     13        return []
     14 
     15    links = []
     16    for href in re.findall(r'<a href="([^"]+)"', html):
     17        if href == "../":
     18            continue
     19 
     20        if "tor-expert-bundle-aar" in href:
     21            href = f"{href.rstrip('/')}/tor-expert-bundle.aar"
     22        elif "tor-expert-bundle" in href:
     23            href = f"{href.rstrip('/')}/tor-expert-bundle.tar.gz"
     24 
     25        links.append(href)
     26 
     27    return links
     28 
     29 
     30 TOR_BROWSER_BUILD_ARTIFACTS = [
     31    # Tor Browser Build-only artifacts, these artifacts are not common with Firefox.
     32    "noscript",
     33    "fonts",
     34    "tor-expert-bundle",
     35    "tor-expert-bundle-aar",
     36    "application-services",
     37 ]
     38 
     39 # Mapping of artifacts from taskcluster to tor-browser-build.
     40 ARTIFACT_NAME_MAP = {
     41    "cbindgen": "cbindgen",
     42    # FIXME (tor-browser-build#41471): nasm is more or less ready to go, but it needs to have the
     43    # executable in the root of the artifact folder instead of nasm/bin.
     44    # "nasm": "nasm",
     45    # FIXME (tor-browser-build#41421): the clang project as is, is not ready to use. It needs
     46    # to be repackaged with a bunch of things that differ per platform. Fun stuff.
     47    # "clang": "clang",
     48    "node": "node",
     49 }
     50 
     51 
     52 def get_artifact_index(artifact_path, artifact):
     53    """
     54    Return a unique identifier for the given artifact based on its path.
     55 
     56    In most cases, artifacts built by tor-browser-build include part of their
     57    SHA sum or version in the filename, so the file name itself serves as a unique
     58    identifier. However, some artifacts are stored within subfolders where the file
     59    name alone is not unique — in those cases, the name of the parent directory
     60    provides the unique identifier instead.
     61    """
     62    if artifact in ["tor-expert-bundle"]:
     63        return artifact_path.rsplit("/", 2)[-2]
     64 
     65    return artifact_path.rsplit("/", 1)[-1]
     66 
     67 
     68 def get_artifact_name(original_artifact_name, host):
     69    # These are not build artifacts, they are pre-built artifacts to be added to the final build,
     70    # therefore this check can come before the host check.
     71    if original_artifact_name in TOR_BROWSER_BUILD_ARTIFACTS:
     72        return original_artifact_name
     73 
     74    if host != "linux64":
     75        # Tor browser build only has development artifacts for linux64 host systems.
     76        return None
     77 
     78    return ARTIFACT_NAME_MAP.get(original_artifact_name)
     79 
     80 
     81 def get_artifact_path(url, artifact, target, prefix="", log=lambda *args, **kwargs: {}):
     82    if prefix:
     83        path = prefix
     84    else:
     85        path = artifact
     86 
     87    # The `?C=M;O=D` parameters make it so links are ordered by
     88    # the last modified date. This here to make us get the latest
     89    # version of file in the case there are multiple and we just
     90    # grab the first one.
     91    files = list_files_http(f"{url}/{path}?C=M;O=D")
     92 
     93    if not files:
     94        log(f"No files found in {url} for {artifact}.")
     95        return None
     96 
     97    def filter_files(files, keyword):
     98        return [file for file in files if keyword in file]
     99 
    100    artifact_files = [file for file in files if file.startswith(artifact)]
    101 
    102    if len(artifact_files) == 0:
    103        log(f"No files found in {url} for {artifact}.")
    104        return None
    105 
    106    if len(artifact_files) == 1:
    107        return f"{url}/{path}/{artifact_files[0]}"
    108 
    109    files_per_os = filter_files(artifact_files, target.tor_browser_build_alias)
    110 
    111    # If there are files in the folder, but they don't have the OS in the name
    112    # it probably means we can get any of them because they can be used to build
    113    # for any OS. So let's just get the first one.
    114    #
    115    # Note: It could be the case that the artifact _is_ OS dependant, but there
    116    # just are no files for the OS we are looking for. In that case, this will
    117    # return an incorrect artifact. This should not happen often though and is
    118    # something we cannot address until artifact names are standardized on tbb.
    119    if len(files_per_os) == 0:
    120        return f"{url}/{artifact}/{artifact_files[0]}"
    121 
    122    elif len(files_per_os) == 1:
    123        return f"{url}/{artifact}/{files_per_os[0]}"
    124 
    125    matches = filter_files(files_per_os, target.cpu)
    126 
    127    return f"{url}/{artifact}/{matches[0]}" if matches else None