tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

partners.py (21348B)


      1 # This Source Code Form is subject to the terms of the Mozilla Public
      2 # License, v. 2.0. If a copy of the MPL was not distributed with this
      3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      4 
      5 
      6 import logging
      7 import os
      8 import xml.etree.ElementTree as ET
      9 from urllib.parse import quote, urlencode
     10 
     11 import requests
     12 import yaml
     13 from redo import retry
     14 from taskgraph.util import json
     15 from taskgraph.util.copy import deepcopy
     16 from taskgraph.util.schema import resolve_keyed_by
     17 
     18 from gecko_taskgraph.util.attributes import release_level
     19 
     20 # Suppress chatty requests logging
     21 logging.getLogger("requests").setLevel(logging.WARNING)
     22 
     23 log = logging.getLogger(__name__)
     24 
     25 GITHUB_API_ENDPOINT = "https://api.github.com/graphql"
     26 
     27 """
     28 LOGIN_QUERY, MANIFEST_QUERY, and REPACK_CFG_QUERY are all written to the Github v4 API,
     29 which users GraphQL. See https://developer.github.com/v4/
     30 """
     31 
     32 LOGIN_QUERY = """query {
     33  viewer {
     34    login
     35    name
     36  }
     37 }
     38 """
     39 
     40 # Returns the contents of default.xml from a manifest repository
     41 MANIFEST_QUERY = """query {
     42  repository(owner:"%(owner)s", name:"%(repo)s") {
     43    object(expression: "master:%(file)s") {
     44      ... on Blob {
     45        text
     46      }
     47    }
     48  }
     49 }
     50 """
     51 # Example response:
     52 # {
     53 #   "data": {
     54 #     "repository": {
     55 #       "object": {
     56 #         "text": "<?xml version=\"1.0\" ?>\n<manifest>\n  " +
     57 #           "<remote fetch=\"git@github.com:mozilla-partners/\" name=\"mozilla-partners\"/>\n  " +
     58 #           "<remote fetch=\"git@github.com:mozilla/\" name=\"mozilla\"/>\n\n  " +
     59 #           "<project name=\"repack-scripts\" path=\"scripts\" remote=\"mozilla-partners\" " +
     60 #           "revision=\"master\"/>\n  <project name=\"build-tools\" path=\"scripts/tools\" " +
     61 #           "remote=\"mozilla\" revision=\"master\"/>\n  <project name=\"mozilla-EME-free\" " +
     62 #           "path=\"partners/mozilla-EME-free\" remote=\"mozilla-partners\" " +
     63 #           "revision=\"master\"/>\n</manifest>\n"
     64 #       }
     65 #     }
     66 #   }
     67 # }
     68 
     69 # Returns the contents of desktop/*/repack.cfg for a partner repository
     70 REPACK_CFG_QUERY = """query{
     71  repository(owner:"%(owner)s", name:"%(repo)s") {
     72    object(expression: "%(revision)s:desktop/"){
     73      ... on Tree {
     74        entries {
     75          name
     76          object {
     77            ... on Tree {
     78              entries {
     79                name
     80                object {
     81                  ... on Blob {
     82                    text
     83                  }
     84                }
     85              }
     86            }
     87          }
     88        }
     89      }
     90    }
     91  }
     92 }
     93 """
     94 # Example response:
     95 # {
     96 #   "data": {
     97 #     "repository": {
     98 #       "object": {
     99 #         "entries": [
    100 #           {
    101 #             "name": "mozilla-EME-free",
    102 #             "object": {
    103 #               "entries": [
    104 #                 {
    105 #                   "name": "distribution",
    106 #                   "object": {}
    107 #                 },
    108 #                 {
    109 #                   "name": "repack.cfg",
    110 #                   "object": {
    111 #                     "text": "aus=\"mozilla-EMEfree\"\ndist_id=\"mozilla-EMEfree\"\n" +
    112 #                             "dist_version=\"1.0\"\nlinux-i686=true\nlinux-x86_64=true\n" +
    113 #                             " locales=\"ach af de en-US\"\nmac=true\nwin32=true\nwin64=true\n" +
    114 #                             "output_dir=\"%(platform)s-EME-free/%(locale)s\"\n\n" +
    115 #                             "# Upload params\nbucket=\"net-mozaws-prod-delivery-firefox\"\n" +
    116 #                             "upload_to_candidates=true\n"
    117 #                   }
    118 #                 }
    119 #               ]
    120 #             }
    121 #           }
    122 #         ]
    123 #       }
    124 #     }
    125 #   }
    126 # }
    127 
    128 # Map platforms in repack.cfg into their equivalents in taskcluster
    129 TC_PLATFORM_PER_FTP = {
    130    "linux-x86_64": "linux64-shippable",
    131    "mac": "macosx64-shippable",
    132    "win32": "win32-shippable",
    133    "win64": "win64-shippable",
    134    "win64-aarch64": "win64-aarch64-shippable",
    135 }
    136 
    137 TASKCLUSTER_PROXY_SECRET_ROOT = "http://taskcluster/secrets/v1/secret"
    138 
    139 LOCALES_FILE = os.path.join(
    140    os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))),
    141    "browser",
    142    "locales",
    143    "l10n-changesets.json",
    144 )
    145 
    146 # cache data at the module level
    147 partner_configs = {}
    148 
    149 
    150 def get_token(params):
    151    """We use a Personal Access Token from Github to lookup partner config. No extra scopes are
    152    needed on the token to read public repositories, but need the 'repo' scope to see private
    153    repositories. This is not fine grained and also grants r/w access, but is revoked at the repo
    154    level.
    155    """
    156 
    157    # Allow for local taskgraph debugging
    158    if os.environ.get("GITHUB_API_TOKEN"):
    159        return os.environ["GITHUB_API_TOKEN"]
    160 
    161    # The 'usual' method - via taskClusterProxy for decision tasks
    162    url = "{secret_root}/project/releng/gecko/build/level-{level}/partner-github-api".format(
    163        secret_root=TASKCLUSTER_PROXY_SECRET_ROOT, **params
    164    )
    165    try:
    166        resp = retry(
    167            requests.get,
    168            attempts=2,
    169            sleeptime=10,
    170            args=(url,),
    171            kwargs={"timeout": 60, "headers": ""},
    172        )
    173        j = resp.json()
    174        return j["secret"]["key"]
    175    except (requests.ConnectionError, ValueError, KeyError):
    176        raise RuntimeError("Could not get Github API token to lookup partner data")
    177 
    178 
    179 def query_api(query, token):
    180    """Make a query with a Github auth header, returning the json"""
    181    headers = {"Authorization": "bearer %s" % token}
    182    r = requests.post(GITHUB_API_ENDPOINT, json={"query": query}, headers=headers)
    183    r.raise_for_status()
    184 
    185    j = r.json()
    186    if "errors" in j:
    187        raise RuntimeError("Github query error - %s", j["errors"])
    188    return j
    189 
    190 
    191 def check_login(token):
    192    log.debug("Checking we have a valid login")
    193    query_api(LOGIN_QUERY, token)
    194 
    195 
    196 def get_repo_params(repo):
    197    """Parse the organisation and repo name from an https or git url for a repo"""
    198    if repo.startswith("https"):
    199        # eg https://github.com/mozilla-partners/mozilla-EME-free
    200        return repo.rsplit("/", 2)[-2:]
    201    if repo.startswith("git@"):
    202        # eg git@github.com:mozilla-partners/mailru.git
    203        repo = repo.replace(".git", "")
    204        return repo.split(":")[-1].split("/")
    205 
    206 
    207 def get_partners(manifestRepo, token):
    208    """Given the url to a manifest repository, retrieve the default.xml and parse it into a
    209    list of partner repos.
    210    """
    211    log.debug("Querying for manifest default.xml in %s", manifestRepo)
    212    owner, repo = get_repo_params(manifestRepo)
    213    query = MANIFEST_QUERY % {"owner": owner, "repo": repo, "file": "default.xml"}
    214    raw_manifest = query_api(query, token)
    215    log.debug("Raw manifest: %s", raw_manifest)
    216    if not raw_manifest["data"]["repository"]:
    217        raise RuntimeError(
    218            "Couldn't load partner manifest at %s, insufficient permissions ?"
    219            % manifestRepo
    220        )
    221    e = ET.fromstring(raw_manifest["data"]["repository"]["object"]["text"])
    222 
    223    remotes = {}
    224    partners = {}
    225    for child in e:
    226        if child.tag == "remote":
    227            name = child.attrib["name"]
    228            url = child.attrib["fetch"]
    229            remotes[name] = url
    230            log.debug("Added remote %s at %s", name, url)
    231        elif child.tag == "project":
    232            # we don't need to check any code repos
    233            if "scripts" in child.attrib["path"]:
    234                continue
    235            owner, _ = get_repo_params(remotes[child.attrib["remote"]] + "_")
    236            partner_url = {
    237                "owner": owner,
    238                "repo": child.attrib["name"],
    239                "revision": child.attrib["revision"],
    240            }
    241            partners[child.attrib["name"]] = partner_url
    242            log.debug(
    243                "Added partner %s at revision %s"
    244                % (partner_url["repo"], partner_url["revision"])
    245            )
    246    return partners
    247 
    248 
    249 def parse_config(data):
    250    """Parse a single repack.cfg file into a python dictionary.
    251    data is contents of the file, in "foo=bar\nbaz=buzz" style. We do some translation on
    252    locales and platforms data, otherwise passthrough
    253    """
    254    ALLOWED_KEYS = (
    255        "locales",
    256        "platforms",
    257        "upload_to_candidates",
    258        "repack_stub_installer",
    259        "publish_to_releases",
    260    )
    261    config = {"platforms": []}
    262    for l in data.splitlines():
    263        if "=" in l:
    264            l = str(l)
    265            key, value = l.split("=", 1)
    266            value = value.strip("'\"").rstrip("'\"")
    267            if key in TC_PLATFORM_PER_FTP.keys():
    268                if value.lower() == "true":
    269                    config["platforms"].append(TC_PLATFORM_PER_FTP[key])
    270                continue
    271            if key not in ALLOWED_KEYS:
    272                continue
    273            if key == "locales":
    274                # a list please
    275                value = value.split(" ")
    276            config[key] = value
    277    return config
    278 
    279 
    280 def get_repack_configs(repackRepo, token):
    281    """For a partner repository, retrieve all the repack.cfg files and parse them into a dict"""
    282    log.debug("Querying for configs in %s", repackRepo)
    283    query = REPACK_CFG_QUERY % repackRepo
    284    raw_configs = query_api(query, token)
    285    raw_configs = raw_configs["data"]["repository"]["object"]["entries"]
    286 
    287    configs = {}
    288    for sub_config in raw_configs:
    289        name = sub_config["name"]
    290        for file in sub_config["object"].get("entries", []):
    291            if file["name"] != "repack.cfg":
    292                continue
    293            configs[name] = parse_config(file["object"]["text"])
    294    return configs
    295 
    296 
    297 def get_attribution_config(manifestRepo, token):
    298    log.debug("Querying for manifest attribution_config.yml in %s", manifestRepo)
    299    owner, repo = get_repo_params(manifestRepo)
    300    query = MANIFEST_QUERY % {
    301        "owner": owner,
    302        "repo": repo,
    303        "file": "attribution_config.yml",
    304    }
    305    raw_manifest = query_api(query, token)
    306    if not raw_manifest["data"]["repository"]:
    307        raise RuntimeError(
    308            "Couldn't load partner manifest at %s, insufficient permissions ?"
    309            % manifestRepo
    310        )
    311    # no file has been set up, gracefully continue
    312    if raw_manifest["data"]["repository"]["object"] is None:
    313        log.debug("No attribution_config.yml file found")
    314        return {}
    315 
    316    return yaml.safe_load(raw_manifest["data"]["repository"]["object"]["text"])
    317 
    318 
    319 def get_partner_config_by_url(manifest_url, kind, token, partner_subset=None):
    320    """Retrieve partner data starting from the manifest url, which points to a repository
    321    containing a default.xml that is intended to be drive the Google tool 'repo'. It
    322    descends into each partner repo to lookup and parse the repack.cfg file(s).
    323 
    324    If partner_subset is a list of sub_config names only return data for those.
    325 
    326    Supports caching data by kind to avoid repeated requests, relying on the related kinds for
    327    partner repacking, signing, repackage, repackage signing all having the same kind prefix.
    328    """
    329    if not manifest_url:
    330        raise RuntimeError(f"Manifest url for {kind} not defined")
    331    if kind not in partner_configs:
    332        log.info("Looking up data for %s from %s", kind, manifest_url)
    333        check_login(token)
    334        if kind == "release-partner-attribution":
    335            partner_configs[kind] = get_attribution_config(manifest_url, token)
    336        else:
    337            partners = get_partners(manifest_url, token)
    338 
    339            partner_configs[kind] = {}
    340            for partner, partner_url in partners.items():
    341                if partner_subset and partner not in partner_subset:
    342                    continue
    343                partner_configs[kind][partner] = get_repack_configs(partner_url, token)
    344 
    345    return partner_configs[kind]
    346 
    347 
    348 def check_if_partners_enabled(config, tasks):
    349    if (
    350        (
    351            config.params["release_enable_partner_repack"]
    352            and config.kind.startswith("release-partner-repack")
    353        )
    354        or (
    355            config.params["release_enable_partner_attribution"]
    356            and config.kind.startswith("release-partner-attribution")
    357        )
    358        or (
    359            config.params["release_enable_emefree"]
    360            and config.kind.startswith("release-eme-free-")
    361        )
    362    ):
    363        yield from tasks
    364 
    365 
    366 def get_partner_config_by_kind(config, kind):
    367    """Retrieve partner data starting from the manifest url, which points to a repository
    368    containing a default.xml that is intended to be drive the Google tool 'repo'. It
    369    descends into each partner repo to lookup and parse the repack.cfg file(s).
    370 
    371    Supports caching data by kind to avoid repeated requests, relying on the related kinds for
    372    partner repacking, signing, repackage, repackage signing all having the same kind prefix.
    373    """
    374    partner_subset = config.params["release_partners"]
    375    partner_configs = config.params["release_partner_config"] or {}
    376 
    377    # TODO eme-free should be a partner; we shouldn't care about per-kind
    378    for k in partner_configs:
    379        if kind.startswith(k):
    380            kind_config = partner_configs[k]
    381            break
    382    else:
    383        return {}
    384    # if we're only interested in a subset of partners we remove the rest
    385    if partner_subset:
    386        if kind.startswith("release-partner-repack"):
    387            # TODO - should be fatal to have an unknown partner in partner_subset
    388            for partner in [p for p in kind_config.keys() if p not in partner_subset]:
    389                del kind_config[partner]
    390        elif kind.startswith("release-partner-attribution") and isinstance(
    391            kind_config, dict
    392        ):
    393            all_configs = deepcopy(kind_config.get("configs", []))
    394            kind_config["configs"] = []
    395            for this_config in all_configs:
    396                if this_config["campaign"] in partner_subset:
    397                    kind_config["configs"].append(this_config)
    398    return kind_config
    399 
    400 
    401 def _fix_subpartner_locales(orig_config, all_locales):
    402    subpartner_config = deepcopy(orig_config)
    403    # Get an ordered list of subpartner locales that is a subset of all_locales
    404    subpartner_config["locales"] = sorted(
    405        list(set(orig_config["locales"]) & set(all_locales))
    406    )
    407    return subpartner_config
    408 
    409 
    410 def fix_partner_config(orig_config):
    411    pc = {}
    412    with open(LOCALES_FILE) as fh:
    413        all_locales = list(json.load(fh).keys())
    414    # l10n-changesets.json doesn't include en-US, but the repack list does
    415    if "en-US" not in all_locales:
    416        all_locales.append("en-US")
    417    for kind, kind_config in orig_config.items():
    418        if kind == "release-partner-attribution":
    419            pc[kind] = {}
    420            if kind_config:
    421                pc[kind] = {"defaults": kind_config["defaults"]}
    422                for config in kind_config["configs"]:
    423                    # Make sure our locale list is a subset of all_locales
    424                    pc[kind].setdefault("configs", []).append(
    425                        _fix_subpartner_locales(config, all_locales)
    426                    )
    427        else:
    428            for partner, partner_config in kind_config.items():
    429                for subpartner, subpartner_config in partner_config.items():
    430                    # get rid of empty subpartner configs
    431                    if not subpartner_config:
    432                        continue
    433                    # Make sure our locale list is a subset of all_locales
    434                    pc.setdefault(kind, {}).setdefault(partner, {})[subpartner] = (
    435                        _fix_subpartner_locales(subpartner_config, all_locales)
    436                    )
    437    return pc
    438 
    439 
    440 # seems likely this exists elsewhere already
    441 def get_ftp_platform(platform):
    442    if platform.startswith("win32"):
    443        return "win32"
    444    if platform.startswith("win64-aarch64"):
    445        return "win64-aarch64"
    446    if platform.startswith("win64"):
    447        return "win64"
    448    if platform.startswith("macosx"):
    449        return "mac"
    450    if platform.startswith("linux64"):
    451        return "linux-x86_64"
    452    raise ValueError(f"Unimplemented platform {platform}")
    453 
    454 
    455 # Ugh
    456 def locales_per_build_platform(build_platform, locales):
    457    if build_platform.startswith("mac"):
    458        exclude = ["ja"]
    459    else:
    460        exclude = ["ja-JP-mac"]
    461    return [locale for locale in locales if locale not in exclude]
    462 
    463 
    464 def get_partner_url_config(parameters, graph_config):
    465    partner_url_config = deepcopy(graph_config["partner-urls"])
    466    substitutions = {
    467        "release-product": parameters["release_product"],
    468        "release-level": release_level(parameters),
    469        "release-type": parameters["release_type"],
    470    }
    471    resolve_keyed_by(
    472        partner_url_config,
    473        "release-eme-free-repack",
    474        "eme-free manifest_url",
    475        **substitutions,
    476    )
    477    resolve_keyed_by(
    478        partner_url_config,
    479        "release-partner-repack",
    480        "partner manifest url",
    481        **substitutions,
    482    )
    483    resolve_keyed_by(
    484        partner_url_config,
    485        "release-partner-attribution",
    486        "partner attribution url",
    487        **substitutions,
    488    )
    489    return partner_url_config
    490 
    491 
    492 def get_repack_ids_by_platform(config, build_platform):
    493    partner_config = get_partner_config_by_kind(config, config.kind)
    494    combinations = []
    495    for partner, subconfigs in partner_config.items():
    496        for sub_config_name, sub_config in subconfigs.items():
    497            if build_platform not in sub_config.get("platforms", []):
    498                continue
    499            locales = locales_per_build_platform(
    500                build_platform, sub_config.get("locales", [])
    501            )
    502            for locale in locales:
    503                combinations.append(f"{partner}/{sub_config_name}/{locale}")
    504    return sorted(combinations)
    505 
    506 
    507 def get_partners_to_be_published(config):
    508    return _get_repack_partners(config) + _get_attribution_partners(config)
    509 
    510 
    511 def _get_repack_partners(config):
    512    repack_partner_config = get_partner_config_by_kind(config, "release-partner-repack")
    513    partners = []
    514 
    515    for partner, subconfigs in repack_partner_config.items():
    516        for sub_config_name, sub_config in subconfigs.items():
    517            if sub_config.get("publish_to_releases"):
    518                partners.append((partner, sub_config_name, sub_config["platforms"]))
    519 
    520    return partners
    521 
    522 
    523 def _get_attribution_partners(config):
    524    attribution_partner_config = get_partner_config_by_kind(
    525        config, "release-partner-attribution"
    526    )
    527    partners = []
    528 
    529    for entry in attribution_partner_config.get("configs", []):
    530        if entry.get("publish_to_releases"):
    531            partners.append((entry["campaign"], entry["content"], entry["platforms"]))
    532 
    533    return partners
    534 
    535 
    536 def apply_partner_priority(config, jobs):
    537    priority = None
    538    # Reduce the priority of the partner repack jobs because they don't block QE. Meanwhile
    539    # leave EME-free jobs alone because they do, and they'll get the branch priority like the rest
    540    # of the release. Only bother with this in production, not on staging releases on try.
    541    # medium is the same as mozilla-central, see taskcluster/config.yml. ie higher than
    542    # integration branches because we don't want to wait a lot for the graph to be done, but
    543    # for multiple releases the partner tasks always wait for non-partner.
    544    if (
    545        config.kind.startswith((
    546            "release-partner-repack",
    547            "release-partner-attribution",
    548        ))
    549        and release_level(config.params) == "production"
    550    ):
    551        priority = "medium"
    552    for job in jobs:
    553        if priority:
    554            job["priority"] = priority
    555        yield job
    556 
    557 
    558 def generate_attribution_code(defaults, partner):
    559    params = {
    560        "medium": defaults["medium"],
    561        "source": defaults["source"],
    562        "campaign": partner["campaign"],
    563        "content": partner["content"],
    564    }
    565    if partner.get("variation"):
    566        params["variation"] = partner["variation"]
    567    if partner.get("experiment"):
    568        params["experiment"] = partner["experiment"]
    569 
    570    code = urlencode(params)
    571    return code
    572 
    573 
    574 MACOS_ATTRIBUTION_SENTINEL = "__MOZCUSTOM__"
    575 
    576 
    577 def build_macos_attribution_dmg_command(dmg_app_path, attributions):
    578    command = []
    579    for a in attributions:
    580        output_dir = os.path.dirname(os.path.abspath(a["output"]))
    581        create_dir_command = f"mkdir -p {output_dir}"
    582        if create_dir_command not in command:
    583            command.append(create_dir_command)
    584 
    585        command.append(
    586            " ".join([
    587                dmg_app_path,
    588                "attribute",
    589                a["input"],
    590                a["output"],
    591                MACOS_ATTRIBUTION_SENTINEL,
    592                _build_macos_attribution_string(attribution_code=a["attribution"]),
    593            ])
    594        )
    595    return " && ".join(command)
    596 
    597 
    598 def _build_macos_attribution_string(attribution_code):
    599    quoted_attribution_code = quote(attribution_code)
    600    attribution_string = f"{MACOS_ATTRIBUTION_SENTINEL}{quoted_attribution_code}"
    601    # Padding must happen after string is URL-quoted, otherwise the tabs themselves
    602    # are quoted as well.
    603    padded_attribution_string = _pad_macos_attribution_code(attribution_string)
    604    return f"'{padded_attribution_string}'"
    605 
    606 
    607 def _pad_macos_attribution_code(attribution_string):
    608    # Attribution length should be aligned with ATTR_CODE_MAX_LENGTH
    609    #   from browser/components/attribution/AttributionCode.sys
    610    while len(attribution_string) < 1010:
    611        attribution_string += "\t"
    612    return attribution_string