tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

extract-for-git.py (7593B)


      1 # This Source Code Form is subject to the terms of the Mozilla Public
      2 # License, v. 2.0. If a copy of the MPL was not distributed with this
      3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      4 import argparse
      5 import os
      6 import re
      7 import subprocess
      8 import sys
      9 
     10 from run_operations import (
     11    RepoType,
     12    detect_repo_type,
     13    run_git,
     14 )
     15 
     16 # This script extracts commits that touch third party webrtc code so they can
     17 # be imported into Git. It filters out commits that are not part of upstream
     18 # code and rewrites the paths to match upstream. Finally, the commits are
     19 # combined into a mailbox file that can be applied with `git am`.
     20 LIBWEBRTC_DIR = "third_party/libwebrtc"
     21 
     22 repo_type = detect_repo_type()
     23 
     24 
     25 def build_commit_list(revset, env):
     26    """Build commit list from the specified revset.
     27 
     28    The revset can be a single revision, e.g. 52bb9bb94661, or a range,
     29    e.g. 8c08a5bb8a99::52bb9bb94661, or any other valid revset
     30    (check hg help revset). Only commits that touch libwebrtc are included.
     31    """
     32    if repo_type == RepoType.GIT:
     33        if ".." not in revset and "^" not in revset:
     34            # Single commit shas in git need to be converted to a range
     35            # otherwise git log will return all the commits from the
     36            # given sha back in history.
     37            revset = f"{revset}^..{revset}"
     38        cmd = f"git log --reverse --oneline --format=%h -r {revset} ."
     39        commits = run_git(cmd, LIBWEBRTC_DIR)
     40        return commits
     41 
     42    res = subprocess.run(
     43        ["hg", "log", "-r", revset, "-M", "--template", "{node}\n", LIBWEBRTC_DIR],
     44        capture_output=True,
     45        text=True,
     46        env=env,
     47        check=False,
     48    )
     49    # return empty list instead of a list with one empty element if no
     50    # libwebrtc changing commits are found in the given range
     51    if len(res.stdout) == 0:
     52        return []
     53    return [line.strip() for line in res.stdout.strip().split("\n")]
     54 
     55 
     56 def extract_git_author_date(sha1, env):
     57    res = subprocess.run(
     58        ["git", "show", "--no-patch", "--format=%aN <%aE>|%ai", sha1],
     59        capture_output=True,
     60        text=True,
     61        env=env,
     62        check=False,
     63    )
     64    # we don't want a line break at the end of date, rstrip() before split
     65    return res.stdout.rstrip().split("|")
     66 
     67 
     68 def extract_hg_author_date(sha1, env):
     69    res = subprocess.run(
     70        ["hg", "log", "-r", sha1, "--template", "{author}|{date|isodate}"],
     71        capture_output=True,
     72        text=True,
     73        env=env,
     74        check=False,
     75    )
     76    return res.stdout.split("|")
     77 
     78 
     79 def extract_git_description(sha1, env):
     80    res = subprocess.run(
     81        ["git", "show", "--no-patch", "--format=%B", sha1],
     82        capture_output=True,
     83        text=True,
     84        env=env,
     85        check=False,
     86    )
     87    return res.stdout.rstrip()
     88 
     89 
     90 def extract_hg_description(sha1, env):
     91    res = subprocess.run(
     92        ["hg", "log", "-r", sha1, "--template", "{desc}"],
     93        capture_output=True,
     94        text=True,
     95        env=env,
     96        check=False,
     97    )
     98    return res.stdout
     99 
    100 
    101 def extract_git_commit(sha1, env):
    102    # an empty format string just gives us the raw diffs, cutting out all
    103    # the header info like author, date, etc.
    104    res = subprocess.run(
    105        ["git", "show", "--format=", sha1],
    106        capture_output=True,
    107        text=True,
    108        env=env,
    109        check=False,
    110    )
    111    return "\n" + res.stdout
    112 
    113 
    114 def extract_hg_commit(sha1, env):
    115    res = subprocess.run(
    116        ["hg", "log", "-r", sha1, "-pg", "--template", "\n"],
    117        capture_output=True,
    118        text=True,
    119        env=env,
    120        check=False,
    121    )
    122    return res.stdout
    123 
    124 
    125 def filter_nonwebrtc(commit):
    126    filtered = []
    127    skipping = False
    128    for line in commit.split("\n"):
    129        # Extract only patches affecting libwebrtc, but avoid commits that
    130        # touch build, which is tracked by a separate repo, or that affect
    131        # moz.build files which are code generated.
    132        if (
    133            line.startswith("diff --git a/" + LIBWEBRTC_DIR)
    134            and not line.startswith("diff --git a/" + LIBWEBRTC_DIR + "/third_party/")
    135            and not line.startswith("diff --git a/" + LIBWEBRTC_DIR + "/README.moz")
    136            and not line.startswith(
    137                "diff --git a/" + LIBWEBRTC_DIR + "/moz-patch-stack/"
    138            )
    139            and not line.endswith("moz.build")
    140        ):
    141            skipping = False
    142        elif line.startswith("diff --git"):
    143            skipping = True
    144 
    145        if not skipping:
    146            filtered.append(line)
    147    return "\n".join(filtered)
    148 
    149 
    150 def fixup_paths(commit, search_path):
    151    # make sure we only rewrite paths in the diff-related or rename lines
    152    commit = re.sub(
    153        f"^rename (from|to) {search_path}/", "rename \\1 ", commit, flags=re.MULTILINE
    154    )
    155    return re.sub(f"( [ab])/{search_path}/", "\\1/", commit)
    156 
    157 
    158 def write_as_mbox(sha1, author, date, description, commit, ofile):
    159    # Use same magic date as git format-patch
    160    ofile.write(f"From {sha1} Mon Sep 17 00:00:00 2001\n")
    161    ofile.write(f"From: {author}\n")
    162    ofile.write(f"Date: {date}\n")
    163    description = description.split("\n")
    164    ofile.write(f"Subject: {description[0]}\n")
    165    ofile.write("\n".join(description[1:]))
    166    ofile.write(
    167        f"\nMercurial Revision: https://hg.mozilla.org/mozilla-central/rev/{sha1}\n"
    168    )
    169    ofile.write(commit)
    170    ofile.write("\n")
    171    ofile.write("\n")
    172 
    173 
    174 if __name__ == "__main__":
    175    # first, check which repo we're in, git or hg
    176    if repo_type is None or not isinstance(repo_type, RepoType):
    177        print("Unable to detect repo (git or hg)")
    178        sys.exit(1)
    179 
    180    commits = []
    181    parser = argparse.ArgumentParser(
    182        description="Format commits for upstream libwebrtc"
    183    )
    184    parser.add_argument(
    185        "revsets", metavar="revset", type=str, nargs="+", help="A revset to process"
    186    )
    187    parser.add_argument(
    188        "--target",
    189        choices=("libwebrtc", "build", "third_party", "abseil-cpp"),
    190        default="libwebrtc",
    191    )
    192    args = parser.parse_args()
    193 
    194    if args.target == "build":
    195        LIBWEBRTC_DIR = "third_party/chromium/build"
    196    elif args.target == "abseil-cpp":
    197        LIBWEBRTC_DIR = "third_party/abseil-cpp"
    198    elif args.target == "third_party":
    199        LIBWEBRTC_DIR = os.path.join(LIBWEBRTC_DIR, args.target)
    200 
    201    # must run 'hg' with HGPLAIN=1 to ensure aliases don't interfere with
    202    # command output.
    203    env = os.environ.copy()
    204    env["HGPLAIN"] = "1"
    205 
    206    for revset in args.revsets:
    207        commits.extend(build_commit_list(revset, env))
    208 
    209    if len(commits) == 0:
    210        print(f"No commits modifying {LIBWEBRTC_DIR} found in provided revsets")
    211        sys.exit(1)
    212 
    213    with open("mailbox.patch", "w") as ofile:
    214        for sha1 in commits:
    215            if repo_type == RepoType.GIT:
    216                author, date = extract_git_author_date(sha1, env)
    217                description = extract_git_description(sha1, env)
    218                commit = extract_git_commit(sha1, env)
    219            else:
    220                author, date = extract_hg_author_date(sha1, env)
    221                description = extract_hg_description(sha1, env)
    222                commit = extract_hg_commit(sha1, env)
    223 
    224            filtered_commit = filter_nonwebrtc(commit)
    225            if len(filtered_commit) == 0:
    226                continue
    227            if args.target == "abseil-cpp":
    228                fixedup_commit = fixup_paths(filtered_commit, "third_party")
    229            else:
    230                fixedup_commit = fixup_paths(filtered_commit, LIBWEBRTC_DIR)
    231            write_as_mbox(sha1, author, date, description, fixedup_commit, ofile)