extract-for-git.py (7593B)
1 # This Source Code Form is subject to the terms of the Mozilla Public 2 # License, v. 2.0. If a copy of the MPL was not distributed with this 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 import argparse 5 import os 6 import re 7 import subprocess 8 import sys 9 10 from run_operations import ( 11 RepoType, 12 detect_repo_type, 13 run_git, 14 ) 15 16 # This script extracts commits that touch third party webrtc code so they can 17 # be imported into Git. It filters out commits that are not part of upstream 18 # code and rewrites the paths to match upstream. Finally, the commits are 19 # combined into a mailbox file that can be applied with `git am`. 20 LIBWEBRTC_DIR = "third_party/libwebrtc" 21 22 repo_type = detect_repo_type() 23 24 25 def build_commit_list(revset, env): 26 """Build commit list from the specified revset. 27 28 The revset can be a single revision, e.g. 52bb9bb94661, or a range, 29 e.g. 8c08a5bb8a99::52bb9bb94661, or any other valid revset 30 (check hg help revset). Only commits that touch libwebrtc are included. 31 """ 32 if repo_type == RepoType.GIT: 33 if ".." not in revset and "^" not in revset: 34 # Single commit shas in git need to be converted to a range 35 # otherwise git log will return all the commits from the 36 # given sha back in history. 37 revset = f"{revset}^..{revset}" 38 cmd = f"git log --reverse --oneline --format=%h -r {revset} ." 39 commits = run_git(cmd, LIBWEBRTC_DIR) 40 return commits 41 42 res = subprocess.run( 43 ["hg", "log", "-r", revset, "-M", "--template", "{node}\n", LIBWEBRTC_DIR], 44 capture_output=True, 45 text=True, 46 env=env, 47 check=False, 48 ) 49 # return empty list instead of a list with one empty element if no 50 # libwebrtc changing commits are found in the given range 51 if len(res.stdout) == 0: 52 return [] 53 return [line.strip() for line in res.stdout.strip().split("\n")] 54 55 56 def extract_git_author_date(sha1, env): 57 res = subprocess.run( 58 ["git", "show", "--no-patch", "--format=%aN <%aE>|%ai", sha1], 59 capture_output=True, 60 text=True, 61 env=env, 62 check=False, 63 ) 64 # we don't want a line break at the end of date, rstrip() before split 65 return res.stdout.rstrip().split("|") 66 67 68 def extract_hg_author_date(sha1, env): 69 res = subprocess.run( 70 ["hg", "log", "-r", sha1, "--template", "{author}|{date|isodate}"], 71 capture_output=True, 72 text=True, 73 env=env, 74 check=False, 75 ) 76 return res.stdout.split("|") 77 78 79 def extract_git_description(sha1, env): 80 res = subprocess.run( 81 ["git", "show", "--no-patch", "--format=%B", sha1], 82 capture_output=True, 83 text=True, 84 env=env, 85 check=False, 86 ) 87 return res.stdout.rstrip() 88 89 90 def extract_hg_description(sha1, env): 91 res = subprocess.run( 92 ["hg", "log", "-r", sha1, "--template", "{desc}"], 93 capture_output=True, 94 text=True, 95 env=env, 96 check=False, 97 ) 98 return res.stdout 99 100 101 def extract_git_commit(sha1, env): 102 # an empty format string just gives us the raw diffs, cutting out all 103 # the header info like author, date, etc. 104 res = subprocess.run( 105 ["git", "show", "--format=", sha1], 106 capture_output=True, 107 text=True, 108 env=env, 109 check=False, 110 ) 111 return "\n" + res.stdout 112 113 114 def extract_hg_commit(sha1, env): 115 res = subprocess.run( 116 ["hg", "log", "-r", sha1, "-pg", "--template", "\n"], 117 capture_output=True, 118 text=True, 119 env=env, 120 check=False, 121 ) 122 return res.stdout 123 124 125 def filter_nonwebrtc(commit): 126 filtered = [] 127 skipping = False 128 for line in commit.split("\n"): 129 # Extract only patches affecting libwebrtc, but avoid commits that 130 # touch build, which is tracked by a separate repo, or that affect 131 # moz.build files which are code generated. 132 if ( 133 line.startswith("diff --git a/" + LIBWEBRTC_DIR) 134 and not line.startswith("diff --git a/" + LIBWEBRTC_DIR + "/third_party/") 135 and not line.startswith("diff --git a/" + LIBWEBRTC_DIR + "/README.moz") 136 and not line.startswith( 137 "diff --git a/" + LIBWEBRTC_DIR + "/moz-patch-stack/" 138 ) 139 and not line.endswith("moz.build") 140 ): 141 skipping = False 142 elif line.startswith("diff --git"): 143 skipping = True 144 145 if not skipping: 146 filtered.append(line) 147 return "\n".join(filtered) 148 149 150 def fixup_paths(commit, search_path): 151 # make sure we only rewrite paths in the diff-related or rename lines 152 commit = re.sub( 153 f"^rename (from|to) {search_path}/", "rename \\1 ", commit, flags=re.MULTILINE 154 ) 155 return re.sub(f"( [ab])/{search_path}/", "\\1/", commit) 156 157 158 def write_as_mbox(sha1, author, date, description, commit, ofile): 159 # Use same magic date as git format-patch 160 ofile.write(f"From {sha1} Mon Sep 17 00:00:00 2001\n") 161 ofile.write(f"From: {author}\n") 162 ofile.write(f"Date: {date}\n") 163 description = description.split("\n") 164 ofile.write(f"Subject: {description[0]}\n") 165 ofile.write("\n".join(description[1:])) 166 ofile.write( 167 f"\nMercurial Revision: https://hg.mozilla.org/mozilla-central/rev/{sha1}\n" 168 ) 169 ofile.write(commit) 170 ofile.write("\n") 171 ofile.write("\n") 172 173 174 if __name__ == "__main__": 175 # first, check which repo we're in, git or hg 176 if repo_type is None or not isinstance(repo_type, RepoType): 177 print("Unable to detect repo (git or hg)") 178 sys.exit(1) 179 180 commits = [] 181 parser = argparse.ArgumentParser( 182 description="Format commits for upstream libwebrtc" 183 ) 184 parser.add_argument( 185 "revsets", metavar="revset", type=str, nargs="+", help="A revset to process" 186 ) 187 parser.add_argument( 188 "--target", 189 choices=("libwebrtc", "build", "third_party", "abseil-cpp"), 190 default="libwebrtc", 191 ) 192 args = parser.parse_args() 193 194 if args.target == "build": 195 LIBWEBRTC_DIR = "third_party/chromium/build" 196 elif args.target == "abseil-cpp": 197 LIBWEBRTC_DIR = "third_party/abseil-cpp" 198 elif args.target == "third_party": 199 LIBWEBRTC_DIR = os.path.join(LIBWEBRTC_DIR, args.target) 200 201 # must run 'hg' with HGPLAIN=1 to ensure aliases don't interfere with 202 # command output. 203 env = os.environ.copy() 204 env["HGPLAIN"] = "1" 205 206 for revset in args.revsets: 207 commits.extend(build_commit_list(revset, env)) 208 209 if len(commits) == 0: 210 print(f"No commits modifying {LIBWEBRTC_DIR} found in provided revsets") 211 sys.exit(1) 212 213 with open("mailbox.patch", "w") as ofile: 214 for sha1 in commits: 215 if repo_type == RepoType.GIT: 216 author, date = extract_git_author_date(sha1, env) 217 description = extract_git_description(sha1, env) 218 commit = extract_git_commit(sha1, env) 219 else: 220 author, date = extract_hg_author_date(sha1, env) 221 description = extract_hg_description(sha1, env) 222 commit = extract_hg_commit(sha1, env) 223 224 filtered_commit = filter_nonwebrtc(commit) 225 if len(filtered_commit) == 0: 226 continue 227 if args.target == "abseil-cpp": 228 fixedup_commit = fixup_paths(filtered_commit, "third_party") 229 else: 230 fixedup_commit = fixup_paths(filtered_commit, LIBWEBRTC_DIR) 231 write_as_mbox(sha1, author, date, description, fixedup_commit, ofile)