testfiles.py (17440B)
1 import argparse 2 import logging 3 import os 4 import re 5 import subprocess 6 import sys 7 8 from collections import OrderedDict 9 10 try: 11 from ..manifest import manifest 12 from ..manifest.utils import git as get_git_cmd 13 except ValueError: 14 # if we're not within the tools package, the above is an import from above 15 # the top-level which raises ValueError, so reimport it with an absolute 16 # reference 17 # 18 # note we need both because depending on caller we may/may not have the 19 # paths set up correctly to handle both and mypy has no knowledge of our 20 # sys.path magic 21 from manifest import manifest # type: ignore 22 from manifest.utils import git as get_git_cmd # type: ignore 23 24 from typing import Any, Dict, Iterable, List, Optional, Pattern, Sequence, Set, Text, Tuple 25 26 DEFAULT_IGNORE_RULES = ("resources/testharness*", "resources/testdriver*") 27 28 here = os.path.dirname(__file__) 29 wpt_root = os.path.abspath(os.path.join(here, os.pardir, os.pardir)) 30 31 logger = logging.getLogger() 32 33 34 def display_branch_point() -> None: 35 print(branch_point()) 36 37 38 def branch_point() -> Optional[Text]: 39 git = get_git_cmd(wpt_root) 40 if git is None: 41 raise Exception("git not found") 42 43 if (os.environ.get("GITHUB_PULL_REQUEST", "false") == "false" and 44 os.environ.get("GITHUB_BRANCH") == "master"): 45 # For builds on the master branch just return the HEAD commit 46 return git("rev-parse", "HEAD") 47 elif os.environ.get("GITHUB_PULL_REQUEST", "false") != "false": 48 # This is a PR, so the base branch is in GITHUB_BRANCH 49 base_branch = os.environ.get("GITHUB_BRANCH") 50 assert base_branch, "GITHUB_BRANCH environment variable is defined" 51 branch_point: Optional[Text] = git("merge-base", "HEAD", base_branch) 52 else: 53 # Otherwise we aren't on a PR, so we try to find commits that are only in the 54 # current branch c.f. 55 # http://stackoverflow.com/questions/13460152/find-first-ancestor-commit-in-another-branch 56 57 # parse HEAD into an object ref 58 head = git("rev-parse", "HEAD") 59 60 # get everything in refs/heads and refs/remotes that doesn't include HEAD 61 not_heads = [item for item in git("rev-parse", "--not", "--branches", "--remotes").split("\n") 62 if item and item != "^%s" % head] 63 64 # get all commits on HEAD but not reachable from anything in not_heads 65 cmd = ["git", "rev-list", "--topo-order", "--parents", "--stdin", "HEAD"] 66 proc = subprocess.Popen(cmd, 67 stdin=subprocess.PIPE, 68 stdout=subprocess.PIPE, 69 cwd=wpt_root) 70 commits_bytes, _ = proc.communicate(b"\n".join(item.encode("ascii") for item in not_heads)) 71 if proc.returncode != 0: 72 raise subprocess.CalledProcessError(proc.returncode, 73 cmd, 74 commits_bytes) 75 76 commit_parents: Dict[Text, List[Text]] = OrderedDict() 77 commits = commits_bytes.decode("ascii") 78 if commits: 79 for line in commits.split("\n"): 80 line_commits = line.split(" ") 81 commit_parents[line_commits[0]] = line_commits[1:] 82 83 branch_point = None 84 85 # if there are any commits, take the first parent that is not in commits 86 for commit, parents in commit_parents.items(): 87 for parent in parents: 88 if parent not in commit_parents: 89 branch_point = parent 90 break 91 92 if branch_point: 93 break 94 95 # if we had any commits, we should now have a branch point 96 assert branch_point or not commit_parents 97 98 # The above heuristic will fail in the following cases: 99 # 100 # - The current branch has fallen behind the remote version 101 # - Changes on the current branch were rebased and therefore do not exist on any 102 # other branch. This will result in the selection of a commit that is earlier 103 # in the history than desired (as determined by calculating the later of the 104 # branch point and the merge base) 105 # 106 # In either case, fall back to using the merge base as the branch point. 107 merge_base = git("merge-base", "HEAD", "origin/master") 108 if (branch_point is None or 109 (branch_point != merge_base and 110 not git("log", "--oneline", f"{merge_base}..{branch_point}").strip())): 111 logger.debug("Using merge-base as the branch point") 112 branch_point = merge_base 113 else: 114 logger.debug("Using first commit on another branch as the branch point") 115 116 logger.debug("Branch point from master: %s" % branch_point) 117 if branch_point: 118 branch_point = branch_point.strip() 119 return branch_point 120 121 122 def compile_ignore_rule(rule: Text) -> Pattern[Text]: 123 rule = rule.replace(os.path.sep, "/") 124 parts = rule.split("/") 125 re_parts = [] 126 for part in parts: 127 if part.endswith("**"): 128 re_parts.append(re.escape(part[:-2]) + ".*") 129 elif part.endswith("*"): 130 re_parts.append(re.escape(part[:-1]) + "[^/]*") 131 else: 132 re_parts.append(re.escape(part)) 133 return re.compile("^%s$" % "/".join(re_parts)) 134 135 136 def repo_files_changed(revish: Text, include_uncommitted: bool = False, include_new: bool = False) -> Set[Text]: 137 git = get_git_cmd(wpt_root) 138 if git is None: 139 raise Exception("git not found") 140 141 if "..." in revish: 142 raise Exception(f"... not supported when finding files changed (revish: {revish!r}") 143 144 if ".." in revish: 145 # ".." isn't treated as a range for git-diff; what we want is 146 # everything reachable from B but not A, and git diff A...B 147 # gives us that (via the merge-base) 148 revish = revish.replace("..", "...") 149 150 files_list = git("diff", "--no-renames", "--name-only", "--diff-filter=d", "-z", revish).split("\0") 151 assert not files_list[-1], f"final item should be empty, got: {files_list[-1]!r}" 152 files = set(files_list[:-1]) 153 154 if include_uncommitted: 155 entries = git("status", "-z").split("\0") 156 assert not entries[-1] 157 entries = entries[:-1] 158 for item in entries: 159 status, path = item.split(" ", 1) 160 if status == "??" and not include_new: 161 continue 162 else: 163 if not os.path.isdir(path): 164 files.add(path) 165 else: 166 for dirpath, dirnames, filenames in os.walk(path): 167 for filename in filenames: 168 files.add(os.path.join(dirpath, filename)) 169 170 return files 171 172 173 def exclude_ignored(files: Iterable[Text], ignore_rules: Optional[Sequence[Text]]) -> Tuple[List[Text], List[Text]]: 174 if ignore_rules is None: 175 ignore_rules = DEFAULT_IGNORE_RULES 176 compiled_ignore_rules = [compile_ignore_rule(item) for item in set(ignore_rules)] 177 178 changed = [] 179 ignored = [] 180 for item in sorted(files): 181 fullpath = os.path.join(wpt_root, item) 182 rule_path = item.replace(os.path.sep, "/") 183 for rule in compiled_ignore_rules: 184 if rule.match(rule_path): 185 ignored.append(fullpath) 186 break 187 else: 188 changed.append(fullpath) 189 190 return changed, ignored 191 192 193 def files_changed(revish: Text, 194 ignore_rules: Optional[Sequence[Text]] = None, 195 include_uncommitted: bool = False, 196 include_new: bool = False 197 ) -> Tuple[List[Text], List[Text]]: 198 """Find files changed in certain revisions. 199 200 The function passes `revish` directly to `git diff`, so `revish` can have a 201 variety of forms; see `git diff --help` for details. Files in the diff that 202 are matched by `ignore_rules` are excluded. 203 """ 204 files = repo_files_changed(revish, 205 include_uncommitted=include_uncommitted, 206 include_new=include_new) 207 if not files: 208 return [], [] 209 210 return exclude_ignored(files, ignore_rules) 211 212 213 def _in_repo_root(full_path: Text) -> bool: 214 rel_path = os.path.relpath(full_path, wpt_root) 215 path_components = rel_path.split(os.sep) 216 return len(path_components) < 2 217 218 219 def load_manifest(manifest_path: Optional[Text] = None, manifest_update: bool = True) -> manifest.Manifest: 220 if manifest_path is None: 221 manifest_path = os.path.join(wpt_root, "MANIFEST.json") 222 return manifest.load_and_update(wpt_root, manifest_path, "/", 223 update=manifest_update) 224 225 226 def affected_testfiles(files_changed: Iterable[Text], 227 skip_dirs: Optional[Set[Text]] = None, 228 manifest_path: Optional[Text] = None, 229 manifest_update: bool = True 230 ) -> Tuple[Set[Text], Set[Text]]: 231 """Determine and return list of test files that reference changed files.""" 232 if skip_dirs is None: 233 skip_dirs = {"conformance-checkers", "docs", "tools"} 234 affected_testfiles = set() 235 # Exclude files that are in the repo root, because 236 # they are not part of any test. 237 files_changed = [f for f in files_changed if not _in_repo_root(f)] 238 nontests_changed = set(files_changed) 239 wpt_manifest = load_manifest(manifest_path, manifest_update) 240 241 test_types = ["crashtest", "print-reftest", "reftest", "testharness", "wdspec"] 242 support_files = {os.path.join(wpt_root, path) 243 for _, path, _ in wpt_manifest.itertypes("support")} 244 wdspec_test_files = {os.path.join(wpt_root, path) 245 for _, path, _ in wpt_manifest.itertypes("wdspec")} 246 test_files = {os.path.join(wpt_root, path) 247 for _, path, _ in wpt_manifest.itertypes(*test_types)} 248 249 interface_dir = os.path.join(wpt_root, 'interfaces') 250 interfaces_files = {os.path.join(wpt_root, 'interfaces', filename) 251 for filename in os.listdir(interface_dir)} 252 253 interfaces_changed = interfaces_files.intersection(nontests_changed) 254 nontests_changed = nontests_changed.intersection(support_files) 255 256 tests_changed = {item for item in files_changed if item in test_files} 257 258 nontest_changed_paths = set() 259 rewrites: Dict[Text, Text] = {"/resources/webidl2/lib/webidl2.js": "/resources/WebIDLParser.js"} 260 for full_path in nontests_changed: 261 rel_path = os.path.relpath(full_path, wpt_root) 262 path_components = rel_path.split(os.sep) 263 top_level_subdir = path_components[0] 264 if top_level_subdir in skip_dirs: 265 continue 266 repo_path = "/" + os.path.relpath(full_path, wpt_root).replace(os.path.sep, "/") 267 if repo_path in rewrites: 268 repo_path = rewrites[repo_path] 269 full_path = os.path.join(wpt_root, repo_path[1:].replace("/", os.path.sep)) 270 nontest_changed_paths.add((full_path, repo_path)) 271 272 interfaces_changed_names = [os.path.splitext(os.path.basename(interface))[0] 273 for interface in interfaces_changed] 274 275 def affected_by_wdspec(test: Text) -> bool: 276 affected = False 277 if test in wdspec_test_files: 278 for support_full_path, _ in nontest_changed_paths: 279 # parent of support file or of "support" directory 280 parent = os.path.dirname(support_full_path) 281 if os.path.basename(parent) == "support": 282 parent = os.path.dirname(parent) 283 relpath = os.path.relpath(test, parent) 284 if not relpath.startswith(os.pardir): 285 # testfile is in subtree of support file 286 affected = True 287 break 288 return affected 289 290 def affected_by_interfaces(file_contents: Text) -> bool: 291 if len(interfaces_changed_names) > 0: 292 if 'idlharness.js' in file_contents: 293 for interface in interfaces_changed_names: 294 regex = '[\'"]' + interface + '(\\.idl)?[\'"]' 295 if re.search(regex, file_contents): 296 return True 297 return False 298 299 for root, dirs, fnames in os.walk(wpt_root): 300 # Walk top_level_subdir looking for test files containing either the 301 # relative filepath or absolute filepath to the changed files. 302 if root == wpt_root: 303 for dir_name in skip_dirs: 304 dirs.remove(dir_name) 305 for fname in fnames: 306 test_full_path = os.path.join(root, fname) 307 # Skip any file that's not a test file. 308 if test_full_path not in test_files: 309 continue 310 if affected_by_wdspec(test_full_path): 311 affected_testfiles.add(test_full_path) 312 continue 313 314 with open(test_full_path, "rb") as fh: 315 raw_file_contents: bytes = fh.read() 316 if raw_file_contents.startswith(b"\xfe\xff"): 317 file_contents: Text = raw_file_contents.decode("utf-16be", "replace") 318 elif raw_file_contents.startswith(b"\xff\xfe"): 319 file_contents = raw_file_contents.decode("utf-16le", "replace") 320 else: 321 file_contents = raw_file_contents.decode("utf8", "replace") 322 for full_path, repo_path in nontest_changed_paths: 323 rel_path = os.path.relpath(full_path, root).replace(os.path.sep, "/") 324 if rel_path in file_contents or repo_path in file_contents or affected_by_interfaces(file_contents): 325 affected_testfiles.add(test_full_path) 326 continue 327 328 return tests_changed, affected_testfiles 329 330 331 def get_parser() -> argparse.ArgumentParser: 332 parser = argparse.ArgumentParser() 333 parser.add_argument("revish", nargs="?", 334 help="Commits to consider. Defaults to the " 335 "commits on the current branch") 336 parser.add_argument("--ignore-rule", action="append", 337 help="Override the rules for paths to exclude from lists of changes. " 338 "Rules are paths relative to the test root, with * before a separator " 339 "or the end matching anything other than a path separator and ** in that " 340 "position matching anything. This flag can be used multiple times for " 341 "multiple rules. Specifying this flag overrides the default: " + 342 ", ".join(DEFAULT_IGNORE_RULES)) 343 parser.add_argument("--modified", action="store_true", 344 help="Include files under version control that have been " 345 "modified or staged") 346 parser.add_argument("--new", action="store_true", 347 help="Include files in the worktree that are not in version control") 348 parser.add_argument("--show-type", action="store_true", 349 help="Print the test type along with each affected test") 350 parser.add_argument("--null", action="store_true", 351 help="Separate items with a null byte") 352 return parser 353 354 355 def get_parser_affected() -> argparse.ArgumentParser: 356 parser = get_parser() 357 parser.add_argument("--metadata", 358 dest="metadata_root", 359 default=wpt_root, 360 help="Directory that will contain MANIFEST.json") 361 return parser 362 363 364 def get_revish(**kwargs: Any) -> Text: 365 revish = kwargs.get("revish") 366 if revish is None: 367 revish = "%s..HEAD" % branch_point() 368 return revish.strip() 369 370 371 def run_changed_files(**kwargs: Any) -> None: 372 revish = get_revish(**kwargs) 373 changed, _ = files_changed(revish, 374 kwargs["ignore_rule"], 375 include_uncommitted=kwargs["modified"], 376 include_new=kwargs["new"]) 377 378 separator = "\0" if kwargs["null"] else "\n" 379 380 for item in sorted(changed): 381 line = os.path.relpath(item, wpt_root) + separator 382 sys.stdout.write(line) 383 384 385 def run_tests_affected(**kwargs: Any) -> None: 386 revish = get_revish(**kwargs) 387 changed, _ = files_changed(revish, 388 kwargs["ignore_rule"], 389 include_uncommitted=kwargs["modified"], 390 include_new=kwargs["new"]) 391 manifest_path = os.path.join(kwargs["metadata_root"], "MANIFEST.json") 392 tests_changed, dependents = affected_testfiles( 393 changed, 394 {"conformance-checkers", "docs", "tools"}, 395 manifest_path=manifest_path 396 ) 397 398 message = "{path}" 399 if kwargs["show_type"]: 400 wpt_manifest = load_manifest(manifest_path) 401 message = "{path}\t{item_type}" 402 403 message += "\0" if kwargs["null"] else "\n" 404 405 for item in sorted(tests_changed | dependents): 406 results = { 407 "path": os.path.relpath(item, wpt_root) 408 } 409 if kwargs["show_type"]: 410 item_types = {i.item_type for i in wpt_manifest.iterpath(results["path"])} 411 if len(item_types) != 1: 412 item_types = {" ".join(item_types)} 413 results["item_type"] = item_types.pop() 414 sys.stdout.write(message.format(**results))