tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

testfiles.py (17440B)


      1 import argparse
      2 import logging
      3 import os
      4 import re
      5 import subprocess
      6 import sys
      7 
      8 from collections import OrderedDict
      9 
     10 try:
     11    from ..manifest import manifest
     12    from ..manifest.utils import git as get_git_cmd
     13 except ValueError:
     14    # if we're not within the tools package, the above is an import from above
     15    # the top-level which raises ValueError, so reimport it with an absolute
     16    # reference
     17    #
     18    # note we need both because depending on caller we may/may not have the
     19    # paths set up correctly to handle both and mypy has no knowledge of our
     20    # sys.path magic
     21    from manifest import manifest  # type: ignore
     22    from manifest.utils import git as get_git_cmd  # type: ignore
     23 
     24 from typing import Any, Dict, Iterable, List, Optional, Pattern, Sequence, Set, Text, Tuple
     25 
     26 DEFAULT_IGNORE_RULES = ("resources/testharness*", "resources/testdriver*")
     27 
     28 here = os.path.dirname(__file__)
     29 wpt_root = os.path.abspath(os.path.join(here, os.pardir, os.pardir))
     30 
     31 logger = logging.getLogger()
     32 
     33 
     34 def display_branch_point() -> None:
     35    print(branch_point())
     36 
     37 
     38 def branch_point() -> Optional[Text]:
     39    git = get_git_cmd(wpt_root)
     40    if git is None:
     41        raise Exception("git not found")
     42 
     43    if (os.environ.get("GITHUB_PULL_REQUEST", "false") == "false" and
     44        os.environ.get("GITHUB_BRANCH") == "master"):
     45        # For builds on the master branch just return the HEAD commit
     46        return git("rev-parse", "HEAD")
     47    elif os.environ.get("GITHUB_PULL_REQUEST", "false") != "false":
     48        # This is a PR, so the base branch is in GITHUB_BRANCH
     49        base_branch = os.environ.get("GITHUB_BRANCH")
     50        assert base_branch, "GITHUB_BRANCH environment variable is defined"
     51        branch_point: Optional[Text] = git("merge-base", "HEAD", base_branch)
     52    else:
     53        # Otherwise we aren't on a PR, so we try to find commits that are only in the
     54        # current branch c.f.
     55        # http://stackoverflow.com/questions/13460152/find-first-ancestor-commit-in-another-branch
     56 
     57        # parse HEAD into an object ref
     58        head = git("rev-parse", "HEAD")
     59 
     60        # get everything in refs/heads and refs/remotes that doesn't include HEAD
     61        not_heads = [item for item in git("rev-parse", "--not", "--branches", "--remotes").split("\n")
     62                     if item and item != "^%s" % head]
     63 
     64        # get all commits on HEAD but not reachable from anything in not_heads
     65        cmd = ["git", "rev-list", "--topo-order", "--parents", "--stdin", "HEAD"]
     66        proc = subprocess.Popen(cmd,
     67                                stdin=subprocess.PIPE,
     68                                stdout=subprocess.PIPE,
     69                                cwd=wpt_root)
     70        commits_bytes, _ = proc.communicate(b"\n".join(item.encode("ascii") for item in not_heads))
     71        if proc.returncode != 0:
     72            raise subprocess.CalledProcessError(proc.returncode,
     73                                                cmd,
     74                                                commits_bytes)
     75 
     76        commit_parents: Dict[Text, List[Text]] = OrderedDict()
     77        commits = commits_bytes.decode("ascii")
     78        if commits:
     79            for line in commits.split("\n"):
     80                line_commits = line.split(" ")
     81                commit_parents[line_commits[0]] = line_commits[1:]
     82 
     83        branch_point = None
     84 
     85        # if there are any commits, take the first parent that is not in commits
     86        for commit, parents in commit_parents.items():
     87            for parent in parents:
     88                if parent not in commit_parents:
     89                    branch_point = parent
     90                    break
     91 
     92            if branch_point:
     93                break
     94 
     95        # if we had any commits, we should now have a branch point
     96        assert branch_point or not commit_parents
     97 
     98        # The above heuristic will fail in the following cases:
     99        #
    100        # - The current branch has fallen behind the remote version
    101        # - Changes on the current branch were rebased and therefore do not exist on any
    102        #   other branch. This will result in the selection of a commit that is earlier
    103        #   in the history than desired (as determined by calculating the later of the
    104        #   branch point and the merge base)
    105        #
    106        # In either case, fall back to using the merge base as the branch point.
    107        merge_base = git("merge-base", "HEAD", "origin/master")
    108        if (branch_point is None or
    109            (branch_point != merge_base and
    110             not git("log", "--oneline", f"{merge_base}..{branch_point}").strip())):
    111            logger.debug("Using merge-base as the branch point")
    112            branch_point = merge_base
    113        else:
    114            logger.debug("Using first commit on another branch as the branch point")
    115 
    116    logger.debug("Branch point from master: %s" % branch_point)
    117    if branch_point:
    118        branch_point = branch_point.strip()
    119    return branch_point
    120 
    121 
    122 def compile_ignore_rule(rule: Text) -> Pattern[Text]:
    123    rule = rule.replace(os.path.sep, "/")
    124    parts = rule.split("/")
    125    re_parts = []
    126    for part in parts:
    127        if part.endswith("**"):
    128            re_parts.append(re.escape(part[:-2]) + ".*")
    129        elif part.endswith("*"):
    130            re_parts.append(re.escape(part[:-1]) + "[^/]*")
    131        else:
    132            re_parts.append(re.escape(part))
    133    return re.compile("^%s$" % "/".join(re_parts))
    134 
    135 
    136 def repo_files_changed(revish: Text, include_uncommitted: bool = False, include_new: bool = False) -> Set[Text]:
    137    git = get_git_cmd(wpt_root)
    138    if git is None:
    139        raise Exception("git not found")
    140 
    141    if "..." in revish:
    142        raise Exception(f"... not supported when finding files changed (revish: {revish!r}")
    143 
    144    if ".." in revish:
    145        # ".." isn't treated as a range for git-diff; what we want is
    146        # everything reachable from B but not A, and git diff A...B
    147        # gives us that (via the merge-base)
    148        revish = revish.replace("..", "...")
    149 
    150    files_list = git("diff", "--no-renames", "--name-only", "--diff-filter=d", "-z", revish).split("\0")
    151    assert not files_list[-1], f"final item should be empty, got: {files_list[-1]!r}"
    152    files = set(files_list[:-1])
    153 
    154    if include_uncommitted:
    155        entries = git("status", "-z").split("\0")
    156        assert not entries[-1]
    157        entries = entries[:-1]
    158        for item in entries:
    159            status, path = item.split(" ", 1)
    160            if status == "??" and not include_new:
    161                continue
    162            else:
    163                if not os.path.isdir(path):
    164                    files.add(path)
    165                else:
    166                    for dirpath, dirnames, filenames in os.walk(path):
    167                        for filename in filenames:
    168                            files.add(os.path.join(dirpath, filename))
    169 
    170    return files
    171 
    172 
    173 def exclude_ignored(files: Iterable[Text], ignore_rules: Optional[Sequence[Text]]) -> Tuple[List[Text], List[Text]]:
    174    if ignore_rules is None:
    175        ignore_rules = DEFAULT_IGNORE_RULES
    176    compiled_ignore_rules = [compile_ignore_rule(item) for item in set(ignore_rules)]
    177 
    178    changed = []
    179    ignored = []
    180    for item in sorted(files):
    181        fullpath = os.path.join(wpt_root, item)
    182        rule_path = item.replace(os.path.sep, "/")
    183        for rule in compiled_ignore_rules:
    184            if rule.match(rule_path):
    185                ignored.append(fullpath)
    186                break
    187        else:
    188            changed.append(fullpath)
    189 
    190    return changed, ignored
    191 
    192 
    193 def files_changed(revish: Text,
    194                  ignore_rules: Optional[Sequence[Text]] = None,
    195                  include_uncommitted: bool = False,
    196                  include_new: bool = False
    197                  ) -> Tuple[List[Text], List[Text]]:
    198    """Find files changed in certain revisions.
    199 
    200    The function passes `revish` directly to `git diff`, so `revish` can have a
    201    variety of forms; see `git diff --help` for details. Files in the diff that
    202    are matched by `ignore_rules` are excluded.
    203    """
    204    files = repo_files_changed(revish,
    205                               include_uncommitted=include_uncommitted,
    206                               include_new=include_new)
    207    if not files:
    208        return [], []
    209 
    210    return exclude_ignored(files, ignore_rules)
    211 
    212 
    213 def _in_repo_root(full_path: Text) -> bool:
    214    rel_path = os.path.relpath(full_path, wpt_root)
    215    path_components = rel_path.split(os.sep)
    216    return len(path_components) < 2
    217 
    218 
    219 def load_manifest(manifest_path: Optional[Text] = None, manifest_update: bool = True) -> manifest.Manifest:
    220    if manifest_path is None:
    221        manifest_path = os.path.join(wpt_root, "MANIFEST.json")
    222    return manifest.load_and_update(wpt_root, manifest_path, "/",
    223                                    update=manifest_update)
    224 
    225 
    226 def affected_testfiles(files_changed: Iterable[Text],
    227                       skip_dirs: Optional[Set[Text]] = None,
    228                       manifest_path: Optional[Text] = None,
    229                       manifest_update: bool = True
    230                       ) -> Tuple[Set[Text], Set[Text]]:
    231    """Determine and return list of test files that reference changed files."""
    232    if skip_dirs is None:
    233        skip_dirs = {"conformance-checkers", "docs", "tools"}
    234    affected_testfiles = set()
    235    # Exclude files that are in the repo root, because
    236    # they are not part of any test.
    237    files_changed = [f for f in files_changed if not _in_repo_root(f)]
    238    nontests_changed = set(files_changed)
    239    wpt_manifest = load_manifest(manifest_path, manifest_update)
    240 
    241    test_types = ["crashtest", "print-reftest", "reftest", "testharness", "wdspec"]
    242    support_files = {os.path.join(wpt_root, path)
    243                     for _, path, _ in wpt_manifest.itertypes("support")}
    244    wdspec_test_files = {os.path.join(wpt_root, path)
    245                         for _, path, _ in wpt_manifest.itertypes("wdspec")}
    246    test_files = {os.path.join(wpt_root, path)
    247                  for _, path, _ in wpt_manifest.itertypes(*test_types)}
    248 
    249    interface_dir = os.path.join(wpt_root, 'interfaces')
    250    interfaces_files = {os.path.join(wpt_root, 'interfaces', filename)
    251                        for filename in os.listdir(interface_dir)}
    252 
    253    interfaces_changed = interfaces_files.intersection(nontests_changed)
    254    nontests_changed = nontests_changed.intersection(support_files)
    255 
    256    tests_changed = {item for item in files_changed if item in test_files}
    257 
    258    nontest_changed_paths = set()
    259    rewrites: Dict[Text, Text] = {"/resources/webidl2/lib/webidl2.js": "/resources/WebIDLParser.js"}
    260    for full_path in nontests_changed:
    261        rel_path = os.path.relpath(full_path, wpt_root)
    262        path_components = rel_path.split(os.sep)
    263        top_level_subdir = path_components[0]
    264        if top_level_subdir in skip_dirs:
    265            continue
    266        repo_path = "/" + os.path.relpath(full_path, wpt_root).replace(os.path.sep, "/")
    267        if repo_path in rewrites:
    268            repo_path = rewrites[repo_path]
    269            full_path = os.path.join(wpt_root, repo_path[1:].replace("/", os.path.sep))
    270        nontest_changed_paths.add((full_path, repo_path))
    271 
    272    interfaces_changed_names = [os.path.splitext(os.path.basename(interface))[0]
    273                                for interface in interfaces_changed]
    274 
    275    def affected_by_wdspec(test: Text) -> bool:
    276        affected = False
    277        if test in wdspec_test_files:
    278            for support_full_path, _ in nontest_changed_paths:
    279                # parent of support file or of "support" directory
    280                parent = os.path.dirname(support_full_path)
    281                if os.path.basename(parent) == "support":
    282                    parent = os.path.dirname(parent)
    283                relpath = os.path.relpath(test, parent)
    284                if not relpath.startswith(os.pardir):
    285                    # testfile is in subtree of support file
    286                    affected = True
    287                    break
    288        return affected
    289 
    290    def affected_by_interfaces(file_contents: Text) -> bool:
    291        if len(interfaces_changed_names) > 0:
    292            if 'idlharness.js' in file_contents:
    293                for interface in interfaces_changed_names:
    294                    regex = '[\'"]' + interface + '(\\.idl)?[\'"]'
    295                    if re.search(regex, file_contents):
    296                        return True
    297        return False
    298 
    299    for root, dirs, fnames in os.walk(wpt_root):
    300        # Walk top_level_subdir looking for test files containing either the
    301        # relative filepath or absolute filepath to the changed files.
    302        if root == wpt_root:
    303            for dir_name in skip_dirs:
    304                dirs.remove(dir_name)
    305        for fname in fnames:
    306            test_full_path = os.path.join(root, fname)
    307            # Skip any file that's not a test file.
    308            if test_full_path not in test_files:
    309                continue
    310            if affected_by_wdspec(test_full_path):
    311                affected_testfiles.add(test_full_path)
    312                continue
    313 
    314            with open(test_full_path, "rb") as fh:
    315                raw_file_contents: bytes = fh.read()
    316                if raw_file_contents.startswith(b"\xfe\xff"):
    317                    file_contents: Text = raw_file_contents.decode("utf-16be", "replace")
    318                elif raw_file_contents.startswith(b"\xff\xfe"):
    319                    file_contents = raw_file_contents.decode("utf-16le", "replace")
    320                else:
    321                    file_contents = raw_file_contents.decode("utf8", "replace")
    322                for full_path, repo_path in nontest_changed_paths:
    323                    rel_path = os.path.relpath(full_path, root).replace(os.path.sep, "/")
    324                    if rel_path in file_contents or repo_path in file_contents or affected_by_interfaces(file_contents):
    325                        affected_testfiles.add(test_full_path)
    326                        continue
    327 
    328    return tests_changed, affected_testfiles
    329 
    330 
    331 def get_parser() -> argparse.ArgumentParser:
    332    parser = argparse.ArgumentParser()
    333    parser.add_argument("revish", nargs="?",
    334                        help="Commits to consider. Defaults to the "
    335                        "commits on the current branch")
    336    parser.add_argument("--ignore-rule", action="append",
    337                        help="Override the rules for paths to exclude from lists of changes. "
    338                        "Rules are paths relative to the test root, with * before a separator "
    339                        "or the end matching anything other than a path separator and ** in that "
    340                        "position matching anything. This flag can be used multiple times for "
    341                        "multiple rules. Specifying this flag overrides the default: " +
    342                        ", ".join(DEFAULT_IGNORE_RULES))
    343    parser.add_argument("--modified", action="store_true",
    344                        help="Include files under version control that have been "
    345                        "modified or staged")
    346    parser.add_argument("--new", action="store_true",
    347                        help="Include files in the worktree that are not in version control")
    348    parser.add_argument("--show-type", action="store_true",
    349                        help="Print the test type along with each affected test")
    350    parser.add_argument("--null", action="store_true",
    351                        help="Separate items with a null byte")
    352    return parser
    353 
    354 
    355 def get_parser_affected() -> argparse.ArgumentParser:
    356    parser = get_parser()
    357    parser.add_argument("--metadata",
    358                        dest="metadata_root",
    359                        default=wpt_root,
    360                        help="Directory that will contain MANIFEST.json")
    361    return parser
    362 
    363 
    364 def get_revish(**kwargs: Any) -> Text:
    365    revish = kwargs.get("revish")
    366    if revish is None:
    367        revish = "%s..HEAD" % branch_point()
    368    return revish.strip()
    369 
    370 
    371 def run_changed_files(**kwargs: Any) -> None:
    372    revish = get_revish(**kwargs)
    373    changed, _ = files_changed(revish,
    374                               kwargs["ignore_rule"],
    375                               include_uncommitted=kwargs["modified"],
    376                               include_new=kwargs["new"])
    377 
    378    separator = "\0" if kwargs["null"] else "\n"
    379 
    380    for item in sorted(changed):
    381        line = os.path.relpath(item, wpt_root) + separator
    382        sys.stdout.write(line)
    383 
    384 
    385 def run_tests_affected(**kwargs: Any) -> None:
    386    revish = get_revish(**kwargs)
    387    changed, _ = files_changed(revish,
    388                               kwargs["ignore_rule"],
    389                               include_uncommitted=kwargs["modified"],
    390                               include_new=kwargs["new"])
    391    manifest_path = os.path.join(kwargs["metadata_root"], "MANIFEST.json")
    392    tests_changed, dependents = affected_testfiles(
    393        changed,
    394        {"conformance-checkers", "docs", "tools"},
    395        manifest_path=manifest_path
    396    )
    397 
    398    message = "{path}"
    399    if kwargs["show_type"]:
    400        wpt_manifest = load_manifest(manifest_path)
    401        message = "{path}\t{item_type}"
    402 
    403    message += "\0" if kwargs["null"] else "\n"
    404 
    405    for item in sorted(tests_changed | dependents):
    406        results = {
    407            "path": os.path.relpath(item, wpt_root)
    408        }
    409        if kwargs["show_type"]:
    410            item_types = {i.item_type for i in wpt_manifest.iterpath(results["path"])}
    411            if len(item_types) != 1:
    412                item_types = {" ".join(item_types)}
    413            results["item_type"] = item_types.pop()
    414        sys.stdout.write(message.format(**results))