files_changed.py (4267B)
1 # This Source Code Form is subject to the terms of the Mozilla Public 2 # License, v. 2.0. If a copy of the MPL was not distributed with this 3 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5 """ 6 Support for optimizing tasks based on the set of files that have changed. 7 """ 8 9 import logging 10 import os 11 from subprocess import CalledProcessError 12 13 from mozbuild.util import memoize 14 from mozpack.path import join as join_path 15 from mozpack.path import match as mozpackmatch 16 from mozversioncontrol import InvalidRepoPath, get_repository_object 17 18 from gecko_taskgraph import GECKO 19 from gecko_taskgraph.util.hg import get_json_pushchangedfiles 20 21 logger = logging.getLogger(__name__) 22 23 24 @memoize 25 def get_changed_files(repository, revision): 26 """ 27 Get the set of files changed in the push headed by the given revision. 28 Responses are cached, so multiple calls with the same arguments are OK. 29 """ 30 try: 31 return get_json_pushchangedfiles(repository, revision)["files"] 32 except KeyError: 33 # We shouldn't hit this error in CI. 34 if os.environ.get("MOZ_AUTOMATION"): 35 raise 36 37 # We're likely on an unpublished commit, grab changed files from 38 # version control. 39 return get_locally_changed_files(GECKO) 40 41 42 def check(params, file_patterns): 43 """Determine whether any of the files changed in the indicated push to 44 https://hg.mozilla.org match any of the given file patterns.""" 45 repository = params.get("head_repository") 46 revision = params.get("head_rev") 47 if not repository or not revision: 48 logger.warning( 49 "Missing `head_repository` or `head_rev` parameters; " 50 "assuming all files have changed" 51 ) 52 return True 53 54 changed_files = get_changed_files(repository, revision) 55 56 if "comm_head_repository" in params: 57 repository = params.get("comm_head_repository") 58 revision = params.get("comm_head_rev") 59 if not revision: 60 logger.warning( 61 "Missing `comm_head_rev` parameters; assuming all files have changed" 62 ) 63 return True 64 65 changed_files |= { 66 join_path("comm", file) for file in get_changed_files(repository, revision) 67 } 68 69 for pattern in file_patterns: 70 for path in changed_files: 71 if mozpackmatch(path, pattern): 72 return True 73 74 return False 75 76 77 def _get_locally_changed_files(repo): 78 try: 79 vcs = get_repository_object(repo) 80 s = set(vcs.get_outgoing_files("AM")) 81 return s 82 except (InvalidRepoPath, CalledProcessError): 83 return set() 84 85 86 class PreloadedGetLocallyChangedFiles: 87 """ 88 Function-like class that performs eager computation of _get_locally_changed_files 89 for what looks the default repo. 90 91 The rationale is the following: 92 - computing _get_locally_changed_files is relatively slow (~600ms) 93 - it's already done through an external command 94 95 So we do that in a background thread as soon as possible, so that at the 96 point when we need the result, it's already `prefetched'. 97 """ 98 99 def __init__(self): 100 self.preloaded_repo = None 101 self.preloading_thread = None 102 self.preloaded_answer = None 103 104 def preload(self, repo): 105 """ 106 Fire off preloading of get_locally_changed_files(repo). 107 108 For the sake of simplicity, there can be only one preloaded repo. 109 """ 110 import threading 111 from pathlib import Path 112 113 if self.preloaded_repo is not None: 114 raise ValueError("Can only preload one repo") 115 116 self.preloaded_repo = Path(repo) 117 118 def preloading(): 119 self.preloaded_answer = _get_locally_changed_files(self.preloaded_repo) 120 121 self.preloading_thread = threading.Thread(target=preloading, daemon=True) 122 self.preloading_thread.start() 123 124 @memoize 125 def __call__(self, repo): 126 if repo == self.preloaded_repo: 127 # A thread can be joined many times, but it's going to happen only 128 # once, thanks to @memoize. 129 self.preloading_thread.join() 130 return self.preloaded_answer 131 return _get_locally_changed_files(repo) 132 133 134 get_locally_changed_files = PreloadedGetLocallyChangedFiles()