gitignore.py (10142B)
1 import re 2 import os 3 import itertools 4 from collections import defaultdict 5 from typing import (Any, Dict, Iterable, List, MutableMapping, Optional, Pattern, Tuple, TypeVar, 6 Union, cast) 7 8 9 T = TypeVar('T') 10 11 end_space = re.compile(r"([^\\]\s)*$") 12 13 14 def fnmatch_translate(pat: bytes) -> Tuple[bool, Pattern[bytes]]: 15 parts = [] 16 seq: Optional[int] = None 17 i = 0 18 any_char = b"[^/]" 19 if pat[0:1] == b"/": 20 parts.append(b"^") 21 pat = pat[1:] 22 else: 23 # By default match the entire path up to a / 24 # but if / doesn't appear in the pattern we will mark is as 25 # a name pattern and just produce a pattern that matches against 26 # the filename 27 parts.append(b"^(?:.*/)?") 28 29 name_pattern = True 30 if pat[-1:] == b"/": 31 # If the last character is / match this directory or any subdirectory 32 pat = pat[:-1] 33 suffix = b"(?:/|$)" 34 else: 35 suffix = b"$" 36 while i < len(pat): 37 c = pat[i:i+1] 38 if c == b"\\": 39 if i < len(pat) - 1: 40 i += 1 41 c = pat[i:i+1] 42 parts.append(re.escape(c)) 43 else: 44 raise ValueError 45 elif seq is not None: 46 # TODO: this doesn't really handle invalid sequences in the right way 47 if c == b"]": 48 seq = None 49 if parts[-1] == b"[": 50 parts = parts[:-1] 51 elif parts[-1] == b"^" and parts[-2] == b"[": 52 raise ValueError 53 else: 54 parts.append(c) 55 elif c == b"-": 56 parts.append(c) 57 elif c == b"[": 58 raise ValueError 59 else: 60 parts.append(re.escape(c)) 61 elif c == b"[": 62 parts.append(b"[") 63 if i < len(pat) - 1 and pat[i+1:i+2] in (b"!", b"^"): 64 parts.append(b"^") 65 i += 1 66 seq = i 67 elif c == b"*": 68 if i < len(pat) - 1 and pat[i+1:i+2] == b"*": 69 if i > 0 and pat[i-1:i] != b"/": 70 raise ValueError 71 parts.append(b".*") 72 i += 1 73 if i < len(pat) - 1 and pat[i+1:i+2] != b"/": 74 raise ValueError 75 else: 76 parts.append(any_char + b"*") 77 elif c == b"?": 78 parts.append(any_char) 79 elif c == b"/" and not seq: 80 name_pattern = False 81 parts.append(c) 82 else: 83 parts.append(re.escape(c)) 84 i += 1 85 86 if name_pattern: 87 parts[0] = b"^" 88 89 if seq is not None: 90 raise ValueError 91 parts.append(suffix) 92 try: 93 return name_pattern, re.compile(b"".join(parts)) 94 except Exception: 95 raise ValueError 96 97 # Regexp matching rules that have to be converted to patterns 98 pattern_re = re.compile(br".*[\*\[\?]") 99 100 101 def parse_line(line: bytes) -> Optional[Tuple[bool, bool, bool, Union[Tuple[bytes, ...], Tuple[bool, Pattern[bytes]]]]]: 102 line = line.rstrip() 103 if not line or line[0:1] == b"#": 104 return None 105 106 invert = line[0:1] == b"!" 107 if invert: 108 line = line[1:] 109 110 dir_only = line[-1:] == b"/" 111 112 if dir_only: 113 line = line[:-1] 114 115 # Could make a special case for **/foo, but we don't have any patterns like that 116 if not invert and not pattern_re.match(line): 117 literal = True 118 pattern: Union[Tuple[bytes, ...], Tuple[bool, Pattern[bytes]]] = tuple(line.rsplit(b"/", 1)) 119 else: 120 pattern = fnmatch_translate(line) 121 literal = False 122 123 return invert, dir_only, literal, pattern 124 125 126 class PathFilter: 127 def __init__(self, root: bytes, extras: Optional[List[bytes]] = None, cache: Optional[MutableMapping[bytes, bool]] = None) -> None: 128 if root: 129 ignore_path: Optional[bytes] = os.path.join(root, b".gitignore") 130 else: 131 ignore_path = None 132 if not ignore_path and not extras: 133 self.trivial = True 134 return 135 self.trivial = False 136 137 self.literals_file: Dict[Optional[bytes], Dict[bytes, List[Tuple[bool, Pattern[bytes]]]]] = defaultdict(dict) 138 self.literals_dir: Dict[Optional[bytes], Dict[bytes, List[Tuple[bool, Pattern[bytes]]]]] = defaultdict(dict) 139 self.patterns_file: List[Tuple[Tuple[bool, Pattern[bytes]], List[Tuple[bool, Pattern[bytes]]]]] = [] 140 self.patterns_dir: List[Tuple[Tuple[bool, Pattern[bytes]], List[Tuple[bool, Pattern[bytes]]]]] = [] 141 142 if cache is None: 143 cache = {} 144 self.cache: MutableMapping[bytes, bool] = cache 145 146 if extras is None: 147 extras = [] 148 149 if ignore_path and os.path.exists(ignore_path): 150 args: Tuple[Optional[bytes], List[bytes]] = (ignore_path, extras) 151 else: 152 args = None, extras 153 self._read_ignore(*args) 154 155 def _read_ignore(self, ignore_path: Optional[bytes], extras: List[bytes]) -> None: 156 if ignore_path is not None: 157 with open(ignore_path, "rb") as f: 158 for line in f: 159 self._read_line(line) 160 for line in extras: 161 self._read_line(line) 162 163 def _read_line(self, line: bytes) -> None: 164 parsed = parse_line(line) 165 if not parsed: 166 return 167 invert, dir_only, literal, rule = parsed 168 169 if invert: 170 # For exclude rules, we attach the rules to all preceeding patterns, so 171 # that we can match patterns out of order and check if they were later 172 # overridden by an exclude rule 173 assert not literal 174 rule = cast(Tuple[bool, Pattern[bytes]], rule) 175 if not dir_only: 176 rules_iter: Iterable[Tuple[Any, List[Tuple[bool, Pattern[bytes]]]]] = itertools.chain( 177 itertools.chain(*(item.items() for item in self.literals_dir.values())), 178 itertools.chain(*(item.items() for item in self.literals_file.values())), 179 self.patterns_dir, 180 self.patterns_file) 181 else: 182 rules_iter = itertools.chain( 183 itertools.chain(*(item.items() for item in self.literals_dir.values())), 184 self.patterns_dir) 185 186 for rules in rules_iter: 187 rules[1].append(rule) 188 else: 189 if literal: 190 rule = cast(Tuple[bytes, ...], rule) 191 if len(rule) == 1: 192 dir_name, pattern = None, rule[0] # type: Tuple[Optional[bytes], bytes] 193 else: 194 dir_name, pattern = rule 195 self.literals_dir[dir_name][pattern] = [] 196 if not dir_only: 197 self.literals_file[dir_name][pattern] = [] 198 else: 199 rule = cast(Tuple[bool, Pattern[bytes]], rule) 200 self.patterns_dir.append((rule, [])) 201 if not dir_only: 202 self.patterns_file.append((rule, [])) 203 204 def filter(self, 205 iterator: Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]] 206 ) -> Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]]: 207 empty: Dict[Any, Any] = {} 208 for dirpath, dirnames, filenames in iterator: 209 orig_dirpath = dirpath 210 path_sep = os.path.sep.encode() 211 if path_sep != b"/": 212 dirpath = dirpath.replace(path_sep, b"/") 213 214 keep_dirs: List[Tuple[bytes, T]] = [] 215 keep_files: List[Tuple[bytes, T]] = [] 216 217 for iter_items, literals, patterns, target, suffix in [ 218 (dirnames, self.literals_dir, self.patterns_dir, keep_dirs, b"/"), 219 (filenames, self.literals_file, self.patterns_file, keep_files, b"")]: 220 for item in iter_items: 221 name = item[0] 222 if dirpath: 223 path = b"%s/%s" % (dirpath, name) + suffix 224 else: 225 path = name + suffix 226 if path in self.cache: 227 if not self.cache[path]: 228 target.append(item) 229 continue 230 for rule_dir in [None, dirpath if dirpath != b"." else b""]: 231 if name in literals.get(rule_dir, empty): 232 exclude = literals[rule_dir][name] 233 if not any(rule.match(name if name_only else path) 234 for name_only, rule in exclude): 235 # Skip this item 236 self.cache[path] = True 237 break 238 else: 239 for (component_only, pattern), exclude in patterns: 240 if component_only: 241 match = pattern.match(name) 242 else: 243 match = pattern.match(path) 244 if match: 245 if not any(rule.match(name if name_only else path) 246 for name_only, rule in exclude): 247 # Skip this item 248 self.cache[path] = True 249 break 250 else: 251 self.cache[path] = False 252 target.append(item) 253 254 dirnames[:] = keep_dirs 255 assert not any(b".git" == name for name, _ in dirnames) 256 yield orig_dirpath, dirnames, keep_files 257 258 def __call__(self, 259 iterator: Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]] 260 ) -> Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]]: 261 if self.trivial: 262 return iterator 263 264 return self.filter(iterator) 265 266 267 def has_ignore(dirpath: bytes) -> bool: 268 return os.path.exists(os.path.join(dirpath, b".gitignore"))