tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

gitignore.py (10142B)


      1 import re
      2 import os
      3 import itertools
      4 from collections import defaultdict
      5 from typing import (Any, Dict, Iterable, List, MutableMapping, Optional, Pattern, Tuple, TypeVar,
      6                    Union, cast)
      7 
      8 
      9 T = TypeVar('T')
     10 
     11 end_space = re.compile(r"([^\\]\s)*$")
     12 
     13 
     14 def fnmatch_translate(pat: bytes) -> Tuple[bool, Pattern[bytes]]:
     15    parts = []
     16    seq: Optional[int] = None
     17    i = 0
     18    any_char = b"[^/]"
     19    if pat[0:1] == b"/":
     20        parts.append(b"^")
     21        pat = pat[1:]
     22    else:
     23        # By default match the entire path up to a /
     24        # but if / doesn't appear in the pattern we will mark is as
     25        # a name pattern and just produce a pattern that matches against
     26        # the filename
     27        parts.append(b"^(?:.*/)?")
     28 
     29    name_pattern = True
     30    if pat[-1:] == b"/":
     31        # If the last character is / match this directory or any subdirectory
     32        pat = pat[:-1]
     33        suffix = b"(?:/|$)"
     34    else:
     35        suffix = b"$"
     36    while i < len(pat):
     37        c = pat[i:i+1]
     38        if c == b"\\":
     39            if i < len(pat) - 1:
     40                i += 1
     41                c = pat[i:i+1]
     42                parts.append(re.escape(c))
     43            else:
     44                raise ValueError
     45        elif seq is not None:
     46            # TODO: this doesn't really handle invalid sequences in the right way
     47            if c == b"]":
     48                seq = None
     49                if parts[-1] == b"[":
     50                    parts = parts[:-1]
     51                elif parts[-1] == b"^" and parts[-2] == b"[":
     52                    raise ValueError
     53                else:
     54                    parts.append(c)
     55            elif c == b"-":
     56                parts.append(c)
     57            elif c == b"[":
     58                raise ValueError
     59            else:
     60                parts.append(re.escape(c))
     61        elif c == b"[":
     62            parts.append(b"[")
     63            if i < len(pat) - 1 and pat[i+1:i+2] in (b"!", b"^"):
     64                parts.append(b"^")
     65                i += 1
     66            seq = i
     67        elif c == b"*":
     68            if i < len(pat) - 1 and pat[i+1:i+2] == b"*":
     69                if i > 0 and pat[i-1:i] != b"/":
     70                    raise ValueError
     71                parts.append(b".*")
     72                i += 1
     73                if i < len(pat) - 1 and pat[i+1:i+2] != b"/":
     74                    raise ValueError
     75            else:
     76                parts.append(any_char + b"*")
     77        elif c == b"?":
     78            parts.append(any_char)
     79        elif c == b"/" and not seq:
     80            name_pattern = False
     81            parts.append(c)
     82        else:
     83            parts.append(re.escape(c))
     84        i += 1
     85 
     86    if name_pattern:
     87        parts[0] = b"^"
     88 
     89    if seq is not None:
     90        raise ValueError
     91    parts.append(suffix)
     92    try:
     93        return name_pattern, re.compile(b"".join(parts))
     94    except Exception:
     95        raise ValueError
     96 
     97 # Regexp matching rules that have to be converted to patterns
     98 pattern_re = re.compile(br".*[\*\[\?]")
     99 
    100 
    101 def parse_line(line: bytes) -> Optional[Tuple[bool, bool, bool, Union[Tuple[bytes, ...], Tuple[bool, Pattern[bytes]]]]]:
    102    line = line.rstrip()
    103    if not line or line[0:1] == b"#":
    104        return None
    105 
    106    invert = line[0:1] == b"!"
    107    if invert:
    108        line = line[1:]
    109 
    110    dir_only = line[-1:] == b"/"
    111 
    112    if dir_only:
    113        line = line[:-1]
    114 
    115    # Could make a special case for **/foo, but we don't have any patterns like that
    116    if not invert and not pattern_re.match(line):
    117        literal = True
    118        pattern: Union[Tuple[bytes, ...], Tuple[bool, Pattern[bytes]]] = tuple(line.rsplit(b"/", 1))
    119    else:
    120        pattern = fnmatch_translate(line)
    121        literal = False
    122 
    123    return invert, dir_only, literal, pattern
    124 
    125 
    126 class PathFilter:
    127    def __init__(self, root: bytes, extras: Optional[List[bytes]] = None, cache: Optional[MutableMapping[bytes, bool]] = None) -> None:
    128        if root:
    129            ignore_path: Optional[bytes] = os.path.join(root, b".gitignore")
    130        else:
    131            ignore_path = None
    132        if not ignore_path and not extras:
    133            self.trivial = True
    134            return
    135        self.trivial = False
    136 
    137        self.literals_file: Dict[Optional[bytes], Dict[bytes, List[Tuple[bool, Pattern[bytes]]]]] = defaultdict(dict)
    138        self.literals_dir: Dict[Optional[bytes], Dict[bytes, List[Tuple[bool, Pattern[bytes]]]]] = defaultdict(dict)
    139        self.patterns_file: List[Tuple[Tuple[bool, Pattern[bytes]], List[Tuple[bool, Pattern[bytes]]]]] = []
    140        self.patterns_dir: List[Tuple[Tuple[bool, Pattern[bytes]], List[Tuple[bool, Pattern[bytes]]]]] = []
    141 
    142        if cache is None:
    143            cache = {}
    144        self.cache: MutableMapping[bytes, bool] = cache
    145 
    146        if extras is None:
    147            extras = []
    148 
    149        if ignore_path and os.path.exists(ignore_path):
    150            args: Tuple[Optional[bytes], List[bytes]] = (ignore_path, extras)
    151        else:
    152            args = None, extras
    153        self._read_ignore(*args)
    154 
    155    def _read_ignore(self, ignore_path: Optional[bytes], extras: List[bytes]) -> None:
    156        if ignore_path is not None:
    157            with open(ignore_path, "rb") as f:
    158                for line in f:
    159                    self._read_line(line)
    160        for line in extras:
    161            self._read_line(line)
    162 
    163    def _read_line(self, line: bytes) -> None:
    164        parsed = parse_line(line)
    165        if not parsed:
    166            return
    167        invert, dir_only, literal, rule = parsed
    168 
    169        if invert:
    170            # For exclude rules, we attach the rules to all preceeding patterns, so
    171            # that we can match patterns out of order and check if they were later
    172            # overridden by an exclude rule
    173            assert not literal
    174            rule = cast(Tuple[bool, Pattern[bytes]], rule)
    175            if not dir_only:
    176                rules_iter: Iterable[Tuple[Any, List[Tuple[bool, Pattern[bytes]]]]] = itertools.chain(
    177                    itertools.chain(*(item.items() for item in self.literals_dir.values())),
    178                    itertools.chain(*(item.items() for item in self.literals_file.values())),
    179                    self.patterns_dir,
    180                    self.patterns_file)
    181            else:
    182                rules_iter = itertools.chain(
    183                    itertools.chain(*(item.items() for item in self.literals_dir.values())),
    184                    self.patterns_dir)
    185 
    186            for rules in rules_iter:
    187                rules[1].append(rule)
    188        else:
    189            if literal:
    190                rule = cast(Tuple[bytes, ...], rule)
    191                if len(rule) == 1:
    192                    dir_name, pattern = None, rule[0]  # type: Tuple[Optional[bytes], bytes]
    193                else:
    194                    dir_name, pattern = rule
    195                self.literals_dir[dir_name][pattern] = []
    196                if not dir_only:
    197                    self.literals_file[dir_name][pattern] = []
    198            else:
    199                rule = cast(Tuple[bool, Pattern[bytes]], rule)
    200                self.patterns_dir.append((rule, []))
    201                if not dir_only:
    202                    self.patterns_file.append((rule, []))
    203 
    204    def filter(self,
    205               iterator: Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]]
    206               ) -> Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]]:
    207        empty: Dict[Any, Any] = {}
    208        for dirpath, dirnames, filenames in iterator:
    209            orig_dirpath = dirpath
    210            path_sep = os.path.sep.encode()
    211            if path_sep != b"/":
    212                dirpath = dirpath.replace(path_sep, b"/")
    213 
    214            keep_dirs: List[Tuple[bytes, T]] = []
    215            keep_files: List[Tuple[bytes, T]] = []
    216 
    217            for iter_items, literals, patterns, target, suffix in [
    218                    (dirnames, self.literals_dir, self.patterns_dir, keep_dirs, b"/"),
    219                    (filenames, self.literals_file, self.patterns_file, keep_files, b"")]:
    220                for item in iter_items:
    221                    name = item[0]
    222                    if dirpath:
    223                        path = b"%s/%s" % (dirpath, name) + suffix
    224                    else:
    225                        path = name + suffix
    226                    if path in self.cache:
    227                        if not self.cache[path]:
    228                            target.append(item)
    229                        continue
    230                    for rule_dir in [None, dirpath if dirpath != b"." else b""]:
    231                        if name in literals.get(rule_dir, empty):
    232                            exclude = literals[rule_dir][name]
    233                            if not any(rule.match(name if name_only else path)
    234                                       for name_only, rule in exclude):
    235                                # Skip this item
    236                                self.cache[path] = True
    237                                break
    238                    else:
    239                        for (component_only, pattern), exclude in patterns:
    240                            if component_only:
    241                                match = pattern.match(name)
    242                            else:
    243                                match = pattern.match(path)
    244                            if match:
    245                                if not any(rule.match(name if name_only else path)
    246                                           for name_only, rule in exclude):
    247                                    # Skip this item
    248                                    self.cache[path] = True
    249                                    break
    250                        else:
    251                            self.cache[path] = False
    252                            target.append(item)
    253 
    254            dirnames[:] = keep_dirs
    255            assert not any(b".git" == name for name, _ in dirnames)
    256            yield orig_dirpath, dirnames, keep_files
    257 
    258    def __call__(self,
    259                 iterator: Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]]
    260                 ) -> Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]]:
    261        if self.trivial:
    262            return iterator
    263 
    264        return self.filter(iterator)
    265 
    266 
    267 def has_ignore(dirpath: bytes) -> bool:
    268    return os.path.exists(os.path.join(dirpath, b".gitignore"))