tor

The Tor anonymity network
git clone https://git.dasho.dev/tor.git
Log | Files | Refs | README | LICENSE

includes.py (12292B)


      1 #!/usr/bin/env python
      2 # Copyright 2018 The Tor Project, Inc.  See LICENSE file for licensing info.
      3 
      4 """This script looks through all the directories for files matching *.c or
      5   *.h, and checks their #include directives to make sure that only "permitted"
      6   headers are included.
      7 
      8   Any #include directives with angle brackets (like #include <stdio.h>) are
      9   ignored -- only directives with quotes (like #include "foo.h") are
     10   considered.
     11 
     12   To decide what includes are permitted, this script looks at a .may_include
     13   file in each directory.  This file contains empty lines, #-prefixed
     14   comments, filenames (like "lib/foo/bar.h") and file globs (like lib/*/*.h)
     15   for files that are permitted.
     16 
     17   The script exits with an error if any non-permitted includes are found.
     18   .may_include files that contain "!advisory" are considered advisory.
     19   Advisory .may_include files only result in warnings, rather than errors.
     20 """
     21 
     22 # Future imports for Python 2.7, mandatory in 3.0
     23 from __future__ import division
     24 from __future__ import print_function
     25 from __future__ import unicode_literals
     26 
     27 import fnmatch
     28 import os
     29 import re
     30 import sys
     31 
     32 if sys.version_info[0] <= 2:
     33    def open_file(fname):
     34        return open(fname, 'r')
     35 else:
     36    def open_file(fname):
     37        return open(fname, 'r', encoding='utf-8')
     38 
     39 def warn(msg):
     40    print(msg, file=sys.stderr)
     41 
     42 def fname_is_c(fname):
     43    """
     44    Return true if 'fname' is the name of a file that we should
     45    search for possibly disallowed #include directives.
     46    """
     47    if fname.endswith((".c", ".h")):
     48        bname = os.path.basename(fname)
     49        return not bname.startswith((".", "#"))
     50    else:
     51        return False
     52 
     53 INCLUDE_PATTERN = re.compile(r'\s*#\s*include\s+"([^"]*)"')
     54 RULES_FNAME = ".may_include"
     55 
     56 ALLOWED_PATTERNS = [
     57    re.compile(r'^.*\*\.(h|inc)$'),
     58    re.compile(r'^.*/.*\.h$'),
     59    re.compile(r'^ext/.*\.c$'),
     60    re.compile(r'^orconfig.h$'),
     61    re.compile(r'^micro-revision.i$'),
     62 ]
     63 
     64 TOPDIR = "src"
     65 
     66 def pattern_is_normal(s):
     67    for p in ALLOWED_PATTERNS:
     68        if p.match(s):
     69            return True
     70    return False
     71 
     72 class Error(object):
     73    def __init__(self, location, msg, is_advisory=False):
     74        self.location = location
     75        self.msg = msg
     76        self.is_advisory = is_advisory
     77 
     78    def __str__(self):
     79        return "{} at {}".format(self.msg, self.location)
     80 
     81 class Rules(object):
     82    """ A 'Rules' object is the parsed version of a .may_include file. """
     83    def __init__(self, dirpath):
     84        self.dirpath = dirpath
     85        if dirpath.startswith("src/"):
     86            self.incpath = dirpath[4:]
     87        else:
     88            self.incpath = dirpath
     89        self.patterns = []
     90        self.usedPatterns = set()
     91        self.is_advisory = False
     92 
     93    def addPattern(self, pattern):
     94        if pattern == "!advisory":
     95            self.is_advisory = True
     96            return
     97        if not pattern_is_normal(pattern):
     98            warn("Unusual pattern {} in {}".format(pattern, self.dirpath))
     99        self.patterns.append(pattern)
    100 
    101    def includeOk(self, path):
    102        for pattern in self.patterns:
    103            if fnmatch.fnmatchcase(path, pattern):
    104                self.usedPatterns.add(pattern)
    105                return True
    106        return False
    107 
    108    def applyToLines(self, lines, loc_prefix=""):
    109        lineno = 0
    110        for line in lines:
    111            lineno += 1
    112            m = INCLUDE_PATTERN.match(line)
    113            if m:
    114                include = m.group(1)
    115                if not self.includeOk(include):
    116                    yield Error("{}{}".format(loc_prefix,str(lineno)),
    117                                "Forbidden include of {}".format(include),
    118                                is_advisory=self.is_advisory)
    119 
    120    def applyToFile(self, fname, f):
    121        for error in self.applyToLines(iter(f), "{}:".format(fname)):
    122            yield error
    123 
    124    def noteUnusedRules(self):
    125        for p in self.patterns:
    126            if p not in self.usedPatterns:
    127                warn("Pattern {} in {} was never used.".format(p, self.dirpath))
    128 
    129    def getAllowedDirectories(self):
    130        allowed = []
    131        for p in self.patterns:
    132            m = re.match(r'^(.*)/\*\.(h|inc)$', p)
    133            if m:
    134                allowed.append(m.group(1))
    135                continue
    136            m = re.match(r'^(.*)/[^/]*$', p)
    137            if m:
    138                allowed.append(m.group(1))
    139                continue
    140 
    141        return allowed
    142 
    143 
    144 def normalize_srcdir(fname):
    145    """given the name of a source directory or file, return its name
    146        relative to `src` in a unix-like format.
    147    """
    148    orig = fname
    149    dirname, dirfile = os.path.split(fname)
    150    if re.match(r'.*\.[ch]$', dirfile):
    151        fname = dirname
    152 
    153    # Now we have a directory.
    154    dirname, result = os.path.split(fname)
    155    for _ in range(100):
    156        # prevent excess looping in case I missed a tricky case
    157        dirname, dirpart = os.path.split(dirname)
    158        if dirpart == 'src' or dirname == "":
    159            #print(orig,"=>",result)
    160            return result
    161        result = "{}/{}".format(dirpart,result)
    162 
    163    print("No progress!")
    164    assert False
    165 
    166 include_rules_cache = {}
    167 
    168 def load_include_rules(fname):
    169    """ Read a rules file from 'fname', and return it as a Rules object.
    170        Return 'None' if fname does not exist.
    171    """
    172    if fname in include_rules_cache:
    173        return include_rules_cache[fname]
    174    if not os.path.exists(fname):
    175        include_rules_cache[fname] = None
    176        return None
    177    result = Rules(os.path.split(fname)[0])
    178    with open_file(fname) as f:
    179        for line in f:
    180            line = line.strip()
    181            if line.startswith("#") or not line:
    182                continue
    183            result.addPattern(line)
    184    include_rules_cache[fname] = result
    185    return result
    186 
    187 def get_all_include_rules():
    188    """Return a list of all the Rules objects we have loaded so far,
    189       sorted by their directory names."""
    190    return [ rules for (fname,rules) in
    191             sorted(include_rules_cache.items())
    192             if rules is not None ]
    193 
    194 def remove_self_edges(graph):
    195    """Takes a directed graph in as an adjacency mapping (a mapping from
    196       node to a list of the nodes to which it connects).
    197 
    198       Remove all edges from a node to itself."""
    199 
    200    for k in list(graph):
    201        graph[k] = [ d for d in graph[k] if d != k ]
    202 
    203 def closure(graph):
    204    """Takes a directed graph in as an adjacency mapping (a mapping from
    205       node to a list of the nodes to which it connects), and completes
    206       its closure.
    207    """
    208    graph = graph.copy()
    209    changed = False
    210    for k in graph.keys():
    211        graph[k] = set(graph[k])
    212    while True:
    213        for k in graph.keys():
    214            sz = len(graph[k])
    215            for v in list(graph[k]):
    216                graph[k].update(graph.get(v, []))
    217            if sz != len(graph[k]):
    218                changed = True
    219 
    220        if not changed:
    221            return graph
    222        changed = False
    223 
    224 def toposort(graph, limit=100):
    225    """Takes a directed graph in as an adjacency mapping (a mapping from
    226       node to a list of the nodes to which it connects).  Tries to
    227       perform a topological sort on the graph, arranging the nodes into
    228       "levels", such that every member of each level is only reachable
    229       by members of later levels.
    230 
    231       Returns a list of the members of each level.
    232 
    233       Modifies the input graph, removing every member that could be
    234       sorted.  If the graph does not become empty, then it contains a
    235       cycle.
    236 
    237       "limit" is the max depth of the graph after which we give up trying
    238       to sort it and conclude we have a cycle.
    239    """
    240    all_levels = []
    241 
    242    n = 0
    243    while graph:
    244        n += 0
    245        cur_level = []
    246        all_levels.append(cur_level)
    247        for k in list(graph):
    248            graph[k] = [ d for d in graph[k] if d in graph ]
    249            if graph[k] == []:
    250                cur_level.append(k)
    251        for k in cur_level:
    252            del graph[k]
    253        n += 1
    254        if n > limit:
    255            break
    256 
    257    return all_levels
    258 
    259 def consider_include_rules(fname, f):
    260    dirpath = os.path.split(fname)[0]
    261    rules_fname = os.path.join(dirpath, RULES_FNAME)
    262    rules = load_include_rules(os.path.join(dirpath, RULES_FNAME))
    263    if rules is None:
    264        return
    265 
    266    for err in rules.applyToFile(fname, f):
    267        yield err
    268 
    269    list_unused = False
    270    log_sorted_levels = False
    271 
    272 def walk_c_files(topdir="src"):
    273    """Run through all .c and .h files under topdir, looking for
    274       include-rule violations. Yield those violations."""
    275 
    276    for dirpath, dirnames, fnames in os.walk(topdir):
    277        for fname in fnames:
    278            if fname_is_c(fname):
    279                fullpath = os.path.join(dirpath,fname)
    280                with open(fullpath) as f:
    281                    for err in consider_include_rules(fullpath, f):
    282                        yield err
    283 
    284 def open_or_stdin(fname):
    285    if fname == '-':
    286        return sys.stdin
    287    else:
    288        return open(fname)
    289 
    290 def check_subsys_file(fname, uses_dirs):
    291    if not uses_dirs:
    292        # We're doing a distcheck build, or for some other reason there are
    293        # no .may_include files.
    294        print("SKIPPING")
    295        return False
    296 
    297    uses_dirs = { normalize_srcdir(k) : { normalize_srcdir(d) for d in v }
    298                  for (k,v) in uses_dirs.items() }
    299    uses_closure = closure(uses_dirs)
    300    ok = True
    301    previous_subsystems = []
    302 
    303    with open_or_stdin(fname) as f:
    304        for line in f:
    305            _, name, fname = line.split()
    306            fname = normalize_srcdir(fname)
    307            for prev in previous_subsystems:
    308                if fname in uses_closure[prev]:
    309                    print("INVERSION: {} uses {}".format(prev,fname))
    310                    ok = False
    311            previous_subsystems.append(fname)
    312    return not ok
    313 
    314 def run_check_includes(topdir, list_unused=False, log_sorted_levels=False,
    315                       list_advisories=False, check_subsystem_order=None):
    316    trouble = False
    317 
    318    for err in walk_c_files(topdir):
    319        if err.is_advisory and not list_advisories:
    320            continue
    321        print(err, file=sys.stderr)
    322        if not err.is_advisory:
    323            trouble = True
    324 
    325    if trouble:
    326        warn(
    327    """To change which includes are allowed in a C file, edit the {}
    328    files in its enclosing directory.""".format(RULES_FNAME))
    329        sys.exit(1)
    330 
    331    if list_unused:
    332        for rules in get_all_include_rules():
    333            rules.noteUnusedRules()
    334 
    335    uses_dirs = { }
    336    for rules in get_all_include_rules():
    337        uses_dirs[rules.incpath] = rules.getAllowedDirectories()
    338 
    339    remove_self_edges(uses_dirs)
    340 
    341    if check_subsystem_order:
    342        if check_subsys_file(check_subsystem_order, uses_dirs):
    343            sys.exit(1)
    344 
    345    all_levels = toposort(uses_dirs)
    346 
    347    if log_sorted_levels:
    348        for (n, cur_level) in enumerate(all_levels):
    349            if cur_level:
    350                print(n, cur_level)
    351 
    352    if uses_dirs:
    353        print("There are circular .may_include dependencies in here somewhere:",
    354              uses_dirs)
    355        sys.exit(1)
    356 
    357 def main(argv):
    358    import argparse
    359 
    360    progname = argv[0]
    361    parser = argparse.ArgumentParser(prog=progname)
    362    parser.add_argument("--toposort", action="store_true",
    363                        help="Print a topologically sorted list of modules")
    364    parser.add_argument("--list-unused", action="store_true",
    365                        help="List unused lines in .may_include files.")
    366    parser.add_argument("--list-advisories", action="store_true",
    367                        help="List advisories as well as forbidden includes")
    368    parser.add_argument("--check-subsystem-order", action="store",
    369                        help="Check a list of subsystems for ordering")
    370    parser.add_argument("topdir", default="src", nargs="?",
    371                        help="Top-level directory for the tor source")
    372    args = parser.parse_args(argv[1:])
    373 
    374    global TOPDIR
    375    TOPDIR = args.topdir
    376    run_check_includes(topdir=args.topdir,
    377                       log_sorted_levels=args.toposort,
    378                       list_unused=args.list_unused,
    379                       list_advisories=args.list_advisories,
    380                       check_subsystem_order=args.check_subsystem_order)
    381 
    382 if __name__ == '__main__':
    383    main(sys.argv)