tor

The Tor anonymity network
git clone https://git.dasho.dev/tor.git
Log | Files | Refs | README | LICENSE

practracker.py (11902B)


      1 #!/usr/bin/env python
      2 
      3 """
      4 Best-practices tracker for Tor source code.
      5 
      6 Go through the various .c files and collect metrics about them. If the metrics
      7 violate some of our best practices and they are not found in the optional
      8 exceptions file, then log a problem about them.
      9 
     10 We currently do metrics about file size, function size and number of includes,
     11 for C source files and headers.
     12 
     13 practracker.py should be run with its second argument pointing to the Tor
     14 top-level source directory like this:
     15  $ python3 ./scripts/maint/practracker/practracker.py .
     16 
     17 To regenerate the exceptions file so that it allows all current
     18 problems in the Tor source, use the --regen flag:
     19  $ python3 --regen ./scripts/maint/practracker/practracker.py .
     20 """
     21 
     22 # Future imports for Python 2.7, mandatory in 3.0
     23 from __future__ import division
     24 from __future__ import print_function
     25 from __future__ import unicode_literals
     26 
     27 import io, os, sys
     28 
     29 import metrics
     30 import util
     31 import problem
     32 import includes
     33 import shutil
     34 
     35 # The filename of the exceptions file (it should be placed in the practracker directory)
     36 EXCEPTIONS_FNAME = "./exceptions.txt"
     37 
     38 # Recommended file size
     39 MAX_FILE_SIZE = 3000 # lines
     40 # Recommended function size
     41 MAX_FUNCTION_SIZE = 100 # lines
     42 # Recommended number of #includes
     43 MAX_INCLUDE_COUNT = 50
     44 # Recommended file size for headers
     45 MAX_H_FILE_SIZE = 500
     46 # Recommended include count for headers
     47 MAX_H_INCLUDE_COUNT = 15
     48 # Recommended number of dependency violations
     49 MAX_DEP_VIOLATIONS = 0
     50 
     51 # Map from problem type to functions that adjust for tolerance
     52 TOLERANCE_FNS = {
     53    'include-count': lambda n: int(n*1.1),
     54    'function-size': lambda n: int(n*1.1),
     55    'file-size': lambda n: int(n*1.02),
     56    'dependency-violation': lambda n: (n+2)
     57 }
     58 
     59 #######################################################
     60 
     61 # The Tor source code topdir
     62 TOR_TOPDIR = None
     63 
     64 #######################################################
     65 
     66 def open_file(fname):
     67    return io.open(fname, 'r', encoding='utf-8')
     68 
     69 def consider_file_size(fname, f):
     70    """Consider the size of 'f' and yield an FileSizeItem for it.
     71    """
     72    file_size = metrics.get_file_len(f)
     73    yield problem.FileSizeItem(fname, file_size)
     74 
     75 def consider_includes(fname, f):
     76    """Consider the #include count in for 'f' and yield an IncludeCountItem
     77        for it.
     78    """
     79    include_count = metrics.get_include_count(f)
     80 
     81    yield problem.IncludeCountItem(fname, include_count)
     82 
     83 def consider_function_size(fname, f):
     84    """yield a FunctionSizeItem for every function in f.
     85    """
     86 
     87    for name, lines in metrics.get_function_lines(f):
     88        canonical_function_name = "%s:%s()" % (fname, name)
     89        yield problem.FunctionSizeItem(canonical_function_name, lines)
     90 
     91 def consider_include_violations(fname, real_fname, f):
     92    n = 0
     93    for item in includes.consider_include_rules(real_fname, f):
     94        n += 1
     95    if n:
     96        yield problem.DependencyViolationItem(fname, n)
     97 
     98 
     99 #######################################################
    100 
    101 def consider_all_metrics(files_list):
    102    """Consider metrics for all files, and yield a sequence of problem.Item
    103       object for those issues."""
    104    for fname in files_list:
    105        with open_file(fname) as f:
    106            for item in consider_metrics_for_file(fname, f):
    107                yield item
    108 
    109 def consider_metrics_for_file(fname, f):
    110    """
    111       Yield a sequence of problem.Item objects for all of the metrics in
    112       'f'.
    113    """
    114    real_fname = fname
    115    # Strip the useless part of the path
    116    if fname.startswith(TOR_TOPDIR):
    117        fname = fname[len(TOR_TOPDIR):]
    118 
    119    # Get file length
    120    for item in consider_file_size(fname, f):
    121        yield item
    122 
    123    # Consider number of #includes
    124    f.seek(0)
    125    for item in consider_includes(fname, f):
    126        yield item
    127 
    128    # Get function length
    129    f.seek(0)
    130    for item in consider_function_size(fname, f):
    131        yield item
    132 
    133    # Check for "upward" includes
    134    f.seek(0)
    135    for item in consider_include_violations(fname, real_fname, f):
    136        yield item
    137 
    138 HEADER="""\
    139 # Welcome to the exceptions file for Tor's best-practices tracker!
    140 #
    141 # Each line of this file represents a single violation of Tor's best
    142 # practices -- typically, a violation that we had before practracker.py
    143 # first existed.
    144 #
    145 # There are three kinds of problems that we recognize right now:
    146 #   function-size -- a function of more than {MAX_FUNCTION_SIZE} lines.
    147 #   file-size -- a .c file of more than {MAX_FILE_SIZE} lines, or a .h
    148 #      file with more than {MAX_H_FILE_SIZE} lines.
    149 #   include-count -- a .c file with more than {MAX_INCLUDE_COUNT} #includes,
    150 #      or a .h file with more than {MAX_H_INCLUDE_COUNT} #includes.
    151 #   dependency-violation -- a file includes a header that it should
    152 #      not, according to an advisory .may_include file.
    153 #
    154 # Each line below represents a single exception that practracker should
    155 # _ignore_. Each line has four parts:
    156 #  1. The word "problem".
    157 #  2. The kind of problem.
    158 #  3. The location of the problem: either a filename, or a
    159 #     filename:functionname pair.
    160 #  4. The magnitude of the problem to ignore.
    161 #
    162 # So for example, consider this line:
    163 #    problem file-size /src/core/or/connection_or.c 3200
    164 #
    165 # It tells practracker to allow the mentioned file to be up to 3200 lines
    166 # long, even though ordinarily it would warn about any file with more than
    167 # {MAX_FILE_SIZE} lines.
    168 #
    169 # You can either edit this file by hand, or regenerate it completely by
    170 # running `make practracker-regen`.
    171 #
    172 # Remember: It is better to fix the problem than to add a new exception!
    173 
    174 """.format(**globals())
    175 
    176 def main(argv):
    177    import argparse
    178 
    179    progname = argv[0]
    180    parser = argparse.ArgumentParser(prog=progname)
    181    parser.add_argument("--regen", action="store_true",
    182                        help="Regenerate the exceptions file")
    183    parser.add_argument("--list-overbroad", action="store_true",
    184                        help="List over-broad exceptions")
    185    parser.add_argument("--regen-overbroad", action="store_true",
    186                        help="Regenerate the exceptions file, "
    187                             "removing over-broad exceptions.")
    188    parser.add_argument("--exceptions",
    189                        help="Override the location for the exceptions file")
    190    parser.add_argument("--strict", action="store_true",
    191                        help="Make all warnings into errors")
    192    parser.add_argument("--terse", action="store_true",
    193                        help="Do not emit helpful instructions.")
    194    parser.add_argument("--max-h-file-size", default=MAX_H_FILE_SIZE,
    195                        help="Maximum lines per .h file")
    196    parser.add_argument("--max-h-include-count", default=MAX_H_INCLUDE_COUNT,
    197                        help="Maximum includes per .h file")
    198    parser.add_argument("--max-file-size", default=MAX_FILE_SIZE,
    199                        help="Maximum lines per .c file")
    200    parser.add_argument("--max-include-count", default=MAX_INCLUDE_COUNT,
    201                        help="Maximum includes per .c file")
    202    parser.add_argument("--max-function-size", default=MAX_FUNCTION_SIZE,
    203                        help="Maximum lines per function")
    204    parser.add_argument("--max-dependency-violations", default=MAX_DEP_VIOLATIONS,
    205                        help="Maximum number of dependency violations to allow")
    206    parser.add_argument("--include-dir", action="append",
    207                        default=["src"],
    208                        help="A directory (under topdir) to search for source")
    209    parser.add_argument("topdir", default=".", nargs="?",
    210                        help="Top-level directory for the tor source")
    211    args = parser.parse_args(argv[1:])
    212 
    213    global TOR_TOPDIR
    214    TOR_TOPDIR = args.topdir
    215    if args.exceptions:
    216        exceptions_file = args.exceptions
    217    else:
    218        exceptions_file = os.path.join(TOR_TOPDIR, "scripts/maint/practracker", EXCEPTIONS_FNAME)
    219 
    220    # 0) Configure our thresholds of "what is a problem actually"
    221    filt = problem.ProblemFilter()
    222    filt.addThreshold(problem.FileSizeItem("*.c", int(args.max_file_size)))
    223    filt.addThreshold(problem.IncludeCountItem("*.c", int(args.max_include_count)))
    224    filt.addThreshold(problem.FileSizeItem("*.h", int(args.max_h_file_size)))
    225    filt.addThreshold(problem.IncludeCountItem("*.h", int(args.max_h_include_count)))
    226    filt.addThreshold(problem.FunctionSizeItem("*.c", int(args.max_function_size)))
    227    filt.addThreshold(problem.DependencyViolationItem("*.c", int(args.max_dependency_violations)))
    228    filt.addThreshold(problem.DependencyViolationItem("*.h", int(args.max_dependency_violations)))
    229 
    230    if args.list_overbroad + args.regen + args.regen_overbroad > 1:
    231        print("Cannot use more than one of --regen, --list-overbroad, and "
    232              "--regen-overbroad.",
    233              file=sys.stderr)
    234        sys.exit(1)
    235 
    236    # 1) Get all the .c files we care about
    237    files_list = util.get_tor_c_files(TOR_TOPDIR, args.include_dir)
    238 
    239    # 2) Initialize problem vault and load an optional exceptions file so that
    240    # we don't warn about the past
    241    if args.regen:
    242        tmpname = exceptions_file + ".tmp"
    243        tmpfile = open(tmpname, "w")
    244        problem_file = tmpfile
    245        problem_file.write(HEADER)
    246        ProblemVault = problem.ProblemVault()
    247    else:
    248        ProblemVault = problem.ProblemVault(exceptions_file)
    249        problem_file = sys.stdout
    250 
    251    if args.list_overbroad or args.regen_overbroad:
    252        # If we're looking for overbroad exceptions, don't list problems
    253        # immediately to the problem file.
    254        problem_file = util.NullFile()
    255 
    256    # 2.1) Adjust the exceptions so that we warn only about small problems,
    257    # and produce errors on big ones.
    258    if not (args.regen or args.list_overbroad or args.regen_overbroad or
    259            args.strict):
    260        ProblemVault.set_tolerances(TOLERANCE_FNS)
    261 
    262    # 3) Go through all the files and report problems if they are not exceptions
    263    found_new_issues = 0
    264    for item in filt.filter(consider_all_metrics(files_list)):
    265        status = ProblemVault.register_problem(item)
    266        if status == problem.STATUS_ERR:
    267            print(item, file=problem_file)
    268            found_new_issues += 1
    269        elif status == problem.STATUS_WARN:
    270            # warnings always go to stdout.
    271            print("(warning) {}".format(item))
    272 
    273    if args.regen:
    274        tmpfile.close()
    275        shutil.move(tmpname, exceptions_file)
    276        sys.exit(0)
    277 
    278    if args.regen_overbroad:
    279        tmpname = exceptions_file + ".tmp"
    280        tmpfile = open(tmpname, "w")
    281        tmpfile.write(HEADER)
    282        for item in ProblemVault.list_exceptions_without_overbroad():
    283            print(item, file=tmpfile)
    284        tmpfile.close()
    285        shutil.move(tmpname, exceptions_file)
    286        sys.exit(0)
    287 
    288    # If new issues were found, try to give out some advice to the developer on how to resolve it.
    289    if found_new_issues and not args.regen and not args.terse:
    290        new_issues_str = """\
    291 FAILURE: practracker found {} new problem(s) in the code: see warnings above.
    292 
    293 Please fix the problems if you can, and update the exceptions file
    294 ({}) if you can't.
    295 
    296 See doc/HACKING/HelpfulTools.md for more information on using practracker.\
    297 
    298 You can disable this message by setting the TOR_DISABLE_PRACTRACKER environment
    299 variable.
    300 """.format(found_new_issues, exceptions_file)
    301        print(new_issues_str)
    302 
    303    if args.list_overbroad:
    304        def k_fn(tup):
    305            return tup[0].key()
    306        for (ex,p) in sorted(ProblemVault.list_overbroad_exceptions(), key=k_fn):
    307            if p is None:
    308                print(ex, "->", 0)
    309            else:
    310                print(ex, "->", p.metric_value)
    311 
    312 
    313    sys.exit(found_new_issues)
    314 
    315 if __name__ == '__main__':
    316    if os.environ.get("TOR_DISABLE_PRACTRACKER"):
    317        print("TOR_DISABLE_PRACTRACKER is set, skipping practracker tests.",
    318              file=sys.stderr)
    319        sys.exit(0)
    320    main(sys.argv)