practracker.py (11902B)
1 #!/usr/bin/env python 2 3 """ 4 Best-practices tracker for Tor source code. 5 6 Go through the various .c files and collect metrics about them. If the metrics 7 violate some of our best practices and they are not found in the optional 8 exceptions file, then log a problem about them. 9 10 We currently do metrics about file size, function size and number of includes, 11 for C source files and headers. 12 13 practracker.py should be run with its second argument pointing to the Tor 14 top-level source directory like this: 15 $ python3 ./scripts/maint/practracker/practracker.py . 16 17 To regenerate the exceptions file so that it allows all current 18 problems in the Tor source, use the --regen flag: 19 $ python3 --regen ./scripts/maint/practracker/practracker.py . 20 """ 21 22 # Future imports for Python 2.7, mandatory in 3.0 23 from __future__ import division 24 from __future__ import print_function 25 from __future__ import unicode_literals 26 27 import io, os, sys 28 29 import metrics 30 import util 31 import problem 32 import includes 33 import shutil 34 35 # The filename of the exceptions file (it should be placed in the practracker directory) 36 EXCEPTIONS_FNAME = "./exceptions.txt" 37 38 # Recommended file size 39 MAX_FILE_SIZE = 3000 # lines 40 # Recommended function size 41 MAX_FUNCTION_SIZE = 100 # lines 42 # Recommended number of #includes 43 MAX_INCLUDE_COUNT = 50 44 # Recommended file size for headers 45 MAX_H_FILE_SIZE = 500 46 # Recommended include count for headers 47 MAX_H_INCLUDE_COUNT = 15 48 # Recommended number of dependency violations 49 MAX_DEP_VIOLATIONS = 0 50 51 # Map from problem type to functions that adjust for tolerance 52 TOLERANCE_FNS = { 53 'include-count': lambda n: int(n*1.1), 54 'function-size': lambda n: int(n*1.1), 55 'file-size': lambda n: int(n*1.02), 56 'dependency-violation': lambda n: (n+2) 57 } 58 59 ####################################################### 60 61 # The Tor source code topdir 62 TOR_TOPDIR = None 63 64 ####################################################### 65 66 def open_file(fname): 67 return io.open(fname, 'r', encoding='utf-8') 68 69 def consider_file_size(fname, f): 70 """Consider the size of 'f' and yield an FileSizeItem for it. 71 """ 72 file_size = metrics.get_file_len(f) 73 yield problem.FileSizeItem(fname, file_size) 74 75 def consider_includes(fname, f): 76 """Consider the #include count in for 'f' and yield an IncludeCountItem 77 for it. 78 """ 79 include_count = metrics.get_include_count(f) 80 81 yield problem.IncludeCountItem(fname, include_count) 82 83 def consider_function_size(fname, f): 84 """yield a FunctionSizeItem for every function in f. 85 """ 86 87 for name, lines in metrics.get_function_lines(f): 88 canonical_function_name = "%s:%s()" % (fname, name) 89 yield problem.FunctionSizeItem(canonical_function_name, lines) 90 91 def consider_include_violations(fname, real_fname, f): 92 n = 0 93 for item in includes.consider_include_rules(real_fname, f): 94 n += 1 95 if n: 96 yield problem.DependencyViolationItem(fname, n) 97 98 99 ####################################################### 100 101 def consider_all_metrics(files_list): 102 """Consider metrics for all files, and yield a sequence of problem.Item 103 object for those issues.""" 104 for fname in files_list: 105 with open_file(fname) as f: 106 for item in consider_metrics_for_file(fname, f): 107 yield item 108 109 def consider_metrics_for_file(fname, f): 110 """ 111 Yield a sequence of problem.Item objects for all of the metrics in 112 'f'. 113 """ 114 real_fname = fname 115 # Strip the useless part of the path 116 if fname.startswith(TOR_TOPDIR): 117 fname = fname[len(TOR_TOPDIR):] 118 119 # Get file length 120 for item in consider_file_size(fname, f): 121 yield item 122 123 # Consider number of #includes 124 f.seek(0) 125 for item in consider_includes(fname, f): 126 yield item 127 128 # Get function length 129 f.seek(0) 130 for item in consider_function_size(fname, f): 131 yield item 132 133 # Check for "upward" includes 134 f.seek(0) 135 for item in consider_include_violations(fname, real_fname, f): 136 yield item 137 138 HEADER="""\ 139 # Welcome to the exceptions file for Tor's best-practices tracker! 140 # 141 # Each line of this file represents a single violation of Tor's best 142 # practices -- typically, a violation that we had before practracker.py 143 # first existed. 144 # 145 # There are three kinds of problems that we recognize right now: 146 # function-size -- a function of more than {MAX_FUNCTION_SIZE} lines. 147 # file-size -- a .c file of more than {MAX_FILE_SIZE} lines, or a .h 148 # file with more than {MAX_H_FILE_SIZE} lines. 149 # include-count -- a .c file with more than {MAX_INCLUDE_COUNT} #includes, 150 # or a .h file with more than {MAX_H_INCLUDE_COUNT} #includes. 151 # dependency-violation -- a file includes a header that it should 152 # not, according to an advisory .may_include file. 153 # 154 # Each line below represents a single exception that practracker should 155 # _ignore_. Each line has four parts: 156 # 1. The word "problem". 157 # 2. The kind of problem. 158 # 3. The location of the problem: either a filename, or a 159 # filename:functionname pair. 160 # 4. The magnitude of the problem to ignore. 161 # 162 # So for example, consider this line: 163 # problem file-size /src/core/or/connection_or.c 3200 164 # 165 # It tells practracker to allow the mentioned file to be up to 3200 lines 166 # long, even though ordinarily it would warn about any file with more than 167 # {MAX_FILE_SIZE} lines. 168 # 169 # You can either edit this file by hand, or regenerate it completely by 170 # running `make practracker-regen`. 171 # 172 # Remember: It is better to fix the problem than to add a new exception! 173 174 """.format(**globals()) 175 176 def main(argv): 177 import argparse 178 179 progname = argv[0] 180 parser = argparse.ArgumentParser(prog=progname) 181 parser.add_argument("--regen", action="store_true", 182 help="Regenerate the exceptions file") 183 parser.add_argument("--list-overbroad", action="store_true", 184 help="List over-broad exceptions") 185 parser.add_argument("--regen-overbroad", action="store_true", 186 help="Regenerate the exceptions file, " 187 "removing over-broad exceptions.") 188 parser.add_argument("--exceptions", 189 help="Override the location for the exceptions file") 190 parser.add_argument("--strict", action="store_true", 191 help="Make all warnings into errors") 192 parser.add_argument("--terse", action="store_true", 193 help="Do not emit helpful instructions.") 194 parser.add_argument("--max-h-file-size", default=MAX_H_FILE_SIZE, 195 help="Maximum lines per .h file") 196 parser.add_argument("--max-h-include-count", default=MAX_H_INCLUDE_COUNT, 197 help="Maximum includes per .h file") 198 parser.add_argument("--max-file-size", default=MAX_FILE_SIZE, 199 help="Maximum lines per .c file") 200 parser.add_argument("--max-include-count", default=MAX_INCLUDE_COUNT, 201 help="Maximum includes per .c file") 202 parser.add_argument("--max-function-size", default=MAX_FUNCTION_SIZE, 203 help="Maximum lines per function") 204 parser.add_argument("--max-dependency-violations", default=MAX_DEP_VIOLATIONS, 205 help="Maximum number of dependency violations to allow") 206 parser.add_argument("--include-dir", action="append", 207 default=["src"], 208 help="A directory (under topdir) to search for source") 209 parser.add_argument("topdir", default=".", nargs="?", 210 help="Top-level directory for the tor source") 211 args = parser.parse_args(argv[1:]) 212 213 global TOR_TOPDIR 214 TOR_TOPDIR = args.topdir 215 if args.exceptions: 216 exceptions_file = args.exceptions 217 else: 218 exceptions_file = os.path.join(TOR_TOPDIR, "scripts/maint/practracker", EXCEPTIONS_FNAME) 219 220 # 0) Configure our thresholds of "what is a problem actually" 221 filt = problem.ProblemFilter() 222 filt.addThreshold(problem.FileSizeItem("*.c", int(args.max_file_size))) 223 filt.addThreshold(problem.IncludeCountItem("*.c", int(args.max_include_count))) 224 filt.addThreshold(problem.FileSizeItem("*.h", int(args.max_h_file_size))) 225 filt.addThreshold(problem.IncludeCountItem("*.h", int(args.max_h_include_count))) 226 filt.addThreshold(problem.FunctionSizeItem("*.c", int(args.max_function_size))) 227 filt.addThreshold(problem.DependencyViolationItem("*.c", int(args.max_dependency_violations))) 228 filt.addThreshold(problem.DependencyViolationItem("*.h", int(args.max_dependency_violations))) 229 230 if args.list_overbroad + args.regen + args.regen_overbroad > 1: 231 print("Cannot use more than one of --regen, --list-overbroad, and " 232 "--regen-overbroad.", 233 file=sys.stderr) 234 sys.exit(1) 235 236 # 1) Get all the .c files we care about 237 files_list = util.get_tor_c_files(TOR_TOPDIR, args.include_dir) 238 239 # 2) Initialize problem vault and load an optional exceptions file so that 240 # we don't warn about the past 241 if args.regen: 242 tmpname = exceptions_file + ".tmp" 243 tmpfile = open(tmpname, "w") 244 problem_file = tmpfile 245 problem_file.write(HEADER) 246 ProblemVault = problem.ProblemVault() 247 else: 248 ProblemVault = problem.ProblemVault(exceptions_file) 249 problem_file = sys.stdout 250 251 if args.list_overbroad or args.regen_overbroad: 252 # If we're looking for overbroad exceptions, don't list problems 253 # immediately to the problem file. 254 problem_file = util.NullFile() 255 256 # 2.1) Adjust the exceptions so that we warn only about small problems, 257 # and produce errors on big ones. 258 if not (args.regen or args.list_overbroad or args.regen_overbroad or 259 args.strict): 260 ProblemVault.set_tolerances(TOLERANCE_FNS) 261 262 # 3) Go through all the files and report problems if they are not exceptions 263 found_new_issues = 0 264 for item in filt.filter(consider_all_metrics(files_list)): 265 status = ProblemVault.register_problem(item) 266 if status == problem.STATUS_ERR: 267 print(item, file=problem_file) 268 found_new_issues += 1 269 elif status == problem.STATUS_WARN: 270 # warnings always go to stdout. 271 print("(warning) {}".format(item)) 272 273 if args.regen: 274 tmpfile.close() 275 shutil.move(tmpname, exceptions_file) 276 sys.exit(0) 277 278 if args.regen_overbroad: 279 tmpname = exceptions_file + ".tmp" 280 tmpfile = open(tmpname, "w") 281 tmpfile.write(HEADER) 282 for item in ProblemVault.list_exceptions_without_overbroad(): 283 print(item, file=tmpfile) 284 tmpfile.close() 285 shutil.move(tmpname, exceptions_file) 286 sys.exit(0) 287 288 # If new issues were found, try to give out some advice to the developer on how to resolve it. 289 if found_new_issues and not args.regen and not args.terse: 290 new_issues_str = """\ 291 FAILURE: practracker found {} new problem(s) in the code: see warnings above. 292 293 Please fix the problems if you can, and update the exceptions file 294 ({}) if you can't. 295 296 See doc/HACKING/HelpfulTools.md for more information on using practracker.\ 297 298 You can disable this message by setting the TOR_DISABLE_PRACTRACKER environment 299 variable. 300 """.format(found_new_issues, exceptions_file) 301 print(new_issues_str) 302 303 if args.list_overbroad: 304 def k_fn(tup): 305 return tup[0].key() 306 for (ex,p) in sorted(ProblemVault.list_overbroad_exceptions(), key=k_fn): 307 if p is None: 308 print(ex, "->", 0) 309 else: 310 print(ex, "->", p.metric_value) 311 312 313 sys.exit(found_new_issues) 314 315 if __name__ == '__main__': 316 if os.environ.get("TOR_DISABLE_PRACTRACKER"): 317 print("TOR_DISABLE_PRACTRACKER is set, skipping practracker tests.", 318 file=sys.stderr) 319 sys.exit(0) 320 main(sys.argv)