annotate_ifdef_directives.py (9881B)
1 #!/usr/bin/env python 2 # Copyright (c) 2017-2019, The Tor Project, Inc. 3 # See LICENSE for licensing information 4 5 r""" 6 This script iterates over a list of C files. For each file, it looks at the 7 #if/#else C macros, and annotates them with comments explaining what they 8 match. 9 10 For example, it replaces this kind of input... 11 12 >>> INPUT = ''' 13 ... #ifdef HAVE_OCELOT 14 ... C code here 15 ... #if MIMSY == BOROGROVE 16 ... block 1 17 ... block 1 18 ... block 1 19 ... block 1 20 ... #else 21 ... block 2 22 ... block 2 23 ... block 2 24 ... block 2 25 ... #endif 26 ... #endif 27 ... ''' 28 29 With this kind of output: 30 >>> EXPECTED_OUTPUT = ''' 31 ... #ifdef HAVE_OCELOT 32 ... C code here 33 ... #if MIMSY == BOROGROVE 34 ... block 1 35 ... block 1 36 ... block 1 37 ... block 1 38 ... #else /* !(MIMSY == BOROGROVE) */ 39 ... block 2 40 ... block 2 41 ... block 2 42 ... block 2 43 ... #endif /* MIMSY == BOROGROVE */ 44 ... #endif /* defined(HAVE_OCELOT) */ 45 ... ''' 46 47 Here's how to use it: 48 >>> import sys 49 >>> if sys.version_info.major < 3: from cStringIO import StringIO 50 >>> if sys.version_info.major >= 3: from io import StringIO 51 52 >>> OUTPUT = StringIO() 53 >>> translate(StringIO(INPUT), OUTPUT) 54 >>> assert OUTPUT.getvalue() == EXPECTED_OUTPUT 55 56 Note that only #else and #endif lines are annotated. Existing comments 57 on those lines are removed. 58 """ 59 60 # Future imports for Python 2.7, mandatory in 3.0 61 from __future__ import division 62 from __future__ import print_function 63 from __future__ import unicode_literals 64 65 import re 66 67 # Any block with fewer than this many lines does not need annotations. 68 LINE_OBVIOUSNESS_LIMIT = 4 69 70 # Maximum line width. This includes a terminating newline character. 71 # 72 # (This is the maximum before encoding, so that if the the operating system 73 # uses multiple characters to encode newline, that's still okay.) 74 LINE_WIDTH=80 75 76 class Problem(Exception): 77 pass 78 79 def close_parens_needed(expr): 80 """Return the number of left-parentheses needed to make 'expr' 81 balanced. 82 83 >>> close_parens_needed("1+2") 84 0 85 >>> close_parens_needed("(1 + 2)") 86 0 87 >>> close_parens_needed("(1 + 2") 88 1 89 >>> close_parens_needed("(1 + (2 *") 90 2 91 >>> close_parens_needed("(1 + (2 * 3) + (4") 92 2 93 """ 94 return expr.count("(") - expr.count(")") 95 96 def truncate_expression(expr, new_width): 97 """Given a parenthesized C expression in 'expr', try to return a new 98 expression that is similar to 'expr', but no more than 'new_width' 99 characters long. 100 101 Try to return an expression with balanced parentheses. 102 103 >>> truncate_expression("1+2+3", 8) 104 '1+2+3' 105 >>> truncate_expression("1+2+3+4+5", 8) 106 '1+2+3...' 107 >>> truncate_expression("(1+2+3+4)", 8) 108 '(1+2...)' 109 >>> truncate_expression("(1+(2+3+4))", 8) 110 '(1+...)' 111 >>> truncate_expression("(((((((((", 8) 112 '((...))' 113 """ 114 if len(expr) <= new_width: 115 # The expression is already short enough. 116 return expr 117 118 ellipsis = "..." 119 120 # Start this at the minimum that we might truncate. 121 n_to_remove = len(expr) + len(ellipsis) - new_width 122 123 # Try removing characters, one by one, until we get something where 124 # re-balancing the parentheses still fits within the limit. 125 while n_to_remove < len(expr): 126 truncated = expr[:-n_to_remove] + ellipsis 127 truncated += ")" * close_parens_needed(truncated) 128 if len(truncated) <= new_width: 129 return truncated 130 n_to_remove += 1 131 132 return ellipsis 133 134 def commented_line(fmt, argument, maxwidth=LINE_WIDTH): 135 # (This is a raw docstring so that our doctests can use \.) 136 r""" 137 Return fmt%argument, for use as a commented line. If the line would 138 be longer than maxwidth, truncate argument but try to keep its 139 parentheses balanced. 140 141 Requires that fmt%"..." will fit into maxwidth characters. 142 143 Requires that fmt ends with a newline. 144 145 >>> commented_line("/* %s */\n", "hello world", 32) 146 '/* hello world */\n' 147 >>> commented_line("/* %s */\n", "hello world", 15) 148 '/* hello... */\n' 149 >>> commented_line("#endif /* %s */\n", "((1+2) && defined(FOO))", 32) 150 '#endif /* ((1+2) && defi...) */\n' 151 152 153 The default line limit is 80 characters including the newline: 154 155 >>> long_argument = "long " * 100 156 >>> long_line = commented_line("#endif /* %s */\n", long_argument) 157 >>> len(long_line) 158 80 159 160 >>> long_line[:40] 161 '#endif /* long long long long long long ' 162 >>> long_line[40:] 163 'long long long long long long lon... */\n' 164 165 If a line works out to being 80 characters naturally, it isn't truncated, 166 and no ellipsis is added. 167 168 >>> medium_argument = "a"*66 169 >>> medium_line = commented_line("#endif /* %s */\n", medium_argument) 170 >>> len(medium_line) 171 80 172 >>> "..." in medium_line 173 False 174 >>> medium_line[:40] 175 '#endif /* aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' 176 >>> medium_line[40:] 177 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa */\n' 178 179 180 """ 181 assert fmt.endswith("\n") 182 result = fmt % argument 183 if len(result) <= maxwidth: 184 return result 185 else: 186 # How long can we let the argument be? Try filling in the 187 # format with an empty argument to find out. 188 max_arg_width = maxwidth - len(fmt % "") 189 result = fmt % truncate_expression(argument, max_arg_width) 190 assert len(result) <= maxwidth 191 return result 192 193 def negate(expr): 194 """Return a negated version of expr; try to avoid double-negation. 195 196 We usually wrap expressions in parentheses and add a "!". 197 >>> negate("A && B") 198 '!(A && B)' 199 200 But if we recognize the expression as negated, we can restore it. 201 >>> negate(negate("A && B")) 202 'A && B' 203 204 The same applies for defined(FOO). 205 >>> negate("defined(FOO)") 206 '!defined(FOO)' 207 >>> negate(negate("defined(FOO)")) 208 'defined(FOO)' 209 210 Internal parentheses don't confuse us: 211 >>> negate("!(FOO) && !(BAR)") 212 '!(!(FOO) && !(BAR))' 213 214 """ 215 expr = expr.strip() 216 # See whether we match !(...), with no intervening close-parens. 217 m = re.match(r'^!\s*\(([^\)]*)\)$', expr) 218 if m: 219 return m.group(1) 220 221 222 # See whether we match !?defined(...), with no intervening close-parens. 223 m = re.match(r'^(!?)\s*(defined\([^\)]*\))$', expr) 224 if m: 225 if m.group(1) == "!": 226 prefix = "" 227 else: 228 prefix = "!" 229 return prefix + m.group(2) 230 231 return "!(%s)" % expr 232 233 def uncomment(s): 234 """ 235 Remove existing trailing comments from an #else or #endif line. 236 """ 237 s = re.sub(r'//.*','',s) 238 s = re.sub(r'/\*.*','',s) 239 return s.strip() 240 241 def translate(f_in, f_out): 242 """ 243 Read a file from f_in, and write its annotated version to f_out. 244 """ 245 # A stack listing our current if/else state. Each member of the stack 246 # is a list of directives. Each directive is a 3-tuple of 247 # (command, rest, lineno) 248 # where "command" is one of if/ifdef/ifndef/else/elif, and where 249 # "rest" is an expression in a format suitable for use with #if, and where 250 # lineno is the line number where the directive occurred. 251 stack = [] 252 # the stack element corresponding to the top level of the file. 253 whole_file = [] 254 cur_level = whole_file 255 lineno = 0 256 for line in f_in: 257 lineno += 1 258 m = re.match(r'\s*#\s*(if|ifdef|ifndef|else|endif|elif)\b\s*(.*)', 259 line) 260 if not m: 261 # no directive, so we can just write it out. 262 f_out.write(line) 263 continue 264 command,rest = m.groups() 265 if command in ("if", "ifdef", "ifndef"): 266 # The #if directive pushes us one level lower on the stack. 267 if command == 'ifdef': 268 rest = "defined(%s)"%uncomment(rest) 269 elif command == 'ifndef': 270 rest = "!defined(%s)"%uncomment(rest) 271 elif rest.endswith("\\"): 272 rest = rest[:-1]+"..." 273 274 rest = uncomment(rest) 275 276 new_level = [ (command, rest, lineno) ] 277 stack.append(cur_level) 278 cur_level = new_level 279 f_out.write(line) 280 elif command in ("else", "elif"): 281 # We stay at the same level on the stack. If we have an #else, 282 # we comment it. 283 if len(cur_level) == 0 or cur_level[-1][0] == 'else': 284 raise Problem("Unexpected #%s on %d"% (command,lineno)) 285 if (len(cur_level) == 1 and command == 'else' and 286 lineno > cur_level[0][2] + LINE_OBVIOUSNESS_LIMIT): 287 f_out.write(commented_line("#else /* %s */\n", 288 negate(cur_level[0][1]))) 289 else: 290 f_out.write(line) 291 cur_level.append((command, rest, lineno)) 292 else: 293 # We pop one element on the stack, and comment an endif. 294 assert command == 'endif' 295 if len(stack) == 0: 296 raise Problem("Unmatched #%s on %s"% (command,lineno)) 297 if lineno <= cur_level[0][2] + LINE_OBVIOUSNESS_LIMIT: 298 f_out.write(line) 299 elif len(cur_level) == 1 or ( 300 len(cur_level) == 2 and cur_level[1][0] == 'else'): 301 f_out.write(commented_line("#endif /* %s */\n", 302 cur_level[0][1])) 303 else: 304 f_out.write(commented_line("#endif /* %s || ... */\n", 305 cur_level[0][1])) 306 cur_level = stack.pop() 307 if len(stack) or cur_level != whole_file: 308 raise Problem("Missing #endif") 309 310 if __name__ == '__main__': 311 312 import sys,os 313 314 if sys.argv[1] == "--self-test": 315 import doctest 316 doctest.testmod() 317 sys.exit(0) 318 319 for fn in sys.argv[1:]: 320 with open(fn+"_OUT", 'w') as output_file: 321 translate(open(fn, 'r'), output_file) 322 os.rename(fn+"_OUT", fn)