codetool.py (5233B)
1 #!/usr/bin/env python3 2 # Copyright (c) 2020, The Tor Project, Inc. 3 # See LICENSE for licensing information. 4 5 # 6 # DO NOT COMMIT OR MERGE CODE THAT IS RUN THROUGH THIS TOOL YET. 7 # 8 # WE ARE STILL DISCUSSING OUR DESIRED STYLE AND ITERATING ON IT, 9 # ALONG WITH THE TOOLS THAT ACHIEVE IT. 10 # (12 Feb 2020) 11 # 12 13 """ 14 This program uses a set of pluggable filters to inspect and transform 15 our C code. 16 """ 17 18 import os 19 import re 20 import sys 21 22 class Filter: 23 """A Filter transforms a string containing a C program.""" 24 def __init__(self): 25 pass 26 27 def transform(self, s): 28 return s 29 30 class CompoundFilt(Filter): 31 """A CompoundFilt runs another set of filters, in sequence.""" 32 def __init__(self, items=()): 33 super().__init__() 34 self._filters = list(items) 35 36 def add(self, filt): 37 self._filters.append(filt) 38 return self 39 40 def transform(self, s): 41 for f in self._filters: 42 s = f.transform(s) 43 44 return s 45 46 class SplitError(Exception): 47 """Exception: raised if split_comments() can't understand a C file.""" 48 pass 49 50 def split_comments(s): 51 r"""Iterate over the C code in 's', and yield a sequence of (code, 52 comment) pairs. Each pair will contain either a nonempty piece 53 of code, a nonempty comment, or both. 54 55 >>> list(split_comments("hello // world\n")) 56 [('hello ', '// world'), ('\n', '')] 57 58 >>> list(split_comments("a /* b cd */ efg // hi")) 59 [('a ', '/* b cd */'), (' efg ', '// hi')] 60 """ 61 62 # Matches a block of code without any comments. 63 PAT_CODE = re.compile(r'''^(?: [^/"']+ | 64 "(?:[^\\"]+|\\.)*" | 65 '(?:[^\\']+|\\.)*' | 66 /[^/*] 67 )*''', re.VERBOSE|re.DOTALL) 68 69 # Matches a C99 "//" comment. 70 PAT_C99_COMMENT = re.compile(r'^//.*$', re.MULTILINE) 71 72 # Matches a C "/* */" comment. 73 PAT_C_COMMENT = re.compile(r'^/\*(?:[^*]|\*+[^*/])*\*+/', re.DOTALL) 74 75 while True: 76 # Find some non-comment code at the start of the string. 77 m = PAT_CODE.match(s) 78 79 # If we found some code here, save it and advance the string. 80 # Otherwise set 'code' to "". 81 if m: 82 code = m.group(0) 83 s = s[m.end():] 84 else: 85 code = "" 86 87 # Now we have a comment, or the end of the string. Find out which 88 # one, and how long it is. 89 if s.startswith("//"): 90 m = PAT_C99_COMMENT.match(s) 91 else: 92 m = PAT_C_COMMENT.match(s) 93 94 # If we got a comment, save it and advance the string. Otherwise 95 # set 'comment' to "". 96 if m: 97 comment = m.group(0) 98 s = s[m.end():] 99 else: 100 comment = "" 101 102 # If we found no code and no comment, we should be at the end of 103 # the string... 104 if code == "" and comment == "": 105 if s: 106 # But in case we *aren't* at the end of the string, raise 107 # an error. 108 raise SplitError() 109 # ... all is well, we're done scanning the code. 110 return 111 112 yield (code, comment) 113 114 class IgnoreCommentsFilt(Filter): 115 """Wrapper: applies another filter to C code only, excluding comments. 116 """ 117 def __init__(self, filt): 118 super().__init__() 119 self._filt = filt 120 121 def transform(self, s): 122 result = [] 123 for code, comment in split_comments(s): 124 result.append(self._filt.transform(code)) 125 result.append(comment) 126 return "".join(result) 127 128 129 class RegexFilt(Filter): 130 """A regex filter applies a regular expression to some C code.""" 131 def __init__(self, pat, replacement, flags=0): 132 super().__init__() 133 self._pat = re.compile(pat, flags) 134 self._replacement = replacement 135 136 def transform(self, s): 137 s, _ = self._pat.subn(self._replacement, s) 138 return s 139 140 def revise(fname, filt): 141 """Run 'filt' on the contents of the file in 'fname'. If any 142 changes are made, then replace the file with its new contents. 143 Otherwise, leave the file alone. 144 """ 145 contents = open(fname, 'r').read() 146 result = filt.transform(contents) 147 if result == contents: 148 return 149 150 tmpname = "{}_codetool_tmp".format(fname) 151 try: 152 with open(tmpname, 'w') as f: 153 f.write(result) 154 os.rename(tmpname, fname) 155 except: 156 os.unlink(tmpname) 157 raise 158 159 ############################## 160 # Filtering rules. 161 ############################## 162 163 # Make sure that there is a newline after the first comma in a MOCK_IMPL() 164 BREAK_MOCK_IMPL = RegexFilt( 165 r'^MOCK_IMPL\(([^,]+),\s*(\S+)', 166 r'MOCK_IMPL(\1,\n\2', 167 re.MULTILINE) 168 169 # Make sure there is no newline between } and a loop iteration terminator. 170 RESTORE_SMARTLIST_END = RegexFilt( 171 r'}\s*(SMARTLIST|DIGESTMAP|DIGEST256MAP|STRMAP|MAP)_FOREACH_END\s*\(', 172 r'} \1_FOREACH_END (', 173 re.MULTILINE) 174 175 F = CompoundFilt() 176 F.add(IgnoreCommentsFilt(CompoundFilt([ 177 RESTORE_SMARTLIST_END, 178 BREAK_MOCK_IMPL]))) 179 180 if __name__ == '__main__': 181 for fname in sys.argv[1:]: 182 revise(fname, F)