tor

The Tor anonymity network
git clone https://git.dasho.dev/tor.git
Log | Files | Refs | README | LICENSE

codetool.py (5233B)


      1 #!/usr/bin/env python3
      2 # Copyright (c) 2020, The Tor Project, Inc.
      3 # See LICENSE for licensing information.
      4 
      5 #
      6 # DO NOT COMMIT OR MERGE CODE THAT IS RUN THROUGH THIS TOOL YET.
      7 #
      8 # WE ARE STILL DISCUSSING OUR DESIRED STYLE AND ITERATING ON IT,
      9 # ALONG WITH THE TOOLS THAT ACHIEVE IT.
     10 #     (12 Feb 2020)
     11 #
     12 
     13 """
     14   This program uses a set of pluggable filters to inspect and transform
     15   our C code.
     16 """
     17 
     18 import os
     19 import re
     20 import sys
     21 
     22 class Filter:
     23    """A Filter transforms a string containing a C program."""
     24    def __init__(self):
     25        pass
     26 
     27    def transform(self, s):
     28        return s
     29 
     30 class CompoundFilt(Filter):
     31    """A CompoundFilt runs another set of filters, in sequence."""
     32    def __init__(self, items=()):
     33        super().__init__()
     34        self._filters = list(items)
     35 
     36    def add(self, filt):
     37        self._filters.append(filt)
     38        return self
     39 
     40    def transform(self, s):
     41        for f in self._filters:
     42            s = f.transform(s)
     43 
     44        return s
     45 
     46 class SplitError(Exception):
     47    """Exception: raised if split_comments() can't understand a C file."""
     48    pass
     49 
     50 def split_comments(s):
     51    r"""Iterate over the C code in 's', and yield a sequence of (code,
     52       comment) pairs.  Each pair will contain either a nonempty piece
     53       of code, a nonempty comment, or both.
     54 
     55       >>> list(split_comments("hello // world\n"))
     56       [('hello ', '// world'), ('\n', '')]
     57 
     58       >>> list(split_comments("a /* b cd */ efg // hi"))
     59       [('a ', '/* b cd */'), (' efg ', '// hi')]
     60    """
     61 
     62    # Matches a block of code without any comments.
     63    PAT_CODE = re.compile(r'''^(?: [^/"']+ |
     64                                   "(?:[^\\"]+|\\.)*" |
     65                                   '(?:[^\\']+|\\.)*' |
     66                                   /[^/*]
     67                               )*''', re.VERBOSE|re.DOTALL)
     68 
     69    # Matches a C99 "//" comment.
     70    PAT_C99_COMMENT = re.compile(r'^//.*$', re.MULTILINE)
     71 
     72    # Matches a C "/*  */" comment.
     73    PAT_C_COMMENT = re.compile(r'^/\*(?:[^*]|\*+[^*/])*\*+/', re.DOTALL)
     74 
     75    while True:
     76        # Find some non-comment code at the start of the string.
     77        m = PAT_CODE.match(s)
     78 
     79        # If we found some code here, save it and advance the string.
     80        # Otherwise set 'code' to "".
     81        if m:
     82            code = m.group(0)
     83            s = s[m.end():]
     84        else:
     85            code = ""
     86 
     87        # Now we have a comment, or the end of the string.  Find out which
     88        # one, and how long it is.
     89        if s.startswith("//"):
     90            m = PAT_C99_COMMENT.match(s)
     91        else:
     92            m = PAT_C_COMMENT.match(s)
     93 
     94        # If we got a comment, save it and advance the string.  Otherwise
     95        # set 'comment' to "".
     96        if m:
     97            comment = m.group(0)
     98            s = s[m.end():]
     99        else:
    100            comment = ""
    101 
    102        # If we found no code and no comment, we should be at the end of
    103        # the string...
    104        if code == "" and comment == "":
    105            if s:
    106                # But in case we *aren't* at the end of the string, raise
    107                # an error.
    108                raise SplitError()
    109            # ... all is well, we're done scanning the code.
    110            return
    111 
    112        yield (code, comment)
    113 
    114 class IgnoreCommentsFilt(Filter):
    115    """Wrapper: applies another filter to C code only, excluding comments.
    116    """
    117    def __init__(self, filt):
    118        super().__init__()
    119        self._filt = filt
    120 
    121    def transform(self, s):
    122        result = []
    123        for code, comment in split_comments(s):
    124            result.append(self._filt.transform(code))
    125            result.append(comment)
    126        return "".join(result)
    127 
    128 
    129 class RegexFilt(Filter):
    130    """A regex filter applies a regular expression to some C code."""
    131    def __init__(self, pat, replacement, flags=0):
    132        super().__init__()
    133        self._pat = re.compile(pat, flags)
    134        self._replacement = replacement
    135 
    136    def transform(self, s):
    137        s, _ = self._pat.subn(self._replacement, s)
    138        return s
    139 
    140 def revise(fname, filt):
    141    """Run 'filt' on the contents of the file in 'fname'.  If any
    142       changes are made, then replace the file with its new contents.
    143       Otherwise, leave the file alone.
    144    """
    145    contents = open(fname, 'r').read()
    146    result = filt.transform(contents)
    147    if result == contents:
    148        return
    149 
    150    tmpname = "{}_codetool_tmp".format(fname)
    151    try:
    152        with open(tmpname, 'w') as f:
    153            f.write(result)
    154            os.rename(tmpname, fname)
    155    except:
    156        os.unlink(tmpname)
    157        raise
    158 
    159 ##############################
    160 # Filtering rules.
    161 ##############################
    162 
    163 # Make sure that there is a newline after the first comma in a MOCK_IMPL()
    164 BREAK_MOCK_IMPL = RegexFilt(
    165    r'^MOCK_IMPL\(([^,]+),\s*(\S+)',
    166    r'MOCK_IMPL(\1,\n\2',
    167    re.MULTILINE)
    168 
    169 # Make sure there is no newline between } and a loop iteration terminator.
    170 RESTORE_SMARTLIST_END = RegexFilt(
    171    r'}\s*(SMARTLIST|DIGESTMAP|DIGEST256MAP|STRMAP|MAP)_FOREACH_END\s*\(',
    172    r'} \1_FOREACH_END (',
    173    re.MULTILINE)
    174 
    175 F = CompoundFilt()
    176 F.add(IgnoreCommentsFilt(CompoundFilt([
    177    RESTORE_SMARTLIST_END,
    178    BREAK_MOCK_IMPL])))
    179 
    180 if __name__ == '__main__':
    181    for fname in sys.argv[1:]:
    182        revise(fname, F)