tor

The Tor anonymity network
git clone https://git.dasho.dev/tor.git
Log | Files | Refs | README | LICENSE

annotate_ifdef_directives.py (9881B)


      1 #!/usr/bin/env python
      2 # Copyright (c) 2017-2019, The Tor Project, Inc.
      3 # See LICENSE for licensing information
      4 
      5 r"""
      6 This script iterates over a list of C files. For each file, it looks at the
      7 #if/#else C macros, and annotates them with comments explaining what they
      8 match.
      9 
     10 For example, it replaces this kind of input...
     11 
     12 >>> INPUT = '''
     13 ... #ifdef HAVE_OCELOT
     14 ...   C code here
     15 ... #if MIMSY == BOROGROVE
     16 ...   block 1
     17 ...   block 1
     18 ...   block 1
     19 ...   block 1
     20 ... #else
     21 ...   block 2
     22 ...   block 2
     23 ...   block 2
     24 ...   block 2
     25 ... #endif
     26 ... #endif
     27 ... '''
     28 
     29 With this kind of output:
     30 >>> EXPECTED_OUTPUT = '''
     31 ... #ifdef HAVE_OCELOT
     32 ...   C code here
     33 ... #if MIMSY == BOROGROVE
     34 ...   block 1
     35 ...   block 1
     36 ...   block 1
     37 ...   block 1
     38 ... #else /* !(MIMSY == BOROGROVE) */
     39 ...   block 2
     40 ...   block 2
     41 ...   block 2
     42 ...   block 2
     43 ... #endif /* MIMSY == BOROGROVE */
     44 ... #endif /* defined(HAVE_OCELOT) */
     45 ... '''
     46 
     47 Here's how to use it:
     48 >>> import sys
     49 >>> if sys.version_info.major < 3: from cStringIO import StringIO
     50 >>> if sys.version_info.major >= 3: from io import StringIO
     51 
     52 >>> OUTPUT = StringIO()
     53 >>> translate(StringIO(INPUT), OUTPUT)
     54 >>> assert OUTPUT.getvalue() == EXPECTED_OUTPUT
     55 
     56 Note that only #else and #endif lines are annotated.  Existing comments
     57 on those lines are removed.
     58 """
     59 
     60 # Future imports for Python 2.7, mandatory in 3.0
     61 from __future__ import division
     62 from __future__ import print_function
     63 from __future__ import unicode_literals
     64 
     65 import re
     66 
     67 # Any block with fewer than this many lines does not need annotations.
     68 LINE_OBVIOUSNESS_LIMIT = 4
     69 
     70 # Maximum line width.  This includes a terminating newline character.
     71 #
     72 # (This is the maximum before encoding, so that if the the operating system
     73 # uses multiple characters to encode newline, that's still okay.)
     74 LINE_WIDTH=80
     75 
     76 class Problem(Exception):
     77    pass
     78 
     79 def close_parens_needed(expr):
     80    """Return the number of left-parentheses needed to make 'expr'
     81       balanced.
     82 
     83    >>> close_parens_needed("1+2")
     84    0
     85    >>> close_parens_needed("(1 + 2)")
     86    0
     87    >>> close_parens_needed("(1 + 2")
     88    1
     89    >>> close_parens_needed("(1 + (2 *")
     90    2
     91    >>> close_parens_needed("(1 + (2 * 3) + (4")
     92    2
     93    """
     94    return expr.count("(") - expr.count(")")
     95 
     96 def truncate_expression(expr, new_width):
     97    """Given a parenthesized C expression in 'expr', try to return a new
     98       expression that is similar to 'expr', but no more than 'new_width'
     99       characters long.
    100 
    101       Try to return an expression with balanced parentheses.
    102 
    103    >>> truncate_expression("1+2+3", 8)
    104    '1+2+3'
    105    >>> truncate_expression("1+2+3+4+5", 8)
    106    '1+2+3...'
    107    >>> truncate_expression("(1+2+3+4)", 8)
    108    '(1+2...)'
    109    >>> truncate_expression("(1+(2+3+4))", 8)
    110    '(1+...)'
    111    >>> truncate_expression("(((((((((", 8)
    112    '((...))'
    113    """
    114    if len(expr) <= new_width:
    115        # The expression is already short enough.
    116        return expr
    117 
    118    ellipsis = "..."
    119 
    120    # Start this at the minimum that we might truncate.
    121    n_to_remove = len(expr) + len(ellipsis) - new_width
    122 
    123    # Try removing characters, one by one, until we get something where
    124    # re-balancing the parentheses still fits within the limit.
    125    while n_to_remove < len(expr):
    126        truncated = expr[:-n_to_remove] + ellipsis
    127        truncated += ")" * close_parens_needed(truncated)
    128        if len(truncated) <= new_width:
    129            return truncated
    130        n_to_remove += 1
    131 
    132    return ellipsis
    133 
    134 def commented_line(fmt, argument, maxwidth=LINE_WIDTH):
    135    # (This is a raw docstring so that our doctests can use \.)
    136    r"""
    137    Return fmt%argument, for use as a commented line.  If the line would
    138    be longer than maxwidth, truncate argument but try to keep its
    139    parentheses balanced.
    140 
    141    Requires that fmt%"..." will fit into maxwidth characters.
    142 
    143    Requires that fmt ends with a newline.
    144 
    145    >>> commented_line("/* %s */\n", "hello world", 32)
    146    '/* hello world */\n'
    147    >>> commented_line("/* %s */\n", "hello world", 15)
    148    '/* hello... */\n'
    149    >>> commented_line("#endif /* %s */\n", "((1+2) && defined(FOO))", 32)
    150    '#endif /* ((1+2) && defi...) */\n'
    151 
    152 
    153    The default line limit is 80 characters including the newline:
    154 
    155    >>> long_argument = "long " * 100
    156    >>> long_line = commented_line("#endif /* %s */\n", long_argument)
    157    >>> len(long_line)
    158    80
    159 
    160    >>> long_line[:40]
    161    '#endif /* long long long long long long '
    162    >>> long_line[40:]
    163    'long long long long long long lon... */\n'
    164 
    165    If a line works out to being 80 characters naturally, it isn't truncated,
    166    and no ellipsis is added.
    167 
    168    >>> medium_argument = "a"*66
    169    >>> medium_line = commented_line("#endif /* %s */\n", medium_argument)
    170    >>> len(medium_line)
    171    80
    172    >>> "..." in medium_line
    173    False
    174    >>> medium_line[:40]
    175    '#endif /* aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
    176    >>> medium_line[40:]
    177    'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa */\n'
    178 
    179 
    180    """
    181    assert fmt.endswith("\n")
    182    result = fmt % argument
    183    if len(result) <= maxwidth:
    184        return result
    185    else:
    186        # How long can we let the argument be?  Try filling in the
    187        # format with an empty argument to find out.
    188        max_arg_width = maxwidth - len(fmt % "")
    189        result = fmt % truncate_expression(argument, max_arg_width)
    190        assert len(result) <= maxwidth
    191        return result
    192 
    193 def negate(expr):
    194    """Return a negated version of expr; try to avoid double-negation.
    195 
    196    We usually wrap expressions in parentheses and add a "!".
    197    >>> negate("A && B")
    198    '!(A && B)'
    199 
    200    But if we recognize the expression as negated, we can restore it.
    201    >>> negate(negate("A && B"))
    202    'A && B'
    203 
    204    The same applies for defined(FOO).
    205    >>> negate("defined(FOO)")
    206    '!defined(FOO)'
    207    >>> negate(negate("defined(FOO)"))
    208    'defined(FOO)'
    209 
    210    Internal parentheses don't confuse us:
    211    >>> negate("!(FOO) && !(BAR)")
    212    '!(!(FOO) && !(BAR))'
    213 
    214    """
    215    expr = expr.strip()
    216    # See whether we match !(...), with no intervening close-parens.
    217    m = re.match(r'^!\s*\(([^\)]*)\)$', expr)
    218    if m:
    219        return m.group(1)
    220 
    221 
    222    # See whether we match !?defined(...), with no intervening close-parens.
    223    m = re.match(r'^(!?)\s*(defined\([^\)]*\))$', expr)
    224    if m:
    225        if m.group(1) == "!":
    226            prefix = ""
    227        else:
    228            prefix = "!"
    229        return prefix + m.group(2)
    230 
    231    return "!(%s)" % expr
    232 
    233 def uncomment(s):
    234    """
    235    Remove existing trailing comments from an #else or #endif line.
    236    """
    237    s = re.sub(r'//.*','',s)
    238    s = re.sub(r'/\*.*','',s)
    239    return s.strip()
    240 
    241 def translate(f_in, f_out):
    242    """
    243    Read a file from f_in, and write its annotated version to f_out.
    244    """
    245    # A stack listing our current if/else state.  Each member of the stack
    246    # is a list of directives.  Each directive is a 3-tuple of
    247    #    (command, rest, lineno)
    248    # where "command" is one of if/ifdef/ifndef/else/elif, and where
    249    # "rest" is an expression in a format suitable for use with #if, and where
    250    # lineno is the line number where the directive occurred.
    251    stack = []
    252    # the stack element corresponding to the top level of the file.
    253    whole_file = []
    254    cur_level = whole_file
    255    lineno = 0
    256    for line in f_in:
    257        lineno += 1
    258        m = re.match(r'\s*#\s*(if|ifdef|ifndef|else|endif|elif)\b\s*(.*)',
    259                     line)
    260        if not m:
    261            # no directive, so we can just write it out.
    262            f_out.write(line)
    263            continue
    264        command,rest = m.groups()
    265        if command in ("if", "ifdef", "ifndef"):
    266            # The #if directive pushes us one level lower on the stack.
    267            if command == 'ifdef':
    268                rest = "defined(%s)"%uncomment(rest)
    269            elif command == 'ifndef':
    270                rest = "!defined(%s)"%uncomment(rest)
    271            elif rest.endswith("\\"):
    272                rest = rest[:-1]+"..."
    273 
    274            rest = uncomment(rest)
    275 
    276            new_level = [ (command, rest, lineno) ]
    277            stack.append(cur_level)
    278            cur_level = new_level
    279            f_out.write(line)
    280        elif command in ("else", "elif"):
    281            # We stay at the same level on the stack.  If we have an #else,
    282            # we comment it.
    283            if len(cur_level) == 0 or cur_level[-1][0] == 'else':
    284                raise Problem("Unexpected #%s on %d"% (command,lineno))
    285            if (len(cur_level) == 1 and command == 'else' and
    286                lineno > cur_level[0][2] + LINE_OBVIOUSNESS_LIMIT):
    287                f_out.write(commented_line("#else /* %s */\n",
    288                                           negate(cur_level[0][1])))
    289            else:
    290                f_out.write(line)
    291            cur_level.append((command, rest, lineno))
    292        else:
    293            # We pop one element on the stack, and comment an endif.
    294            assert command == 'endif'
    295            if len(stack) == 0:
    296                raise Problem("Unmatched #%s on %s"% (command,lineno))
    297            if lineno <= cur_level[0][2] + LINE_OBVIOUSNESS_LIMIT:
    298                f_out.write(line)
    299            elif len(cur_level) == 1 or (
    300                    len(cur_level) == 2 and cur_level[1][0] == 'else'):
    301                f_out.write(commented_line("#endif /* %s */\n",
    302                                           cur_level[0][1]))
    303            else:
    304                f_out.write(commented_line("#endif /* %s || ... */\n",
    305                                           cur_level[0][1]))
    306            cur_level = stack.pop()
    307    if len(stack) or cur_level != whole_file:
    308        raise Problem("Missing #endif")
    309 
    310 if __name__ == '__main__':
    311 
    312    import sys,os
    313 
    314    if sys.argv[1] == "--self-test":
    315        import doctest
    316        doctest.testmod()
    317        sys.exit(0)
    318 
    319    for fn in sys.argv[1:]:
    320        with open(fn+"_OUT", 'w') as output_file:
    321            translate(open(fn, 'r'), output_file)
    322        os.rename(fn+"_OUT", fn)