tor

The Tor anonymity network
git clone https://git.dasho.dev/tor.git
Log | Files | Refs | README | LICENSE

problem.py (10118B)


      1 """
      2 In this file we define a ProblemVault class where we store all the
      3 exceptions and all the problems we find with the code.
      4 
      5 The ProblemVault is capable of registering problems and also figuring out if a
      6 problem is worse than a registered exception so that it only warns when things
      7 get worse.
      8 """
      9 
     10 # Future imports for Python 2.7, mandatory in 3.0
     11 from __future__ import division
     12 from __future__ import print_function
     13 from __future__ import unicode_literals
     14 
     15 import os.path
     16 import re
     17 import sys
     18 
     19 STATUS_ERR = 2
     20 STATUS_WARN = 1
     21 STATUS_OK = 0
     22 
     23 class ProblemVault(object):
     24    """
     25    Singleton where we store the various new problems we
     26    found in the code, and also the old problems we read from the exception
     27    file.
     28    """
     29    def __init__(self, exception_fname=None):
     30        # Exception dictionary: { problem.key() : Problem object }
     31        self.exceptions = {}
     32        # Exception list: list of Problem objects, in the order added.
     33        self.exception_list = []
     34        # Exception dictionary: maps key to the problem it was used to
     35        # suppress.
     36        self.used_exception_for = {}
     37 
     38        if exception_fname == None:
     39            return
     40 
     41        try:
     42            with open(exception_fname, 'r') as exception_f:
     43                self.register_exceptions(exception_f)
     44        except IOError:
     45            print("No exception file provided", file=sys.stderr)
     46 
     47    def register_exceptions(self, exception_file):
     48        # Register exceptions
     49        for lineno, line in enumerate(exception_file, 1):
     50            try:
     51                problem = get_old_problem_from_exception_str(line)
     52            except ValueError as v:
     53                print("Exception file line {} not recognized: {}"
     54                      .format(lineno,v),
     55                      file=sys.stderr)
     56                continue
     57 
     58            if problem is None:
     59                continue
     60 
     61            # Fail if we see dup exceptions. There is really no reason to have dup exceptions.
     62            if problem.key() in self.exceptions:
     63                print("Duplicate exceptions lines found in exception file:\n\t{}\n\t{}\nAborting...".format(problem, self.exceptions[problem.key()]),
     64                      file=sys.stderr)
     65                sys.exit(1)
     66 
     67            self.exceptions[problem.key()] = problem
     68            self.exception_list.append(problem)
     69            #print "Registering exception: %s" % problem
     70 
     71    def register_problem(self, problem):
     72        """
     73        Register this problem to the problem value. Return true if it was a new
     74        problem or it worsens an already existing problem.  A true
     75        value may be STATUS_ERR to indicate a hard violation, or STATUS_WARN
     76        to indicate a warning.
     77        """
     78        # This is a new problem, print it
     79        if problem.key() not in self.exceptions:
     80            return STATUS_ERR
     81 
     82        # If it's an old problem, we don't warn if the situation got better
     83        # (e.g. we went from 4k LoC to 3k LoC), but we do warn if the
     84        # situation worsened (e.g. we went from 60 includes to 80).
     85        status = problem.is_worse_than(self.exceptions[problem.key()])
     86 
     87        # Remember that we used this exception, so that we can later
     88        # determine whether the exception was overbroad.
     89        self.used_exception_for[problem.key()] = problem
     90 
     91        return status
     92 
     93    def list_overbroad_exceptions(self):
     94        """Return an iterator of tuples containing (ex,prob) where ex is an
     95           exceptions in this vault that are stricter than it needs to be, and
     96           prob is the worst problem (if any) that it covered.
     97        """
     98        for k in self.exceptions:
     99            e = self.exceptions[k]
    100            p = self.used_exception_for.get(k)
    101            if p is None or e.is_worse_than(p):
    102                yield (e, p)
    103 
    104    def list_exceptions_without_overbroad(self):
    105        """Return an iterator of new problems, such that overbroad
    106           exceptions are replaced with minimally broad versions, or removed.
    107        """
    108        for e in self.exception_list:
    109            p = self.used_exception_for.get(e.key())
    110            if p is None:
    111                # This exception wasn't needed at all.
    112                continue
    113            if e.is_worse_than(p):
    114                # The exception is worse than the problem we found.
    115                # Yield the problem as the new exception value.
    116                yield p
    117            else:
    118                # The problem is as bad as the exception, or worse.
    119                # Yield the exception.
    120                yield e
    121 
    122    def set_tolerances(self, fns):
    123        """Adjust the tolerances for the exceptions in this vault.  Takes
    124           a map of problem type to a function that adjusts the permitted
    125           function to its new maximum value."""
    126        for k in self.exceptions:
    127            ex = self.exceptions[k]
    128            fn = fns.get(ex.problem_type)
    129            if fn is not None:
    130                ex.metric_value = fn(ex.metric_value)
    131 
    132 class ProblemFilter(object):
    133    def __init__(self):
    134        self.thresholds = dict()
    135 
    136    def addThreshold(self, item):
    137        self.thresholds[(item.get_type(),item.get_file_type())] = item
    138 
    139    def matches(self, item):
    140        key = (item.get_type(), item.get_file_type())
    141        filt = self.thresholds.get(key, None)
    142        if filt is None:
    143            return False
    144        return item.is_worse_than(filt)
    145 
    146    def filter(self, sequence):
    147        for item in iter(sequence):
    148            if self.matches(item):
    149                yield item
    150 
    151 class Item(object):
    152    """
    153    A generic measurement about some aspect of our source code. See
    154    the subclasses below for the specific problems we are trying to tackle.
    155    """
    156    def __init__(self, problem_type, problem_location, metric_value):
    157        self.problem_location = problem_location
    158        self.metric_value = int(metric_value)
    159        self.warning_threshold = self.metric_value
    160        self.problem_type = problem_type
    161 
    162    def is_worse_than(self, other_problem):
    163        """Return STATUS_ERR if this is a worse problem than other_problem.
    164           Return STATUS_WARN if it is a little worse, but falls within the
    165           warning threshold.  Return STATUS_OK if this problem is not
    166           at all worse than other_problem.
    167        """
    168        if self.metric_value > other_problem.metric_value:
    169            return STATUS_ERR
    170        elif self.metric_value > other_problem.warning_threshold:
    171            return STATUS_WARN
    172        else:
    173            return STATUS_OK
    174 
    175    def key(self):
    176        """Generate a unique key that describes this problem that can be used as a dictionary key"""
    177        # Item location is a filesystem path, so we need to normalize this
    178        # across platforms otherwise same paths are not gonna match.
    179        canonical_location = os.path.normcase(self.problem_location)
    180        return "%s:%s" % (canonical_location, self.problem_type)
    181 
    182    def __str__(self):
    183        return "problem %s %s %s" % (self.problem_type, self.problem_location, self.metric_value)
    184 
    185    def get_type(self):
    186        return self.problem_type
    187 
    188    def get_file_type(self):
    189        if self.problem_location.endswith(".h"):
    190            return "*.h"
    191        else:
    192            return "*.c"
    193 
    194 class FileSizeItem(Item):
    195    """
    196    Denotes a problem with the size of a .c file.
    197 
    198    The 'problem_location' is the filesystem path of the .c file, and the
    199    'metric_value' is the number of lines in the .c file.
    200    """
    201    def __init__(self, problem_location, metric_value):
    202        super(FileSizeItem, self).__init__("file-size", problem_location, metric_value)
    203 
    204 class IncludeCountItem(Item):
    205    """
    206    Denotes a problem with the number of #includes in a .c file.
    207 
    208    The 'problem_location' is the filesystem path of the .c file, and the
    209    'metric_value' is the number of #includes in the .c file.
    210    """
    211    def __init__(self, problem_location, metric_value):
    212        super(IncludeCountItem, self).__init__("include-count", problem_location, metric_value)
    213 
    214 class FunctionSizeItem(Item):
    215    """
    216    Denotes a problem with a size of a function in a .c file.
    217 
    218    The 'problem_location' is "<path>:<function>()" where <path> is the
    219    filesystem path of the .c file and <function> is the name of the offending
    220    function.
    221 
    222    The 'metric_value' is the size of the offending function in lines.
    223    """
    224    def __init__(self, problem_location, metric_value):
    225        super(FunctionSizeItem, self).__init__("function-size", problem_location, metric_value)
    226 
    227 class DependencyViolationItem(Item):
    228    """
    229    Denotes a dependency violation in a .c or .h file.  A dependency violation
    230    occurs when a file includes a file from some module that is not listed
    231    in its .may_include file.
    232 
    233    The 'problem_location' is the file that contains the problem.
    234 
    235    The 'metric_value' is the number of forbidden includes.
    236    """
    237    def __init__(self, problem_location, metric_value):
    238        super(DependencyViolationItem, self).__init__("dependency-violation",
    239                                                      problem_location,
    240                                                      metric_value)
    241 
    242 comment_re = re.compile(r'#.*$')
    243 
    244 def get_old_problem_from_exception_str(exception_str):
    245    orig_str = exception_str
    246    exception_str = comment_re.sub("", exception_str)
    247    fields = exception_str.split()
    248    if len(fields) == 0:
    249        # empty line or comment
    250        return None
    251    elif len(fields) == 4:
    252        # valid line
    253        _, problem_type, problem_location, metric_value = fields
    254    else:
    255        raise ValueError("Misformatted line {!r}".format(orig_str))
    256 
    257    if problem_type == "file-size":
    258        return FileSizeItem(problem_location, metric_value)
    259    elif problem_type == "include-count":
    260        return IncludeCountItem(problem_location, metric_value)
    261    elif problem_type == "function-size":
    262        return FunctionSizeItem(problem_location, metric_value)
    263    elif problem_type == "dependency-violation":
    264        return DependencyViolationItem(problem_location, metric_value)
    265    else:
    266        raise ValueError("Unknown exception type {!r}".format(orig_str))