tor

The Tor anonymity network
git clone https://git.dasho.dev/tor.git
Log | Files | Refs | README | LICENSE

format_changelog.py (16521B)


      1 #!/usr/bin/env python
      2 # Copyright (c) 2014-2019, The Tor Project, Inc.
      3 # See LICENSE for licensing information
      4 #
      5 # This script reformats a section of the changelog to wrap everything to
      6 # the right width and put blank lines in the right places.  Eventually,
      7 # it might include a linter.
      8 #
      9 # To run it, pipe a section of the changelog (starting with "Changes
     10 # in Tor 0.x.y.z-alpha" through the script.)
     11 
     12 # Future imports for Python 2.7, mandatory in 3.0
     13 from __future__ import division
     14 from __future__ import print_function
     15 from __future__ import unicode_literals
     16 
     17 import os
     18 import re
     19 import sys
     20 import optparse
     21 
     22 # ==============================
     23 # Oh, look!  It's a cruddy approximation to Knuth's elegant text wrapping
     24 # algorithm, with totally ad hoc parameters!
     25 #
     26 # We're trying to minimize:
     27 #    The total of the cubes of ragged space on underflowed intermediate lines,
     28 #  PLUS
     29 #    100 * the fourth power of overflowed characters
     30 #  PLUS
     31 #    .1 * a bit more than the cube of ragged space on the last line.
     32 #  PLUS
     33 #    OPENPAREN_PENALTY for each line that starts with (
     34 #
     35 # We use an obvious dynamic programming algorithm to sorta approximate this.
     36 # It's not coded right or optimally, but it's fast enough for changelogs
     37 #
     38 # (Code found in an old directory of mine, lightly cleaned. -NM)
     39 
     40 NO_HYPHENATE=set("""
     41 pf-divert
     42 tor-resolve
     43 tor-gencert
     44 """.split())
     45 
     46 LASTLINE_UNDERFLOW_EXPONENT = 1
     47 LASTLINE_UNDERFLOW_PENALTY = 1
     48 
     49 UNDERFLOW_EXPONENT = 3
     50 UNDERFLOW_PENALTY = 1
     51 
     52 OVERFLOW_EXPONENT = 4
     53 OVERFLOW_PENALTY = 2000
     54 
     55 ORPHAN_PENALTY = 10000
     56 
     57 OPENPAREN_PENALTY = 200
     58 
     59 def generate_wrapping(words, divisions):
     60    lines = []
     61    last = 0
     62    for i in divisions:
     63        w = words[last:i]
     64        last = i
     65        line = " ".join(w).replace("\xff ","-").replace("\xff","-")
     66        lines.append(line.strip())
     67    return lines
     68 
     69 def wrapping_quality(words, divisions, width1, width2):
     70    total = 0.0
     71 
     72    lines = generate_wrapping(words, divisions)
     73    for line in lines:
     74        length = len(line)
     75        if line is lines[0]:
     76            width = width1
     77        else:
     78            width = width2
     79 
     80        if line[0:1] == '(':
     81            total += OPENPAREN_PENALTY
     82 
     83        if length > width:
     84            total += OVERFLOW_PENALTY * (
     85                (length - width) ** OVERFLOW_EXPONENT )
     86        else:
     87            if line is lines[-1]:
     88                e,p = (LASTLINE_UNDERFLOW_EXPONENT, LASTLINE_UNDERFLOW_PENALTY)
     89                if " " not in line:
     90                    total += ORPHAN_PENALTY
     91            else:
     92                e,p = (UNDERFLOW_EXPONENT, UNDERFLOW_PENALTY)
     93 
     94            total += p * ((width - length) ** e)
     95 
     96    return total
     97 
     98 def wrap_graf(words, prefix_len1=0, prefix_len2=0, width=72):
     99    wrapping_after = [ (0,), ]
    100 
    101    w1 = width - prefix_len1
    102    w2 = width - prefix_len2
    103 
    104    for i in range(1, len(words)+1):
    105        best_so_far = None
    106        best_score = 1e300
    107        for j in range(i):
    108            t = wrapping_after[j]
    109            t1 = t[:-1] + (i,)
    110            t2 = t + (i,)
    111            wq1 = wrapping_quality(words, t1, w1, w2)
    112            wq2 = wrapping_quality(words, t2, w1, w2)
    113 
    114            if wq1 < best_score:
    115                best_so_far = t1
    116                best_score = wq1
    117            if wq2 < best_score:
    118                best_so_far = t2
    119                best_score = wq2
    120        wrapping_after.append( best_so_far )
    121 
    122    lines = generate_wrapping(words, wrapping_after[-1])
    123 
    124    return lines
    125 
    126 def hyphenatable(word):
    127    if "--" in word:
    128        return False
    129 
    130    if re.match(r'^[^\d\-]\D*-', word):
    131        stripped = re.sub(r'^\W+','',word)
    132        stripped = re.sub(r'\W+$','',word)
    133        return stripped not in NO_HYPHENATE
    134    else:
    135        return False
    136 
    137 def split_paragraph(s):
    138    "Split paragraph into words; tuned for Tor."
    139 
    140    r = []
    141    for word in s.split():
    142        if hyphenatable(word):
    143            while "-" in word:
    144                a,word = word.split("-",1)
    145                r.append(a+"\xff")
    146        r.append(word)
    147    return r
    148 
    149 def fill(text, width, initial_indent, subsequent_indent):
    150    words = split_paragraph(text)
    151    lines = wrap_graf(words, len(initial_indent), len(subsequent_indent),
    152                      width)
    153    res = [ initial_indent, lines[0], "\n" ]
    154    for line in lines[1:]:
    155        res.append(subsequent_indent)
    156        res.append(line)
    157        res.append("\n")
    158    return "".join(res)
    159 
    160 # ==============================
    161 
    162 
    163 TP_MAINHEAD = 0
    164 TP_HEADTEXT = 1
    165 TP_BLANK = 2
    166 TP_SECHEAD = 3
    167 TP_ITEMFIRST = 4
    168 TP_ITEMBODY = 5
    169 TP_END = 6
    170 TP_PREHEAD = 7
    171 
    172 def head_parser(line):
    173    if re.match(r'^Changes in', line):
    174        return TP_MAINHEAD
    175    elif re.match(r'^[A-Za-z]', line):
    176        return TP_PREHEAD
    177    elif re.match(r'^  o ', line):
    178        return TP_SECHEAD
    179    elif re.match(r'^\s*$', line):
    180        return TP_BLANK
    181    else:
    182        return TP_HEADTEXT
    183 
    184 def body_parser(line):
    185    if re.match(r'^  o ', line):
    186        return TP_SECHEAD
    187    elif re.match(r'^    -',line):
    188        return TP_ITEMFIRST
    189    elif re.match(r'^      \S', line):
    190        return TP_ITEMBODY
    191    elif re.match(r'^\s*$', line):
    192        return TP_BLANK
    193    elif re.match(r'^Changes in', line):
    194        return TP_END
    195    elif re.match(r'^\s+\S', line):
    196        return TP_HEADTEXT
    197    else:
    198        print("Weird line %r"%line, file=sys.stderr)
    199 
    200 def clean_head(head):
    201    return head
    202 
    203 def head_score(s):
    204    m = re.match(r'^ +o (.*)', s)
    205    if not m:
    206        print("Can't score %r"%s, file=sys.stderr)
    207        return 99999
    208    lw = m.group(1).lower()
    209    if lw.startswith("security") and "feature" not in lw:
    210        score = -300
    211    elif lw.startswith("deprecated version"):
    212        score = -200
    213    elif lw.startswith("directory auth"):
    214        score = -150
    215    elif (('new' in lw and 'requirement' in lw) or
    216          ('new' in lw and 'dependenc' in lw) or
    217          ('build' in lw and 'requirement' in lw) or
    218          ('removed' in lw and 'platform' in lw)):
    219        score = -100
    220    elif lw.startswith("major feature"):
    221        score = 00
    222    elif lw.startswith("major bug"):
    223        score = 50
    224    elif lw.startswith("major"):
    225        score = 70
    226    elif lw.startswith("minor feature"):
    227        score = 200
    228    elif lw.startswith("minor bug"):
    229        score = 250
    230    elif lw.startswith("minor"):
    231        score = 270
    232    else:
    233        score = 1000
    234 
    235    if 'secur' in lw:
    236        score -= 2
    237 
    238    if "(other)" in lw:
    239        score += 2
    240 
    241    if '(' not in lw:
    242        score -= 1
    243 
    244    return score
    245 
    246 class ChangeLog(object):
    247    def __init__(self, wrapText=True, blogOrder=True, drupalBreak=False):
    248        self.prehead = []
    249        self.mainhead = None
    250        self.headtext = []
    251        self.curgraf = None
    252        self.sections = []
    253        self.cursection = None
    254        self.lineno = 0
    255        self.wrapText = wrapText
    256        self.blogOrder = blogOrder
    257        self.drupalBreak = drupalBreak
    258 
    259    def addLine(self, tp, line):
    260        self.lineno += 1
    261 
    262        if tp == TP_MAINHEAD:
    263            assert not self.mainhead
    264            self.mainhead = line
    265 
    266        elif tp == TP_PREHEAD:
    267            self.prehead.append(line)
    268 
    269        elif tp == TP_HEADTEXT:
    270            if self.curgraf is None:
    271                self.curgraf = []
    272                self.headtext.append(self.curgraf)
    273            self.curgraf.append(line)
    274 
    275        elif tp == TP_BLANK:
    276            self.curgraf = None
    277 
    278        elif tp == TP_SECHEAD:
    279            self.cursection = [ self.lineno, line, [] ]
    280            self.sections.append(self.cursection)
    281 
    282        elif tp == TP_ITEMFIRST:
    283            item = ( self.lineno, [ [line] ])
    284            self.curgraf = item[1][0]
    285            self.cursection[2].append(item)
    286 
    287        elif tp == TP_ITEMBODY:
    288            if self.curgraf is None:
    289                self.curgraf = []
    290                self.cursection[2][-1][1].append(self.curgraf)
    291            self.curgraf.append(line)
    292 
    293        else:
    294            assert False  # This should be unreachable.
    295 
    296    def lint_head(self, line, head):
    297        m = re.match(r'^ *o ([^\(]+)((?:\([^\)]+\))?):', head)
    298        if not m:
    299            print("Weird header format on line %s"%line, file=sys.stderr)
    300 
    301    def lint_item(self, line, grafs, head_type):
    302        pass
    303 
    304    def lint(self):
    305        self.head_lines = {}
    306        for sec_line, sec_head, items in self.sections:
    307            head_type = self.lint_head(sec_line, sec_head)
    308            for item_line, grafs in items:
    309                self.lint_item(item_line, grafs, head_type)
    310 
    311    def dumpGraf(self,par,indent1,indent2=-1):
    312        if not self.wrapText:
    313            for line in par:
    314                print(line)
    315            return
    316 
    317        if indent2 == -1:
    318            indent2 = indent1
    319        text = " ".join(re.sub(r'\s+', ' ', line.strip()) for line in par)
    320 
    321        sys.stdout.write(fill(text,
    322                              width=72,
    323                              initial_indent=" "*indent1,
    324                              subsequent_indent=" "*indent2))
    325 
    326    def dumpPreheader(self, graf):
    327        self.dumpGraf(graf, 0)
    328        print()
    329 
    330    def dumpMainhead(self, head):
    331        print(head)
    332 
    333    def dumpHeadGraf(self, graf):
    334        self.dumpGraf(graf, 2)
    335        print()
    336 
    337    def dumpSectionHeader(self, header):
    338        print(header)
    339 
    340    def dumpStartOfSections(self):
    341        pass
    342 
    343    def dumpEndOfSections(self):
    344        pass
    345 
    346    def dumpEndOfSection(self):
    347        print()
    348 
    349    def dumpEndOfChangelog(self):
    350        print()
    351 
    352    def dumpDrupalBreak(self):
    353        pass
    354 
    355    def dumpItem(self, grafs):
    356        self.dumpGraf(grafs[0],4,6)
    357        for par in grafs[1:]:
    358            print()
    359            self.dumpGraf(par,6,6)
    360 
    361    def collateAndSortSections(self):
    362        heads = []
    363        sectionsByHead = { }
    364        for _, head, items in self.sections:
    365            head = clean_head(head)
    366            try:
    367                s = sectionsByHead[head]
    368            except KeyError:
    369                s = sectionsByHead[head] = []
    370                heads.append( (head_score(head), head.lower(), head, s) )
    371 
    372            s.extend(items)
    373 
    374        heads.sort()
    375        self.sections = [ (0, head, items) for _1,_2,head,items in heads ]
    376 
    377    def dump(self):
    378        if self.prehead:
    379            self.dumpPreheader(self.prehead)
    380 
    381        if not self.blogOrder:
    382            self.dumpMainhead(self.mainhead)
    383 
    384        for par in self.headtext:
    385            self.dumpHeadGraf(par)
    386 
    387        if self.blogOrder:
    388            self.dumpMainhead(self.mainhead)
    389 
    390        drupalBreakAfter = None
    391        if self.drupalBreak and len(self.sections) > 4:
    392            drupalBreakAfter = self.sections[1][2]
    393 
    394        self.dumpStartOfSections()
    395        for _,head,items in self.sections:
    396            if not head.endswith(':'):
    397                print("adding : to %r"%head, file=sys.stderr)
    398                head = head + ":"
    399            self.dumpSectionHeader(head)
    400            for _,grafs in items:
    401                self.dumpItem(grafs)
    402            self.dumpEndOfSection()
    403            if items is drupalBreakAfter:
    404                self.dumpDrupalBreak()
    405        self.dumpEndOfSections()
    406        self.dumpEndOfChangelog()
    407 
    408 # Map from issue prefix to pair of (visible prefix, url prefix)
    409 ISSUE_PREFIX_MAP = {
    410    "" : ( "", "tpo/core/tor" ),
    411    "tor#" : ( "", "tpo/core/tor" ),
    412    "chutney#" : ( "chutney#", "tpo/core/chutney" ),
    413    "torspec#" : ( "torspec#", "tpo/core/torspec" ),
    414    "trunnel#" : ( "trunnel#", "tpo/core/trunnel" ),
    415    "torsocks#" : ( "torsocks#", "tpo/core/torsocks"),
    416 }
    417 
    418 # Let's turn bugs to html.
    419 BUG_PAT = re.compile(r'(bug|ticket|issue|feature)\s+([\w/]+#)?(\d{4,6})', re.I)
    420 def bug_html(m):
    421    kind = m.group(1)
    422    prefix = m.group(2) or ""
    423    bugno = m.group(3)
    424    try:
    425        disp_prefix, url_prefix = ISSUE_PREFIX_MAP[prefix]
    426    except KeyError:
    427        print("Can't figure out URL for {}{}".format(prefix,bugno),
    428              file=sys.stderr)
    429        return "{} {}{}".format(kind, prefix, bugno)
    430 
    431    return "{} <a href='https://bugs.torproject.org/{}/{}'>{}{}</a>".format(
    432        kind, url_prefix, bugno, disp_prefix, bugno)
    433 
    434 class HTMLChangeLog(ChangeLog):
    435    def __init__(self, *args, **kwargs):
    436        ChangeLog.__init__(self, *args, **kwargs)
    437 
    438    def htmlText(self, graf):
    439        output = []
    440        for line in graf:
    441            line = line.rstrip().replace("&","&amp;")
    442            line = line.rstrip().replace("<","&lt;").replace(">","&gt;")
    443            output.append(line.strip())
    444        output = " ".join(output)
    445        output = BUG_PAT.sub(bug_html, output)
    446        sys.stdout.write(output)
    447 
    448    def htmlPar(self, graf):
    449        sys.stdout.write("<p>")
    450        self.htmlText(graf)
    451        sys.stdout.write("</p>\n")
    452 
    453    def dumpPreheader(self, graf):
    454        self.htmlPar(graf)
    455 
    456    def dumpMainhead(self, head):
    457        sys.stdout.write("<h2>%s</h2>"%head)
    458 
    459    def dumpHeadGraf(self, graf):
    460        self.htmlPar(graf)
    461 
    462    def dumpSectionHeader(self, header):
    463        header = header.replace(" o ", "", 1).lstrip()
    464        sys.stdout.write("  <li>%s\n"%header)
    465        sys.stdout.write("  <ul>\n")
    466 
    467    def dumpEndOfSection(self):
    468        sys.stdout.write("  </ul>\n\n")
    469 
    470    def dumpEndOfChangelog(self):
    471        pass
    472 
    473    def dumpStartOfSections(self):
    474        print("<ul>\n")
    475 
    476    def dumpEndOfSections(self):
    477        print("</ul>\n")
    478 
    479    def dumpDrupalBreak(self):
    480        print("\n</ul>\n")
    481        print("<p>&nbsp;</p>")
    482        print("\n<!--break-->\n\n")
    483        print("<ul>")
    484 
    485    def dumpItem(self, grafs):
    486        grafs[0][0] = grafs[0][0].replace(" - ", "", 1).lstrip()
    487        sys.stdout.write("  <li>")
    488        if len(grafs) > 1:
    489            for par in grafs:
    490                self.htmlPar(par)
    491        else:
    492            self.htmlText(grafs[0])
    493        print()
    494 
    495 op = optparse.OptionParser(usage="usage: %prog [options] [filename]")
    496 op.add_option('-W', '--no-wrap', action='store_false',
    497              dest='wrapText', default=True,
    498              help='Do not re-wrap paragraphs')
    499 op.add_option('-S', '--no-sort', action='store_false',
    500              dest='sort', default=True,
    501              help='Do not sort or collate sections')
    502 op.add_option('-o', '--output', dest='output',
    503              default='-', metavar='FILE', help="write output to FILE")
    504 op.add_option('-H', '--html', action='store_true',
    505              dest='html', default=False,
    506              help="generate an HTML fragment")
    507 op.add_option('-1', '--first', action='store_true',
    508              dest='firstOnly', default=False,
    509              help="write only the first section")
    510 op.add_option('-b', '--blog-header', action='store_true',
    511              dest='blogOrder', default=False,
    512              help="Write the header in blog order")
    513 op.add_option('-B', '--blog', action='store_true',
    514              dest='blogFormat', default=False,
    515              help="Set all other options as appropriate for a blog post")
    516 op.add_option('--inplace', action='store_true',
    517              dest='inplace', default=False,
    518              help="Alter the ChangeLog in place")
    519 op.add_option('--drupal-break', action='store_true',
    520              dest='drupalBreak', default=False,
    521              help='Insert a drupal-friendly <!--break--> as needed')
    522 
    523 options,args = op.parse_args()
    524 
    525 if options.blogFormat:
    526    options.blogOrder = True
    527    options.html = True
    528    options.sort = False
    529    options.wrapText = False
    530    options.firstOnly = True
    531    options.drupalBreak = True
    532 
    533 if len(args) > 1:
    534    op.error("Too many arguments")
    535 elif len(args) == 0:
    536    fname = 'ChangeLog'
    537 else:
    538    fname = args[0]
    539 
    540 if options.inplace:
    541    assert options.output == '-'
    542    options.output = fname
    543 
    544 if fname != '-':
    545    sys.stdin = open(fname, 'r')
    546 
    547 nextline = None
    548 
    549 if options.html:
    550    ChangeLogClass = HTMLChangeLog
    551 else:
    552    ChangeLogClass = ChangeLog
    553 
    554 CL = ChangeLogClass(wrapText=options.wrapText,
    555                    blogOrder=options.blogOrder,
    556                    drupalBreak=options.drupalBreak)
    557 parser = head_parser
    558 
    559 for line in sys.stdin:
    560    line = line.rstrip()
    561    tp = parser(line)
    562 
    563    if tp == TP_SECHEAD:
    564        parser = body_parser
    565    elif tp == TP_END:
    566        nextline = line
    567        break
    568 
    569    CL.addLine(tp,line)
    570 
    571 CL.lint()
    572 
    573 if options.output != '-':
    574    fname_new = options.output+".new"
    575    fname_out = options.output
    576    sys.stdout = open(fname_new, 'w')
    577 else:
    578    fname_new = fname_out = None
    579 
    580 if options.sort:
    581    CL.collateAndSortSections()
    582 
    583 CL.dump()
    584 
    585 if options.firstOnly:
    586    sys.exit(0)
    587 
    588 if nextline is not None:
    589    print(nextline)
    590 
    591 for line in sys.stdin:
    592    sys.stdout.write(line)
    593 
    594 if fname_new is not None:
    595    os.rename(fname_new, fname_out)