format_changelog.py (16521B)
1 #!/usr/bin/env python 2 # Copyright (c) 2014-2019, The Tor Project, Inc. 3 # See LICENSE for licensing information 4 # 5 # This script reformats a section of the changelog to wrap everything to 6 # the right width and put blank lines in the right places. Eventually, 7 # it might include a linter. 8 # 9 # To run it, pipe a section of the changelog (starting with "Changes 10 # in Tor 0.x.y.z-alpha" through the script.) 11 12 # Future imports for Python 2.7, mandatory in 3.0 13 from __future__ import division 14 from __future__ import print_function 15 from __future__ import unicode_literals 16 17 import os 18 import re 19 import sys 20 import optparse 21 22 # ============================== 23 # Oh, look! It's a cruddy approximation to Knuth's elegant text wrapping 24 # algorithm, with totally ad hoc parameters! 25 # 26 # We're trying to minimize: 27 # The total of the cubes of ragged space on underflowed intermediate lines, 28 # PLUS 29 # 100 * the fourth power of overflowed characters 30 # PLUS 31 # .1 * a bit more than the cube of ragged space on the last line. 32 # PLUS 33 # OPENPAREN_PENALTY for each line that starts with ( 34 # 35 # We use an obvious dynamic programming algorithm to sorta approximate this. 36 # It's not coded right or optimally, but it's fast enough for changelogs 37 # 38 # (Code found in an old directory of mine, lightly cleaned. -NM) 39 40 NO_HYPHENATE=set(""" 41 pf-divert 42 tor-resolve 43 tor-gencert 44 """.split()) 45 46 LASTLINE_UNDERFLOW_EXPONENT = 1 47 LASTLINE_UNDERFLOW_PENALTY = 1 48 49 UNDERFLOW_EXPONENT = 3 50 UNDERFLOW_PENALTY = 1 51 52 OVERFLOW_EXPONENT = 4 53 OVERFLOW_PENALTY = 2000 54 55 ORPHAN_PENALTY = 10000 56 57 OPENPAREN_PENALTY = 200 58 59 def generate_wrapping(words, divisions): 60 lines = [] 61 last = 0 62 for i in divisions: 63 w = words[last:i] 64 last = i 65 line = " ".join(w).replace("\xff ","-").replace("\xff","-") 66 lines.append(line.strip()) 67 return lines 68 69 def wrapping_quality(words, divisions, width1, width2): 70 total = 0.0 71 72 lines = generate_wrapping(words, divisions) 73 for line in lines: 74 length = len(line) 75 if line is lines[0]: 76 width = width1 77 else: 78 width = width2 79 80 if line[0:1] == '(': 81 total += OPENPAREN_PENALTY 82 83 if length > width: 84 total += OVERFLOW_PENALTY * ( 85 (length - width) ** OVERFLOW_EXPONENT ) 86 else: 87 if line is lines[-1]: 88 e,p = (LASTLINE_UNDERFLOW_EXPONENT, LASTLINE_UNDERFLOW_PENALTY) 89 if " " not in line: 90 total += ORPHAN_PENALTY 91 else: 92 e,p = (UNDERFLOW_EXPONENT, UNDERFLOW_PENALTY) 93 94 total += p * ((width - length) ** e) 95 96 return total 97 98 def wrap_graf(words, prefix_len1=0, prefix_len2=0, width=72): 99 wrapping_after = [ (0,), ] 100 101 w1 = width - prefix_len1 102 w2 = width - prefix_len2 103 104 for i in range(1, len(words)+1): 105 best_so_far = None 106 best_score = 1e300 107 for j in range(i): 108 t = wrapping_after[j] 109 t1 = t[:-1] + (i,) 110 t2 = t + (i,) 111 wq1 = wrapping_quality(words, t1, w1, w2) 112 wq2 = wrapping_quality(words, t2, w1, w2) 113 114 if wq1 < best_score: 115 best_so_far = t1 116 best_score = wq1 117 if wq2 < best_score: 118 best_so_far = t2 119 best_score = wq2 120 wrapping_after.append( best_so_far ) 121 122 lines = generate_wrapping(words, wrapping_after[-1]) 123 124 return lines 125 126 def hyphenatable(word): 127 if "--" in word: 128 return False 129 130 if re.match(r'^[^\d\-]\D*-', word): 131 stripped = re.sub(r'^\W+','',word) 132 stripped = re.sub(r'\W+$','',word) 133 return stripped not in NO_HYPHENATE 134 else: 135 return False 136 137 def split_paragraph(s): 138 "Split paragraph into words; tuned for Tor." 139 140 r = [] 141 for word in s.split(): 142 if hyphenatable(word): 143 while "-" in word: 144 a,word = word.split("-",1) 145 r.append(a+"\xff") 146 r.append(word) 147 return r 148 149 def fill(text, width, initial_indent, subsequent_indent): 150 words = split_paragraph(text) 151 lines = wrap_graf(words, len(initial_indent), len(subsequent_indent), 152 width) 153 res = [ initial_indent, lines[0], "\n" ] 154 for line in lines[1:]: 155 res.append(subsequent_indent) 156 res.append(line) 157 res.append("\n") 158 return "".join(res) 159 160 # ============================== 161 162 163 TP_MAINHEAD = 0 164 TP_HEADTEXT = 1 165 TP_BLANK = 2 166 TP_SECHEAD = 3 167 TP_ITEMFIRST = 4 168 TP_ITEMBODY = 5 169 TP_END = 6 170 TP_PREHEAD = 7 171 172 def head_parser(line): 173 if re.match(r'^Changes in', line): 174 return TP_MAINHEAD 175 elif re.match(r'^[A-Za-z]', line): 176 return TP_PREHEAD 177 elif re.match(r'^ o ', line): 178 return TP_SECHEAD 179 elif re.match(r'^\s*$', line): 180 return TP_BLANK 181 else: 182 return TP_HEADTEXT 183 184 def body_parser(line): 185 if re.match(r'^ o ', line): 186 return TP_SECHEAD 187 elif re.match(r'^ -',line): 188 return TP_ITEMFIRST 189 elif re.match(r'^ \S', line): 190 return TP_ITEMBODY 191 elif re.match(r'^\s*$', line): 192 return TP_BLANK 193 elif re.match(r'^Changes in', line): 194 return TP_END 195 elif re.match(r'^\s+\S', line): 196 return TP_HEADTEXT 197 else: 198 print("Weird line %r"%line, file=sys.stderr) 199 200 def clean_head(head): 201 return head 202 203 def head_score(s): 204 m = re.match(r'^ +o (.*)', s) 205 if not m: 206 print("Can't score %r"%s, file=sys.stderr) 207 return 99999 208 lw = m.group(1).lower() 209 if lw.startswith("security") and "feature" not in lw: 210 score = -300 211 elif lw.startswith("deprecated version"): 212 score = -200 213 elif lw.startswith("directory auth"): 214 score = -150 215 elif (('new' in lw and 'requirement' in lw) or 216 ('new' in lw and 'dependenc' in lw) or 217 ('build' in lw and 'requirement' in lw) or 218 ('removed' in lw and 'platform' in lw)): 219 score = -100 220 elif lw.startswith("major feature"): 221 score = 00 222 elif lw.startswith("major bug"): 223 score = 50 224 elif lw.startswith("major"): 225 score = 70 226 elif lw.startswith("minor feature"): 227 score = 200 228 elif lw.startswith("minor bug"): 229 score = 250 230 elif lw.startswith("minor"): 231 score = 270 232 else: 233 score = 1000 234 235 if 'secur' in lw: 236 score -= 2 237 238 if "(other)" in lw: 239 score += 2 240 241 if '(' not in lw: 242 score -= 1 243 244 return score 245 246 class ChangeLog(object): 247 def __init__(self, wrapText=True, blogOrder=True, drupalBreak=False): 248 self.prehead = [] 249 self.mainhead = None 250 self.headtext = [] 251 self.curgraf = None 252 self.sections = [] 253 self.cursection = None 254 self.lineno = 0 255 self.wrapText = wrapText 256 self.blogOrder = blogOrder 257 self.drupalBreak = drupalBreak 258 259 def addLine(self, tp, line): 260 self.lineno += 1 261 262 if tp == TP_MAINHEAD: 263 assert not self.mainhead 264 self.mainhead = line 265 266 elif tp == TP_PREHEAD: 267 self.prehead.append(line) 268 269 elif tp == TP_HEADTEXT: 270 if self.curgraf is None: 271 self.curgraf = [] 272 self.headtext.append(self.curgraf) 273 self.curgraf.append(line) 274 275 elif tp == TP_BLANK: 276 self.curgraf = None 277 278 elif tp == TP_SECHEAD: 279 self.cursection = [ self.lineno, line, [] ] 280 self.sections.append(self.cursection) 281 282 elif tp == TP_ITEMFIRST: 283 item = ( self.lineno, [ [line] ]) 284 self.curgraf = item[1][0] 285 self.cursection[2].append(item) 286 287 elif tp == TP_ITEMBODY: 288 if self.curgraf is None: 289 self.curgraf = [] 290 self.cursection[2][-1][1].append(self.curgraf) 291 self.curgraf.append(line) 292 293 else: 294 assert False # This should be unreachable. 295 296 def lint_head(self, line, head): 297 m = re.match(r'^ *o ([^\(]+)((?:\([^\)]+\))?):', head) 298 if not m: 299 print("Weird header format on line %s"%line, file=sys.stderr) 300 301 def lint_item(self, line, grafs, head_type): 302 pass 303 304 def lint(self): 305 self.head_lines = {} 306 for sec_line, sec_head, items in self.sections: 307 head_type = self.lint_head(sec_line, sec_head) 308 for item_line, grafs in items: 309 self.lint_item(item_line, grafs, head_type) 310 311 def dumpGraf(self,par,indent1,indent2=-1): 312 if not self.wrapText: 313 for line in par: 314 print(line) 315 return 316 317 if indent2 == -1: 318 indent2 = indent1 319 text = " ".join(re.sub(r'\s+', ' ', line.strip()) for line in par) 320 321 sys.stdout.write(fill(text, 322 width=72, 323 initial_indent=" "*indent1, 324 subsequent_indent=" "*indent2)) 325 326 def dumpPreheader(self, graf): 327 self.dumpGraf(graf, 0) 328 print() 329 330 def dumpMainhead(self, head): 331 print(head) 332 333 def dumpHeadGraf(self, graf): 334 self.dumpGraf(graf, 2) 335 print() 336 337 def dumpSectionHeader(self, header): 338 print(header) 339 340 def dumpStartOfSections(self): 341 pass 342 343 def dumpEndOfSections(self): 344 pass 345 346 def dumpEndOfSection(self): 347 print() 348 349 def dumpEndOfChangelog(self): 350 print() 351 352 def dumpDrupalBreak(self): 353 pass 354 355 def dumpItem(self, grafs): 356 self.dumpGraf(grafs[0],4,6) 357 for par in grafs[1:]: 358 print() 359 self.dumpGraf(par,6,6) 360 361 def collateAndSortSections(self): 362 heads = [] 363 sectionsByHead = { } 364 for _, head, items in self.sections: 365 head = clean_head(head) 366 try: 367 s = sectionsByHead[head] 368 except KeyError: 369 s = sectionsByHead[head] = [] 370 heads.append( (head_score(head), head.lower(), head, s) ) 371 372 s.extend(items) 373 374 heads.sort() 375 self.sections = [ (0, head, items) for _1,_2,head,items in heads ] 376 377 def dump(self): 378 if self.prehead: 379 self.dumpPreheader(self.prehead) 380 381 if not self.blogOrder: 382 self.dumpMainhead(self.mainhead) 383 384 for par in self.headtext: 385 self.dumpHeadGraf(par) 386 387 if self.blogOrder: 388 self.dumpMainhead(self.mainhead) 389 390 drupalBreakAfter = None 391 if self.drupalBreak and len(self.sections) > 4: 392 drupalBreakAfter = self.sections[1][2] 393 394 self.dumpStartOfSections() 395 for _,head,items in self.sections: 396 if not head.endswith(':'): 397 print("adding : to %r"%head, file=sys.stderr) 398 head = head + ":" 399 self.dumpSectionHeader(head) 400 for _,grafs in items: 401 self.dumpItem(grafs) 402 self.dumpEndOfSection() 403 if items is drupalBreakAfter: 404 self.dumpDrupalBreak() 405 self.dumpEndOfSections() 406 self.dumpEndOfChangelog() 407 408 # Map from issue prefix to pair of (visible prefix, url prefix) 409 ISSUE_PREFIX_MAP = { 410 "" : ( "", "tpo/core/tor" ), 411 "tor#" : ( "", "tpo/core/tor" ), 412 "chutney#" : ( "chutney#", "tpo/core/chutney" ), 413 "torspec#" : ( "torspec#", "tpo/core/torspec" ), 414 "trunnel#" : ( "trunnel#", "tpo/core/trunnel" ), 415 "torsocks#" : ( "torsocks#", "tpo/core/torsocks"), 416 } 417 418 # Let's turn bugs to html. 419 BUG_PAT = re.compile(r'(bug|ticket|issue|feature)\s+([\w/]+#)?(\d{4,6})', re.I) 420 def bug_html(m): 421 kind = m.group(1) 422 prefix = m.group(2) or "" 423 bugno = m.group(3) 424 try: 425 disp_prefix, url_prefix = ISSUE_PREFIX_MAP[prefix] 426 except KeyError: 427 print("Can't figure out URL for {}{}".format(prefix,bugno), 428 file=sys.stderr) 429 return "{} {}{}".format(kind, prefix, bugno) 430 431 return "{} <a href='https://bugs.torproject.org/{}/{}'>{}{}</a>".format( 432 kind, url_prefix, bugno, disp_prefix, bugno) 433 434 class HTMLChangeLog(ChangeLog): 435 def __init__(self, *args, **kwargs): 436 ChangeLog.__init__(self, *args, **kwargs) 437 438 def htmlText(self, graf): 439 output = [] 440 for line in graf: 441 line = line.rstrip().replace("&","&") 442 line = line.rstrip().replace("<","<").replace(">",">") 443 output.append(line.strip()) 444 output = " ".join(output) 445 output = BUG_PAT.sub(bug_html, output) 446 sys.stdout.write(output) 447 448 def htmlPar(self, graf): 449 sys.stdout.write("<p>") 450 self.htmlText(graf) 451 sys.stdout.write("</p>\n") 452 453 def dumpPreheader(self, graf): 454 self.htmlPar(graf) 455 456 def dumpMainhead(self, head): 457 sys.stdout.write("<h2>%s</h2>"%head) 458 459 def dumpHeadGraf(self, graf): 460 self.htmlPar(graf) 461 462 def dumpSectionHeader(self, header): 463 header = header.replace(" o ", "", 1).lstrip() 464 sys.stdout.write(" <li>%s\n"%header) 465 sys.stdout.write(" <ul>\n") 466 467 def dumpEndOfSection(self): 468 sys.stdout.write(" </ul>\n\n") 469 470 def dumpEndOfChangelog(self): 471 pass 472 473 def dumpStartOfSections(self): 474 print("<ul>\n") 475 476 def dumpEndOfSections(self): 477 print("</ul>\n") 478 479 def dumpDrupalBreak(self): 480 print("\n</ul>\n") 481 print("<p> </p>") 482 print("\n<!--break-->\n\n") 483 print("<ul>") 484 485 def dumpItem(self, grafs): 486 grafs[0][0] = grafs[0][0].replace(" - ", "", 1).lstrip() 487 sys.stdout.write(" <li>") 488 if len(grafs) > 1: 489 for par in grafs: 490 self.htmlPar(par) 491 else: 492 self.htmlText(grafs[0]) 493 print() 494 495 op = optparse.OptionParser(usage="usage: %prog [options] [filename]") 496 op.add_option('-W', '--no-wrap', action='store_false', 497 dest='wrapText', default=True, 498 help='Do not re-wrap paragraphs') 499 op.add_option('-S', '--no-sort', action='store_false', 500 dest='sort', default=True, 501 help='Do not sort or collate sections') 502 op.add_option('-o', '--output', dest='output', 503 default='-', metavar='FILE', help="write output to FILE") 504 op.add_option('-H', '--html', action='store_true', 505 dest='html', default=False, 506 help="generate an HTML fragment") 507 op.add_option('-1', '--first', action='store_true', 508 dest='firstOnly', default=False, 509 help="write only the first section") 510 op.add_option('-b', '--blog-header', action='store_true', 511 dest='blogOrder', default=False, 512 help="Write the header in blog order") 513 op.add_option('-B', '--blog', action='store_true', 514 dest='blogFormat', default=False, 515 help="Set all other options as appropriate for a blog post") 516 op.add_option('--inplace', action='store_true', 517 dest='inplace', default=False, 518 help="Alter the ChangeLog in place") 519 op.add_option('--drupal-break', action='store_true', 520 dest='drupalBreak', default=False, 521 help='Insert a drupal-friendly <!--break--> as needed') 522 523 options,args = op.parse_args() 524 525 if options.blogFormat: 526 options.blogOrder = True 527 options.html = True 528 options.sort = False 529 options.wrapText = False 530 options.firstOnly = True 531 options.drupalBreak = True 532 533 if len(args) > 1: 534 op.error("Too many arguments") 535 elif len(args) == 0: 536 fname = 'ChangeLog' 537 else: 538 fname = args[0] 539 540 if options.inplace: 541 assert options.output == '-' 542 options.output = fname 543 544 if fname != '-': 545 sys.stdin = open(fname, 'r') 546 547 nextline = None 548 549 if options.html: 550 ChangeLogClass = HTMLChangeLog 551 else: 552 ChangeLogClass = ChangeLog 553 554 CL = ChangeLogClass(wrapText=options.wrapText, 555 blogOrder=options.blogOrder, 556 drupalBreak=options.drupalBreak) 557 parser = head_parser 558 559 for line in sys.stdin: 560 line = line.rstrip() 561 tp = parser(line) 562 563 if tp == TP_SECHEAD: 564 parser = body_parser 565 elif tp == TP_END: 566 nextline = line 567 break 568 569 CL.addLine(tp,line) 570 571 CL.lint() 572 573 if options.output != '-': 574 fname_new = options.output+".new" 575 fname_out = options.output 576 sys.stdout = open(fname_new, 'w') 577 else: 578 fname_new = fname_out = None 579 580 if options.sort: 581 CL.collateAndSortSections() 582 583 CL.dump() 584 585 if options.firstOnly: 586 sys.exit(0) 587 588 if nextline is not None: 589 print(nextline) 590 591 for line in sys.stdin: 592 sys.stdout.write(line) 593 594 if fname_new is not None: 595 os.rename(fname_new, fname_out)