benchmark.py (4219B)
1 #!/usr/bin/env python 2 3 import getpass 4 import json 5 import pickle 6 import subprocess 7 import sys 8 import time 9 import zlib 10 11 12 CORPUS_FILE = "corpus.pkl" 13 14 REPEAT = 10 15 16 WB, ML = 12, 5 # defaults used as a reference 17 18 19 def _corpus(): 20 OAUTH_TOKEN = getpass.getpass("OAuth Token? ") 21 COMMIT_API = ( 22 f'curl -H "Authorization: token {OAUTH_TOKEN}" ' 23 f"https://api.github.com/repos/python-websockets/websockets/git/commits/:sha" 24 ) 25 26 commits = [] 27 28 head = subprocess.check_output("git rev-parse HEAD", shell=True).decode().strip() 29 todo = [head] 30 seen = set() 31 32 while todo: 33 sha = todo.pop(0) 34 commit = subprocess.check_output(COMMIT_API.replace(":sha", sha), shell=True) 35 commits.append(commit) 36 seen.add(sha) 37 for parent in json.loads(commit)["parents"]: 38 sha = parent["sha"] 39 if sha not in seen and sha not in todo: 40 todo.append(sha) 41 time.sleep(1) # rate throttling 42 43 return commits 44 45 46 def corpus(): 47 data = _corpus() 48 with open(CORPUS_FILE, "wb") as handle: 49 pickle.dump(data, handle) 50 51 52 def _run(data): 53 size = {} 54 duration = {} 55 56 for wbits in range(9, 16): 57 size[wbits] = {} 58 duration[wbits] = {} 59 60 for memLevel in range(1, 10): 61 encoder = zlib.compressobj(wbits=-wbits, memLevel=memLevel) 62 encoded = [] 63 64 t0 = time.perf_counter() 65 66 for _ in range(REPEAT): 67 for item in data: 68 if isinstance(item, str): 69 item = item.encode("utf-8") 70 # Taken from PerMessageDeflate.encode 71 item = encoder.compress(item) + encoder.flush(zlib.Z_SYNC_FLUSH) 72 if item.endswith(b"\x00\x00\xff\xff"): 73 item = item[:-4] 74 encoded.append(item) 75 76 t1 = time.perf_counter() 77 78 size[wbits][memLevel] = sum(len(item) for item in encoded) 79 duration[wbits][memLevel] = (t1 - t0) / REPEAT 80 81 raw_size = sum(len(item) for item in data) 82 83 print("=" * 79) 84 print("Compression ratio") 85 print("=" * 79) 86 print("\t".join(["wb \\ ml"] + [str(memLevel) for memLevel in range(1, 10)])) 87 for wbits in range(9, 16): 88 print( 89 "\t".join( 90 [str(wbits)] 91 + [ 92 f"{100 * (1 - size[wbits][memLevel] / raw_size):.1f}%" 93 for memLevel in range(1, 10) 94 ] 95 ) 96 ) 97 print("=" * 79) 98 print() 99 100 print("=" * 79) 101 print("CPU time") 102 print("=" * 79) 103 print("\t".join(["wb \\ ml"] + [str(memLevel) for memLevel in range(1, 10)])) 104 for wbits in range(9, 16): 105 print( 106 "\t".join( 107 [str(wbits)] 108 + [ 109 f"{1000 * duration[wbits][memLevel]:.1f}ms" 110 for memLevel in range(1, 10) 111 ] 112 ) 113 ) 114 print("=" * 79) 115 print() 116 117 print("=" * 79) 118 print(f"Size vs. {WB} \\ {ML}") 119 print("=" * 79) 120 print("\t".join(["wb \\ ml"] + [str(memLevel) for memLevel in range(1, 10)])) 121 for wbits in range(9, 16): 122 print( 123 "\t".join( 124 [str(wbits)] 125 + [ 126 f"{100 * (size[wbits][memLevel] / size[WB][ML] - 1):.1f}%" 127 for memLevel in range(1, 10) 128 ] 129 ) 130 ) 131 print("=" * 79) 132 print() 133 134 print("=" * 79) 135 print(f"Time vs. {WB} \\ {ML}") 136 print("=" * 79) 137 print("\t".join(["wb \\ ml"] + [str(memLevel) for memLevel in range(1, 10)])) 138 for wbits in range(9, 16): 139 print( 140 "\t".join( 141 [str(wbits)] 142 + [ 143 f"{100 * (duration[wbits][memLevel] / duration[WB][ML] - 1):.1f}%" 144 for memLevel in range(1, 10) 145 ] 146 ) 147 ) 148 print("=" * 79) 149 print() 150 151 152 def run(): 153 with open(CORPUS_FILE, "rb") as handle: 154 data = pickle.load(handle) 155 _run(data) 156 157 158 try: 159 run = globals()[sys.argv[1]] 160 except (KeyError, IndexError): 161 print(f"Usage: {sys.argv[0]} [corpus|run]") 162 else: 163 run()