bench_html.py (1570B)
1 import io 2 import os 3 import sys 4 5 import pyperf 6 7 sys.path[0:0] = [os.path.join(os.path.dirname(__file__), "..")] 8 import html5lib # noqa: E402 9 10 11 def bench_parse(fh, treebuilder): 12 fh.seek(0) 13 html5lib.parse(fh, treebuilder=treebuilder, useChardet=False) 14 15 16 def bench_serialize(loops, fh, treebuilder): 17 fh.seek(0) 18 doc = html5lib.parse(fh, treebuilder=treebuilder, useChardet=False) 19 20 range_it = range(loops) 21 t0 = pyperf.perf_counter() 22 23 for loops in range_it: 24 html5lib.serialize(doc, tree=treebuilder, encoding="ascii", inject_meta_charset=False) 25 26 return pyperf.perf_counter() - t0 27 28 29 BENCHMARKS = ["parse", "serialize"] 30 31 32 def add_cmdline_args(cmd, args): 33 if args.benchmark: 34 cmd.append(args.benchmark) 35 36 37 if __name__ == "__main__": 38 runner = pyperf.Runner(add_cmdline_args=add_cmdline_args) 39 runner.metadata["description"] = "Run benchmarks based on Anolis" 40 runner.argparser.add_argument("benchmark", nargs="?", choices=BENCHMARKS) 41 42 args = runner.parse_args() 43 if args.benchmark: 44 benchmarks = (args.benchmark,) 45 else: 46 benchmarks = BENCHMARKS 47 48 with open(os.path.join(os.path.dirname(__file__), "data", "html.html"), "rb") as fh: 49 source = io.BytesIO(fh.read()) 50 51 if "parse" in benchmarks: 52 for tb in ("etree", "dom", "lxml"): 53 runner.bench_func("html_parse_%s" % tb, bench_parse, source, tb) 54 55 if "serialize" in benchmarks: 56 for tb in ("etree", "dom", "lxml"): 57 runner.bench_time_func("html_serialize_%s" % tb, bench_serialize, source, tb)