tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

bench_html.py (1570B)


      1 import io
      2 import os
      3 import sys
      4 
      5 import pyperf
      6 
      7 sys.path[0:0] = [os.path.join(os.path.dirname(__file__), "..")]
      8 import html5lib  # noqa: E402
      9 
     10 
     11 def bench_parse(fh, treebuilder):
     12    fh.seek(0)
     13    html5lib.parse(fh, treebuilder=treebuilder, useChardet=False)
     14 
     15 
     16 def bench_serialize(loops, fh, treebuilder):
     17    fh.seek(0)
     18    doc = html5lib.parse(fh, treebuilder=treebuilder, useChardet=False)
     19 
     20    range_it = range(loops)
     21    t0 = pyperf.perf_counter()
     22 
     23    for loops in range_it:
     24        html5lib.serialize(doc, tree=treebuilder, encoding="ascii", inject_meta_charset=False)
     25 
     26    return pyperf.perf_counter() - t0
     27 
     28 
     29 BENCHMARKS = ["parse", "serialize"]
     30 
     31 
     32 def add_cmdline_args(cmd, args):
     33    if args.benchmark:
     34        cmd.append(args.benchmark)
     35 
     36 
     37 if __name__ == "__main__":
     38    runner = pyperf.Runner(add_cmdline_args=add_cmdline_args)
     39    runner.metadata["description"] = "Run benchmarks based on Anolis"
     40    runner.argparser.add_argument("benchmark", nargs="?", choices=BENCHMARKS)
     41 
     42    args = runner.parse_args()
     43    if args.benchmark:
     44        benchmarks = (args.benchmark,)
     45    else:
     46        benchmarks = BENCHMARKS
     47 
     48    with open(os.path.join(os.path.dirname(__file__), "data", "html.html"), "rb") as fh:
     49        source = io.BytesIO(fh.read())
     50 
     51    if "parse" in benchmarks:
     52        for tb in ("etree", "dom", "lxml"):
     53            runner.bench_func("html_parse_%s" % tb, bench_parse, source, tb)
     54 
     55    if "serialize" in benchmarks:
     56        for tb in ("etree", "dom", "lxml"):
     57            runner.bench_time_func("html_serialize_%s" % tb, bench_serialize, source, tb)