tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

update_html5lib_tests.py (6997B)


      1 import glob
      2 import hashlib
      3 import itertools
      4 import json
      5 import os
      6 import re
      7 import shutil
      8 import site
      9 import subprocess
     10 import sys
     11 import tempfile
     12 import urllib
     13 from importlib import reload
     14 
     15 
     16 import genshi
     17 from genshi.template import MarkupTemplate
     18 
     19 
     20 TESTS_PATH = "html/syntax/parsing/"
     21 
     22 def get_paths():
     23    script_path = os.path.dirname(os.path.abspath(__file__))
     24    repo_base = get_repo_base(script_path)
     25    tests_path = os.path.join(repo_base, TESTS_PATH)
     26    return script_path, tests_path
     27 
     28 
     29 def get_repo_base(path):
     30    while path:
     31        if os.path.exists(os.path.join(path, ".git")):
     32            return path
     33        else:
     34            path = os.path.dirname(path)
     35 
     36 
     37 def get_expected(data):
     38    data = "#document\n" + data
     39    return data
     40 
     41 
     42 def get_hash(data, container=None):
     43    if container == None:
     44        container = ""
     45    return hashlib.sha1(b"#container%s#data%s"%(container.encode("utf8"),
     46                                               data.encode("utf8"))).hexdigest()
     47 
     48 
     49 class Html5libInstall:
     50    def __init__(self, rev=None, tests_rev=None):
     51        self.html5lib_dir = None
     52        self.rev = rev
     53        self.tests_rev = tests_rev
     54 
     55    def __enter__(self):
     56        self.html5lib_dir = tempfile.TemporaryDirectory()
     57        html5lib_path = self.html5lib_dir.__enter__()
     58        html5lib_python_path = os.path.join(html5lib_path, "html5lib")
     59        html5lib_tests_path = os.path.join(
     60            html5lib_python_path, "html5lib", "tests", "testdata"
     61        )
     62 
     63        subprocess.check_call(
     64            [
     65                "git",
     66                "clone",
     67                "--no-checkout",
     68                "https://github.com/html5lib/html5lib-python.git",
     69                "html5lib",
     70            ],
     71            cwd=html5lib_path,
     72        )
     73 
     74        rev = self.rev if self.rev is not None else "origin/master"
     75        subprocess.check_call(
     76            ["git", "checkout", rev], cwd=html5lib_python_path
     77        )
     78 
     79        subprocess.check_call(
     80            [
     81                "git",
     82                "submodule",
     83                "update",
     84                "--init",
     85                "--recursive",
     86            ],
     87            cwd=html5lib_python_path,
     88        )
     89 
     90        subprocess.check_call(["pip", "install", "-e", "html5lib"], cwd=html5lib_path)
     91        reload(site)
     92 
     93        tests_rev = self.tests_rev if self.tests_rev is not None else "origin/master"
     94        subprocess.check_call(["git", "checkout", tests_rev], cwd=html5lib_tests_path)
     95 
     96    def __exit__(self, *args, **kwargs):
     97        subprocess.call(["pip", "uninstall", "-y", "html5lib"], cwd=self.html5lib_dir.name)
     98        self.html5lib_dir.__exit__(*args, **kwargs)
     99        self.html5lib_dir = None
    100 
    101 
    102 def make_tests(script_dir, out_dir, input_file_name, test_data):
    103    tests = []
    104    innerHTML_tests = []
    105    ids_seen = {}
    106    print(input_file_name)
    107    for test in test_data:
    108        if "script-off" in test:
    109            continue
    110        is_innerHTML = "document-fragment" in test
    111        data = test["data"]
    112        container = test["document-fragment"] if is_innerHTML else None
    113        assert test["document"], test
    114        expected = get_expected(test["document"])
    115        test_list = innerHTML_tests if is_innerHTML else tests
    116        test_id = get_hash(data, container)
    117        if test_id in ids_seen:
    118            print("WARNING: id %s seen multiple times in file %s this time for test (%s, %s) before for test %s, skipping"%(test_id, input_file_name, container, data, ids_seen[test_id]))
    119            continue
    120        ids_seen[test_id] = (container, data)
    121        test_list.append({'string_uri_encoded_input':"\"%s\""%urllib.parse.quote(data.encode("utf8")),
    122                          'input':data,
    123                          'expected':expected,
    124                          'string_escaped_expected':json.dumps(urllib.parse.quote(expected.encode("utf8"))),
    125                          'id':test_id,
    126                          'container':container
    127                          })
    128    path_normal = None
    129    if tests:
    130        path_normal = write_test_file(script_dir, out_dir,
    131                                      tests, "html5lib_%s"%input_file_name,
    132                                      "html5lib_test.xml")
    133    path_innerHTML = None
    134    if innerHTML_tests:
    135        path_innerHTML = write_test_file(script_dir, out_dir,
    136                                         innerHTML_tests, "html5lib_innerHTML_%s"%input_file_name,
    137                                         "html5lib_test_fragment.xml")
    138 
    139    return path_normal, path_innerHTML
    140 
    141 def write_test_file(script_dir, out_dir, tests, file_name, template_file_name):
    142    file_name = os.path.join(out_dir, file_name + ".html")
    143    short_name = os.path.basename(file_name)
    144 
    145    with open(os.path.join(script_dir, template_file_name), "r") as f:
    146        template = MarkupTemplate(f)
    147 
    148    stream = template.generate(file_name=short_name, tests=tests)
    149 
    150    with open(file_name, "w") as f:
    151        f.write(str(stream.render('html', doctype='html5',
    152                              encoding="utf8"), "utf-8"))
    153    return file_name
    154 
    155 def escape_js_string(in_data):
    156    return in_data.encode("utf8").encode("string-escape")
    157 
    158 def serialize_filenames(test_filenames):
    159    return "[" + ",\n".join("\"%s\""%item for item in test_filenames) + "]"
    160 
    161 def main():
    162    script_dir, out_dir = get_paths()
    163 
    164    test_files = []
    165    inner_html_files = []
    166    with open(os.path.join(script_dir, "html5lib_python_revision"), "r") as f:
    167        html5lib_rev = f.read().strip()
    168 
    169    with open(os.path.join(script_dir, "html5lib_tests_revision"), "r") as f:
    170        html5lib_tests_rev = f.read().strip()
    171 
    172    with Html5libInstall(html5lib_rev, html5lib_tests_rev):
    173        from html5lib.tests import support
    174 
    175        if len(sys.argv) > 2:
    176            test_iterator = zip(
    177                itertools.repeat(False),
    178                sorted(os.path.abspath(item) for item in
    179                       glob.glob(os.path.join(sys.argv[2], "*.dat"))))
    180        else:
    181            test_iterator = itertools.chain(
    182                zip(itertools.repeat(False),
    183                               sorted(support.get_data_files("tree-construction"))),
    184                zip(itertools.repeat(True),
    185                               sorted(support.get_data_files(
    186                            os.path.join("tree-construction", "scripted")))))
    187 
    188        for (scripted, test_file) in test_iterator:
    189            input_file_name = os.path.splitext(os.path.basename(test_file))[0]
    190            if scripted:
    191                input_file_name = "scripted_" + input_file_name
    192            test_data = support.TestData(test_file)
    193            test_filename, inner_html_file_name = make_tests(script_dir, out_dir,
    194                                                             input_file_name, test_data)
    195            if test_filename is not None:
    196                test_files.append(test_filename)
    197            if inner_html_file_name is not None:
    198                inner_html_files.append(inner_html_file_name)
    199 
    200 if __name__ == "__main__":
    201    main()