tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

sourcefile.py (42434B)


      1 import hashlib
      2 import re
      3 import os
      4 from collections import deque
      5 from fnmatch import fnmatch
      6 from io import BytesIO
      7 from typing import (Any, BinaryIO, Callable, Deque, Dict, Iterable, List,
      8                    Optional, Pattern, Set, Text, Tuple, TypedDict, Union)
      9 from urllib.parse import parse_qs, urlparse, urljoin
     10 
     11 try:
     12    from xml.etree import cElementTree as ElementTree
     13 except ImportError:
     14    from xml.etree import ElementTree as ElementTree  # type: ignore
     15 
     16 import html5lib
     17 
     18 from . import XMLParser
     19 from .item import (ConformanceCheckerTest,
     20                   CrashTest,
     21                   ManifestItem,
     22                   ManualTest,
     23                   PrintRefTest,
     24                   RefTest,
     25                   SpecItem,
     26                   SupportFile,
     27                   TestharnessTest,
     28                   VisualTest,
     29                   WebDriverSpecTest)
     30 from .utils import cached_property
     31 
     32 # Cannot do `from ..metadata.webfeatures.schema import WEB_FEATURES_YML_FILENAME`
     33 # because relative import beyond toplevel throws *ImportError*!
     34 from metadata.webfeatures.schema import WEB_FEATURES_YML_FILENAME  # type: ignore
     35 
     36 wd_pattern = "*.py"
     37 js_meta_re = re.compile(br"//\s*META:\s*(\w*)=(.*)$")
     38 python_meta_re = re.compile(br"#\s*META:\s*(\w*)=(.*)$")
     39 
     40 reference_file_re = re.compile(r'(^|[\-_])(not)?ref[0-9]*([\-_]|$)')
     41 
     42 space_chars: Text = "".join(html5lib.constants.spaceCharacters)  # type: ignore[attr-defined]
     43 
     44 
     45 def replace_end(s: Text, old: Text, new: Text) -> Text:
     46    """
     47    Given a string `s` that ends with `old`, replace that occurrence of `old`
     48    with `new`.
     49    """
     50    assert s.endswith(old)
     51    return s[:-len(old)] + new
     52 
     53 
     54 def read_script_metadata(f: BinaryIO, regexp: Pattern[bytes]) -> Iterable[Tuple[Text, Text]]:
     55    """
     56    Yields any metadata (pairs of strings) from the file-like object `f`,
     57    as specified according to a supplied regexp.
     58 
     59    `regexp` - Regexp containing two groups containing the metadata name and
     60               value.
     61    """
     62    for line in f:
     63        assert isinstance(line, bytes), line
     64        m = regexp.match(line)
     65        if not m:
     66            break
     67 
     68        yield (m.groups()[0].decode("utf8"), m.groups()[1].decode("utf8"))
     69 
     70 
     71 class VariantData(TypedDict, total=False):
     72    suffix: str
     73    force_https: bool
     74    longhand: Set[str]
     75 
     76 
     77 _any_variants: Dict[Text, VariantData] = {
     78    "window": {"suffix": ".any.html"},
     79    "window-module": {},
     80    "serviceworker": {"force_https": True},
     81    "serviceworker-module": {"force_https": True},
     82    "sharedworker": {},
     83    "sharedworker-module": {},
     84    "dedicatedworker": {"suffix": ".any.worker.html"},
     85    "dedicatedworker-module": {"suffix": ".any.worker-module.html"},
     86    "worker": {"longhand": {"dedicatedworker", "sharedworker", "serviceworker"}},
     87    "worker-module": {},
     88    "shadowrealm-in-window": {},
     89    "shadowrealm-in-shadowrealm": {},
     90    "shadowrealm-in-dedicatedworker": {},
     91    "shadowrealm-in-sharedworker": {},
     92    "shadowrealm-in-serviceworker": {
     93        "force_https": True,
     94        "suffix": ".https.any.shadowrealm-in-serviceworker.html",
     95    },
     96    "shadowrealm-in-audioworklet": {
     97        "force_https": True,
     98        "suffix": ".https.any.shadowrealm-in-audioworklet.html",
     99    },
    100    "shadowrealm": {"longhand": {
    101        "shadowrealm-in-window",
    102        "shadowrealm-in-shadowrealm",
    103        "shadowrealm-in-dedicatedworker",
    104        "shadowrealm-in-sharedworker",
    105        "shadowrealm-in-serviceworker",
    106        "shadowrealm-in-audioworklet",
    107    }},
    108    "jsshell": {"suffix": ".any.js"},
    109 }
    110 
    111 
    112 def get_any_variants(item: Text) -> Set[Text]:
    113    """
    114    Returns a set of variants (strings) defined by the given keyword.
    115    """
    116    assert isinstance(item, str), item
    117 
    118    variant = _any_variants.get(item, None)
    119    if variant is None:
    120        return set()
    121 
    122    return variant.get("longhand", {item})
    123 
    124 
    125 def get_default_any_variants() -> Set[Text]:
    126    """
    127    Returns a set of variants (strings) that will be used by default.
    128    """
    129    return set({"window", "dedicatedworker"})
    130 
    131 
    132 def parse_variants(value: Text) -> Set[Text]:
    133    """
    134    Returns a set of variants (strings) defined by a comma-separated value.
    135    """
    136    assert isinstance(value, str), value
    137 
    138    if value == "":
    139        return get_default_any_variants()
    140 
    141    globals = set()
    142    for item in value.split(","):
    143        item = item.strip()
    144        globals |= get_any_variants(item)
    145    return globals
    146 
    147 
    148 def global_suffixes(value: Text) -> Set[Tuple[Text, bool]]:
    149    """
    150    Yields tuples of the relevant filename suffix (a string) and whether the
    151    variant is intended to run in a JS shell, for the variants defined by the
    152    given comma-separated value.
    153    """
    154    assert isinstance(value, str), value
    155 
    156    rv = set()
    157 
    158    global_types = parse_variants(value)
    159    for global_type in global_types:
    160        variant = _any_variants[global_type]
    161        suffix = variant.get("suffix", ".any.%s.html" % global_type)
    162        rv.add((suffix, global_type == "jsshell"))
    163 
    164    return rv
    165 
    166 
    167 def global_variant_url(url: Text, suffix: Text) -> Text:
    168    """
    169    Returns a url created from the given url and suffix (all strings).
    170    """
    171    url = url.replace(".any.", ".")
    172    # If the url must be loaded over https, ensure that it will have
    173    # the form .https.any.js
    174    if ".https." in url and suffix.startswith(".https."):
    175        url = url.replace(".https.", ".")
    176    elif ".h2." in url and suffix.startswith(".h2."):
    177        url = url.replace(".h2.", ".")
    178    return replace_end(url, ".js", suffix)
    179 
    180 
    181 def _parse_html(f: BinaryIO) -> ElementTree.Element:
    182    return html5lib.parse(f, treebuilder="etree", useChardet=False)
    183 
    184 def _parse_xml(f: BinaryIO) -> ElementTree.Element:
    185    try:
    186        # raises ValueError with an unsupported encoding,
    187        # ParseError when there's an undefined entity
    188        return ElementTree.parse(f).getroot()
    189    except (ValueError, ElementTree.ParseError):
    190        f.seek(0)
    191        return ElementTree.parse(f, XMLParser.XMLParser()).getroot()  # type: ignore
    192 
    193 
    194 class SourceFile:
    195    parsers: Dict[Text, Callable[[BinaryIO], ElementTree.Element]] = {"html":_parse_html,
    196               "xhtml":_parse_xml,
    197               "svg":_parse_xml}
    198 
    199    root_dir_non_test = {"common"}
    200 
    201    dir_non_test = {"resources",
    202                    "support",
    203                    "tools"}
    204 
    205    dir_path_non_test: Set[Tuple[Text, ...]] = {("css21", "archive"),
    206                                                ("css", "CSS2", "archive"),
    207                                                ("css", "common")}
    208 
    209    def __init__(self, tests_root: Text,
    210                 rel_path: Text,
    211                 url_base: Text,
    212                 hash: Optional[Text] = None,
    213                 contents: Optional[bytes] = None) -> None:
    214        """Object representing a file in a source tree.
    215 
    216        :param tests_root: Path to the root of the source tree
    217        :param rel_path_str: File path relative to tests_root
    218        :param url_base: Base URL used when converting file paths to urls
    219        :param contents: Byte array of the contents of the file or ``None``.
    220        """
    221 
    222        assert not os.path.isabs(rel_path), rel_path
    223        if os.name == "nt":
    224            # do slash normalization on Windows
    225            rel_path = rel_path.replace("/", "\\")
    226 
    227        dir_path, filename = os.path.split(rel_path)
    228        name, ext = os.path.splitext(filename)
    229 
    230        type_flag = None
    231        if "-" in name:
    232            type_meta = name.rsplit("-", 1)[1].split(".")
    233            type_flag = type_meta[0]
    234            meta_flags = type_meta[1:]
    235        else:
    236            meta_flags = name.split(".")[1:]
    237 
    238        self.tests_root: Text = tests_root
    239        self.rel_path: Text = rel_path
    240        self.dir_path: Text = dir_path
    241        self.filename: Text = filename
    242        self.name: Text = name
    243        self.ext: Text = ext
    244        self.type_flag: Optional[Text] = type_flag
    245        self.meta_flags: Union[List[bytes], List[Text]] = meta_flags
    246        self.url_base = url_base
    247        self.contents = contents
    248        self.items_cache: Optional[Tuple[Text, List[ManifestItem]]] = None
    249        self._hash = hash
    250 
    251    def __getstate__(self) -> Dict[str, Any]:
    252        # Remove computed properties if we pickle this class
    253        rv = self.__dict__.copy()
    254 
    255        if "__cached_properties__" in rv:
    256            cached_properties = rv["__cached_properties__"]
    257            rv = {key:value for key, value in rv.items() if key not in cached_properties}
    258            del rv["__cached_properties__"]
    259        return rv
    260 
    261    def name_prefix(self, prefix: Text) -> bool:
    262        """Check if the filename starts with a given prefix
    263 
    264        :param prefix: The prefix to check"""
    265        return self.name.startswith(prefix)
    266 
    267    def is_dir(self) -> bool:
    268        """Return whether this file represents a directory."""
    269        if self.contents is not None:
    270            return False
    271 
    272        return os.path.isdir(self.rel_path)
    273 
    274    def open(self) -> BinaryIO:
    275        """
    276        Return either
    277        * the contents specified in the constructor, if any;
    278        * a File object opened for reading the file contents.
    279        """
    280        if self.contents is not None:
    281            file_obj: BinaryIO = BytesIO(self.contents)
    282        else:
    283            file_obj = open(self.path, 'rb')
    284        return file_obj
    285 
    286    @cached_property
    287    def rel_path_parts(self) -> Tuple[Text, ...]:
    288        return tuple(self.rel_path.split(os.path.sep))
    289 
    290    @cached_property
    291    def path(self) -> Text:
    292        return os.path.join(self.tests_root, self.rel_path)
    293 
    294    @cached_property
    295    def rel_url(self) -> Text:
    296        assert not os.path.isabs(self.rel_path), self.rel_path
    297        return self.rel_path.replace(os.sep, "/")
    298 
    299    @cached_property
    300    def url(self) -> Text:
    301        return urljoin(self.url_base, self.rel_url)
    302 
    303    @cached_property
    304    def hash(self) -> Text:
    305        if not self._hash:
    306            with self.open() as f:
    307                content = f.read()
    308 
    309            data = b"".join((b"blob ", b"%d" % len(content), b"\0", content))
    310            self._hash = str(hashlib.sha1(data).hexdigest())
    311 
    312        return self._hash
    313 
    314    def in_non_test_dir(self) -> bool:
    315        if self.dir_path == "":
    316            return True
    317 
    318        parts = self.rel_path_parts
    319 
    320        if (parts[0] in self.root_dir_non_test or
    321            any(item in self.dir_non_test for item in parts) or
    322            any(parts[:len(path)] == path for path in self.dir_path_non_test)):
    323            return True
    324        return False
    325 
    326    def in_conformance_checker_dir(self) -> bool:
    327        return self.rel_path_parts[0] == "conformance-checkers"
    328 
    329    @property
    330    def name_is_non_test(self) -> bool:
    331        """Check if the file name matches the conditions for the file to
    332        be a non-test file"""
    333        return (self.is_dir() or
    334                self.name_prefix("MANIFEST") or
    335                self.filename == "META.yml" or
    336                self.filename == WEB_FEATURES_YML_FILENAME or
    337                self.filename.startswith(".") or
    338                self.filename.endswith(".headers") or
    339                self.filename.endswith(".ini") or
    340                self.in_non_test_dir())
    341 
    342    @property
    343    def name_is_conformance(self) -> bool:
    344        return (self.in_conformance_checker_dir() and
    345                self.type_flag in ("is-valid", "no-valid"))
    346 
    347    @property
    348    def name_is_conformance_support(self) -> bool:
    349        return self.in_conformance_checker_dir()
    350 
    351    @property
    352    def name_is_manual(self) -> bool:
    353        """Check if the file name matches the conditions for the file to
    354        be a manual test file"""
    355        return self.type_flag == "manual"
    356 
    357    @property
    358    def name_is_visual(self) -> bool:
    359        """Check if the file name matches the conditions for the file to
    360        be a visual test file"""
    361        return self.type_flag == "visual"
    362 
    363    @property
    364    def name_is_multi_global(self) -> bool:
    365        """Check if the file name matches the conditions for the file to
    366        be a multi-global js test file"""
    367        return "any" in self.meta_flags and self.ext == ".js"
    368 
    369    @property
    370    def name_is_worker(self) -> bool:
    371        """Check if the file name matches the conditions for the file to
    372        be a worker js test file"""
    373        return "worker" in self.meta_flags and self.ext == ".js"
    374 
    375    @property
    376    def name_is_window(self) -> bool:
    377        """Check if the file name matches the conditions for the file to
    378        be a window js test file"""
    379        return "window" in self.meta_flags and self.ext == ".js"
    380 
    381    @property
    382    def name_is_extension(self) -> bool:
    383        """Check if the file name matches the conditions for the file to
    384        be a extension js test file"""
    385        return "extension" in self.meta_flags and self.ext == ".js"
    386 
    387    @property
    388    def name_is_webdriver(self) -> bool:
    389        """Check if the file name matches the conditions for the file to
    390        be a webdriver spec test file"""
    391        # wdspec tests are in subdirectories of /webdriver excluding __init__.py
    392        # files.
    393        rel_path_parts = self.rel_path_parts
    394        return (((rel_path_parts[0] == "webdriver" and len(rel_path_parts) > 1) or
    395                 (rel_path_parts[:2] == ("infrastructure", "webdriver") and
    396                  len(rel_path_parts) > 2)) and
    397                self.filename not in ("__init__.py", "conftest.py") and
    398                fnmatch(self.filename, wd_pattern))
    399 
    400    @property
    401    def name_is_reference(self) -> bool:
    402        """Check if the file name matches the conditions for the file to
    403        be a reference file (not a reftest)"""
    404        return "/reference/" in self.url or bool(reference_file_re.search(self.name))
    405 
    406    @property
    407    def name_is_crashtest(self) -> bool:
    408        return (self.markup_type is not None and
    409                (self.type_flag == "crash" or "crashtests" in self.dir_path.split(os.path.sep)))
    410 
    411    @property
    412    def name_is_tentative(self) -> bool:
    413        """Check if the file name matches the conditions for the file to be a
    414        tentative file.
    415 
    416        See https://web-platform-tests.org/writing-tests/file-names.html#test-features"""
    417        return "tentative" in self.meta_flags or "tentative" in self.dir_path.split(os.path.sep)
    418 
    419    @property
    420    def name_is_print_reftest(self) -> bool:
    421        return (self.markup_type is not None and
    422                (self.type_flag == "print" or "print" in self.dir_path.split(os.path.sep)))
    423 
    424    @property
    425    def markup_type(self) -> Optional[Text]:
    426        """Return the type of markup contained in a file, based on its extension,
    427        or None if it doesn't contain markup"""
    428        ext = self.ext
    429 
    430        if not ext:
    431            return None
    432        if ext[0] == ".":
    433            ext = ext[1:]
    434        if ext in ["html", "htm"]:
    435            return "html"
    436        if ext in ["xhtml", "xht", "xml"]:
    437            return "xhtml"
    438        if ext == "svg":
    439            return "svg"
    440        return None
    441 
    442    @cached_property
    443    def root(self) -> Optional[ElementTree.Element]:
    444        """Return an ElementTree Element for the root node of the file if it contains
    445        markup, or None if it does not"""
    446        if not self.markup_type:
    447            return None
    448 
    449        parser = self.parsers[self.markup_type]
    450 
    451        with self.open() as f:
    452            try:
    453                tree = parser(f)
    454            except Exception:
    455                return None
    456 
    457        return tree
    458 
    459    @cached_property
    460    def timeout_nodes(self) -> List[ElementTree.Element]:
    461        """List of ElementTree Elements corresponding to nodes in a test that
    462        specify timeouts"""
    463        assert self.root is not None
    464        return self.root.findall(".//{http://www.w3.org/1999/xhtml}meta[@name='timeout']")
    465 
    466    @cached_property
    467    def pac_nodes(self) -> List[ElementTree.Element]:
    468        """List of ElementTree Elements corresponding to nodes in a test that
    469        specify PAC (proxy auto-config)"""
    470        assert self.root is not None
    471        return self.root.findall(".//{http://www.w3.org/1999/xhtml}meta[@name='pac']")
    472 
    473    @cached_property
    474    def script_metadata(self) -> Optional[List[Tuple[Text, Text]]]:
    475        if self.name_is_worker or self.name_is_multi_global or self.name_is_window or self.name_is_extension:
    476            regexp = js_meta_re
    477        elif self.name_is_webdriver:
    478            regexp = python_meta_re
    479        else:
    480            return None
    481 
    482        with self.open() as f:
    483            return list(read_script_metadata(f, regexp))
    484 
    485    @cached_property
    486    def timeout(self) -> Optional[Text]:
    487        """The timeout of a test or reference file. "long" if the file has an extended timeout
    488        or None otherwise"""
    489        if self.script_metadata:
    490            if any(m == ("timeout", "long") for m in self.script_metadata):
    491                return "long"
    492 
    493        if self.root is None:
    494            return None
    495 
    496        if self.timeout_nodes:
    497            timeout_str: Optional[Text] = self.timeout_nodes[0].attrib.get("content", None)
    498            if timeout_str and timeout_str.lower() == "long":
    499                return "long"
    500 
    501        return None
    502 
    503    @cached_property
    504    def pac(self) -> Optional[Text]:
    505        """The PAC (proxy config) of a test or reference file. A URL or null"""
    506        if self.script_metadata:
    507            for (meta, content) in self.script_metadata:
    508                if meta == 'pac':
    509                    return content
    510 
    511        if self.root is None:
    512            return None
    513 
    514        if self.pac_nodes:
    515            return self.pac_nodes[0].attrib.get("content", None)
    516 
    517        return None
    518 
    519    @cached_property
    520    def viewport_nodes(self) -> List[ElementTree.Element]:
    521        """List of ElementTree Elements corresponding to nodes in a test that
    522        specify viewport sizes"""
    523        assert self.root is not None
    524        return self.root.findall(".//{http://www.w3.org/1999/xhtml}meta[@name='viewport-size']")
    525 
    526    @cached_property
    527    def viewport_size(self) -> Optional[Text]:
    528        """The viewport size of a test or reference file"""
    529        if self.root is None:
    530            return None
    531 
    532        if not self.viewport_nodes:
    533            return None
    534 
    535        return self.viewport_nodes[0].attrib.get("content", None)
    536 
    537    @cached_property
    538    def dpi_nodes(self) -> List[ElementTree.Element]:
    539        """List of ElementTree Elements corresponding to nodes in a test that
    540        specify device pixel ratios"""
    541        assert self.root is not None
    542        return self.root.findall(".//{http://www.w3.org/1999/xhtml}meta[@name='device-pixel-ratio']")
    543 
    544    @cached_property
    545    def dpi(self) -> Optional[Text]:
    546        """The device pixel ratio of a test or reference file"""
    547        if self.root is None:
    548            return None
    549 
    550        if not self.dpi_nodes:
    551            return None
    552 
    553        return self.dpi_nodes[0].attrib.get("content", None)
    554 
    555    def parse_ref_keyed_meta(self, node: ElementTree.Element) -> Tuple[Optional[Tuple[Text, Text, Text]], Text]:
    556        item: Text = node.attrib.get("content", "")
    557 
    558        parts = item.rsplit(":", 1)
    559        if len(parts) == 1:
    560            key: Optional[Tuple[Text, Text, Text]] = None
    561            value = parts[0]
    562        else:
    563            key_part = urljoin(self.url, parts[0])
    564            reftype = None
    565            for ref in self.references:  # type: Tuple[Text, Text]
    566                if ref[0] == key_part:
    567                    reftype = ref[1]
    568                    break
    569            if reftype not in ("==", "!="):
    570                raise ValueError("Key %s doesn't correspond to a reference" % key_part)
    571            key = (self.url, key_part, reftype)
    572            value = parts[1]
    573 
    574        return key, value
    575 
    576 
    577    @cached_property
    578    def fuzzy_nodes(self) -> List[ElementTree.Element]:
    579        """List of ElementTree Elements corresponding to nodes in a test that
    580        specify reftest fuzziness"""
    581        assert self.root is not None
    582        return self.root.findall(".//{http://www.w3.org/1999/xhtml}meta[@name='fuzzy']")
    583 
    584 
    585    @cached_property
    586    def fuzzy(self) -> Dict[Optional[Tuple[Text, Text, Text]], List[List[int]]]:
    587        rv: Dict[Optional[Tuple[Text, Text, Text]], List[List[int]]] = {}
    588        if self.root is None:
    589            return rv
    590 
    591        if not self.fuzzy_nodes:
    592            return rv
    593 
    594        args = ["maxDifference", "totalPixels"]
    595 
    596        for node in self.fuzzy_nodes:
    597            key, value = self.parse_ref_keyed_meta(node)
    598            ranges = value.split(";")
    599            if len(ranges) != 2:
    600                raise ValueError("Malformed fuzzy value %s" % value)
    601            arg_values: Dict[Text, List[int]] = {}
    602            positional_args: Deque[List[int]] = deque()
    603            for range_str_value in ranges:  # type: Text
    604                name: Optional[Text] = None
    605                if "=" in range_str_value:
    606                    name, range_str_value = (part.strip()
    607                                             for part in range_str_value.split("=", 1))
    608                    if name not in args:
    609                        raise ValueError("%s is not a valid fuzzy property" % name)
    610                    if arg_values.get(name):
    611                        raise ValueError("Got multiple values for argument %s" % name)
    612                if "-" in range_str_value:
    613                    range_min, range_max = range_str_value.split("-")
    614                else:
    615                    range_min = range_str_value
    616                    range_max = range_str_value
    617                try:
    618                    range_value = [int(x.strip()) for x in (range_min, range_max)]
    619                except ValueError:
    620                    raise ValueError("Fuzzy value %s must be a range of integers" %
    621                                     range_str_value)
    622                if name is None:
    623                    positional_args.append(range_value)
    624                else:
    625                    arg_values[name] = range_value
    626            rv[key] = []
    627            for arg_name in args:
    628                if arg_values.get(arg_name):
    629                    arg_value = arg_values.pop(arg_name)
    630                else:
    631                    arg_value = positional_args.popleft()
    632                rv[key].append(arg_value)
    633            assert len(arg_values) == 0 and len(positional_args) == 0
    634        return rv
    635 
    636    @cached_property
    637    def page_ranges_nodes(self) -> List[ElementTree.Element]:
    638        """List of ElementTree Elements corresponding to nodes in a test that
    639        specify print-reftest """
    640        assert self.root is not None
    641        return self.root.findall(".//{http://www.w3.org/1999/xhtml}meta[@name='reftest-pages']")
    642 
    643    @cached_property
    644    def page_ranges(self) -> Dict[Text, List[List[Optional[int]]]]:
    645        """List of ElementTree Elements corresponding to nodes in a test that
    646        specify print-reftest page ranges"""
    647        rv: Dict[Text, List[List[Optional[int]]]] = {}
    648        for node in self.page_ranges_nodes:
    649            key_data, value = self.parse_ref_keyed_meta(node)
    650            # Just key by url
    651            if key_data is None:
    652                key = self.url
    653            else:
    654                key = key_data[1]
    655            if key in rv:
    656                raise ValueError("Duplicate page-ranges value")
    657            rv[key] = []
    658            for range_str in value.split(","):
    659                range_str = range_str.strip()
    660                if "-" in range_str:
    661                    range_parts_str = [item.strip() for item in range_str.split("-")]
    662                    try:
    663                        range_parts = [int(item) if item else None for item in range_parts_str]
    664                    except ValueError:
    665                        raise ValueError("Malformed page-range value %s" % range_str)
    666                    if any(item == 0 for item in range_parts):
    667                        raise ValueError("Malformed page-range value %s" % range_str)
    668                else:
    669                    try:
    670                        range_parts = [int(range_str)]
    671                    except ValueError:
    672                        raise ValueError("Malformed page-range value %s" % range_str)
    673                rv[key].append(range_parts)
    674        return rv
    675 
    676    @cached_property
    677    def testharness_nodes(self) -> List[ElementTree.Element]:
    678        """List of ElementTree Elements corresponding to nodes representing a
    679        testharness.js script"""
    680        assert self.root is not None
    681        return self.root.findall(".//{http://www.w3.org/1999/xhtml}script[@src='/resources/testharness.js']")
    682 
    683    @cached_property
    684    def content_is_testharness(self) -> Optional[bool]:
    685        """Boolean indicating whether the file content represents a
    686        testharness.js test"""
    687        if self.root is None:
    688            return None
    689        return bool(self.testharness_nodes)
    690 
    691    @cached_property
    692    def variant_nodes(self) -> List[ElementTree.Element]:
    693        """List of ElementTree Elements corresponding to nodes representing a
    694        test variant"""
    695        assert self.root is not None
    696        return self.root.findall(".//{http://www.w3.org/1999/xhtml}meta[@name='variant']")
    697 
    698    @cached_property
    699    def test_variants(self) -> List[Text]:
    700        rv: List[Text] = []
    701        if self.ext == ".js":
    702            script_metadata = self.script_metadata
    703            assert script_metadata is not None
    704            for (key, value) in script_metadata:
    705                if key == "variant":
    706                    rv.append(value)
    707        else:
    708            for element in self.variant_nodes:
    709                if "content" in element.attrib:
    710                    variant: Text = element.attrib["content"]
    711                    rv.append(variant)
    712 
    713        for variant in rv:
    714            if variant != "":
    715                if variant[0] not in ("#", "?"):
    716                    raise ValueError("Non-empty variant must start with either a ? or a #")
    717                if len(variant) == 1 or (variant[0] == "?" and variant[1] == "#"):
    718                    raise ValueError("Variants must not have empty fragment or query " +
    719                                     "(omit the empty part instead)")
    720 
    721        if not rv:
    722            rv = [""]
    723 
    724        return rv
    725 
    726    @cached_property
    727    def testdriver_nodes(self) -> List[ElementTree.Element]:
    728        """List of ElementTree Elements corresponding to nodes representing a
    729        testdriver.js script"""
    730        assert self.root is not None
    731        # `xml.etree.ElementTree.findall` has a limited support of xPath, so
    732        # explicit filter is required.
    733        return [node for node in
    734                self.root.findall(".//{http://www.w3.org/1999/xhtml}script")
    735                if node.attrib.get('src',
    736                                   "") == '/resources/testdriver.js' or
    737                node.attrib.get('src', "").startswith(
    738                    '/resources/testdriver.js?')]
    739 
    740    @cached_property
    741    def has_testdriver(self) -> Optional[bool]:
    742        """Boolean indicating whether the file content represents a
    743        testharness.js test"""
    744        if self.root is None:
    745            return None
    746        return bool(self.testdriver_nodes)
    747 
    748    def ___get_testdriver_include_path(self) -> Optional[str]:
    749        if self.script_metadata:
    750            for (meta, content) in self.script_metadata:
    751                if meta.strip() == 'script' and (
    752                        content == '/resources/testdriver.js' or content.startswith(
    753                        '/resources/testdriver.js?')):
    754                    return content.strip()
    755 
    756        if self.root is None:
    757            return None
    758 
    759        for node in self.testdriver_nodes:
    760            if "src" in node.attrib:
    761                return node.attrib.get("src")
    762 
    763        return None
    764 
    765    @cached_property
    766    def testdriver_features(self) -> Optional[List[Text]]:
    767        """
    768        List of requested testdriver features.
    769        """
    770 
    771        testdriver_include_url = self.___get_testdriver_include_path()
    772 
    773        if testdriver_include_url is None:
    774            return None
    775 
    776        # Parse the URL
    777        parsed_url = urlparse(testdriver_include_url)
    778        # Extract query parameters
    779        query_params = parse_qs(parsed_url.query)
    780        # Get the values for the 'feature' parameter
    781        feature_values = query_params.get('feature', [])
    782 
    783        if len(feature_values) > 0:
    784            return feature_values
    785 
    786        return None
    787 
    788    @cached_property
    789    def reftest_nodes(self) -> List[ElementTree.Element]:
    790        """List of ElementTree Elements corresponding to nodes representing a
    791        to a reftest <link>"""
    792        if self.root is None:
    793            return []
    794 
    795        match_links = self.root.findall(".//{http://www.w3.org/1999/xhtml}link[@rel='match']")
    796        mismatch_links = self.root.findall(".//{http://www.w3.org/1999/xhtml}link[@rel='mismatch']")
    797        return match_links + mismatch_links
    798 
    799    @cached_property
    800    def references(self) -> List[Tuple[Text, Text]]:
    801        """List of (ref_url, relation) tuples for any reftest references specified in
    802        the file"""
    803        rv: List[Tuple[Text, Text]] = []
    804        rel_map = {"match": "==", "mismatch": "!="}
    805        for item in self.reftest_nodes:
    806            if "href" in item.attrib:
    807                ref_url = urljoin(self.url, item.attrib["href"].strip(space_chars))
    808                ref_type = rel_map[item.attrib["rel"]]
    809                rv.append((ref_url, ref_type))
    810        return rv
    811 
    812    @cached_property
    813    def content_is_ref_node(self) -> bool:
    814        """Boolean indicating whether the file is a non-leaf node in a reftest
    815        graph (i.e. if it contains any <link rel=[mis]match>"""
    816        return bool(self.references)
    817 
    818    @cached_property
    819    def css_flag_nodes(self) -> List[ElementTree.Element]:
    820        """List of ElementTree Elements corresponding to nodes representing a
    821        flag <meta>"""
    822        if self.root is None:
    823            return []
    824        return self.root.findall(".//{http://www.w3.org/1999/xhtml}meta[@name='flags']")
    825 
    826    @cached_property
    827    def css_flags(self) -> Set[Text]:
    828        """Set of flags specified in the file"""
    829        rv: Set[Text] = set()
    830        for item in self.css_flag_nodes:
    831            if "content" in item.attrib:
    832                for flag in item.attrib["content"].split():
    833                    rv.add(flag)
    834        return rv
    835 
    836    @cached_property
    837    def content_is_css_manual(self) -> Optional[bool]:
    838        """Boolean indicating whether the file content represents a
    839        CSS WG-style manual test"""
    840        if self.root is None:
    841            return None
    842        # return True if the intersection between the two sets is non-empty
    843        return bool(self.css_flags & {"animated", "font", "history", "interact", "paged", "speech", "userstyle"})
    844 
    845    @cached_property
    846    def spec_link_nodes(self) -> List[ElementTree.Element]:
    847        """List of ElementTree Elements corresponding to nodes representing a
    848        <link rel=help>, used to point to specs"""
    849        if self.root is None:
    850            return []
    851        return self.root.findall(".//{http://www.w3.org/1999/xhtml}link[@rel='help']")
    852 
    853    @cached_property
    854    def spec_links(self) -> Set[Text]:
    855        """Set of spec links specified in the file"""
    856        rv: Set[Text] = set()
    857        for item in self.spec_link_nodes:
    858            if "href" in item.attrib:
    859                rv.add(item.attrib["href"].strip(space_chars))
    860        return rv
    861 
    862    @cached_property
    863    def content_is_css_visual(self) -> Optional[bool]:
    864        """Boolean indicating whether the file content represents a
    865        CSS WG-style visual test"""
    866        if self.root is None:
    867            return None
    868        return bool(self.ext in {'.xht', '.html', '.xhtml', '.htm', '.xml', '.svg'} and
    869                    self.spec_links)
    870 
    871    @property
    872    def type(self) -> Text:
    873        possible_types = self.possible_types
    874        if len(possible_types) == 1:
    875            return possible_types.pop()
    876 
    877        rv, _ = self.manifest_items()
    878        return rv
    879 
    880    @property
    881    def possible_types(self) -> Set[Text]:
    882        """Determines the set of possible types without reading the file"""
    883 
    884        if self.items_cache:
    885            return {self.items_cache[0]}
    886 
    887        if self.name_is_non_test:
    888            return {SupportFile.item_type}
    889 
    890        if self.name_is_manual:
    891            return {ManualTest.item_type}
    892 
    893        if self.name_is_conformance:
    894            return {ConformanceCheckerTest.item_type}
    895 
    896        if self.name_is_conformance_support:
    897            return {SupportFile.item_type}
    898 
    899        if self.name_is_webdriver:
    900            return {WebDriverSpecTest.item_type}
    901 
    902        if self.name_is_visual:
    903            return {VisualTest.item_type}
    904 
    905        if self.name_is_crashtest:
    906            return {CrashTest.item_type}
    907 
    908        if self.name_is_print_reftest:
    909            return {PrintRefTest.item_type}
    910 
    911        if self.name_is_multi_global:
    912            return {TestharnessTest.item_type}
    913 
    914        if self.name_is_worker:
    915            return {TestharnessTest.item_type}
    916 
    917        if self.name_is_window:
    918            return {TestharnessTest.item_type}
    919 
    920        if self.name_is_extension:
    921            return {TestharnessTest.item_type}
    922 
    923        if self.markup_type is None:
    924            return {SupportFile.item_type}
    925 
    926        if not self.name_is_reference:
    927            return {ManualTest.item_type,
    928                    TestharnessTest.item_type,
    929                    RefTest.item_type,
    930                    VisualTest.item_type,
    931                    SupportFile.item_type}
    932 
    933        return {TestharnessTest.item_type,
    934                RefTest.item_type,
    935                SupportFile.item_type}
    936 
    937    def manifest_items(self) -> Tuple[Text, List[ManifestItem]]:
    938        """List of manifest items corresponding to the file. There is typically one
    939        per test, but in the case of reftests a node may have corresponding manifest
    940        items without being a test itself."""
    941 
    942        if self.items_cache:
    943            return self.items_cache
    944 
    945        drop_cached = "root" not in self.__dict__
    946 
    947        if self.name_is_non_test:
    948            rv: Tuple[Text, List[ManifestItem]] = ("support", [
    949                SupportFile(
    950                    self.tests_root,
    951                    self.rel_path
    952                )])
    953 
    954        elif self.name_is_manual:
    955            rv = ManualTest.item_type, [
    956                ManualTest(
    957                    self.tests_root,
    958                    self.rel_path,
    959                    self.url_base,
    960                    self.rel_url
    961                )]
    962 
    963        elif self.name_is_conformance:
    964            rv = ConformanceCheckerTest.item_type, [
    965                ConformanceCheckerTest(
    966                    self.tests_root,
    967                    self.rel_path,
    968                    self.url_base,
    969                    self.rel_url
    970                )]
    971 
    972        elif self.name_is_conformance_support:
    973            rv = "support", [
    974                SupportFile(
    975                    self.tests_root,
    976                    self.rel_path
    977                )]
    978 
    979        elif self.name_is_webdriver:
    980            rv = WebDriverSpecTest.item_type, [
    981                WebDriverSpecTest(
    982                    self.tests_root,
    983                    self.rel_path,
    984                    self.url_base,
    985                    self.rel_url,
    986                    timeout=self.timeout
    987                )]
    988 
    989        elif self.name_is_visual:
    990            rv = VisualTest.item_type, [
    991                VisualTest(
    992                    self.tests_root,
    993                    self.rel_path,
    994                    self.url_base,
    995                    self.rel_url
    996                )]
    997 
    998        elif self.name_is_crashtest:
    999            rv = CrashTest.item_type, [
   1000                CrashTest(
   1001                    self.tests_root,
   1002                    self.rel_path,
   1003                    self.url_base,
   1004                    self.rel_url,
   1005                    testdriver=self.has_testdriver,
   1006                )]
   1007 
   1008        elif self.name_is_print_reftest:
   1009            references = self.references
   1010            if not references:
   1011                raise ValueError("%s detected as print reftest but doesn't have any refs" %
   1012                                 self.path)
   1013            rv = PrintRefTest.item_type, [
   1014                PrintRefTest(
   1015                    self.tests_root,
   1016                    self.rel_path,
   1017                    self.url_base,
   1018                    self.rel_url,
   1019                    references=references,
   1020                    timeout=self.timeout,
   1021                    viewport_size=self.viewport_size,
   1022                    fuzzy=self.fuzzy,
   1023                    page_ranges=self.page_ranges,
   1024                    testdriver=self.has_testdriver,
   1025                )]
   1026 
   1027        elif self.name_is_multi_global:
   1028            globals = ""
   1029            script_metadata = self.script_metadata
   1030            assert script_metadata is not None
   1031            for (key, value) in script_metadata:
   1032                if key == "global":
   1033                    globals = value
   1034                    break
   1035 
   1036            tests: List[ManifestItem] = [
   1037                TestharnessTest(
   1038                    self.tests_root,
   1039                    self.rel_path,
   1040                    self.url_base,
   1041                    global_variant_url(self.rel_url, suffix) + variant,
   1042                    timeout=self.timeout,
   1043                    pac=self.pac,
   1044                    testdriver_features=self.testdriver_features,
   1045                    jsshell=jsshell,
   1046                    script_metadata=self.script_metadata
   1047                )
   1048                for (suffix, jsshell) in sorted(global_suffixes(globals))
   1049                for variant in self.test_variants
   1050            ]
   1051            rv = TestharnessTest.item_type, tests
   1052 
   1053        elif self.name_is_worker:
   1054            test_url = replace_end(self.rel_url, ".worker.js", ".worker.html")
   1055            tests = [
   1056                TestharnessTest(
   1057                    self.tests_root,
   1058                    self.rel_path,
   1059                    self.url_base,
   1060                    test_url + variant,
   1061                    timeout=self.timeout,
   1062                    pac=self.pac,
   1063                    testdriver_features=self.testdriver_features,
   1064                    script_metadata=self.script_metadata
   1065                )
   1066                for variant in self.test_variants
   1067            ]
   1068            rv = TestharnessTest.item_type, tests
   1069 
   1070        elif self.name_is_window:
   1071            test_url = replace_end(self.rel_url, ".window.js", ".window.html")
   1072            tests = [
   1073                TestharnessTest(
   1074                    self.tests_root,
   1075                    self.rel_path,
   1076                    self.url_base,
   1077                    test_url + variant,
   1078                    timeout=self.timeout,
   1079                    pac=self.pac,
   1080                    testdriver_features=self.testdriver_features,
   1081                    script_metadata=self.script_metadata
   1082                )
   1083                for variant in self.test_variants
   1084            ]
   1085            rv = TestharnessTest.item_type, tests
   1086 
   1087        elif self.name_is_extension:
   1088            test_url = replace_end(self.rel_url, ".extension.js", ".extension.html")
   1089            tests = [
   1090                TestharnessTest(
   1091                    self.tests_root,
   1092                    self.rel_path,
   1093                    self.url_base,
   1094                    test_url + variant,
   1095                    timeout=self.timeout,
   1096                    pac=self.pac,
   1097                    script_metadata=self.script_metadata
   1098                )
   1099                for variant in self.test_variants
   1100            ]
   1101            rv = TestharnessTest.item_type, tests
   1102 
   1103        elif self.content_is_css_manual and not self.name_is_reference:
   1104            rv = ManualTest.item_type, [
   1105                ManualTest(
   1106                    self.tests_root,
   1107                    self.rel_path,
   1108                    self.url_base,
   1109                    self.rel_url
   1110                )]
   1111 
   1112        elif self.content_is_testharness:
   1113            rv = TestharnessTest.item_type, []
   1114            testdriver = self.has_testdriver
   1115            for variant in self.test_variants:
   1116                url = self.rel_url + variant
   1117                rv[1].append(TestharnessTest(
   1118                    self.tests_root,
   1119                    self.rel_path,
   1120                    self.url_base,
   1121                    url,
   1122                    timeout=self.timeout,
   1123                    pac=self.pac,
   1124                    testdriver_features=self.testdriver_features,
   1125                    testdriver=testdriver,
   1126                    script_metadata=self.script_metadata
   1127                ))
   1128 
   1129        elif self.content_is_ref_node:
   1130            rv = RefTest.item_type, []
   1131            for variant in self.test_variants:
   1132                url = self.rel_url + variant
   1133                rv[1].append(RefTest(
   1134                    self.tests_root,
   1135                    self.rel_path,
   1136                    self.url_base,
   1137                    url,
   1138                    references=[
   1139                        (ref[0] + variant, ref[1])
   1140                        for ref in self.references
   1141                    ],
   1142                    timeout=self.timeout,
   1143                    viewport_size=self.viewport_size,
   1144                    dpi=self.dpi,
   1145                    fuzzy=self.fuzzy,
   1146                    testdriver=self.has_testdriver,
   1147                ))
   1148 
   1149        elif self.content_is_css_visual and not self.name_is_reference:
   1150            rv = VisualTest.item_type, [
   1151                VisualTest(
   1152                    self.tests_root,
   1153                    self.rel_path,
   1154                    self.url_base,
   1155                    self.rel_url
   1156                )]
   1157 
   1158        else:
   1159            rv = "support", [
   1160                SupportFile(
   1161                    self.tests_root,
   1162                    self.rel_path
   1163                )]
   1164 
   1165        assert rv[0] in self.possible_types
   1166        assert len(rv[1]) == len(set(rv[1]))
   1167 
   1168        self.items_cache = rv
   1169 
   1170        if drop_cached and "__cached_properties__" in self.__dict__:
   1171            cached_properties = self.__dict__["__cached_properties__"]
   1172            for prop in cached_properties:
   1173                if prop in self.__dict__:
   1174                    del self.__dict__[prop]
   1175            del self.__dict__["__cached_properties__"]
   1176 
   1177        return rv
   1178 
   1179    def manifest_spec_items(self) -> Optional[Tuple[Text, List[ManifestItem]]]:
   1180        specs = list(self.spec_links)
   1181        if not specs:
   1182            return None
   1183        rv: Tuple[Text, List[ManifestItem]] = (SpecItem.item_type, [
   1184            SpecItem(
   1185                self.tests_root,
   1186                self.rel_path,
   1187                specs
   1188            )])
   1189        return rv