tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

make-schemas.py (17249B)


      1 #!/usr/bin/env python3
      2 # This Source Code Form is subject to the terms of the Mozilla Public
      3 # License, v. 2.0. If a copy of the MPL was not distributed with this
      4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      5 
      6 """Firefox Messaging System Messaging Experiment schema generator
      7 
      8 The Firefox Messaging System handles several types of messages. This program
      9 patches and combines those schemas into a single schema
     10 (MessagingExperiment.schema.json) which is used to validate messaging
     11 experiments coming from Nimbus.
     12 
     13 Definitions from FxMsCommon.schema.json are bundled into this schema. This
     14 allows all of the FxMS schemas to reference common definitions, e.g.
     15 `localizableText` for translatable strings, via referencing the common schema.
     16 The bundled schema will be re-written so that the references now point at the
     17 top-level, generated schema.
     18 
     19 Additionally, all self-references in each messaging schema will be rewritten
     20 into absolute references, referencing each sub-schemas `$id`. This is requried
     21 due to the JSONSchema validation library used by Experimenter not fully
     22 supporting self-references and bundled schema.
     23 """
     24 
     25 import sys
     26 from argparse import ArgumentParser
     27 from itertools import chain
     28 from pathlib import Path
     29 from typing import Any, NamedTuple, Union
     30 from urllib.parse import urlparse
     31 
     32 import jsonschema
     33 from mozfile import json
     34 
     35 
     36 class SchemaDefinition(NamedTuple):
     37    """A definition of a schema that is to be bundled."""
     38 
     39    #: The $id of the generated schema.
     40    schema_id: str
     41 
     42    #: The path of the generated schema.
     43    schema_path: Path
     44 
     45    #: The message types that will be bundled into the schema.
     46    message_types: dict[str, Path]
     47 
     48    #: What common definitions to bundle into the schema.
     49    #:
     50    #: If `True`, all definitions will be bundled.
     51    #: If `False`, no definitons will be bundled.
     52    #: If a list, only the named definitions will be bundled.
     53    bundle_common: Union[bool, list[str]]
     54 
     55    #: The testing corpus for the schema.
     56    test_corpus: dict[str, Path]
     57 
     58 
     59 SCHEMA_DIR = Path("..", "templates")
     60 
     61 SCHEMAS = [
     62    SchemaDefinition(
     63        schema_id="chrome://browser/content/asrouter/schemas/MessagingExperiment.schema.json",
     64        schema_path=Path("MessagingExperiment.schema.json"),
     65        message_types={
     66            "BookmarksBarButton": (
     67                SCHEMA_DIR / "OnboardingMessage" / "BookmarksBarButton.schema.json"
     68            ),
     69            "CFRUrlbarChiclet": (
     70                SCHEMA_DIR / "CFR" / "templates" / "CFRUrlbarChiclet.schema.json"
     71            ),
     72            "ExtensionDoorhanger": (
     73                SCHEMA_DIR / "CFR" / "templates" / "ExtensionDoorhanger.schema.json"
     74            ),
     75            "InfoBar": SCHEMA_DIR / "CFR" / "templates" / "InfoBar.schema.json",
     76            "MenuMessage": (
     77                SCHEMA_DIR / "OnboardingMessage" / "MenuMessage.schema.json"
     78            ),
     79            "NewtabPromoMessage": (
     80                SCHEMA_DIR / "PBNewtab" / "NewtabPromoMessage.schema.json"
     81            ),
     82            "NewtabMessage": (
     83                SCHEMA_DIR / "OnboardingMessage" / "NewtabMessage.schema.json"
     84            ),
     85            "Spotlight": SCHEMA_DIR / "OnboardingMessage" / "Spotlight.schema.json",
     86            "ToastNotification": (
     87                SCHEMA_DIR / "ToastNotification" / "ToastNotification.schema.json"
     88            ),
     89            "ToolbarBadgeMessage": (
     90                SCHEMA_DIR / "OnboardingMessage" / "ToolbarBadgeMessage.schema.json"
     91            ),
     92            "UpdateAction": (
     93                SCHEMA_DIR / "OnboardingMessage" / "UpdateAction.schema.json"
     94            ),
     95        },
     96        bundle_common=True,
     97        test_corpus={
     98            "ReachExperiments": Path("corpus", "ReachExperiments.messages.json"),
     99            # These are generated via extract-test-corpus.js
    100            "CFRMessageProvider": Path("corpus", "CFRMessageProvider.messages.json"),
    101            "OnboardingMessageProvider": Path(
    102                "corpus", "OnboardingMessageProvider.messages.json"
    103            ),
    104            "PanelTestProvider": Path("corpus", "PanelTestProvider.messages.json"),
    105        },
    106    ),
    107    SchemaDefinition(
    108        schema_id=(
    109            "chrome://browser/content/asrouter/schemas/"
    110            "BackgroundTaskMessagingExperiment.schema.json"
    111        ),
    112        schema_path=Path("BackgroundTaskMessagingExperiment.schema.json"),
    113        message_types={
    114            "ToastNotification": (
    115                SCHEMA_DIR / "ToastNotification" / "ToastNotification.schema.json"
    116            ),
    117        },
    118        bundle_common=True,
    119        # These are generated via extract-test-corpus.js
    120        test_corpus={
    121            # Just the "toast_notification" messages.
    122            "PanelTestProvider": Path(
    123                "corpus", "PanelTestProvider_toast_notification.messages.json"
    124            ),
    125        },
    126    ),
    127 ]
    128 
    129 COMMON_SCHEMA_NAME = "FxMSCommon.schema.json"
    130 COMMON_SCHEMA_PATH = Path(COMMON_SCHEMA_NAME)
    131 
    132 
    133 class NestedRefResolver(jsonschema.RefResolver):
    134    """A custom ref resolver that handles bundled schema.
    135 
    136    This is the resolver used by Experimenter.
    137    """
    138 
    139    def __init__(self, schema):
    140        super().__init__(base_uri=None, referrer=None)
    141 
    142        if "$id" in schema:
    143            self.store[schema["$id"]] = schema
    144 
    145        if "$defs" in schema:
    146            for dfn in schema["$defs"].values():
    147                if "$id" in dfn:
    148                    self.store[dfn["$id"]] = dfn
    149 
    150 
    151 def read_schema(path):
    152    """Read a schema from disk and parse it as JSON."""
    153    with path.open("r") as f:
    154        return json.load(f)
    155 
    156 
    157 def extract_template_values(template):
    158    """Extract the possible template values (either via JSON Schema enum or const)."""
    159    enum = template.get("enum")
    160    if enum:
    161        return enum
    162 
    163    const = template.get("const")
    164    if const:
    165        return [const]
    166 
    167 
    168 def patch_schema(schema, bundled_id, schema_id=None):
    169    """Patch the given schema.
    170 
    171    The JSON schema validator that Experimenter uses
    172    (https://pypi.org/project/jsonschema/) does not support relative references,
    173    nor does it support bundled schemas. We rewrite the schema so that all
    174    relative refs are transformed into absolute refs via the schema's `$id`.
    175 
    176    Additionally, we merge in the contents of FxMSCommon.schema.json, so all
    177    refs relative to that schema will be transformed to become relative to this
    178    schema.
    179 
    180    See-also: https://github.com/python-jsonschema/jsonschema/issues/313
    181    """
    182    if schema_id is None:
    183        schema_id = schema["$id"]
    184 
    185    def patch_impl(schema):
    186        ref = schema.get("$ref")
    187 
    188        if ref:
    189            uri = urlparse(ref)
    190            if (
    191                uri.scheme == ""
    192                and uri.netloc == ""
    193                and uri.path == ""
    194                and uri.fragment != ""
    195            ):
    196                schema["$ref"] = f"{schema_id}#{uri.fragment}"
    197            elif (uri.scheme, uri.path) == ("file", f"/{COMMON_SCHEMA_NAME}"):
    198                schema["$ref"] = f"{bundled_id}#{uri.fragment}"
    199 
    200        # If `schema` is object-like, inspect each of its indivual properties
    201        # and patch them.
    202        properties = schema.get("properties")
    203        if properties:
    204            for prop in properties.keys():
    205                patch_impl(properties[prop])
    206 
    207        # If `schema` is array-like, inspect each of its items and patch them.
    208        items = schema.get("items")
    209        if items:
    210            patch_impl(items)
    211 
    212        # Patch each `if`, `then`, `else`, and `not` sub-schema that is present.
    213        for key in ("if", "then", "else", "not"):
    214            if key in schema:
    215                patch_impl(schema[key])
    216 
    217        # Patch the items of each `oneOf`, `allOf`, and `anyOf` sub-schema that
    218        # is present.
    219        for key in ("oneOf", "allOf", "anyOf"):
    220            subschema = schema.get(key)
    221            if subschema:
    222                for i, alternate in enumerate(subschema):
    223                    patch_impl(alternate)
    224 
    225    # Patch the top-level type defined in the schema.
    226    patch_impl(schema)
    227 
    228    # Patch each named definition in the schema.
    229    for key in ("$defs", "definitions"):
    230        defns = schema.get(key)
    231        if defns:
    232            for defn_name, defn_value in defns.items():
    233                patch_impl(defn_value)
    234 
    235    return schema
    236 
    237 
    238 def bundle_schema(schema_def: SchemaDefinition):
    239    """Create a bundled schema based on the schema definition."""
    240    # Patch each message type schema to resolve all self-references to be
    241    # absolute and rewrite # references to FxMSCommon.schema.json to be relative
    242    # to the new schema (because we are about to bundle its definitions).
    243    defs = {
    244        name: patch_schema(read_schema(path), bundled_id=schema_def.schema_id)
    245        for name, path in schema_def.message_types.items()
    246    }
    247 
    248    # Bundle the definitions from FxMSCommon.schema.json into this schema.
    249    if schema_def.bundle_common:
    250 
    251        def dfn_filter(name):
    252            if schema_def.bundle_common is True:
    253                return True
    254 
    255            return name in schema_def.bundle_common
    256 
    257        common_schema = patch_schema(
    258            read_schema(COMMON_SCHEMA_PATH),
    259            bundled_id=schema_def.schema_id,
    260            schema_id=schema_def.schema_id,
    261        )
    262 
    263        # patch_schema mutates the given schema, so we read a new copy in for
    264        # each bundle operation.
    265        defs.update({
    266            name: dfn
    267            for name, dfn in common_schema["$defs"].items()
    268            if dfn_filter(name)
    269        })
    270 
    271    # Ensure all bundled schemas have an $id so that $refs inside the
    272    # bundled schema work correctly (i.e, they will reference the subschema
    273    # and not the bundle).
    274    for name in schema_def.message_types.keys():
    275        subschema = defs[name]
    276        if "$id" not in subschema:
    277            raise ValueError(f"Schema {name} is missing an $id")
    278 
    279        props = subschema["properties"]
    280        if "template" not in props:
    281            raise ValueError(f"Schema {name} is missing a template")
    282 
    283        template = props["template"]
    284        if "enum" not in template and "const" not in template:
    285            raise ValueError(f"Schema {name} should have const or enum template")
    286 
    287    templates = {
    288        name: extract_template_values(defs[name]["properties"]["template"])
    289        for name in schema_def.message_types.keys()
    290    }
    291 
    292    # Ensure that each schema has a unique set of template values.
    293    for a in templates.keys():
    294        a_keys = set(templates[a])
    295 
    296        for b in templates.keys():
    297            if a == b:
    298                continue
    299 
    300            b_keys = set(templates[b])
    301            intersection = a_keys.intersection(b_keys)
    302 
    303            if len(intersection):
    304                raise ValueError(
    305                    f"Schema {a} and {b} have overlapping template values: "
    306                    f"{', '.join(intersection)}"
    307                )
    308 
    309    all_templates = list(chain.from_iterable(templates.values()))
    310 
    311    # Enforce that one of the templates must match (so that one of the if
    312    # branches will match).
    313    defs["Message"]["properties"]["template"]["enum"] = all_templates
    314    defs["TemplatedMessage"] = {
    315        "description": "An FxMS message of one of a variety of types.",
    316        "type": "object",
    317        "allOf": [
    318            # Ensure each message has all the fields defined in the base
    319            # Message type.
    320            #
    321            # This is slightly redundant because each message should
    322            # already inherit from this message type, but it is easier
    323            # to add this requirement here than to verify that each
    324            # message's schema is properly inheriting.
    325            {"$ref": f"{schema_def.schema_id}#/$defs/Message"},
    326            # For each message type, create a subschema that says if the
    327            # template field matches a value for a message type defined
    328            # in MESSAGE_TYPES, then the message must also match the
    329            # schema for that message type.
    330            #
    331            # This is done using `allOf: [{ if, then }]` instead of `oneOf: []`
    332            # because it provides better error messages. Using `if-then`
    333            # will only show validation errors for the sub-schema that
    334            # matches template, whereas using `oneOf` will show
    335            # validation errors for *all* sub-schemas, which makes
    336            # debugging messages much harder.
    337            *(
    338                {
    339                    "if": {
    340                        "type": "object",
    341                        "properties": {
    342                            "template": {
    343                                "type": "string",
    344                                "enum": templates[message_type],
    345                            },
    346                        },
    347                        "required": ["template"],
    348                    },
    349                    "then": {"$ref": f"{schema_def.schema_id}#/$defs/{message_type}"},
    350                }
    351                for message_type in schema_def.message_types
    352            ),
    353        ],
    354    }
    355    defs["MultiMessage"] = {
    356        "description": "An object containing an array of messages.",
    357        "type": "object",
    358        "properties": {
    359            "template": {"type": "string", "const": "multi"},
    360            "messages": {
    361                "type": "array",
    362                "description": "An array of messages.",
    363                "items": {"$ref": f"{schema_def.schema_id}#/$defs/TemplatedMessage"},
    364            },
    365        },
    366        "required": ["template", "messages"],
    367    }
    368 
    369    # Generate the combined schema.
    370    return {
    371        "$schema": "https://json-schema.org/draft/2019-09/schema",
    372        "$id": schema_def.schema_id,
    373        "title": "Messaging Experiment",
    374        "description": "A Firefox Messaging System message.",
    375        # A message must be one of:
    376        # - An object that contains id, template, and content fields
    377        # - An object that contains none of the above fields (empty message)
    378        # - An array of messages like the above
    379        "if": {
    380            "type": "object",
    381            "properties": {"template": {"const": "multi"}},
    382            "required": ["template"],
    383        },
    384        "then": {
    385            "$ref": f"{schema_def.schema_id}#/$defs/MultiMessage",
    386        },
    387        "else": {
    388            "$ref": f"{schema_def.schema_id}#/$defs/TemplatedMessage",
    389        },
    390        "$defs": defs,
    391    }
    392 
    393 
    394 def check_diff(schema_def: SchemaDefinition, schema: dict[str, Any]):
    395    """Check the generated schema matches the on-disk schema."""
    396    print(f"  Checking {schema_def.schema_path} for differences...")
    397 
    398    with schema_def.schema_path.open("r") as f:
    399        on_disk = json.load(f)
    400 
    401    if on_disk != schema:
    402        print(f"{schema_def.schema_path} does not match generated schema:")
    403        print("Generated schema:")
    404        json.dump(schema, sys.stdout, indent=2)
    405        print("\n\nOn Disk schema:")
    406        json.dump(on_disk, sys.stdout, indent=2)
    407        print("\n\n")
    408 
    409        raise ValueError("Schemas do not match!")
    410 
    411 
    412 def validate_corpus(schema_def: SchemaDefinition, schema: dict[str, Any]):
    413    """Check that the schema validates.
    414 
    415    This uses the same validation configuration that is used in Experimenter.
    416    """
    417    print("  Validating messages with Experimenter JSON Schema validator...")
    418 
    419    resolver = NestedRefResolver(schema)
    420 
    421    for provider, provider_path in schema_def.test_corpus.items():
    422        print(f"    Validating messages from {provider}:")
    423 
    424        try:
    425            with provider_path.open("r") as f:
    426                messages = json.load(f)
    427        except FileNotFoundError as e:
    428            if not provider_path.parent.exists():
    429                new_exc = Exception(
    430                    f"Could not find {provider_path}: Did you run "
    431                    "`mach xpcshell extract-test-corpus.js` ?"
    432                )
    433                raise new_exc from e
    434 
    435            raise e
    436 
    437        for i, message in enumerate(messages):
    438            template = message.get("template", "(no template)")
    439            msg_id = message.get("id", f"index {i}")
    440 
    441            print(
    442                f"      Validating {msg_id} {template} message with {schema_def.schema_path}..."
    443            )
    444            jsonschema.validate(instance=message, schema=schema, resolver=resolver)
    445 
    446        print()
    447 
    448 
    449 def main(check=False):
    450    """Generate Nimbus feature schemas for Firefox Messaging System."""
    451    for schema_def in SCHEMAS:
    452        print(f"Generating {schema_def.schema_path} ...")
    453        schema = bundle_schema(schema_def)
    454 
    455        if check:
    456            print(f"Checking {schema_def.schema_path} ...")
    457            check_diff(schema_def, schema)
    458            validate_corpus(schema_def, schema)
    459        else:
    460            with schema_def.schema_path.open("wb") as f:
    461                print(f"Writing {schema_def.schema_path} ...")
    462                f.write(json.dumps(schema, indent=2).encode("utf-8"))
    463                f.write(b"\n")
    464 
    465 
    466 if __name__ == "__main__":
    467    parser = ArgumentParser(description=main.__doc__)
    468    parser.add_argument(
    469        "--check",
    470        action="store_true",
    471        help="Check that the generated schemas have not changed and run validation tests.",
    472        default=False,
    473    )
    474    args = parser.parse_args()
    475 
    476    main(args.check)