make-schemas.py (17249B)
1 #!/usr/bin/env python3 2 # This Source Code Form is subject to the terms of the Mozilla Public 3 # License, v. 2.0. If a copy of the MPL was not distributed with this 4 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 5 6 """Firefox Messaging System Messaging Experiment schema generator 7 8 The Firefox Messaging System handles several types of messages. This program 9 patches and combines those schemas into a single schema 10 (MessagingExperiment.schema.json) which is used to validate messaging 11 experiments coming from Nimbus. 12 13 Definitions from FxMsCommon.schema.json are bundled into this schema. This 14 allows all of the FxMS schemas to reference common definitions, e.g. 15 `localizableText` for translatable strings, via referencing the common schema. 16 The bundled schema will be re-written so that the references now point at the 17 top-level, generated schema. 18 19 Additionally, all self-references in each messaging schema will be rewritten 20 into absolute references, referencing each sub-schemas `$id`. This is requried 21 due to the JSONSchema validation library used by Experimenter not fully 22 supporting self-references and bundled schema. 23 """ 24 25 import sys 26 from argparse import ArgumentParser 27 from itertools import chain 28 from pathlib import Path 29 from typing import Any, NamedTuple, Union 30 from urllib.parse import urlparse 31 32 import jsonschema 33 from mozfile import json 34 35 36 class SchemaDefinition(NamedTuple): 37 """A definition of a schema that is to be bundled.""" 38 39 #: The $id of the generated schema. 40 schema_id: str 41 42 #: The path of the generated schema. 43 schema_path: Path 44 45 #: The message types that will be bundled into the schema. 46 message_types: dict[str, Path] 47 48 #: What common definitions to bundle into the schema. 49 #: 50 #: If `True`, all definitions will be bundled. 51 #: If `False`, no definitons will be bundled. 52 #: If a list, only the named definitions will be bundled. 53 bundle_common: Union[bool, list[str]] 54 55 #: The testing corpus for the schema. 56 test_corpus: dict[str, Path] 57 58 59 SCHEMA_DIR = Path("..", "templates") 60 61 SCHEMAS = [ 62 SchemaDefinition( 63 schema_id="chrome://browser/content/asrouter/schemas/MessagingExperiment.schema.json", 64 schema_path=Path("MessagingExperiment.schema.json"), 65 message_types={ 66 "BookmarksBarButton": ( 67 SCHEMA_DIR / "OnboardingMessage" / "BookmarksBarButton.schema.json" 68 ), 69 "CFRUrlbarChiclet": ( 70 SCHEMA_DIR / "CFR" / "templates" / "CFRUrlbarChiclet.schema.json" 71 ), 72 "ExtensionDoorhanger": ( 73 SCHEMA_DIR / "CFR" / "templates" / "ExtensionDoorhanger.schema.json" 74 ), 75 "InfoBar": SCHEMA_DIR / "CFR" / "templates" / "InfoBar.schema.json", 76 "MenuMessage": ( 77 SCHEMA_DIR / "OnboardingMessage" / "MenuMessage.schema.json" 78 ), 79 "NewtabPromoMessage": ( 80 SCHEMA_DIR / "PBNewtab" / "NewtabPromoMessage.schema.json" 81 ), 82 "NewtabMessage": ( 83 SCHEMA_DIR / "OnboardingMessage" / "NewtabMessage.schema.json" 84 ), 85 "Spotlight": SCHEMA_DIR / "OnboardingMessage" / "Spotlight.schema.json", 86 "ToastNotification": ( 87 SCHEMA_DIR / "ToastNotification" / "ToastNotification.schema.json" 88 ), 89 "ToolbarBadgeMessage": ( 90 SCHEMA_DIR / "OnboardingMessage" / "ToolbarBadgeMessage.schema.json" 91 ), 92 "UpdateAction": ( 93 SCHEMA_DIR / "OnboardingMessage" / "UpdateAction.schema.json" 94 ), 95 }, 96 bundle_common=True, 97 test_corpus={ 98 "ReachExperiments": Path("corpus", "ReachExperiments.messages.json"), 99 # These are generated via extract-test-corpus.js 100 "CFRMessageProvider": Path("corpus", "CFRMessageProvider.messages.json"), 101 "OnboardingMessageProvider": Path( 102 "corpus", "OnboardingMessageProvider.messages.json" 103 ), 104 "PanelTestProvider": Path("corpus", "PanelTestProvider.messages.json"), 105 }, 106 ), 107 SchemaDefinition( 108 schema_id=( 109 "chrome://browser/content/asrouter/schemas/" 110 "BackgroundTaskMessagingExperiment.schema.json" 111 ), 112 schema_path=Path("BackgroundTaskMessagingExperiment.schema.json"), 113 message_types={ 114 "ToastNotification": ( 115 SCHEMA_DIR / "ToastNotification" / "ToastNotification.schema.json" 116 ), 117 }, 118 bundle_common=True, 119 # These are generated via extract-test-corpus.js 120 test_corpus={ 121 # Just the "toast_notification" messages. 122 "PanelTestProvider": Path( 123 "corpus", "PanelTestProvider_toast_notification.messages.json" 124 ), 125 }, 126 ), 127 ] 128 129 COMMON_SCHEMA_NAME = "FxMSCommon.schema.json" 130 COMMON_SCHEMA_PATH = Path(COMMON_SCHEMA_NAME) 131 132 133 class NestedRefResolver(jsonschema.RefResolver): 134 """A custom ref resolver that handles bundled schema. 135 136 This is the resolver used by Experimenter. 137 """ 138 139 def __init__(self, schema): 140 super().__init__(base_uri=None, referrer=None) 141 142 if "$id" in schema: 143 self.store[schema["$id"]] = schema 144 145 if "$defs" in schema: 146 for dfn in schema["$defs"].values(): 147 if "$id" in dfn: 148 self.store[dfn["$id"]] = dfn 149 150 151 def read_schema(path): 152 """Read a schema from disk and parse it as JSON.""" 153 with path.open("r") as f: 154 return json.load(f) 155 156 157 def extract_template_values(template): 158 """Extract the possible template values (either via JSON Schema enum or const).""" 159 enum = template.get("enum") 160 if enum: 161 return enum 162 163 const = template.get("const") 164 if const: 165 return [const] 166 167 168 def patch_schema(schema, bundled_id, schema_id=None): 169 """Patch the given schema. 170 171 The JSON schema validator that Experimenter uses 172 (https://pypi.org/project/jsonschema/) does not support relative references, 173 nor does it support bundled schemas. We rewrite the schema so that all 174 relative refs are transformed into absolute refs via the schema's `$id`. 175 176 Additionally, we merge in the contents of FxMSCommon.schema.json, so all 177 refs relative to that schema will be transformed to become relative to this 178 schema. 179 180 See-also: https://github.com/python-jsonschema/jsonschema/issues/313 181 """ 182 if schema_id is None: 183 schema_id = schema["$id"] 184 185 def patch_impl(schema): 186 ref = schema.get("$ref") 187 188 if ref: 189 uri = urlparse(ref) 190 if ( 191 uri.scheme == "" 192 and uri.netloc == "" 193 and uri.path == "" 194 and uri.fragment != "" 195 ): 196 schema["$ref"] = f"{schema_id}#{uri.fragment}" 197 elif (uri.scheme, uri.path) == ("file", f"/{COMMON_SCHEMA_NAME}"): 198 schema["$ref"] = f"{bundled_id}#{uri.fragment}" 199 200 # If `schema` is object-like, inspect each of its indivual properties 201 # and patch them. 202 properties = schema.get("properties") 203 if properties: 204 for prop in properties.keys(): 205 patch_impl(properties[prop]) 206 207 # If `schema` is array-like, inspect each of its items and patch them. 208 items = schema.get("items") 209 if items: 210 patch_impl(items) 211 212 # Patch each `if`, `then`, `else`, and `not` sub-schema that is present. 213 for key in ("if", "then", "else", "not"): 214 if key in schema: 215 patch_impl(schema[key]) 216 217 # Patch the items of each `oneOf`, `allOf`, and `anyOf` sub-schema that 218 # is present. 219 for key in ("oneOf", "allOf", "anyOf"): 220 subschema = schema.get(key) 221 if subschema: 222 for i, alternate in enumerate(subschema): 223 patch_impl(alternate) 224 225 # Patch the top-level type defined in the schema. 226 patch_impl(schema) 227 228 # Patch each named definition in the schema. 229 for key in ("$defs", "definitions"): 230 defns = schema.get(key) 231 if defns: 232 for defn_name, defn_value in defns.items(): 233 patch_impl(defn_value) 234 235 return schema 236 237 238 def bundle_schema(schema_def: SchemaDefinition): 239 """Create a bundled schema based on the schema definition.""" 240 # Patch each message type schema to resolve all self-references to be 241 # absolute and rewrite # references to FxMSCommon.schema.json to be relative 242 # to the new schema (because we are about to bundle its definitions). 243 defs = { 244 name: patch_schema(read_schema(path), bundled_id=schema_def.schema_id) 245 for name, path in schema_def.message_types.items() 246 } 247 248 # Bundle the definitions from FxMSCommon.schema.json into this schema. 249 if schema_def.bundle_common: 250 251 def dfn_filter(name): 252 if schema_def.bundle_common is True: 253 return True 254 255 return name in schema_def.bundle_common 256 257 common_schema = patch_schema( 258 read_schema(COMMON_SCHEMA_PATH), 259 bundled_id=schema_def.schema_id, 260 schema_id=schema_def.schema_id, 261 ) 262 263 # patch_schema mutates the given schema, so we read a new copy in for 264 # each bundle operation. 265 defs.update({ 266 name: dfn 267 for name, dfn in common_schema["$defs"].items() 268 if dfn_filter(name) 269 }) 270 271 # Ensure all bundled schemas have an $id so that $refs inside the 272 # bundled schema work correctly (i.e, they will reference the subschema 273 # and not the bundle). 274 for name in schema_def.message_types.keys(): 275 subschema = defs[name] 276 if "$id" not in subschema: 277 raise ValueError(f"Schema {name} is missing an $id") 278 279 props = subschema["properties"] 280 if "template" not in props: 281 raise ValueError(f"Schema {name} is missing a template") 282 283 template = props["template"] 284 if "enum" not in template and "const" not in template: 285 raise ValueError(f"Schema {name} should have const or enum template") 286 287 templates = { 288 name: extract_template_values(defs[name]["properties"]["template"]) 289 for name in schema_def.message_types.keys() 290 } 291 292 # Ensure that each schema has a unique set of template values. 293 for a in templates.keys(): 294 a_keys = set(templates[a]) 295 296 for b in templates.keys(): 297 if a == b: 298 continue 299 300 b_keys = set(templates[b]) 301 intersection = a_keys.intersection(b_keys) 302 303 if len(intersection): 304 raise ValueError( 305 f"Schema {a} and {b} have overlapping template values: " 306 f"{', '.join(intersection)}" 307 ) 308 309 all_templates = list(chain.from_iterable(templates.values())) 310 311 # Enforce that one of the templates must match (so that one of the if 312 # branches will match). 313 defs["Message"]["properties"]["template"]["enum"] = all_templates 314 defs["TemplatedMessage"] = { 315 "description": "An FxMS message of one of a variety of types.", 316 "type": "object", 317 "allOf": [ 318 # Ensure each message has all the fields defined in the base 319 # Message type. 320 # 321 # This is slightly redundant because each message should 322 # already inherit from this message type, but it is easier 323 # to add this requirement here than to verify that each 324 # message's schema is properly inheriting. 325 {"$ref": f"{schema_def.schema_id}#/$defs/Message"}, 326 # For each message type, create a subschema that says if the 327 # template field matches a value for a message type defined 328 # in MESSAGE_TYPES, then the message must also match the 329 # schema for that message type. 330 # 331 # This is done using `allOf: [{ if, then }]` instead of `oneOf: []` 332 # because it provides better error messages. Using `if-then` 333 # will only show validation errors for the sub-schema that 334 # matches template, whereas using `oneOf` will show 335 # validation errors for *all* sub-schemas, which makes 336 # debugging messages much harder. 337 *( 338 { 339 "if": { 340 "type": "object", 341 "properties": { 342 "template": { 343 "type": "string", 344 "enum": templates[message_type], 345 }, 346 }, 347 "required": ["template"], 348 }, 349 "then": {"$ref": f"{schema_def.schema_id}#/$defs/{message_type}"}, 350 } 351 for message_type in schema_def.message_types 352 ), 353 ], 354 } 355 defs["MultiMessage"] = { 356 "description": "An object containing an array of messages.", 357 "type": "object", 358 "properties": { 359 "template": {"type": "string", "const": "multi"}, 360 "messages": { 361 "type": "array", 362 "description": "An array of messages.", 363 "items": {"$ref": f"{schema_def.schema_id}#/$defs/TemplatedMessage"}, 364 }, 365 }, 366 "required": ["template", "messages"], 367 } 368 369 # Generate the combined schema. 370 return { 371 "$schema": "https://json-schema.org/draft/2019-09/schema", 372 "$id": schema_def.schema_id, 373 "title": "Messaging Experiment", 374 "description": "A Firefox Messaging System message.", 375 # A message must be one of: 376 # - An object that contains id, template, and content fields 377 # - An object that contains none of the above fields (empty message) 378 # - An array of messages like the above 379 "if": { 380 "type": "object", 381 "properties": {"template": {"const": "multi"}}, 382 "required": ["template"], 383 }, 384 "then": { 385 "$ref": f"{schema_def.schema_id}#/$defs/MultiMessage", 386 }, 387 "else": { 388 "$ref": f"{schema_def.schema_id}#/$defs/TemplatedMessage", 389 }, 390 "$defs": defs, 391 } 392 393 394 def check_diff(schema_def: SchemaDefinition, schema: dict[str, Any]): 395 """Check the generated schema matches the on-disk schema.""" 396 print(f" Checking {schema_def.schema_path} for differences...") 397 398 with schema_def.schema_path.open("r") as f: 399 on_disk = json.load(f) 400 401 if on_disk != schema: 402 print(f"{schema_def.schema_path} does not match generated schema:") 403 print("Generated schema:") 404 json.dump(schema, sys.stdout, indent=2) 405 print("\n\nOn Disk schema:") 406 json.dump(on_disk, sys.stdout, indent=2) 407 print("\n\n") 408 409 raise ValueError("Schemas do not match!") 410 411 412 def validate_corpus(schema_def: SchemaDefinition, schema: dict[str, Any]): 413 """Check that the schema validates. 414 415 This uses the same validation configuration that is used in Experimenter. 416 """ 417 print(" Validating messages with Experimenter JSON Schema validator...") 418 419 resolver = NestedRefResolver(schema) 420 421 for provider, provider_path in schema_def.test_corpus.items(): 422 print(f" Validating messages from {provider}:") 423 424 try: 425 with provider_path.open("r") as f: 426 messages = json.load(f) 427 except FileNotFoundError as e: 428 if not provider_path.parent.exists(): 429 new_exc = Exception( 430 f"Could not find {provider_path}: Did you run " 431 "`mach xpcshell extract-test-corpus.js` ?" 432 ) 433 raise new_exc from e 434 435 raise e 436 437 for i, message in enumerate(messages): 438 template = message.get("template", "(no template)") 439 msg_id = message.get("id", f"index {i}") 440 441 print( 442 f" Validating {msg_id} {template} message with {schema_def.schema_path}..." 443 ) 444 jsonschema.validate(instance=message, schema=schema, resolver=resolver) 445 446 print() 447 448 449 def main(check=False): 450 """Generate Nimbus feature schemas for Firefox Messaging System.""" 451 for schema_def in SCHEMAS: 452 print(f"Generating {schema_def.schema_path} ...") 453 schema = bundle_schema(schema_def) 454 455 if check: 456 print(f"Checking {schema_def.schema_path} ...") 457 check_diff(schema_def, schema) 458 validate_corpus(schema_def, schema) 459 else: 460 with schema_def.schema_path.open("wb") as f: 461 print(f"Writing {schema_def.schema_path} ...") 462 f.write(json.dumps(schema, indent=2).encode("utf-8")) 463 f.write(b"\n") 464 465 466 if __name__ == "__main__": 467 parser = ArgumentParser(description=main.__doc__) 468 parser.add_argument( 469 "--check", 470 action="store_true", 471 help="Check that the generated schemas have not changed and run validation tests.", 472 default=False, 473 ) 474 args = parser.parse_args() 475 476 main(args.check)