tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

generate.py (19032B)


      1 #!/usr/bin/env python3
      2 
      3 # Usage: python3 generate.py
      4 #
      5 # This will remove all existing .html files in the generated directories and generate new tests.
      6 
      7 
      8 # Notes on potential confusion with the 3 string substitution features in different layers:
      9 #
     10 # - In Python strings when calling .format(): {something} or {}
     11 #   To get a literal {} use {{}}.
     12 #   The template_* variables are ones below are those that will use .format().
     13 #   https://docs.python.org/3/library/string.html#formatstrings
     14 # - JS template literals: ${something}
     15 #   https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Template_literals
     16 # - wptserve server-side substitution when generating a response: {{GET[something]}}
     17 #   https://web-platform-tests.org/writing-tests/server-pipes.html#sub
     18 
     19 import os, shutil
     20 
     21 target_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/generated"
     22 
     23 delay = u'1500'  # Lower value makes the test complete faster, but also higher risk of flaky results
     24 
     25 # Test data
     26 
     27 tentative_tests = [
     28    # title,
     29    # encoding,
     30    # template_testcase_markup,
     31    # template_nonspeculative_testcase_markup (if different from template_testcase_markup),
     32    # expect_load,
     33    # test_nonspeculative
     34    (
     35      u'script-src',
     36      u'utf-8',
     37      u'<script src="{}"></script>',
     38      None,
     39      u'true',
     40      u'true'
     41    ),
     42    (
     43      u'meta-charset-script-src',
     44      None,
     45      u'<meta charset=windows-1254><script src="{}"></script>',
     46      u'<!-- no meta charset --><script src="{}"></script>',
     47      u'true',
     48      u'true'
     49    ),
     50    (
     51      # This test is only valid on "mobile" where meta viewport has an effect
     52      u'meta-viewport-link-stylesheet-media',
     53      u'utf-8',
     54      u'<meta name=viewport content="width=400, initial-scale=1"><link rel=stylesheet href="{}" media="(min-width: 401px)">',
     55      None,
     56      u'false',
     57      u'true'
     58    ),
     59    (
     60      u'meta-csp-img-src-none',
     61      u'utf-8',
     62      u'<meta http-equiv="Content-Security-Policy" content="script-src \'self\' \'unsafe-inline\'; img-src \'none\'"><img src="{}">',
     63      None,
     64      u'false',
     65      u'true'
     66    ),
     67    (
     68      u'meta-csp-img-src-asterisk',
     69      u'utf-8',
     70      u'<meta http-equiv="Content-Security-Policy" content="script-src \'self\' \'unsafe-inline\'; img-src *"><img src="{}">',
     71      None,
     72      u'true',
     73      u'true'
     74    ),
     75    (
     76      u'meta-referrer-no-referrer-img-src',
     77      u'utf-8',
     78      u'<meta name=referrer content=no-referrer><img src="{}">',
     79      None,
     80      u'true',
     81      u'true'
     82    ),
     83    (
     84      u'base-href-script-src',
     85      u'utf-8',
     86      u'<base href=//{{{{domains[www1]}}}}:{{{{ports[http][0]}}}}><script src="{}"></script>',
     87      None,
     88      u'true',
     89      u'true'
     90    ),
     91    (
     92      u'script-src-unsupported-type',
     93      u'utf-8',
     94      u'<script src="{}" type=text/plain></script>',
     95      None,
     96      u'false',
     97      u'true'
     98    ),
     99    (
    100      u'script-src-type-application-ecmascript',
    101      u'utf-8',
    102      u'<script src="{}" type=application/ecmascript></script>',
    103      None,
    104      u'true',
    105      u'true'
    106    ),
    107    (
    108      u'script-src-nomodule',
    109      u'utf-8',
    110      u'<script src="{}" nomodule></script>',
    111      None,
    112      u'false',
    113      u'true'
    114    ),
    115    (
    116      u'script-src-module',
    117      u'utf-8',
    118      u'<script src="{}" type=module></script>',
    119      None,
    120      u'true',
    121      u'true'
    122    ),
    123    (
    124      u'script-src-async',
    125      u'utf-8',
    126      u'<script src="{}" async></script>',
    127      None,
    128      u'true',
    129      u'true'
    130    ),
    131    (
    132      u'script-src-defer',
    133      u'utf-8',
    134      u'<script src="{}" defer></script>',
    135      None,
    136      u'true',
    137      u'true'
    138    ),
    139    (
    140      u'script-src-crossorigin',
    141      u'utf-8',
    142      u'<script src="{}" crossorigin></script>',
    143      None,
    144      u'true',
    145      u'true'
    146    ),
    147    (
    148      u'script-src-integrity',
    149      u'utf-8',
    150      u'<script src="{}" integrity="sha384-OLBgp1GsljhM2TJ+sbHjaiH9txEUvgdDTAzHv2P24donTt6/529l+9Ua0vFImLlb"></script>',
    151      None,
    152      u'true',
    153      u'true'
    154    ),
    155    (
    156      u'script-src-referrerpolicy-no-referrer',
    157      u'utf-8',
    158      u'<script src="{}" referrerpolicy=no-referrer></script>',
    159      None,
    160      u'true',
    161      u'true'
    162    ),
    163    (
    164      u'template-script-src',
    165      u'utf-8',
    166      u'<template><script src="{}"></script></template>',
    167      None,
    168      u'false',
    169      u'true'
    170    ),
    171    (
    172      u'template-link-stylesheet',
    173      u'utf-8',
    174      u'<template><link rel=stylesheet href="{}"></template>',
    175      None,
    176      u'false',
    177      u'true'
    178    ),
    179    (
    180      u'template-img-src',
    181      u'utf-8',
    182      u'<template><img src="{}"></template>',
    183      None,
    184      u'false',
    185      u'true'
    186    ),
    187    (
    188      u'template-shadowrootmode-script-src',
    189      u'utf-8',
    190      u'<div><template shadowrootmode="closed"><script src="{}"></script></template></div>',
    191      None,
    192      u'true',
    193      u'true'
    194    ),
    195    (
    196      u'template-shadowrootmode-link-stylesheet',
    197      u'utf-8',
    198      u'<div><template shadowrootmode="closed"><link rel=stylesheet href="{}"></template></div>',
    199      None,
    200      u'true',
    201      u'true'
    202    ),
    203    (
    204      u'template-shadowrootmode-img-src',
    205      u'utf-8',
    206      u'<div><template shadowrootmode="closed"><img src="{}"></template></div>',
    207      None,
    208      u'true',
    209      u'true'
    210    ),
    211    (
    212      u'nested-template-shadowrootmode-1',
    213      u'utf-8',
    214      u'<template><div><template shadowrootmode="closed"><script src="{}"></script></template></div></template>',
    215      None,
    216      u'false',
    217      u'true'
    218    ),
    219    (
    220      u'nested-template-shadowrootmode-2',
    221      u'utf-8',
    222      u'<div><template shadowrootmode="closed"><template><script src="{}"></script></template></template></div>',
    223      None,
    224      u'false',
    225      u'true'
    226    ),
    227    (
    228      u'link-no-rel',
    229      u'utf-8',
    230      u'<link href="{}">',
    231      None,
    232      u'false',
    233      u'true'
    234    ),
    235    (
    236      u'link-rel-stylesheet',
    237      u'utf-8',
    238      u'<link rel=stylesheet href="{}">',
    239      None,
    240      u'true',
    241      u'true'
    242    ),
    243    (
    244      u'link-rel-alternate-stylesheet',
    245      u'utf-8',
    246      u'<link rel="alternate stylesheet" href="{}">',
    247      None,
    248      u'false',
    249      u'true'
    250    ),
    251    (
    252      u'link-rel-stylesheet-disabled',
    253      u'utf-8',
    254      u'<link rel="stylesheet" href="{}" disabled>',
    255      None,
    256      u'false',
    257      u'true'
    258    ),
    259    (
    260      u'link-rel-stylesheet-nomatch-media',
    261      u'utf-8',
    262      u'<link rel=stylesheet href="{}" media="not all">',
    263      None,
    264      u'false',
    265      u'true'
    266    ),
    267    (
    268      u'link-rel-stylesheet-unsupported-type',
    269      u'utf-8',
    270      u'<link rel=stylesheet href="{}" type=text/plain>',
    271      None,
    272      u'false',
    273      u'true'
    274    ),
    275    (
    276      u'link-rel-stylesheet-type-text-css',
    277      u'utf-8',
    278      u'<link rel=stylesheet href="{}" type=text/css>',
    279      None,
    280      u'true',
    281      u'true'
    282    ),
    283    (
    284      u'link-rel-stylesheet-crossorigin',
    285      u'utf-8',
    286      u'<link rel=stylesheet href="{}" crossorigin>',
    287      None,
    288      u'true',
    289      u'true'
    290    ),
    291    (
    292      u'link-rel-stylesheet-integrity',
    293      u'utf-8',
    294      u'<link rel=stylesheet href="{}" integrity="sha384-OLBgp1GsljhM2TJ+sbHjaiH9txEUvgdDTAzHv2P24donTt6/529l+9Ua0vFImLlb">',
    295      None,
    296      u'true',
    297      u'true'
    298    ),
    299    (
    300      u'link-rel-stylesheet-referrerpolicy-no-referrer',
    301      u'utf-8',
    302      u'<link rel=stylesheet href="{}" referrerpolicy=no-referrer>',
    303      None,
    304      u'true',
    305      u'true'
    306    ),
    307    (
    308      u'link-rel-preload-as-style',
    309      u'utf-8',
    310      u'<link rel=preload as=style href="{}">',
    311      None,
    312      u'true',
    313      u'true'
    314    ),
    315    (
    316      u'link-rel-preload-as-font-crossorigin',
    317      u'utf-8',
    318      u'<link rel=preload as=font href="{}" crossorigin>',
    319      None,
    320      u'true',
    321      u'true'
    322    ),
    323    (
    324      u'link-rel-preload-as-script',
    325      u'utf-8',
    326      u'<link rel=preload as=script href="{}">',
    327      None,
    328      u'true',
    329      u'true'
    330    ),
    331    (
    332      u'link-rel-preload-as-image',
    333      u'utf-8',
    334      u'<link rel=preload as=image href="{}">',
    335      None,
    336      u'true',
    337      u'true'
    338    ),
    339    (
    340      u'img-src',
    341      u'utf-8',
    342      u'<img src="{}">',
    343      None,
    344      u'true',
    345      u'true'
    346    ),
    347    (
    348      u'img-data-src',
    349      u'utf-8',
    350      u'<img data-src="{}">',
    351      None,
    352      u'false',
    353      u'true'
    354    ),
    355    (
    356      # <image> is turned into <img> in the tree builder
    357      u'image-src',
    358      u'utf-8',
    359      u'<image src="{}">',
    360      None,
    361      u'true',
    362      u'true'
    363    ),
    364    (
    365      u'img-srcset',
    366      u'utf-8',
    367      u'<img srcset="{}">',
    368      None,
    369      u'true',
    370      u'true'
    371    ),
    372    (
    373      u'img-src-crossorigin',
    374      u'utf-8',
    375      u'<img src="{}" crossorigin>',
    376      None,
    377      u'true',
    378      u'true'
    379    ),
    380    (
    381      u'img-src-referrerpolicy-no-referrer',
    382      u'utf-8',
    383      u'<img src="{}" referrerpolicy=no-referrer>',
    384      None,
    385      u'true',
    386      u'true'
    387    ),
    388    (
    389      u'img-src-loading-lazy',
    390      u'utf-8',
    391      u'<img src="{}" loading=lazy>',
    392      None,
    393      u'false',
    394      u'false'
    395    ),
    396    (
    397      u'picture-source-unsupported-type',
    398      u'utf-8',
    399      u'<picture><source srcset="{}" type=text/plain><img></picture>',
    400      None,
    401      u'false',
    402      u'true'
    403    ),
    404    (
    405      u'picture-source-nomatch-media',
    406      u'utf-8',
    407      u'<picture><source srcset="{}" media="not all"><img></picture>',
    408      None,
    409      u'false',
    410      u'true'
    411    ),
    412    (
    413      u'picture-source-no-img',
    414      u'utf-8',
    415      u'<picture><source srcset="{}"></picture>',
    416      None,
    417      u'false',
    418      u'true'
    419    ),
    420    (
    421      u'picture-source-br-img',
    422      u'utf-8',
    423      u'<picture><source srcset="{}"><br><img></picture>',
    424      None,
    425      u'true',
    426      u'true'
    427    ),
    428    (
    429      u'video-poster',
    430      u'utf-8',
    431      u'<video poster="{}"></video>',
    432      None,
    433      u'true',
    434      u'true'
    435    ),
    436    (
    437      u'xmp-script-src',
    438      u'utf-8',
    439      u'<xmp><script src="{}"></script></xmp>',
    440      None,
    441      u'false',
    442      u'true'
    443    ),
    444    (
    445      # MathML doesn't have script
    446      u'math-script-src',
    447      u'utf-8',
    448      u'<math><script src="{}"></script></math>',
    449      None,
    450      u'false',
    451      u'true'
    452    ),
    453    (
    454      u'math-font-script-src',
    455      u'utf-8',
    456      u'<math><font><script src="{}"></script></font></math>',
    457      None,
    458      u'false',
    459      u'true'
    460    ),
    461    (
    462      # This breaks out of foreign content, so the script is an HTML script
    463      # https://html.spec.whatwg.org/multipage/#parsing-main-inforeign
    464      u'math-font-face-script-src',
    465      u'utf-8',
    466      u'<math><font face><script src="{}"></script></font></math>',
    467      None,
    468      u'true',
    469      u'true'
    470    ),
    471    (
    472      u'svg-script-href',
    473      u'utf-8',
    474      u'<svg><script href="{}"></script></svg>',
    475      None,
    476      u'true',
    477      u'true'
    478    ),
    479    (
    480      u'svg-script-xlinkhref',
    481      u'utf-8',
    482      u'<svg><script xlink:href="{}"></script></svg>',
    483      None,
    484      u'true',
    485      u'true'
    486    ),
    487    (
    488      # SVG script element doesn't have a src attribute
    489      u'svg-script-src',
    490      u'utf-8',
    491      u'<svg><script src="{}"></script></svg>',
    492      None,
    493      u'false',
    494      u'true'
    495    ),
    496    (
    497      u'svg-image-href',
    498      u'utf-8',
    499      u'<svg><image href="{}"></image></svg>',
    500      None,
    501      u'true',
    502      u'true'
    503    ),
    504    (
    505      u'svg-image-xlinkhref',
    506      u'utf-8',
    507      u'<svg><image xlink:href="{}"></image></svg>',
    508      None,
    509      u'true',
    510      u'true'
    511    ),
    512    (
    513      # SVG image element doesn't have a src attribute
    514      u'svg-image-src',
    515      u'utf-8',
    516      u'<svg><image src="{}"></image></svg>',
    517      None,
    518      u'false',
    519      u'true'
    520    ),
    521 ]
    522 
    523 tests = [
    524    # title,
    525    # encoding,
    526    # template_testcase_markup,
    527    # expect_load,
    528    # test_nonspeculative
    529 ]
    530 
    531 # Templates
    532 
    533 preamble = u"""<!DOCTYPE html>
    534 <!-- DO NOT EDIT. This file has been generated. Source:
    535     /html/syntax/speculative-parsing/tools/generate.py
    536 -->"""
    537 
    538 no_meta_charset = u"""<!-- no meta charset -->
    539 <!-- (padding to exceed 1024 bytes processed by the character encoding scanner)                  -->
    540 <!--                                                                                             -->
    541 <!--                                                                                             -->
    542 <!--                                                                                             -->
    543 <!--                                                                                             -->
    544 <!--                                                                                             -->
    545 <!--                                                                                             -->
    546 <!--                                                                                             -->
    547 <!--                                                                                             -->
    548 <!--                                                                                             -->"""
    549 
    550 # Notes on `encodingcheck` in the URL below
    551 #
    552 # - &Gbreve; is the HTML character reference for U+011E LATIN CAPITAL LETTER G WITH BREVE
    553 # - In windows-1254, this character is encoded as 0xD0.
    554 #   When used in the query part of a URL, it gets percent-encoded as %D0.
    555 # - In windows-1252 (usually the fallback encoding), that character can't be encoded, so is instead
    556 #   represented as &#286; percent-encoded, so %26%23286%3B.
    557 #   https://url.spec.whatwg.org/#query-state
    558 #   https://url.spec.whatwg.org/#code-point-percent-encode-after-encoding
    559 # - In utf-8, it's percent-encoded as utf-8: %C4%9E
    560 # - stash.py will store this value as "param-encodingcheck"
    561 
    562 url_wptserve_sub = u"/html/syntax/speculative-parsing/resources/stash.py?action=put&amp;uuid={{GET[uuid]}}&amp;encodingcheck=&Gbreve;"
    563 url_js_sub = u"/html/syntax/speculative-parsing/resources/stash.py?action=put&amp;uuid=${uuid}&amp;encodingcheck=&Gbreve;"
    564 
    565 
    566 # Non-speculative (normal) case to compare results with
    567 
    568 template_nonspeculative = u"""{preamble}
    569 {encoding_decl}
    570 <title>Speculative parsing, non-speculative (helper file): {title}</title>
    571 <!-- non-speculative case -->
    572 {nonspeculative_testcase_markup}
    573 <!-- block the load event for a bit: -->
    574 <script src="/common/slow.py?delay={delay}"></script>
    575 """
    576 
    577 # Scenario: page load
    578 
    579 template_pageload_toplevel = u"""{preamble}
    580 {encoding_decl}
    581 <title>Speculative parsing, page load: {title}</title>
    582 <script src=/resources/testharness.js></script>
    583 <script src=/resources/testharnessreport.js></script>
    584 <script src=/common/utils.js></script>
    585 <script src=/html/syntax/speculative-parsing/resources/speculative-parsing-util.js></script>
    586 <body>
    587 <script>
    588  setup({{single_test: true}});
    589  const uuid = token();
    590  const iframe = document.createElement('iframe');
    591  iframe.src = `resources/{title}-framed.sub.html?uuid=${{uuid}}`;
    592  document.body.appendChild(iframe);
    593  expect_fetched_onload(uuid, {expect_load})
    594    .then(compare_with_nonspeculative(uuid, '{title}', {test_nonspeculative}))
    595    .then(done);
    596 </script>
    597 """
    598 
    599 template_pageload_framed = u"""{preamble}
    600 {encoding_decl}
    601 <title>Speculative parsing, page load (helper file): {title}</title>
    602 <script src="/common/slow.py?delay={delay}"></script>
    603 <script>
    604  document.write('<plaintext>');
    605 </script>
    606 <!-- speculative case -->
    607 {testcase_markup}
    608 """
    609 
    610 # Scenario: document.write()
    611 
    612 template_docwrite = u"""{preamble}
    613 {encoding_decl}
    614 <title>Speculative parsing, document.write(): {title}</title>
    615 <script src=/resources/testharness.js></script>
    616 <script src=/resources/testharnessreport.js></script>
    617 <script src=/common/utils.js></script>
    618 <script src=/html/syntax/speculative-parsing/resources/speculative-parsing-util.js></script>
    619 <script>
    620  setup({{single_test: true}});
    621  const uuid = token();
    622  expect_fetched_onload(uuid, {expect_load})
    623    .then(compare_with_nonspeculative(uuid, '{title}', {test_nonspeculative}))
    624    .then(done);
    625  document.write(`
    626    <script src="/common/slow.py?delay={delay}"><\\/script>
    627    <script>
    628     document.write('<plaintext>');
    629    <\\/script>
    630    <\\!-- speculative case in document.write -->
    631    {testcase_markup}
    632  `);
    633 </script>
    634 """
    635 
    636 # Scenario: <link rel=prerender> - TODO(zcorpan)
    637 
    638 template_prerender_toplevel = u"""{preamble}
    639 {encoding_decl}
    640 <title>Speculative parsing, prerender: {title}</title>
    641 ...
    642 """
    643 
    644 template_prerender_linked = u"""{preamble}
    645 {encoding_decl}
    646 <title>Speculative parsing, prerender (helper file): {title}</title>
    647 ...
    648 """
    649 
    650 # Generate tests
    651 
    652 # wipe target_dir of HTML files
    653 if os.path.isdir(target_dir):
    654  for root, dirs, files in os.walk(target_dir):
    655    for name in files:
    656      if name.endswith('.html'):
    657        path = os.path.join(root, name)
    658        if os.path.isfile(path):
    659          os.remove(path)
    660 
    661 def write_file(path, content):
    662    path = os.path.join(target_dir, path)
    663    os.makedirs(os.path.dirname(path), exist_ok=True)
    664    file = open(os.path.join(target_dir, path), 'w')
    665    file.write(content)
    666    file.close()
    667 
    668 def generate_tests(testcase, tentative):
    669    title, encoding, template_testcase_markup, template_nonspeculative_testcase_markup, expect_load, test_nonspeculative = testcase
    670    if template_nonspeculative_testcase_markup == None:
    671        template_nonspeculative_testcase_markup = template_testcase_markup
    672    ext = u""
    673    if tentative:
    674        ext = u".tentative"
    675 
    676    if encoding == None:
    677        encoding_decl = no_meta_charset
    678    else:
    679        encoding_decl = f"<meta charset={encoding}>"
    680 
    681    html_testcase_markup = template_testcase_markup.format(url_wptserve_sub)
    682    html_nonspeculative_testcase_markup = template_nonspeculative_testcase_markup.format(url_wptserve_sub)
    683    js_testcase_markup = template_testcase_markup.format(url_js_sub).replace(u"</script>", u"<\\/script>").replace(u"<meta charset", u"<meta\\ charset")
    684 
    685    if test_nonspeculative == u'true':
    686        nonspeculative = template_nonspeculative.format(preamble=preamble, encoding_decl=encoding_decl, title=title, nonspeculative_testcase_markup=html_nonspeculative_testcase_markup, delay=delay)
    687        write_file(f"resources/{title}-nonspeculative.sub.html", nonspeculative)
    688 
    689    pageload_toplevel = template_pageload_toplevel.format(preamble=preamble, encoding_decl=encoding_decl, title=title, expect_load=expect_load, test_nonspeculative=test_nonspeculative)
    690    write_file(f"page-load/{title}{ext}.html", pageload_toplevel)
    691    pageload_framed = template_pageload_framed.format(preamble=preamble, encoding_decl=encoding_decl, title=title, testcase_markup=html_testcase_markup, delay=delay)
    692    write_file(f"page-load/resources/{title}-framed.sub.html", pageload_framed)
    693 
    694    docwrite = template_docwrite.format(preamble=preamble, encoding_decl=encoding_decl, title=title, expect_load=expect_load, testcase_markup=js_testcase_markup, test_nonspeculative=test_nonspeculative, delay=delay)
    695    write_file(f"document-write/{title}{ext}.sub.html", docwrite)
    696 
    697 for testcase in tests:
    698    generate_tests(testcase, False)
    699 
    700 for testcase in tentative_tests:
    701    generate_tests(testcase, True)