tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

generate-segment-break-transformation-rules-tests.py (4812B)


      1 #!/usr/bin/env python
      2 # - * - coding: UTF-8 - * -
      3 
      4 """
      5 This script generates tests segment-break-transformation-rules-001 ~ 049 which
      6 cover all possible combinations of characters at two sides of segment breaks.
      7 More specifically, there are seven types of characters involve in these rules:
      8 
      9 1. East Asian Full-width (F)
     10 2. East Asian Half-width (H)
     11 3. East Asian Wide (W) except Hangul
     12 4. East Asian Narrow (Na)
     13 5. East Asian Ambiguous (A)
     14 6. Not East Asian (Neutral)
     15 7. Hangul
     16 
     17 So there are 49 different combinations. It outputs a list of all
     18 tests it generated in the format of Mozilla reftest.list to the stdout.
     19 """
     20 
     21 TEST_FILE = 'segment-break-transformation-rules-{:03}.html'
     22 TEST_TEMPLATE = '''<!DOCTYPE html>
     23 <meta charset="utf-8">
     24 <title>CSS Reftest Test: Segment Break Transformation Rules</title>
     25 <link rel="author" title="Chun-Min (Jeremy) Chen" href="mailto:jeremychen@mozilla.com">
     26 <link rel="author" title="Mozilla" href="https://www.mozilla.org">
     27 <link rel="help" href="https://drafts.csswg.org/css-text-3/#line-break-transform">
     28 <meta name="assert" content="'segment-break-transformation-rules: with {prev}/{next} in front/back of the semgment break.">
     29 <link rel="stylesheet" type="text/css" href="/fonts/ahem.css">
     30 <link rel="match" href="segment-break-transformation-rules-{index:03}-ref.html">
     31 <style> p {{ font-family: ahem; }} </style>
     32 <div>Pass if there is {expect} white space between the two strings below.
     33 <p>{prevchar}&#x000a;{nextchar}</p>
     34 </div>
     35 '''
     36 
     37 REF_FILE = 'segment-break-transformation-rules-{:03}-ref.html'
     38 REF_TEMPLATE_REMOVE = '''<!DOCTYPE html>
     39 <meta charset="utf-8">
     40 <title>CSS Reftest Reference: Segment Break Transformation Rules</title>
     41 <link rel="author" title="Chun-Min (Jeremy) Chen" href="mailto:jeremychen@mozilla.com">
     42 <link rel="author" title="Mozilla" href="https://www.mozilla.org">
     43 <link rel="stylesheet" type="text/css" href="/fonts/ahem.css">
     44 <style> p {{ font-family: ahem; }} </style>
     45 <div>Pass if there is NO white space between the two strings below.
     46 <p>{0}{1}</p>
     47 </div>
     48 '''
     49 REF_TEMPLATE_KEEP = '''<!DOCTYPE html>
     50 <meta charset="utf-8">
     51 <title>CSS Reftest Reference: Segment Break Transformation Rules</title>
     52 <link rel="author" title="Chun-Min (Jeremy) Chen" href="mailto:jeremychen@mozilla.com">
     53 <link rel="author" title="Mozilla" href="https://www.mozilla.org">
     54 <link rel="stylesheet" type="text/css" href="/fonts/ahem.css">
     55 <style> p {{ font-family: ahem; }} </style>
     56 <div>Pass if there is ONE white space between the two strings below.
     57 <p>{0}{2}{1}</p>
     58 </div>
     59 '''
     60 
     61 CHAR_SET = [
     62        ('East Asian Full-width (F)',         'FULLWIDTH'),
     63        ('East Asian Half-width (H)',         'テスト'),
     64        ('East Asian Wide (W) except Hangul', '測試'),
     65        ('East Asian Narrow (Na)',            'narrow'),
     66        ('East Asian Ambiguous (A)',          '■'),
     67        ('Not East Asian (Neutral)',          'آزمون'),
     68        ('Hangul',                            '테스트'),
     69        ]
     70 
     71 def write_file(filename, content):
     72    with open(filename, 'wb') as f:
     73        f.write(content.encode('UTF-8'))
     74 
     75 print("# START tests from {}".format(__file__))
     76 global idx
     77 idx = 0
     78 for i, (prevtype, prevchars) in enumerate(CHAR_SET):
     79    for j, (nextype, nextchars) in enumerate(CHAR_SET):
     80        idx += 1
     81        reffilename = REF_FILE.format(idx)
     82        testfilename = TEST_FILE.format(idx)
     83        # According to CSS Text 3 - 4.1.2. Segment Break Transformation Rules,
     84        # if the East Asian Width property of both the character before and
     85        # after the segment break is F, W, or H (not A), and neither side is
     86        # Hangul, then the segment break is removed. Otherwise, the segment
     87        # break is converted to a space (U+0020).
     88        if i < 3 and j < 3:
     89            write_file(reffilename,
     90                       REF_TEMPLATE_REMOVE.format(prevchars, nextchars))
     91            write_file(testfilename,
     92                       TEST_TEMPLATE.format(index=idx, prev=prevtype,
     93                                            next=nextype,
     94                                            prevchar=prevchars,
     95                                            nextchar=nextchars,
     96                                            expect='NO'))
     97        else:
     98            write_file(reffilename,
     99                       REF_TEMPLATE_KEEP.format(prevchars, nextchars, '&#x0020;'))
    100            write_file(testfilename,
    101                       TEST_TEMPLATE.format(index=idx, prev=prevtype,
    102                                            next=nextype,
    103                                            prevchar=prevchars,
    104                                            nextchar=nextchars,
    105                                            expect='ONE'))
    106        print("== {} {}".format(testfilename, reffilename))
    107 print("# END tests from {}".format(__file__))