tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

filtration_test.py (12824B)


      1 # Copyright (C) 2018 and later: Unicode, Inc. and others.
      2 # License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 import io as pyio
      5 import json
      6 import os
      7 import unittest
      8 
      9 from .. import InFile
     10 from ..comment_stripper import CommentStripper
     11 from ..filtration import Filter
     12 
     13 EXAMPLE_FILE_STEMS = [
     14    "af_NA",
     15    "af_VARIANT",
     16    "af_ZA_VARIANT",
     17    "af_ZA",
     18    "af",
     19    "ar",
     20    "ar_SA",
     21    "ars",
     22    "bs_BA",
     23    "bs_Cyrl_BA",
     24    "bs_Cyrl",
     25    "bs_Latn_BA",
     26    "bs_Latn",
     27    "bs",
     28    "en_001",
     29    "en_150",
     30    "en_DE",
     31    "en_GB",
     32    "en_US",
     33    "root",
     34    "sr_BA",
     35    "sr_CS",
     36    "sr_Cyrl_BA",
     37    "sr_Cyrl_CS",
     38    "sr_Cyrl_ME",
     39    "sr_Cyrl",
     40    "sr_Latn_BA",
     41    "sr_Latn_CS",
     42    "sr_Latn_ME_VARIANT",
     43    "sr_Latn_ME",
     44    "sr_Latn",
     45    "sr_ME",
     46    "sr",
     47    "vai_Latn_LR",
     48    "vai_Latn",
     49    "vai_LR",
     50    "vai_Vaii_LR",
     51    "vai_Vaii",
     52    "vai",
     53    "yue",
     54    "zh_CN",
     55    "zh_Hans_CN",
     56    "zh_Hans_HK",
     57    "zh_Hans_MO",
     58    "zh_Hans_SG",
     59    "zh_Hans",
     60    "zh_Hant_HK",
     61    "zh_Hant_MO",
     62    "zh_Hant_TW",
     63    "zh_Hant",
     64    "zh_HK",
     65    "zh_MO",
     66    "zh_SG",
     67    "zh_TW",
     68    "zh"
     69 ]
     70 
     71 
     72 class TestIO(object):
     73    def __init__(self):
     74        pass
     75 
     76    def read_locale_deps(self, tree):
     77        if tree not in ("brkitr", "locales", "rbnf"):
     78            return None
     79        with pyio.open(os.path.join(
     80                os.path.dirname(__file__),
     81                "sample_data",
     82                tree,
     83                "LOCALE_DEPS.json"
     84                ), "r", encoding="utf-8-sig") as f:
     85            return json.load(CommentStripper(f))
     86 
     87 
     88 class FiltrationTest(unittest.TestCase):
     89 
     90    def test_exclude(self):
     91        self._check_filter(Filter.create_from_json({
     92            "filterType": "exclude"
     93        }, TestIO()), [
     94        ])
     95 
     96    def test_default_whitelist(self):
     97        self._check_filter(Filter.create_from_json({
     98            "whitelist": [
     99                "ars",
    100                "zh_Hans"
    101            ]
    102        }, TestIO()), [
    103            "ars",
    104            "zh_Hans"
    105        ])
    106 
    107    def test_default_blacklist(self):
    108        expected_matches = set(EXAMPLE_FILE_STEMS)
    109        expected_matches.remove("ars")
    110        expected_matches.remove("zh_Hans")
    111        self._check_filter(Filter.create_from_json({
    112            "blacklist": [
    113                "ars",
    114                "zh_Hans"
    115            ]
    116        }, TestIO()), expected_matches)
    117 
    118    def test_language_whitelist(self):
    119        self._check_filter(Filter.create_from_json({
    120            "filterType": "language",
    121            "whitelist": [
    122                "af",
    123                "bs"
    124            ]
    125        }, TestIO()), [
    126            "root",
    127            "af_NA",
    128            "af_VARIANT",
    129            "af_ZA_VARIANT",
    130            "af_ZA",
    131            "af",
    132            "bs_BA",
    133            "bs_Cyrl_BA",
    134            "bs_Cyrl",
    135            "bs_Latn_BA",
    136            "bs_Latn",
    137            "bs"
    138        ])
    139 
    140    def test_language_blacklist(self):
    141        expected_matches = set(EXAMPLE_FILE_STEMS)
    142        expected_matches.remove("af_NA")
    143        expected_matches.remove("af_VARIANT")
    144        expected_matches.remove("af_ZA_VARIANT")
    145        expected_matches.remove("af_ZA")
    146        expected_matches.remove("af")
    147        self._check_filter(Filter.create_from_json({
    148            "filterType": "language",
    149            "blacklist": [
    150                "af"
    151            ]
    152        }, TestIO()), expected_matches)
    153 
    154    def test_regex_whitelist(self):
    155        self._check_filter(Filter.create_from_json({
    156            "filterType": "regex",
    157            "whitelist": [
    158                r"^ar.*$",
    159                r"^zh$"
    160            ]
    161        }, TestIO()), [
    162            "ar",
    163            "ar_SA",
    164            "ars",
    165            "zh"
    166        ])
    167 
    168    def test_regex_blacklist(self):
    169        expected_matches = set(EXAMPLE_FILE_STEMS)
    170        expected_matches.remove("ar")
    171        expected_matches.remove("ar_SA")
    172        expected_matches.remove("ars")
    173        expected_matches.remove("zh")
    174        self._check_filter(Filter.create_from_json({
    175            "filterType": "regex",
    176            "blacklist": [
    177                r"^ar.*$",
    178                r"^zh$"
    179            ]
    180        }, TestIO()), expected_matches)
    181 
    182    def test_locale_basic(self):
    183        self._check_filter(Filter.create_from_json({
    184            "filterType": "locale",
    185            "whitelist": [
    186                # Default scripts:
    187                # sr => Cyrl
    188                # vai => Vaii
    189                # zh => Hans
    190                "bs_BA", # is an alias to bs_Latn_BA
    191                "en_DE",
    192                "sr", # Language with no script
    193                "vai_Latn", # Language with non-default script
    194                "zh_Hans" # Language with default script
    195            ]
    196        }, TestIO()), [
    197            "root",
    198            # bs: should include the full dependency tree of bs_BA
    199            "bs_BA",
    200            "bs_Latn_BA",
    201            "bs_Latn",
    202            "bs",
    203            # en: should include the full dependency tree of en_DE
    204            "en",
    205            "en_DE",
    206            "en_150",
    207            "en_001",
    208            # sr: include Cyrl, the default, but not Latn.
    209            "sr",
    210            "sr_BA",
    211            "sr_CS",
    212            "sr_Cyrl",
    213            "sr_Cyrl_BA",
    214            "sr_Cyrl_CS",
    215            "sr_Cyrl_ME",
    216            # vai: include Latn but NOT Vaii.
    217            "vai_Latn",
    218            "vai_Latn_LR",
    219            # zh: include Hans but NOT Hant.
    220            "zh",
    221            "zh_CN",
    222            "zh_SG",
    223            "zh_Hans",
    224            "zh_Hans_CN",
    225            "zh_Hans_HK",
    226            "zh_Hans_MO",
    227            "zh_Hans_SG"
    228        ])
    229 
    230    def test_locale_no_children(self):
    231        self._check_filter(Filter.create_from_json({
    232            "filterType": "locale",
    233            "includeChildren": False,
    234            "whitelist": [
    235                # See comments in test_locale_basic.
    236                "bs_BA",
    237                "en_DE",
    238                "sr",
    239                "vai_Latn",
    240                "zh_Hans"
    241            ]
    242        }, TestIO()), [
    243            "root",
    244            "bs_BA",
    245            "bs_Latn_BA",
    246            "bs_Latn",
    247            "bs",
    248            "en",
    249            "en_DE",
    250            "en_150",
    251            "en_001",
    252            "sr",
    253            "vai_Latn",
    254            "zh",
    255            "zh_Hans",
    256        ])
    257 
    258    def test_locale_include_scripts(self):
    259        self._check_filter(Filter.create_from_json({
    260            "filterType": "locale",
    261            "includeScripts": True,
    262            "whitelist": [
    263                # See comments in test_locale_basic.
    264                "bs_BA",
    265                "en_DE",
    266                "sr",
    267                "vai_Latn",
    268                "zh_Hans"
    269            ]
    270        }, TestIO()), [
    271            "root",
    272            # bs: includeScripts only works for language-only (without region)
    273            "bs_BA",
    274            "bs_Latn_BA",
    275            "bs_Latn",
    276            "bs",
    277            # en: should include the full dependency tree of en_DE
    278            "en",
    279            "en_DE",
    280            "en_150",
    281            "en_001",
    282            # sr: include Latn, since no particular script was requested.
    283            "sr_BA",
    284            "sr_CS",
    285            "sr_Cyrl_BA",
    286            "sr_Cyrl_CS",
    287            "sr_Cyrl_ME",
    288            "sr_Cyrl",
    289            "sr_Latn_BA",
    290            "sr_Latn_CS",
    291            "sr_Latn_ME_VARIANT",
    292            "sr_Latn_ME",
    293            "sr_Latn",
    294            "sr_ME",
    295            "sr",
    296            # vai: do NOT include Vaii; the script was explicitly requested.
    297            "vai_Latn_LR",
    298            "vai_Latn",
    299            # zh: do NOT include Hant; the script was explicitly requested.
    300            "zh_CN",
    301            "zh_SG",
    302            "zh_Hans_CN",
    303            "zh_Hans_HK",
    304            "zh_Hans_MO",
    305            "zh_Hans_SG",
    306            "zh_Hans",
    307            "zh"
    308        ])
    309 
    310    def test_locale_no_children_include_scripts(self):
    311        self._check_filter(Filter.create_from_json({
    312            "filterType": "locale",
    313            "includeChildren": False,
    314            "includeScripts": True,
    315            "whitelist": [
    316                # See comments in test_locale_basic.
    317                "bs_BA",
    318                "en_DE",
    319                "sr",
    320                "vai_Latn",
    321                "zh_Hans"
    322            ]
    323        }, TestIO()), [
    324            "root",
    325            # bs: includeScripts only works for language-only (without region)
    326            "bs_BA",
    327            "bs_Latn_BA",
    328            "bs_Latn",
    329            "bs",
    330            # en: should include the full dependency tree of en_DE
    331            "en",
    332            "en_DE",
    333            "en_150",
    334            "en_001",
    335            # sr: include Cyrl and Latn but no other children
    336            "sr",
    337            "sr_Cyrl",
    338            "sr_Latn",
    339            # vai: include only the requested script
    340            "vai_Latn",
    341            # zh: include only the requested script
    342            "zh",
    343            "zh_Hans",
    344        ])
    345 
    346    def test_union(self):
    347        self._check_filter(Filter.create_from_json({
    348            "filterType": "union",
    349            "unionOf": [
    350                {
    351                    "whitelist": [
    352                        "ars",
    353                        "zh_Hans"
    354                    ]
    355                },
    356                {
    357                    "filterType": "regex",
    358                    "whitelist": [
    359                        r"^bs.*$",
    360                        r"^zh$"
    361                    ]
    362                }
    363            ]
    364        }, TestIO()), [
    365            "ars",
    366            "zh_Hans",
    367            "bs_BA",
    368            "bs_Cyrl_BA",
    369            "bs_Cyrl",
    370            "bs_Latn_BA",
    371            "bs_Latn",
    372            "bs",
    373            "zh"
    374        ])
    375 
    376    def test_intersection(self):
    377        self._check_filter(Filter.create_from_json({
    378            "filterType": "intersection",
    379            "intersectionOf": [
    380                {
    381                    "filterType": "locale",
    382                    "includelist": [
    383                        "en"
    384                    ]
    385                },
    386                {
    387                    "excludelist": [
    388                        "en_DE"
    389                    ]
    390                }
    391            ]
    392        }, TestIO()), [
    393            "en_001",
    394            "en_150",
    395            # en_DE is excluded (but, its parents are still there)
    396            "en_GB",
    397            "en_US",
    398            "root",
    399        ])
    400 
    401    def test_complement(self):
    402        self._check_filter(Filter.create_from_json({
    403            "filterType": "complement",
    404            "complementOf": {
    405                "filterType": "locale",
    406                "includeScripts": True,
    407                "includelist": [
    408                    "en",
    409                    "sr",
    410                    "zh",
    411                ]
    412            },
    413        }, TestIO()), [
    414            # Everything *not* a parent or child of en, sr, or zh is included.
    415            # Since root is a parent of all languages, it is not in the complement.
    416            "af_NA",
    417            "af_VARIANT",
    418            "af_ZA_VARIANT",
    419            "af_ZA",
    420            "af",
    421            "ar",
    422            "ar_SA",
    423            "ars",
    424            "bs_BA",
    425            "bs_Cyrl_BA",
    426            "bs_Cyrl",
    427            "bs_Latn_BA",
    428            "bs_Latn",
    429            "bs",
    430            "vai_Latn_LR",
    431            "vai_Latn",
    432            "vai_LR",
    433            "vai_Vaii_LR",
    434            "vai_Vaii",
    435            "vai",
    436            "yue",
    437        ])
    438 
    439    def test_hk_deps_normal(self):
    440        self._check_filter(Filter.create_from_json({
    441            "filterType": "locale",
    442            "whitelist": [
    443                "zh_HK"
    444            ]
    445        }, TestIO()), [
    446            "root",
    447            "zh_Hant",
    448            "zh_Hant_HK",
    449            "zh_HK",
    450        ])
    451 
    452    def test_hk_deps_rbnf(self):
    453        self._check_filter(Filter.create_from_json({
    454            "filterType": "locale",
    455            "whitelist": [
    456                "zh_HK"
    457            ]
    458        }, TestIO()), [
    459            "root",
    460            "yue",
    461            "zh_Hant_HK",
    462            "zh_HK",
    463        ], "rbnf")
    464 
    465    def test_no_alias_parent_structure(self):
    466        self._check_filter(Filter.create_from_json({
    467            "filterType": "locale",
    468            "whitelist": [
    469                "zh_HK"
    470            ]
    471        }, TestIO()), [
    472            "root",
    473            "zh_HK",
    474            "zh",
    475        ], "brkitr")
    476 
    477    def _check_filter(self, filter, expected_matches, tree="locales"):
    478        for file_stem in EXAMPLE_FILE_STEMS:
    479            is_match = filter.match(InFile("%s/%s.txt" % (tree, file_stem)))
    480            expected_match = file_stem in expected_matches
    481            self.assertEqual(is_match, expected_match, file_stem)
    482 
    483 # Export the test for the runner
    484 suite = unittest.defaultTestLoader.loadTestsFromTestCase(FiltrationTest)