tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

GenerateReservedWords.py (5573B)


      1 # This Source Code Form is subject to the terms of the Mozilla Public
      2 # License, v. 2.0. If a copy of the MPL was not distributed with this
      3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      4 
      5 import sys
      6 
      7 import ReservedWordReader
      8 
      9 
     10 def line(opt, s):
     11    opt["output"].write("{}{}\n".format("    " * opt["indent_level"], s))
     12 
     13 
     14 def indent(opt):
     15    opt["indent_level"] += 1
     16 
     17 
     18 def dedent(opt):
     19    opt["indent_level"] -= 1
     20 
     21 
     22 def span_and_count_at(reserved_word_list, column):
     23    assert len(reserved_word_list) != 0
     24 
     25    chars_dict = {}
     26    for index, word in reserved_word_list:
     27        chars_dict[ord(word[column])] = True
     28 
     29    chars = sorted(chars_dict.keys())
     30    return chars[-1] - chars[0] + 1, len(chars)
     31 
     32 
     33 def optimal_switch_column(opt, reserved_word_list, columns, unprocessed_columns):
     34    assert len(reserved_word_list) != 0
     35    assert unprocessed_columns != 0
     36 
     37    min_count = 0
     38    min_span = 0
     39    min_count_index = 0
     40    min_span_index = 0
     41 
     42    for index in range(0, unprocessed_columns):
     43        span, count = span_and_count_at(reserved_word_list, columns[index])
     44        assert span != 0
     45 
     46        if span == 1:
     47            assert count == 1
     48            return 1, True
     49 
     50        assert count != 1
     51        if index == 0 or min_span > span:
     52            min_span = span
     53            min_span_index = index
     54 
     55        if index == 0 or min_count > count:
     56            min_count = count
     57            min_count_index = index
     58 
     59    if min_count <= opt["use_if_threshold"]:
     60        return min_count_index, True
     61 
     62    return min_span_index, False
     63 
     64 
     65 def split_list_per_column(reserved_word_list, column):
     66    assert len(reserved_word_list) != 0
     67 
     68    column_dict = {}
     69    for item in reserved_word_list:
     70        index, word = item
     71        per_column = column_dict.setdefault(word[column], [])
     72        per_column.append(item)
     73 
     74    return sorted(column_dict.items())
     75 
     76 
     77 def generate_letter_switch(opt, unprocessed_columns, reserved_word_list, columns=None):
     78    assert len(reserved_word_list) != 0
     79 
     80    if not columns:
     81        columns = range(0, unprocessed_columns)
     82 
     83    if len(reserved_word_list) == 1:
     84        index, word = reserved_word_list[0]
     85 
     86        if unprocessed_columns == 0:
     87            line(opt, f"JSRW_GOT_MATCH({index}) /* {word} */")
     88            return
     89 
     90        if unprocessed_columns > opt["char_tail_test_threshold"]:
     91            line(opt, f"JSRW_TEST_GUESS({index}) /* {word} */")
     92            return
     93 
     94        conds = []
     95        for column in columns[0:unprocessed_columns]:
     96            quoted = repr(word[column])
     97            conds.append(f"JSRW_AT({column})=={quoted}")
     98 
     99        line(opt, "if ({}) {{".format(" && ".join(conds)))
    100 
    101        indent(opt)
    102        line(opt, f"JSRW_GOT_MATCH({index}) /* {word} */")
    103        dedent(opt)
    104 
    105        line(opt, "}")
    106        line(opt, "JSRW_NO_MATCH()")
    107        return
    108 
    109    assert unprocessed_columns != 0
    110 
    111    optimal_column_index, use_if = optimal_switch_column(
    112        opt, reserved_word_list, columns, unprocessed_columns
    113    )
    114    optimal_column = columns[optimal_column_index]
    115 
    116    # Make a copy to avoid breaking passed list.
    117    columns = list(columns)
    118    columns[optimal_column_index] = columns[unprocessed_columns - 1]
    119 
    120    list_per_column = split_list_per_column(reserved_word_list, optimal_column)
    121 
    122    if not use_if:
    123        line(opt, f"switch (JSRW_AT({optimal_column})) {{")
    124 
    125    for char, reserved_word_list_per_column in list_per_column:
    126        quoted = repr(char)
    127        if use_if:
    128            line(opt, f"if (JSRW_AT({optimal_column}) == {quoted}) {{")
    129        else:
    130            line(opt, f"  case {quoted}:")
    131 
    132        indent(opt)
    133        generate_letter_switch(
    134            opt, unprocessed_columns - 1, reserved_word_list_per_column, columns
    135        )
    136        dedent(opt)
    137 
    138        if use_if:
    139            line(opt, "}")
    140 
    141    if not use_if:
    142        line(opt, "}")
    143 
    144    line(opt, "JSRW_NO_MATCH()")
    145 
    146 
    147 def split_list_per_length(reserved_word_list):
    148    assert len(reserved_word_list) != 0
    149 
    150    length_dict = {}
    151    for item in reserved_word_list:
    152        index, word = item
    153        per_length = length_dict.setdefault(len(word), [])
    154        per_length.append(item)
    155 
    156    return sorted(length_dict.items())
    157 
    158 
    159 def generate_switch(opt, reserved_word_list):
    160    assert len(reserved_word_list) != 0
    161 
    162    line(opt, "/*")
    163    line(
    164        opt,
    165        f" * Generating switch for the list of {len(reserved_word_list)} entries:",
    166    )
    167    for index, word in reserved_word_list:
    168        line(opt, f" * {word}")
    169    line(opt, " */")
    170 
    171    list_per_length = split_list_per_length(reserved_word_list)
    172 
    173    use_if = False
    174    if len(list_per_length) < opt["use_if_threshold"]:
    175        use_if = True
    176 
    177    if not use_if:
    178        line(opt, "switch (JSRW_LENGTH()) {")
    179 
    180    for length, reserved_word_list_per_length in list_per_length:
    181        if use_if:
    182            line(opt, f"if (JSRW_LENGTH() == {length}) {{")
    183        else:
    184            line(opt, f"  case {length}:")
    185 
    186        indent(opt)
    187        generate_letter_switch(opt, length, reserved_word_list_per_length)
    188        dedent(opt)
    189 
    190        if use_if:
    191            line(opt, "}")
    192 
    193    if not use_if:
    194        line(opt, "}")
    195    line(opt, "JSRW_NO_MATCH()")
    196 
    197 
    198 def main(output, reserved_words_h, *args):
    199    reserved_word_list = ReservedWordReader.read_reserved_word_list(
    200        reserved_words_h, *args
    201    )
    202 
    203    opt = {
    204        "indent_level": 1,
    205        "use_if_threshold": 3,
    206        "char_tail_test_threshold": 4,
    207        "output": output,
    208    }
    209    generate_switch(opt, reserved_word_list)
    210 
    211 
    212 if __name__ == "__main__":
    213    main(sys.stdout, *sys.argv[1:])