tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

gen-emoji-table.py (2447B)


      1 #!/usr/bin/env python3
      2 
      3 """usage: ./gen-emoji-table.py emoji-data.txt emoji-test.txt
      4 
      5 Input file:
      6 * https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt
      7 * https://www.unicode.org/Public/emoji/latest/emoji-test.txt
      8 """
      9 
     10 import sys
     11 from collections import OrderedDict
     12 import packTab
     13 
     14 if len (sys.argv) != 3:
     15 sys.exit (__doc__)
     16 
     17 f = open(sys.argv[1])
     18 header = [f.readline () for _ in range(10)]
     19 
     20 ranges = OrderedDict()
     21 for line in f.readlines():
     22 line = line.strip()
     23 if not line or line[0] == '#':
     24 	continue
     25 rang, typ = [s.strip() for s in line.split('#')[0].split(';')[:2]]
     26 
     27 rang = [int(s, 16) for s in rang.split('..')]
     28 if len(rang) > 1:
     29 	start, end = rang
     30 else:
     31 	start = end = rang[0]
     32 
     33 if typ not in ranges:
     34 	ranges[typ] = []
     35 if ranges[typ] and ranges[typ][-1][1] == start - 1:
     36 	ranges[typ][-1] = (ranges[typ][-1][0], end)
     37 else:
     38 	ranges[typ].append((start, end))
     39 
     40 
     41 
     42 print ("/* == Start of generated table == */")
     43 print ("/*")
     44 print (" * The following tables are generated by running:")
     45 print (" *")
     46 print (" *   ./gen-emoji-table.py emoji-data.txt")
     47 print (" *")
     48 print (" * on file with this header:")
     49 print (" *")
     50 for l in header:
     51 print (" * %s" % (l.strip()))
     52 print (" */")
     53 print ()
     54 print ("#ifndef HB_UNICODE_EMOJI_TABLE_HH")
     55 print ("#define HB_UNICODE_EMOJI_TABLE_HH")
     56 print ()
     57 print ('#include "hb-unicode.hh"')
     58 print ()
     59 
     60 for typ, s in ranges.items():
     61 if typ != "Extended_Pictographic": continue
     62 
     63 arr = dict()
     64 for start,end in s:
     65 	for i in range(start, end + 1):
     66 		arr[i] = 1
     67 
     68 sol = packTab.pack_table(arr, 0, compression=9)
     69 code = packTab.Code('_hb_emoji')
     70 sol.genCode(code, 'is_'+typ)
     71 code.print_c()
     72 print()
     73 
     74 print ()
     75 print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */")
     76 print ()
     77 print ("/* == End of generated table == */")
     78 
     79 
     80 # Generate test file.
     81 sequences = []
     82 with open(sys.argv[2]) as f:
     83    for line in f.readlines():
     84        if "#" in line:
     85            line = line[:line.index("#")]
     86        if ";" in line:
     87            line = line[:line.index(";")]
     88        line = line.strip()
     89        line = line.split(" ")
     90        if len(line) < 2:
     91            continue
     92        sequences.append(line)
     93 
     94 with open("../test/shape/data/in-house/tests/emoji-clusters.tests", "w") as f:
     95    for sequence in sequences:
     96        f.write("../fonts/AdobeBlank2.ttf;--no-glyph-names --no-positions --font-funcs=ot")
     97        f.write(";" + ",".join(sequence))
     98        f.write(";[" + "|".join("1=0" for c in sequence) + "]\n")