gen-emoji-table.py (2447B)
1 #!/usr/bin/env python3 2 3 """usage: ./gen-emoji-table.py emoji-data.txt emoji-test.txt 4 5 Input file: 6 * https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt 7 * https://www.unicode.org/Public/emoji/latest/emoji-test.txt 8 """ 9 10 import sys 11 from collections import OrderedDict 12 import packTab 13 14 if len (sys.argv) != 3: 15 sys.exit (__doc__) 16 17 f = open(sys.argv[1]) 18 header = [f.readline () for _ in range(10)] 19 20 ranges = OrderedDict() 21 for line in f.readlines(): 22 line = line.strip() 23 if not line or line[0] == '#': 24 continue 25 rang, typ = [s.strip() for s in line.split('#')[0].split(';')[:2]] 26 27 rang = [int(s, 16) for s in rang.split('..')] 28 if len(rang) > 1: 29 start, end = rang 30 else: 31 start = end = rang[0] 32 33 if typ not in ranges: 34 ranges[typ] = [] 35 if ranges[typ] and ranges[typ][-1][1] == start - 1: 36 ranges[typ][-1] = (ranges[typ][-1][0], end) 37 else: 38 ranges[typ].append((start, end)) 39 40 41 42 print ("/* == Start of generated table == */") 43 print ("/*") 44 print (" * The following tables are generated by running:") 45 print (" *") 46 print (" * ./gen-emoji-table.py emoji-data.txt") 47 print (" *") 48 print (" * on file with this header:") 49 print (" *") 50 for l in header: 51 print (" * %s" % (l.strip())) 52 print (" */") 53 print () 54 print ("#ifndef HB_UNICODE_EMOJI_TABLE_HH") 55 print ("#define HB_UNICODE_EMOJI_TABLE_HH") 56 print () 57 print ('#include "hb-unicode.hh"') 58 print () 59 60 for typ, s in ranges.items(): 61 if typ != "Extended_Pictographic": continue 62 63 arr = dict() 64 for start,end in s: 65 for i in range(start, end + 1): 66 arr[i] = 1 67 68 sol = packTab.pack_table(arr, 0, compression=9) 69 code = packTab.Code('_hb_emoji') 70 sol.genCode(code, 'is_'+typ) 71 code.print_c() 72 print() 73 74 print () 75 print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */") 76 print () 77 print ("/* == End of generated table == */") 78 79 80 # Generate test file. 81 sequences = [] 82 with open(sys.argv[2]) as f: 83 for line in f.readlines(): 84 if "#" in line: 85 line = line[:line.index("#")] 86 if ";" in line: 87 line = line[:line.index(";")] 88 line = line.strip() 89 line = line.split(" ") 90 if len(line) < 2: 91 continue 92 sequences.append(line) 93 94 with open("../test/shape/data/in-house/tests/emoji-clusters.tests", "w") as f: 95 for sequence in sequences: 96 f.write("../fonts/AdobeBlank2.ttf;--no-glyph-names --no-positions --font-funcs=ot") 97 f.write(";" + ",".join(sequence)) 98 f.write(";[" + "|".join("1=0" for c in sequence) + "]\n")