gen-arabic-joining-list.py (2402B)
1 #!/usr/bin/env python3 2 3 """usage: ./gen-arabic-joining-table.py ArabicShaping.txt Scripts.txt 4 5 Input files: 6 * https://unicode.org/Public/UCD/latest/ucd/ArabicShaping.txt 7 * https://unicode.org/Public/UCD/latest/ucd/Scripts.txt 8 """ 9 10 import os.path, sys 11 12 if len (sys.argv) != 3: 13 sys.exit (__doc__) 14 15 files = [open (x, encoding='utf-8') for x in sys.argv[1:]] 16 17 headers = [[f.readline (), f.readline ()] for f in files] 18 while files[0].readline ().find ('##################') < 0: 19 pass 20 21 def read (f): 22 mapping = {} 23 for line in f: 24 25 j = line.find ('#') 26 if j >= 0: 27 line = line[:j] 28 29 fields = [x.strip () for x in line.split (';')] 30 if len (fields) == 1: 31 continue 32 33 uu = fields[0].split ('..') 34 start = int (uu[0], 16) 35 if len (uu) == 1: 36 end = start 37 else: 38 end = int (uu[1], 16) 39 40 t = fields[1] 41 42 for u in range (start, end + 1): 43 mapping[u] = t 44 45 return mapping 46 47 def read_joining_uu (f): 48 values = set () 49 for line in f: 50 51 if line[0] == '#': 52 continue 53 54 fields = [x.strip () for x in line.split (';')] 55 if len (fields) == 1: 56 continue 57 if fields[2] in {'T', 'U'}: 58 continue 59 60 values.add (int (fields[0], 16)) 61 62 return sorted (values) 63 64 def print_has_arabic_joining (scripts, joining_uu): 65 66 print ("static bool") 67 print ("has_arabic_joining (hb_script_t script)") 68 print ("{") 69 print (" /* List of scripts that have data in arabic-table. */") 70 print (" switch ((int) script)") 71 print (" {") 72 73 for script in sorted ({scripts[u] for u in joining_uu if scripts[u] not in {'Common', 'Inherited'}}): 74 print (" case HB_SCRIPT_{}:".format (script.upper ())) 75 76 print (" return true;") 77 print () 78 print (" default:") 79 print (" return false;") 80 print (" }") 81 print ("}") 82 print () 83 84 print ("/* == Start of generated function == */") 85 print ("/*") 86 print (" * The following function is generated by running:") 87 print (" *") 88 print (" * ./gen-arabic-joining-list.py ArabicShaping.txt Scripts.txt") 89 print (" *") 90 print (" * on files with these headers:") 91 print (" *") 92 for h in headers: 93 for l in h: 94 print (" * %s" % (l.strip ())) 95 print (" */") 96 print () 97 print ("#ifndef HB_OT_SHAPER_ARABIC_JOINING_LIST_HH") 98 print ("#define HB_OT_SHAPER_ARABIC_JOINING_LIST_HH") 99 print () 100 101 print_has_arabic_joining (read (files[1]), read_joining_uu (files[0])) 102 103 print () 104 print ("#endif /* HB_OT_SHAPER_ARABIC_JOINING_LIST_HH */") 105 print () 106 print ("/* == End of generated function == */")