ThaiLogical_Latin.txt (6784B)
1 # © 2016 and later: Unicode, Inc. and others. 2 # License & terms of use: http://www.unicode.org/copyright.html 3 # Generated using tools/cldr/cldr-to-icu/ 4 # 5 # File: ThaiLogical_Latin.txt 6 # Generated from CLDR 7 # 8 9 # Thai-Latin 10 # This set of rules follows ISO 11940 11 # see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf 12 # except that that does not mention an implicit vowel, so we use o\u0323 13 # 14 # The transcription is fairly ugly, so we ought to also do the UNGEGN version 15 # see: http://www.eki.ee/wgrs/rom1_th.pdf 16 # and probably make that the main variant. 17 # 18 # Note: this is an internal file. The NFD/NFC is handled externally, in the index 19 # The insertion of spaces between words, the reversal of the vowels 20 # and the conversion of space to semicolon are done *outside* of these rules. 21 # So as far as these rules are concerned, the vowels are in logical order! 22 # insert implicit vowel (and remove it going the other way) 23 # COMMENTED out: the implicit vowel positions cannot be predicted algorithmically 24 #$consonant = [ก-ฮ]; 25 #$vowel = [ะ-\u0E3Aเ-ไ\u0E47]; 26 #{ ( $consonant ) } [^$vowel \uE000] → | $1 \uE000 ; 27 #\uE000 → o\u0323 ; 28 # ← o\u0323 ; 29 $notAbove = [^\p{ccc=0}\p{ccc=Above}] ; 30 $notBelow = [^\p{ccc=0}\p{ccc=Below}] ; 31 # Consonants 32 # Warning: the 'h's need to be handled carefully! 33 # What we really want to say is the following, but we can't 34 # $notHAccent = !($notAbove* \u0304 | $notBelow* \u0323) ; 35 # Since the only accents we care about that could cause problems are free-standing accents below, we use instead: 36 $freeStandingBelow = [\u0325 ]; 37 $hAccent = [ \u0304 \u0323]; 38 $notHAccent0 = [^$freeStandingBelow$hAccent]; 39 $notHAccent1 = $freeStandingBelow [^$hAccent]; 40 ห → h\u0304 ; # THAI CHARACTER HO HIP 41 ห | $1 ← h ($notAbove*) \u0304; # backward case, account for reordering 42 ฮ ↔ h\u0323 ; # THAI CHARACTER HO NOKHUK 43 ข ↔ k\u0304h ; # THAI CHARACTER KHO KHAI 44 ฃ ↔ k\u0323\u0304h ; # THAI CHARACTER KHO KHUAT 45 ฅ ↔ kʹh ; # THAI CHARACTER KHO KHON 46 ฆ ↔ k\u0323h ; # THAI CHARACTER KHO RAKHANG 47 ค ← kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI 48 ค ↔ kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI 49 ก ↔ k ; # THAI CHARACTER KO KAI 50 ภ ↔ p\u0323h ; # THAI CHARACTER PHO SAMPHAO 51 ผ ↔ p\u0304h ; # THAI CHARACTER PHO PHUNG 52 พ ← ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN 53 พ ↔ ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN 54 ป ↔ p ; # THAI CHARACTER PO PLA 55 ฉ ↔ c\u0304h ; # THAI CHARACTER CHO CHING 56 ฌ ↔ c\u0323h ; # THAI CHARACTER CHO CHOE 57 ช ← ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG 58 ช ↔ ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG 59 จ ↔ c ; # THAI CHARACTER CHO CHAN 60 ฐ ↔ t\u0323\u0304h ; # THAI CHARACTER THO THAN 61 ฑ ↔ t\u0331h ; # THAI CHARACTER THO NANGMONTHO 62 ฒ ↔ tʹh ; # THAI CHARACTER THO PHUTHAO 63 ถ ↔ t\u0304h ; # THAI CHARACTER THO THUNG 64 ธ ↔ t\u0323h ; # THAI CHARACTER THO THONG 65 ท ← th } $notHAccent1 ; # THAI CHARACTER THO THAHAN 66 ท ↔ th } $notHAccent0 ; # THAI CHARACTER THO THAHAN 67 #Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick. 68 ฏ ↔ t\u0329 ; # THAI CHARACTER TO PATAK 69 ต ↔ t ; # THAI CHARACTER TO TAO 70 # since there is no singleton g (generated), don't worry about that. 71 ง ↔ ng ; # THAI CHARACTER NGO NGU 72 ณ ↔ n\u0323 ; # THAI CHARACTER NO NEN 73 น ↔ n ; # THAI CHARACTER NO NU 74 ญ ↔ y\u0323 ; # THAI CHARACTER YO YING 75 ฎ ↔ d\u0323 ; # THAI CHARACTER DO CHADA 76 ด ↔ d ; # THAI CHARACTER DO DEK 77 บ ↔ b ; # THAI CHARACTER BO BAIMAI 78 ฝ ↔ f\u0304 ; # THAI CHARACTER FO FA 79 ฝ | $1 ← f ($notAbove*) \u0304; # backward case, account for reordering 80 ม ↔ m ; # THAI CHARACTER MO MA 81 ย ↔ y ; # THAI CHARACTER YO YAK 82 ร ↔ r ; # THAI CHARACTER RO RUA 83 ฤ ↔ v ; # THAI CHARACTER RU 84 ฦ ↔ ł ; # THAI CHARACTER LU 85 ว ↔ w ; # THAI CHARACTER WO WAEN 86 ศ ↔ s\u0323\u0304 ; # THAI CHARACTER SO SALA*** 87 ศ | $1 ← s \u0323 ($notAbove*) \u0304; # backward case, account for reordering 88 ษ ↔ s\u0304ʹ ; # THAI CHARACTER SO RUSI 89 ส → s\u0304 ; # THAI CHARACTER SO SUA*** 90 ส | $1 ← s ($notAbove*) \u0304; # backward case, account for reordering 91 ฬ ↔ l\u0323 ; # THAI CHARACTER LO CHULA 92 ล ↔ l ; # THAI CHARACTER LO LING 93 ฟ ↔ f ; # THAI CHARACTER FO FAN 94 อ ↔ x ; # THAI CHARACTER O ANG 95 ซ ↔ s ; # THAI CHARACTER SO SO 96 # vowels 97 \u0E31 ↔ a\u0323 ; # THAI CHARACTER MAI HAN-AKAT 98 า → a\u0304 ; # THAI CHARACTER SARA AA 99 า | $1 ← a ($notAbove*) \u0304; # backward case, account for reordering 100 # We deviate from ISO for SARA AM for disambiguation 101 ำ → a \u0309; # THAI CHARACTER SARA AM 102 ำ | $1 ← a ($notAbove*) \u0309 ; # backward case, account for reordering 103 ะ ↔ a ; # THAI CHARACTER SARA A 104 \u0E35 ↔ i\u0304 ; # THAI CHARACTER SARA II 105 \u0E35 | $1 ← i ($notAbove*) \u0304 ; # backward case, account for reordering 106 \u0E37 ↔ u\u0323\u0304 ; # THAI CHARACTER SARA UEE 107 \u0E37 | $1 ← u \u0323 ($notAbove*) \u0304 ; # backward case, account for reordering 108 \u0E36 ↔ u\u0323 ; # THAI CHARACTER SARA UE 109 \u0E39 ↔ u\u0304 ; # THAI CHARACTER SARA UU 110 \u0E39 | $1 ← u ($notAbove*) \u0304 ; # backward case, account for reordering 111 \u0E38 ↔ u ; # THAI CHARACTER SARA U 112 ฯ ↔ ‡ ; # THAI CHARACTER PAIYANNOI 113 # ฿ ↔ XXX ; # THAI CURRENCY SYMBOL BAHT 114 เ ↔ e ; # THAI CHARACTER SARA E 115 แ ↔ æ ; # THAI CHARACTER SARA AE 116 โ ↔ o ; # THAI CHARACTER SARA O 117 ใ ↔ ı ; # THAI CHARACTER SARA AI MAIMUAN 118 ไ ↔ i\u0323 ; # THAI CHARACTER SARA AI MAIMALAI 119 ๅ ↔ ɨ ; # THAI CHARACTER LAKKHANGYAO 120 \u0E47 ↔ \u0306 ; # THAI CHARACTER MAITAIKHU 121 \u0E48 ↔ \u0300 ; # THAI CHARACTER MAI EK 122 \u0E49 ↔ \u0302 ; # THAI CHARACTER MAI THO 123 \u0E4A ↔ \u0301 ; # THAI CHARACTER MAI TRI 124 \u0E4B ↔ \u030C ; # THAI CHARACTER MAI CHATTAWA 125 \u0E4C ↔ \u0312 ; # THAI CHARACTER THANTHAKHAT 126 \u0E4E ↔ '~' ; # THAI CHARACTER YAMAKKAN 127 # We deviate from ISO for disambiguation 128 \u0E4D ↔ \u030A ; # THAI CHARACTER NIKHAHIT 129 ๏ ↔ '§' ; # THAI CHARACTER FONGMAN 130 ๐ ↔ 0 ; # THAI DIGIT ZERO 131 ๑ ↔ 1 ; # THAI DIGIT ONE 132 ๒ ↔ 2 ; # THAI DIGIT TWO 133 ๓ ↔ 3 ; # THAI DIGIT THREE 134 ๔ ↔ 4 ; # THAI DIGIT FOUR 135 ๕ ↔ 5 ; # THAI DIGIT FIVE 136 ๖ ↔ 6 ; # THAI DIGIT SIX 137 ๗ ↔ 7 ; # THAI DIGIT SEVEN 138 ๘ ↔ 8 ; # THAI DIGIT EIGHT 139 ๙ ↔ 9 ; # THAI DIGIT NINE 140 ๚ ↔ '||' ; # THAI CHARACTER ANGKHANKHU 141 ๛ ↔ » ; # THAI CHARACTER KHOMUT 142 ๆ ↔ « ; # THAI CHARACTER MAIYAMOK 143 # moved down to make shorter first 144 #Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below. 145 \u0E3A ↔ ˌ ; # THAI CHARACTER PHINTHU 146 \u0E34 ↔ i ; # THAI CHARACTER SARA I 147 # fallbacks 148 | k ← g ; 149 | k ← h ; 150 | c ← j ; 151 | k ← q ; 152 | s ← z ; 153 :: (lower);