my_my_FONIPA.txt (10255B)
1 # © 2016 and later: Unicode, Inc. and others. 2 # License & terms of use: http://www.unicode.org/copyright.html 3 # Generated using tools/cldr/cldr-to-icu/ 4 # 5 # File: my_my_FONIPA.txt 6 # Generated from CLDR 7 # 8 9 # Pronunciation rules for Burmese. 10 # 11 # The following rules are lexical and heuristic: lexical in the sense 12 # that they generate phoneme strings which may further undergo 13 # post-lexical phonological processes, in particular voicing, to 14 # result in actual surface forms; heuristic in the sense that they try 15 # to resolve ambiguities, especially around reduced vowels, in a 16 # systematic way that may be incorrect in many situations. Vowel 17 # reduction depends on many factors, such as morphemic structure, 18 # which are not available here. 19 # 20 # Definitions 21 # 22 # Dependent vowel signs 23 $vs_AA = \u102B; 24 $vs_aa = \u102C; 25 $vs_i = \u102D; 26 $vs_ii = \u102E; 27 $vs_u = \u102F; 28 $vs_uu = \u1030; 29 $vs_e = \u1031; 30 $vs_ai = \u1032; 31 # Various signs 32 $anusvara = \u1036; 33 $visarga = \u1038; 34 $virama = \u1039; 35 $asat = \u103A; 36 # Dependent (medial) consonant signs 37 $med_y = \u103B; 38 $med_r = \u103C; 39 $med_w = \u103D; 40 $med_h = \u103E; 41 # Independent letters and letter-like punctuation symbols 42 $independent = [\u1000-\u102A \u103F \u104C-\u104F \u1050-\u1055]; 43 $creaky = \u0330; 44 $high = \u0301; 45 $low = \u0300; 46 $coda = [$creaky $high $low ɴ ʔ ə]; # TODO: remove if unused 47 # 48 # Preprocessing 49 # 50 ::NFC; 51 # Replace U+102B TALL AA with U+102C AA. Their pronunciation is identical. 52 $vs_AA → $vs_aa; 53 # Unstack kinzi (င\u103A plus U+1039 VIRAMA) into plain င\u103A. 54 # Hmm, what would happen if the syllable ending in kinzi had non-low tone? 55 င\u103A $virama → င\u103A; 56 # Unstack everything else, i.e. replace U+1039 VIRAMA with U+103A ASAT. 57 $virama → $asat; 58 # Unstack U+103F GREAT SA. 59 ဿ → သ\u103Aသ; 60 # Insert a syllable boundary marker /./ before every independent letter. 61 ::Null; 62 [^.$] { } $independent ([\u1037\u103B-\u103E])* [^\u103A] → \.; 63 # Insert default inherent vowel: /a\u0330/ at the end, /ə/ everywhere else. 64 ::Null; 65 ([\u1000-\u1021\u103F] [\u103B-\u103E]*) } [$] → $1 a $creaky; 66 ([\u1000-\u1021\u103F] [\u103B-\u103E]*) } \. → $1 ə; 67 # Allow for additional coda consonants. 68 # 69 # This only covers a few of the cases in which full coda consonants 70 # can appear in loanwords. The general situation is somewhat rare and 71 # is more easily dealt with in a formalism that can impose structural 72 # constraints on syllables more easily. 73 ::Null; 74 $asat ($visarga)? [\u1000-\u102A] { $asat → ; 75 # Deal with ၎င\u103Aး early. 76 ၎င\u103Aး → lə\.ɡa $high ʊ\u032Fɴ; 77 # 78 # Rhymes 79 # 80 ::Null; 81 က\u103A → ɛʔ; 82 ဂ\u103A → ɛʔ; # in မဂ\u1039ဂဇင\u103Aး ~ မဂ\u103Aဂဇင\u103Aး /mɛʔ.ɡə.zɪ\u0301ɴ/ 83 င\u1037\u103A → ɪ $creaky ɴ; 84 င\u103Aး → ɪ $high ɴ; 85 င\u103A → ɪ $low ɴ; 86 စ\u103A → ɪʔ; # maybe sometimes /eɪ\u032Fʔ/ 87 ဉ\u1037\u103A → ɪ $creaky ɴ; 88 ဉ\u103Aး → ɪ $high ɴ; 89 ဉ\u103A → ɪ $low ɴ; 90 ည\u1037\u103A → ɛ $creaky; 91 ည\u103Aး → ɛ $high; 92 ည\u103A → ɛ $low; 93 ဏ\u1037\u103A → a $creaky ɴ; 94 ဏ\u103Aး → a $high ɴ; 95 ဏ\u103A → a $low ɴ; 96 တ\u103A → aʔ; 97 န\u1037\u103A → a $creaky ɴ; 98 န\u103Aး → a $high ɴ; 99 န\u103A → a $low ɴ; 100 ပ\u103A → aʔ; 101 မ\u1037\u103A → a $creaky ɴ; 102 မ\u103Aး → a $high ɴ; 103 မ\u103A → a $low ɴ; 104 ယ\u1037\u103A → ɛ $creaky; 105 ယ\u103Aး → ɛ $high; 106 ယ\u103A → ɛ $low; 107 သ\u103A → aʔ; 108 $vs_aa ဉ\u1037\u103A → ɪ $creaky ɴ; 109 $vs_aa ဉ\u103Aး → ɪ $high ɴ; 110 $vs_aa ဉ\u103A → ɪ $low ɴ; 111 $vs_aa တ\u103A → aʔ; 112 $vs_aa ဏ\u1037\u103A → a $creaky ɴ; 113 $vs_aa ဏ\u103Aး → a $high ɴ; 114 $vs_aa ဏ\u103A → a $low ɴ; 115 $vs_aa န\u1037\u103A → a $creaky ɴ; 116 $vs_aa န\u103Aး → a $high ɴ; 117 $vs_aa န\u103A → a $low ɴ; 118 $vs_aa ပ\u103A → aʔ; # in ကလာပ\u103Aစည\u103Aး /kə.laʔ.sɛ\u0301/ (club cell) 119 $vs_aa ယ\u1037\u103A → ɛ $creaky; 120 $vs_aa ယ\u103Aး → ɛ $high; 121 $vs_aa ယ\u103A → ɛ $low; 122 $vs_aa \u1037 → a $creaky; # redundant creaky tone 123 $vs_aa း → a $high; 124 $vs_aa → a $low; 125 $vs_i က\u103A → eɪ\u032Fʔ; 126 $vs_i စ\u103A → eɪ\u032Fʔ; 127 $vs_i တ\u103A → eɪ\u032Fʔ; 128 $vs_i န\u1037\u103A → e $creaky ɪ\u032Fɴ; 129 $vs_i န\u103Aး → e $high ɪ\u032Fɴ; 130 $vs_i န\u103A → e $low ɪ\u032Fɴ; 131 $vs_i ပ\u103A → eɪ\u032Fʔ; 132 $vs_i မ\u1037\u103A → e $creaky ɪ\u032Fɴ; 133 $vs_i မ\u103Aး → e $high ɪ\u032Fɴ; 134 $vs_i မ\u103A → e $low ɪ\u032Fɴ; 135 $vs_i $vs_u က\u103A → aɪ\u032Fʔ; 136 $vs_i $vs_u င\u1037\u103A → a $creaky ɪ\u032Fɴ; 137 $vs_i $vs_u င\u103Aး → a $high ɪ\u032Fɴ; 138 $vs_i $vs_u င\u103A → a $low ɪ\u032Fɴ; 139 $vs_i $vs_u ဏ\u1037\u103A → a $creaky ɪ\u032Fɴ; 140 $vs_i $vs_u ဏ\u103Aး → a $high ɪ\u032Fɴ; 141 $vs_i $vs_u ဏ\u103A → a $low ɪ\u032Fɴ; 142 $vs_i $vs_u ယ\u1037\u103A → o $creaky; 143 $vs_i $vs_u ယ\u103Aး → o $high; 144 $vs_i $vs_u ယ\u103A → o $low; # in က\u102D\u102Fယ\u103A /kò/ 145 $vs_i $vs_u \u1037 → o $creaky; 146 $vs_i $vs_u း → o $high; 147 $vs_i $vs_u → o $low; 148 $vs_i $anusvara \u1037 → e $creaky ɪ\u032Fɴ; 149 $vs_i $anusvara း → e $high ɪ\u032Fɴ; 150 $vs_i $anusvara → e $low ɪ\u032Fɴ; 151 $vs_i → i $creaky; 152 $vs_ii \u1037 → i $creaky; # this does not usually occur 153 $vs_ii း → i $high; 154 $vs_ii → i $low; 155 $vs_u က\u103A → oʊ\u032Fʔ; 156 $vs_u ဂ\u103A → oʊ\u032Fʔ; 157 $vs_u ဏ\u1037\u103A → o $creaky ʊ\u032Fɴ; 158 $vs_u ဏ\u103Aး → o $high ʊ\u032Fɴ; 159 $vs_u ဏ\u103A → o $low ʊ\u032Fɴ; 160 $vs_u တ\u103A → oʊ\u032Fʔ; 161 $vs_u န\u1037\u103A → o $creaky ʊ\u032Fɴ; 162 $vs_u န\u103Aး → o $high ʊ\u032Fɴ; 163 $vs_u န\u103A → o $low ʊ\u032Fɴ; 164 $vs_u ပ\u103A → oʊ\u032Fʔ; 165 $vs_u မ\u1037\u103A → o $creaky ʊ\u032Fɴ; 166 $vs_u မ\u103Aး → o $high ʊ\u032Fɴ; 167 $vs_u မ\u103A → o $low ʊ\u032Fɴ; 168 $vs_u $anusvara \u1037 → o $creaky ʊ\u032Fɴ; 169 $vs_u $anusvara း → o $high ʊ\u032Fɴ; 170 $vs_u $anusvara → o $low ʊ\u032Fɴ; 171 $vs_u → u $creaky; 172 $vs_uu \u1037 → u $creaky; # this does not usually occur 173 $vs_uu း → u $high; 174 $vs_uu → u $low; 175 $vs_e တ\u103A → ɪʔ; 176 $vs_e $vs_aa က\u103A → aʊ\u032Fʔ; 177 $vs_e $vs_aa င\u1037\u103A → a $creaky ʊ\u032Fɴ; 178 $vs_e $vs_aa င\u103Aး → a $high ʊ\u032Fɴ; 179 $vs_e $vs_aa င\u103A → a $low ʊ\u032Fɴ; 180 $vs_e $vs_aa \u1037 → ɔ $creaky; 181 $vs_e $vs_aa း → ɔ $high; # redundant high tone; this does not usually occur 182 $vs_e $vs_aa \u103A → ɔ $low; 183 $vs_e $vs_aa → ɔ $high; 184 $vs_e \u1037 → e $creaky; 185 $vs_e း → e $high; 186 $vs_e → e $low; 187 $vs_ai \u1037 → ɛ $creaky; 188 $vs_ai း → ɛ $high; # redundant high tone; this does not usually occur 189 $vs_ai → ɛ $high; 190 $anusvara \u1037 → a $creaky ɴ; 191 $anusvara း → a $high ɴ; 192 $anusvara → a $low ɴ; 193 $med_w တ\u103A → ʊʔ; 194 $med_w န\u1037\u103A → ʊ $creaky ɴ; 195 $med_w န\u103Aး → ʊ $high ɴ; 196 $med_w န\u103A → ʊ $low ɴ; 197 $med_w ပ\u103A → ʊʔ; 198 $med_w မ\u1037\u103A → ʊ $creaky ɴ; 199 $med_w မ\u103Aး → ʊ $high ɴ; 200 $med_w မ\u103A → ʊ $low ɴ; 201 # 202 # Medials 203 # 204 ::Null; 205 # Palatalization of the velar stops before MEDIAL YA and MEDIAL RA: 206 # velar + /j/ ==> modern palatals. 207 ကျ → t\u0361ɕ; 208 ချ → t\u0361ɕʰ; 209 ဂျ → d\u0361ʑ; 210 ဃျ → d\u0361ʑ; 211 ကြ → t\u0361ɕ; 212 ခြ → t\u0361ɕʰ; 213 ဂြ → d\u0361ʑ; 214 ဃြ → d\u0361ʑ; 215 # Remove redundant MEDIAL YA and MEDIAL RA after initial YA. 216 ယ { [$med_y $med_r] → ; 217 # Reorder the medials so that U+103E SIGN MEDIAL HA comes before any 218 # other medials. 219 # First, push U+103E MEDIAL HA before U+103D MEDIAL WA. 220 \u103D \u103E → \u103E \u103D; 221 ::Null; 222 # Now MEDIAL WA comes last. 223 # Produce the palatal ʃ from (SA|LA)+YA+HA. 224 သျ\u103E → ʃ; 225 လျ\u103E → ʃ; 226 # Second, push U+103E MEDIAL HA before U+103C MEDIAL RA. 227 \u103C \u103E → \u103E \u103C; 228 ::Null; 229 # Finally, push U+103E MEDIAL HA before U+103B MEDIAL YA. 230 \u103B \u103E → \u103E \u103B; 231 ::Null; 232 # Consume MEDIAL HA and apply devoicing. 233 င\u103E → ŋ\u030A; 234 ဉ\u103E → ɲ\u0325; 235 ည\u103E → ɲ\u0325; 236 ဏ\u103E → n\u0325; 237 န\u103E → n\u0325; 238 မ\u103E → m\u0325; 239 ယ\u103E → ʃ; 240 ရ\u103E → ʃ; 241 လ\u103E → l\u0325; 242 ဝ\u103E → w\u0325; 243 ဠ\u103E → l\u0325; 244 # Drop any remaining U+103E MEDIAL HA. 245 \u103E → ; 246 # Simplify medial cluster /jw/ to /w/, i.e. drop U+103B MEDIAL YA and 247 # U+103C MEDIAL RA before U+103D MEDIAL WA. # TODO: revisit this 248 \u103B } \u103D → ; 249 \u103C } \u103D → ; 250 \u103B → j; 251 \u103C → j; 252 \u103D → w; 253 # 254 # Initials 255 # 256 # Velars 257 က → k; 258 ခ → kʰ; 259 ဂ → ɡ; 260 ဃ → ɡ; 261 င → ŋ; 262 # Historic palatals 263 စ → s; 264 ဆ → sʰ; 265 ဇ → z; 266 ဈ → z; 267 ဉ → ɲ; 268 ည → ɲ; 269 # Alveolars 270 ဋ → t; 271 ဌ → tʰ; 272 ဍ → d; 273 ဎ → d; 274 ဏ → n; 275 # Historic dentals ==> alveolars 276 တ → t; 277 ထ → tʰ; 278 ဒ → d; 279 ဓ → d; 280 န → n; 281 # Labials 282 ပ → p; 283 ဖ → pʰ; 284 ဗ → b; 285 ဘ → b; 286 မ → m; 287 # Other letters 288 ယ → j; 289 ရ → j; # historic /r/ 290 လ\u103A → ; # final, typically not pronounced in native words 291 လ → l; 292 ဝ → w; 293 သ → θ; # historic /s/ ==> modern dental 294 ဟ → h; 295 ဠ → l; 296 အ → ʔ; 297 # Independent vowels 298 ဣ\u1037 → ʔḭ; # redundant creaky tone; this does not usually occur 299 ဣး → ʔí; # this does not usually occur 300 ဣ → ʔḭ; 301 ဤ\u1037 → ʔḭ; # this does not usually occur 302 ဤး → ʔí; # this does not usually occur 303 ဤ → ʔì; 304 ဥ\u1037 → ʔṵ; # redundant creaky tone; this does not usually occur 305 ဥး → ʔú; # this does not usually occur 306 ဥ → ʔṵ; 307 ဦ\u1037 → ʔṵ; # this does not usually occur 308 ဦး → ʔú; 309 ဦ → ʔù; 310 ဧ\u1037 → ʔḛ; # this does not usually occur 311 ဧး → ʔé; 312 ဧ → ʔè; 313 ဩ\u1037 → ʔɔ\u0330; # this does not usually occur 314 ဩး → ʔɔ\u0301; # redundant high tone; this does not usually occur 315 ဩ → ʔɔ\u0301; 316 ဪ\u1037 → ʔɔ\u0330; # this does not usually occur 317 ဪး → ʔɔ\u0301; # this does not usually occur 318 ဪ → ʔɔ\u0300; 319 # Various signs 320 ၌ → n\u0325aɪ\u032Fʔ; 321 ၍ → jwḛ; 322 # ၎င\u103Aး was handled earlier. 323 ၏ → ʔḭ; 324 # 325 # Postprocessing 326 # 327 # Delete any remaining U+103A ASAT. 328 $asat → ; 329 # Delete zero-width space, non-joiner, joiner. 330 [\u200B-\u200D] → ; 331 ::NFC;