tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

my_my_FONIPA.txt (10255B)


      1 # © 2016 and later: Unicode, Inc. and others.
      2 # License & terms of use: http://www.unicode.org/copyright.html
      3 # Generated using tools/cldr/cldr-to-icu/
      4 #
      5 # File: my_my_FONIPA.txt
      6 # Generated from CLDR
      7 #
      8 
      9 # Pronunciation rules for Burmese.
     10 #
     11 # The following rules are lexical and heuristic: lexical in the sense
     12 # that they generate phoneme strings which may further undergo
     13 # post-lexical phonological processes, in particular voicing, to
     14 # result in actual surface forms; heuristic in the sense that they try
     15 # to resolve ambiguities, especially around reduced vowels, in a
     16 # systematic way that may be incorrect in many situations. Vowel
     17 # reduction depends on many factors, such as morphemic structure,
     18 # which are not available here.
     19 #
     20 # Definitions
     21 #
     22 # Dependent vowel signs
     23 $vs_AA = \u102B;
     24 $vs_aa = \u102C;
     25 $vs_i = \u102D;
     26 $vs_ii = \u102E;
     27 $vs_u = \u102F;
     28 $vs_uu = \u1030;
     29 $vs_e = \u1031;
     30 $vs_ai = \u1032;
     31 # Various signs
     32 $anusvara = \u1036;
     33 $visarga = \u1038;
     34 $virama = \u1039;
     35 $asat = \u103A;
     36 # Dependent (medial) consonant signs
     37 $med_y = \u103B;
     38 $med_r = \u103C;
     39 $med_w = \u103D;
     40 $med_h = \u103E;
     41 # Independent letters and letter-like punctuation symbols
     42 $independent = [\u1000-\u102A \u103F \u104C-\u104F \u1050-\u1055];
     43 $creaky = \u0330;
     44 $high = \u0301;
     45 $low = \u0300;
     46 $coda = [$creaky $high $low ɴ ʔ ə];  # TODO: remove if unused
     47 #
     48 # Preprocessing
     49 #
     50 ::NFC;
     51 # Replace U+102B TALL AA with U+102C AA. Their pronunciation is identical.
     52 $vs_AA → $vs_aa;
     53 # Unstack kinzi (င\u103A plus U+1039 VIRAMA) into plain င\u103A.
     54 # Hmm, what would happen if the syllable ending in kinzi had non-low tone?
     55 င\u103A $virama → င\u103A;
     56 # Unstack everything else, i.e. replace U+1039 VIRAMA with U+103A ASAT.
     57 $virama → $asat;
     58 # Unstack U+103F GREAT SA.
     59 ဿ → သ\u103Aသ;
     60 # Insert a syllable boundary marker /./ before every independent letter.
     61 ::Null;
     62 [^.$] { } $independent ([\u1037\u103B-\u103E])* [^\u103A] → \.;
     63 # Insert default inherent vowel: /a\u0330/ at the end, /ə/ everywhere else.
     64 ::Null;
     65 ([\u1000-\u1021\u103F] [\u103B-\u103E]*) } [$] → $1 a $creaky;
     66 ([\u1000-\u1021\u103F] [\u103B-\u103E]*) } \.  → $1 ə;
     67 # Allow for additional coda consonants.
     68 #
     69 # This only covers a few of the cases in which full coda consonants
     70 # can appear in loanwords. The general situation is somewhat rare and
     71 # is more easily dealt with in a formalism that can impose structural
     72 # constraints on syllables more easily.
     73 ::Null;
     74 $asat ($visarga)? [\u1000-\u102A] { $asat → ;
     75 # Deal with ၎င\u103Aး early.
     76 ၎င\u103Aး → lə\.ɡa $high ʊ\u032Fɴ;
     77 #
     78 # Rhymes
     79 #
     80 ::Null;
     81 က\u103A → ɛʔ;
     82 ဂ\u103A → ɛʔ;  # in မဂ\u1039ဂဇင\u103Aး ~ မဂ\u103Aဂဇင\u103Aး /mɛʔ.ɡə.zɪ\u0301ɴ/
     83 င\u1037\u103A → ɪ $creaky ɴ;
     84 င\u103Aး → ɪ $high ɴ;
     85 င\u103A → ɪ $low ɴ;
     86 စ\u103A → ɪʔ;  # maybe sometimes /eɪ\u032Fʔ/
     87 ဉ\u1037\u103A → ɪ $creaky ɴ;
     88 ဉ\u103Aး → ɪ $high ɴ;
     89 ဉ\u103A → ɪ $low ɴ;
     90 ည\u1037\u103A → ɛ $creaky;
     91 ည\u103Aး → ɛ $high;
     92 ည\u103A → ɛ $low;
     93 ဏ\u1037\u103A → a $creaky ɴ;
     94 ဏ\u103Aး → a $high ɴ;
     95 ဏ\u103A → a $low ɴ;
     96 တ\u103A → aʔ;
     97 န\u1037\u103A → a $creaky ɴ;
     98 န\u103Aး → a $high ɴ;
     99 န\u103A → a $low ɴ;
    100 ပ\u103A → aʔ;
    101 မ\u1037\u103A → a $creaky ɴ;
    102 မ\u103Aး → a $high ɴ;
    103 မ\u103A → a $low ɴ;
    104 ယ\u1037\u103A → ɛ $creaky;
    105 ယ\u103Aး → ɛ $high;
    106 ယ\u103A → ɛ $low;
    107 သ\u103A → aʔ;
    108 $vs_aa ဉ\u1037\u103A → ɪ $creaky ɴ;
    109 $vs_aa ဉ\u103Aး → ɪ $high ɴ;
    110 $vs_aa ဉ\u103A → ɪ $low ɴ;
    111 $vs_aa တ\u103A → aʔ;
    112 $vs_aa ဏ\u1037\u103A → a $creaky ɴ;
    113 $vs_aa ဏ\u103Aး → a $high ɴ;
    114 $vs_aa ဏ\u103A → a $low ɴ;
    115 $vs_aa န\u1037\u103A → a $creaky ɴ;
    116 $vs_aa န\u103Aး → a $high ɴ;
    117 $vs_aa န\u103A → a $low ɴ;
    118 $vs_aa ပ\u103A → aʔ;  # in ကလာပ\u103Aစည\u103Aး /kə.laʔ.sɛ\u0301/ (club cell)
    119 $vs_aa ယ\u1037\u103A → ɛ $creaky;
    120 $vs_aa ယ\u103Aး → ɛ $high;
    121 $vs_aa ယ\u103A → ɛ $low;
    122 $vs_aa \u1037 → a $creaky;  # redundant creaky tone
    123 $vs_aa း → a $high;
    124 $vs_aa → a $low;
    125 $vs_i က\u103A → eɪ\u032Fʔ;
    126 $vs_i စ\u103A → eɪ\u032Fʔ;
    127 $vs_i တ\u103A → eɪ\u032Fʔ;
    128 $vs_i န\u1037\u103A → e $creaky ɪ\u032Fɴ;
    129 $vs_i န\u103Aး → e $high ɪ\u032Fɴ;
    130 $vs_i န\u103A → e $low ɪ\u032Fɴ;
    131 $vs_i ပ\u103A → eɪ\u032Fʔ;
    132 $vs_i မ\u1037\u103A → e $creaky ɪ\u032Fɴ;
    133 $vs_i မ\u103Aး → e $high ɪ\u032Fɴ;
    134 $vs_i မ\u103A → e $low ɪ\u032Fɴ;
    135 $vs_i $vs_u က\u103A → aɪ\u032Fʔ;
    136 $vs_i $vs_u င\u1037\u103A → a $creaky ɪ\u032Fɴ;
    137 $vs_i $vs_u င\u103Aး → a $high ɪ\u032Fɴ;
    138 $vs_i $vs_u င\u103A → a $low ɪ\u032Fɴ;
    139 $vs_i $vs_u ဏ\u1037\u103A → a $creaky ɪ\u032Fɴ;
    140 $vs_i $vs_u ဏ\u103Aး → a $high ɪ\u032Fɴ;
    141 $vs_i $vs_u ဏ\u103A → a $low ɪ\u032Fɴ;
    142 $vs_i $vs_u ယ\u1037\u103A → o $creaky;
    143 $vs_i $vs_u ယ\u103Aး → o $high;
    144 $vs_i $vs_u ယ\u103A → o $low;  # in က\u102D\u102Fယ\u103A /kò/
    145 $vs_i $vs_u \u1037 → o $creaky;
    146 $vs_i $vs_u း → o $high;
    147 $vs_i $vs_u → o $low;
    148 $vs_i $anusvara \u1037 → e $creaky ɪ\u032Fɴ;
    149 $vs_i $anusvara း → e $high ɪ\u032Fɴ;
    150 $vs_i $anusvara → e $low ɪ\u032Fɴ;
    151 $vs_i → i $creaky;
    152 $vs_ii \u1037 → i $creaky;  # this does not usually occur
    153 $vs_ii း → i $high;
    154 $vs_ii → i $low;
    155 $vs_u က\u103A → oʊ\u032Fʔ;
    156 $vs_u ဂ\u103A → oʊ\u032Fʔ;
    157 $vs_u ဏ\u1037\u103A → o $creaky ʊ\u032Fɴ;
    158 $vs_u ဏ\u103Aး → o $high ʊ\u032Fɴ;
    159 $vs_u ဏ\u103A → o $low ʊ\u032Fɴ;
    160 $vs_u တ\u103A → oʊ\u032Fʔ;
    161 $vs_u န\u1037\u103A → o $creaky ʊ\u032Fɴ;
    162 $vs_u န\u103Aး → o $high ʊ\u032Fɴ;
    163 $vs_u န\u103A → o $low ʊ\u032Fɴ;
    164 $vs_u ပ\u103A → oʊ\u032Fʔ;
    165 $vs_u မ\u1037\u103A → o $creaky ʊ\u032Fɴ;
    166 $vs_u မ\u103Aး → o $high ʊ\u032Fɴ;
    167 $vs_u မ\u103A → o $low ʊ\u032Fɴ;
    168 $vs_u $anusvara \u1037 → o $creaky ʊ\u032Fɴ;
    169 $vs_u $anusvara း → o $high ʊ\u032Fɴ;
    170 $vs_u $anusvara → o $low ʊ\u032Fɴ;
    171 $vs_u → u $creaky;
    172 $vs_uu \u1037 → u $creaky;  # this does not usually occur
    173 $vs_uu း → u $high;
    174 $vs_uu → u $low;
    175 $vs_e တ\u103A → ɪʔ;
    176 $vs_e $vs_aa က\u103A → aʊ\u032Fʔ;
    177 $vs_e $vs_aa င\u1037\u103A → a $creaky ʊ\u032Fɴ;
    178 $vs_e $vs_aa င\u103Aး → a $high ʊ\u032Fɴ;
    179 $vs_e $vs_aa င\u103A → a $low ʊ\u032Fɴ;
    180 $vs_e $vs_aa \u1037 → ɔ $creaky;
    181 $vs_e $vs_aa း → ɔ $high;  # redundant high tone; this does not usually occur
    182 $vs_e $vs_aa \u103A → ɔ $low;
    183 $vs_e $vs_aa → ɔ $high;
    184 $vs_e \u1037 → e $creaky;
    185 $vs_e း → e $high;
    186 $vs_e → e $low;
    187 $vs_ai \u1037 → ɛ $creaky;
    188 $vs_ai း → ɛ $high;  # redundant high tone; this does not usually occur
    189 $vs_ai → ɛ $high;
    190 $anusvara \u1037 → a $creaky ɴ;
    191 $anusvara း → a $high ɴ;
    192 $anusvara → a $low ɴ;
    193 $med_w တ\u103A → ʊʔ;
    194 $med_w န\u1037\u103A → ʊ $creaky ɴ;
    195 $med_w န\u103Aး → ʊ $high ɴ;
    196 $med_w န\u103A → ʊ $low ɴ;
    197 $med_w ပ\u103A → ʊʔ;
    198 $med_w မ\u1037\u103A → ʊ $creaky ɴ;
    199 $med_w မ\u103Aး → ʊ $high ɴ;
    200 $med_w မ\u103A → ʊ $low ɴ;
    201 #
    202 # Medials
    203 #
    204 ::Null;
    205 # Palatalization of the velar stops before MEDIAL YA and MEDIAL RA:
    206 # velar + /j/ ==> modern palatals.
    207 ကျ → t\u0361ɕ;
    208 ချ → t\u0361ɕʰ;
    209 ဂျ → d\u0361ʑ;
    210 ဃျ → d\u0361ʑ;
    211 ကြ → t\u0361ɕ;
    212 ခြ → t\u0361ɕʰ;
    213 ဂြ → d\u0361ʑ;
    214 ဃြ → d\u0361ʑ;
    215 # Remove redundant MEDIAL YA and MEDIAL RA after initial YA.
    216 ယ { [$med_y $med_r] → ;
    217 # Reorder the medials so that U+103E SIGN MEDIAL HA comes before any
    218 # other medials.
    219 # First, push U+103E MEDIAL HA before U+103D MEDIAL WA.
    220 \u103D \u103E → \u103E \u103D;
    221 ::Null;
    222 # Now MEDIAL WA comes last.
    223 # Produce the palatal ʃ from (SA|LA)+YA+HA.
    224 သျ\u103E → ʃ;
    225 လျ\u103E → ʃ;
    226 # Second, push U+103E MEDIAL HA before U+103C MEDIAL RA.
    227 \u103C \u103E → \u103E \u103C;
    228 ::Null;
    229 # Finally, push U+103E MEDIAL HA before U+103B MEDIAL YA.
    230 \u103B \u103E → \u103E \u103B;
    231 ::Null;
    232 # Consume MEDIAL HA and apply devoicing.
    233 င\u103E → ŋ\u030A;
    234 ဉ\u103E → ɲ\u0325;
    235 ည\u103E → ɲ\u0325;
    236 ဏ\u103E → n\u0325;
    237 န\u103E → n\u0325;
    238 မ\u103E → m\u0325;
    239 ယ\u103E → ʃ;
    240 ရ\u103E → ʃ;
    241 လ\u103E → l\u0325;
    242 ဝ\u103E → w\u0325;
    243 ဠ\u103E → l\u0325;
    244 # Drop any remaining U+103E MEDIAL HA.
    245 \u103E → ;
    246 # Simplify medial cluster /jw/ to /w/, i.e. drop U+103B MEDIAL YA and
    247 # U+103C MEDIAL RA before U+103D MEDIAL WA.  # TODO: revisit this
    248 \u103B } \u103D → ;
    249 \u103C } \u103D → ;
    250 \u103B → j;
    251 \u103C → j;
    252 \u103D → w;
    253 #
    254 # Initials
    255 #
    256 # Velars
    257 က → k;
    258 ခ → kʰ;
    259 ဂ → ɡ;
    260 ဃ → ɡ;
    261 င → ŋ;
    262 # Historic palatals
    263 စ → s;
    264 ဆ → sʰ;
    265 ဇ → z;
    266 ဈ → z;
    267 ဉ → ɲ;
    268 ည → ɲ;
    269 # Alveolars
    270 ဋ → t;
    271 ဌ → tʰ;
    272 ဍ → d;
    273 ဎ → d;
    274 ဏ → n;
    275 # Historic dentals ==> alveolars
    276 တ → t;
    277 ထ → tʰ;
    278 ဒ → d;
    279 ဓ → d;
    280 န → n;
    281 # Labials
    282 ပ → p;
    283 ဖ → pʰ;
    284 ဗ → b;
    285 ဘ → b;
    286 မ → m;
    287 # Other letters
    288 ယ → j;
    289 ရ → j;  # historic /r/
    290 လ\u103A → ;  # final, typically not pronounced in native words
    291 လ → l;
    292 ဝ → w;
    293 သ → θ;  # historic /s/ ==> modern dental
    294 ဟ → h;
    295 ဠ → l;
    296 အ → ʔ;
    297 # Independent vowels
    298 ဣ\u1037 → ʔḭ;  # redundant creaky tone; this does not usually occur
    299 ဣး → ʔí;  # this does not usually occur
    300 ဣ → ʔḭ;
    301 ဤ\u1037 → ʔḭ;  # this does not usually occur
    302 ဤး → ʔí;  # this does not usually occur
    303 ဤ → ʔì;
    304 ဥ\u1037 → ʔṵ;  # redundant creaky tone; this does not usually occur
    305 ဥး → ʔú;  # this does not usually occur
    306 ဥ → ʔṵ;
    307 ဦ\u1037 → ʔṵ;  # this does not usually occur
    308 ဦး → ʔú;
    309 ဦ → ʔù;
    310 ဧ\u1037 → ʔḛ;  # this does not usually occur
    311 ဧး → ʔé;
    312 ဧ → ʔè;
    313 ဩ\u1037 → ʔɔ\u0330;  # this does not usually occur
    314 ဩး → ʔɔ\u0301;  # redundant high tone; this does not usually occur
    315 ဩ → ʔɔ\u0301;
    316 ဪ\u1037 → ʔɔ\u0330;  # this does not usually occur
    317 ဪး → ʔɔ\u0301;  # this does not usually occur
    318 ဪ → ʔɔ\u0300;
    319 # Various signs
    320 ၌ → n\u0325aɪ\u032Fʔ;
    321 ၍ → jwḛ;
    322 # ၎င\u103Aး was handled earlier.
    323 ၏ → ʔḭ;
    324 #
    325 # Postprocessing
    326 #
    327 # Delete any remaining U+103A ASAT.
    328 $asat → ;
    329 # Delete zero-width space, non-joiner, joiner.
    330 [\u200B-\u200D] → ;
    331 ::NFC;