tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

sat_Olck_sat_FONIPA.txt (4659B)


      1 # © 2016 and later: Unicode, Inc. and others.
      2 # License & terms of use: http://www.unicode.org/copyright.html
      3 # Generated using tools/cldr/cldr-to-icu/
      4 #
      5 # File: sat_Olck_sat_FONIPA.txt
      6 # Generated from CLDR
      7 #
      8 
      9 # Santali (Ol Chiki) → Santali (International Phonetic Alphabet)
     10 # Output
     11 # ------
     12 # m mː n nː ɳ ɳː ɲ ɲː ŋ ŋː
     13 # p pʰ pʼ b bʰ t tʰ tʼ d dʰ ʈ ʈʰ ɖ ɖʰ c cʰ cʼ k kʰ kʼ ɡ ʔ
     14 # s sː h
     15 # d\u0361ʒ
     16 # ɽ r
     17 # l lː
     18 # w wː w\u0303 w\u0303ː
     19 #
     20 # i iː ĩ ĩː u uː ũ ũː
     21 # e eː ẽ ẽː ə əː ə\u0303 ə\u0303ː o oː õ õː
     22 # ɛ ɛː ɛ\u0303 ɛ\u0303ː ɔ ɔː ɔ\u0303 ɔ\u0303ː
     23 # a aː ã ãː
     24 # References
     25 # ----------
     26 # [1] Michael Everson: Final proposal to encode the Ol Chiki script
     27 #     in the UCS.  ISO/IEC JTC1/SC2/WG2 Working Group Document N2984R,
     28 #     September 21, 2005.  http://std.dkuug.dk/jtc1/sc2/wg2/docs/n2984.pdf
     29 #
     30 # [2] George L. Campbell: Compendium of the World's Languages.
     31 #     Volume 2: Ladakhi to Zuni. ISBN 0-415-20297-3.  Taylor & Francis, 2000.
     32 #     Pages 1454 to 1458.
     33 # Notes
     34 # -----
     35 # According to [1] (page 3), ᱽ can only follow the four ejective
     36 # consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/; these become
     37 # ᱵᱽ /b/, ᱫᱽ /d/, ᱡᱽ /d\u0361ʒ/, and ᱜᱽ /ɡ/.  In online texts, however,
     38 # we have occasionally encountered ᱽ following non-ejective plosives,
     39 # for example after ᱯ /p/. These might possibly be typos.  Our rules
     40 # try to be resilient and handle ᱯᱽ as /b/.
     41 #
     42 # According to [1] (page 2), U+1C7C PHAARKAA follows the four “glottal”
     43 # consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/ (these are actually
     44 # ejective, not glottal).  In online texts, however, we have frequently
     45 # encountered ᱼ following non-ejective consonants.
     46 $inword = [[:L:][:M:]];
     47 # Some online texts use a decomposed form of U+1C7A MU-GAAHLAA TTUDDAG.
     48 ᱹᱸ → ᱺ ;
     49 ᱸᱹ → ᱺ ;
     50 ::null();
     51 # To simplify the rules below, enforce a uniform ordering of marks.
     52 ᱻᱹ → ᱹᱻ ;
     53 ᱻᱸ → ᱸᱻ ;
     54 ᱻᱺ → ᱺᱻ ;
     55 ᱼᱹ → ᱹᱼ ;
     56 ᱼᱸ → ᱸᱼ ;
     57 ᱼᱺ → ᱺᱼ ;
     58 ::null();
     59 # Some online texts use U+1C7C PHAARKAA instead of U+1C7B RELAA for indicating
     60 # long phonemes, presumably because the graphemes look similar in some fonts.
     61 # Since phaarkaa is used for voicing ejectives and plosives (which cannot
     62 # be lengthened), we rewrite phaarkaa to relaa.
     63 [ᱚᱟᱤᱩᱮᱳᱶᱢᱝᱞᱱ] [ᱹᱸᱺ]* {ᱼ} → ᱻ ;
     64 ::null();
     65 ᱚᱹᱻ → ɔː ;
     66 ᱚᱹ → ɔ ;
     67 ᱚᱸᱻ → ɔ\u0303ː ;
     68 ᱚᱸ → ɔ\u0303 ;
     69 ᱚᱺᱻ → ɔ\u0303ː ;
     70 ᱚᱺ → ɔ\u0303 ;
     71 ᱚᱻ → ɔː ;
     72 ᱚ → ɔ ;
     73 ᱛᱼ → t ;
     74 ᱛᱷ → tʰ ;
     75 ᱛᱽ → d ;
     76 $inword {ᱛ} → d ;
     77 ᱛ → t ;
     78 ᱜᱼ → kʼ ;
     79 ᱜᱷ → kʰ ;
     80 ᱜᱽ → ɡ ;
     81 $inword {ᱜ} → ɡ ;
     82 ᱜ → kʼ ;
     83 ᱝᱻ → ŋː ;
     84 ᱝ → ŋ ;
     85 ᱞᱻ → lː ;
     86 ᱞ → l ;
     87 ᱟᱹᱻ → əː ;
     88 ᱟᱹ → ə ;
     89 ᱟᱸᱻ → ãː ;
     90 ᱟᱸ → ã ;
     91 ᱟᱺᱻ → ə\u0303ː ;
     92 ᱟᱺ → ə\u0303 ;
     93 ᱟᱻ → aː ;
     94 ᱟ → a ;
     95 ᱠᱼ → k ;
     96 ᱠᱷ → kʰ ;
     97 ᱠᱽ → ɡ ;
     98 ᱠ → k ;
     99 ᱡᱼ → cʼ ;
    100 ᱡᱷ → cʰ ;
    101 ᱡᱽ →  d\u0361ʒ ;
    102 $inword {ᱡ} →  d\u0361ʒ ;
    103 ᱡ → cʼ ;
    104 ᱢᱻ → mː ;
    105 ᱢ → m ;
    106 # According to [1], ᱣ is sometimes /v/ and sometimes /w/.
    107 # TODO: Find out if there is a rule for this.
    108 ᱣᱸ → w\u0303 ;
    109 ᱣ → w ;
    110 ᱤᱹᱻ → iː ;
    111 ᱤᱹ → i ;
    112 ᱤᱸᱻ → ĩː ;
    113 ᱤᱸ → ĩ ;
    114 ᱤᱺᱻ → ĩː ;
    115 ᱤᱺ → ĩ ;
    116 ᱤᱻ → iː ;
    117 ᱤ → i ;
    118 ᱥᱻ → sː ;
    119 ᱥ → s ;
    120 # According to [1], ᱦ is sometimes /h/ and sometimes /ʔ/.
    121 # TODO: Find out if there is a rule for this.
    122 ᱦ → h ;
    123 ᱧᱻ → ɲː ;
    124 ᱧ → ɲ ;
    125 ᱨᱻ → r ;
    126 ᱨ → r ;
    127 ᱩᱹᱻ → uː ;
    128 ᱩᱹ → u ;
    129 ᱩᱸᱻ → ũː ;
    130 ᱩᱸ → ũ ;
    131 ᱩᱺᱻ → ũː ;
    132 ᱩᱺ → ũ ;
    133 ᱩᱻ → uː ;
    134 ᱩ → u ;
    135 ᱪᱼ → c ;
    136 ᱪᱷ → cʰ ;
    137 ᱪᱽ →  d\u0361ʒ ;
    138 ᱪ → c ;
    139 ᱫᱼ → tʼ ;
    140 ᱫᱷ → tʰ ;
    141 ᱫᱽ → d ;
    142 $inword {ᱫ} → d ;
    143 ᱫ → tʼ ;
    144 ᱬᱻ → ɳː ;
    145 ᱬ → ɳ ;
    146 # TODO: ᱵᱷᱭᱨᱚᱵ → bʰhrɔb seems unlikely; would be good to verify.
    147 ᱭ → h ;
    148 ᱮᱹᱻ → ɛː ;
    149 ᱮᱹ → ɛ ;
    150 ᱮᱺᱻ → ɛ\u0303ː ;
    151 ᱮᱺ → ɛ\u0303 ;
    152 ᱮᱸᱻ → ẽː ;
    153 ᱮᱸ → ẽ ;
    154 ᱮᱻ → eː ;
    155 ᱮ → e ;
    156 ᱯᱼ → p ;
    157 ᱯᱷ → pʰ ;
    158 ᱯᱽ → b ;
    159 ᱯ → p ;
    160 ᱰᱷ → ɖʰ ;
    161 ᱰ → ɖ ;
    162 ᱱᱻ → nː ;
    163 ᱱ → n ;
    164 ᱲᱻ → ɽ ;
    165 ᱲ → ɽ ;
    166 ᱳᱸᱻ → õː ;
    167 ᱳᱸ → õ ;
    168 ᱳᱻ → oː ;
    169 ᱳ → o ;
    170 ᱴᱼ → ʈ ;
    171 ᱴᱷ → ʈʰ ;
    172 ᱴᱽ → ɖ ;
    173 ᱴ → ʈ ;
    174 ᱵᱼ → pʼ ;
    175 ᱵᱷ → bʰ ;
    176 ᱵᱽ → b ;
    177 $inword {ᱵ} → b ;
    178 ᱵ → pʼ ;
    179 ᱶᱻ → w\u0303ː ;
    180 ᱶ → w\u0303 ;