tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

si_si_FONIPA.txt (4343B)


      1 # © 2016 and later: Unicode, Inc. and others.
      2 # License & terms of use: http://www.unicode.org/copyright.html
      3 # Generated using tools/cldr/cldr-to-icu/
      4 #
      5 # File: si_si_FONIPA.txt
      6 # Generated from CLDR
      7 #
      8 
      9 # Sinhala pronunciation rules
     10 #
     11 # Output
     12 #     k ɡ ŋ ᵑɡ c ɟ ɲ ʈ ɖ ⁿɖ t d n ⁿd p b m ᵐb j r l w ʃ s h f
     13 #     ə əː a aː æ æː i iː u uː e eː o oː
     14 #
     15 # References
     16 # [1] Asanka Wasala, Ruvan Weerasinghe, and Kumudu Gamage:
     17 #     Sinhala Grapheme-to-Phoneme Conversion and Rules for Schwa Epenthesis.
     18 #     Proceedings of the COLING/ACL 2006 Main Conference Poster Sessions,
     19 #     pages 890–897. http://www.aclweb.org/anthology/P06-2114
     20 # Simplify ya + yansaya to plain ya after a consonant.
     21 [\u0D9A-\u0DC6] \u0DCA (\u200D)? { ය\u0DCA‍ය → ය;
     22 # Delete ZWNJ and ZWJ to simplify further processing.
     23 \u200C → ;
     24 \u200D → ;
     25 # Insert a schwa after every consonant that is not followed by a dependent vowel
     26 # or virama.
     27 ::Null;
     28 ([\u0D9A-\u0DC6]) } [^\u0DCA-\u0DDF \u0DF2\u0DF3] → $1 ə;
     29 # Pronunciation rules proper.
     30 ::Null;
     31 # fප is an alternative spelling of ෆ.
     32 # This occurs e.g. in ඩේව\u0DD2ඩ\u0DCA කොපර\u0DCAfප\u0DD3ල\u0DCAඩ\u0DCA (David Copperfield)
     33 # [see http://bradshawofthefuture.blogspot.com/2013/02/f.html].
     34 [Ff]ප → f;
     35 # zස is seemingly the only way to unambiguously indicate a voiced /z/ sound.
     36 # This occurs in e.g. ඇල\u0DCAzසය\u0DD2ම' රෝගය (Alzheimer's disease)
     37 # [see https://si.wikipedia.org/wiki/ඇල\u0DCAzසය\u0DD2ම%27_රෝගය]
     38 # or in zස\u0DD3බ\u0DCA‍රා (zebra) [see https://si.wikipedia.org/wiki/‍zස\u0DD3බ\u0DCA‍රා].
     39 [Zz]ස → z;
     40 ං → ŋ;
     41 o → ŋ;  # common substitution for anusvaraya
     42 ඃ ([\u0D9A-\u0DC6]) → | $1 \u0DCA $1;  # TODO: check which consonants geminate
     43 ඃ → h;
     44 අ → a;
     45 ආ → aː;
     46 ඇ → æ;
     47 ඈ → æː;
     48 ඉ → i;
     49 ඊ → iː;
     50 උ → u;
     51 ඌ → uː;
     52 ඍ → ri;
     53 ඎ → ruː;
     54 ඏ → ilu;
     55 ඐ → iluː;
     56 එ → e;
     57 ඒ → eː;
     58 ඓ → aj;
     59 ඔ → o;
     60 ඕ → oː;
     61 ඖ → aw;  # TODO: check if this is correct
     62 ක → k;
     63 ඛ → k;
     64 ග → ɡ;
     65 ඝ → ɡ;
     66 ඞ → ŋ;
     67 ඟ → ᵑɡ;
     68 ච → c;
     69 ඡ → c;
     70 ජ → ɟ;
     71 ඣ → ɟ;
     72 ඤ → ɲ;
     73 ඥ → kɲ;  # TODO: double-check
     74 ඦ → ɟ;
     75 ට → ʈ;
     76 ඨ → ʈ;
     77 ඩ → ɖ;
     78 ඪ → ɖ;
     79 ණ → n;
     80 ඬ → ⁿɖ;
     81 ත → t;
     82 ථ → t;
     83 ද → d;
     84 ධ → d;
     85 න → n;
     86 ඳ → ⁿd;
     87 ප → p;
     88 ඵ → p;
     89 බ → b;
     90 භ → b;
     91 ම → m;
     92 ඹ → ᵐb;
     93 ය → j;
     94 ර → r;
     95 ල → l;
     96 ව → w;
     97 ශ → ʃ;
     98 ෂ → ʃ;
     99 ස → s;
    100 හ → h;
    101 ළ → l;
    102 ෆ → f;
    103 \u0DCA → ;  # delete virama
    104 ා → aː;
    105 ැ → æ;
    106 ෑ → æː;
    107 \u0DD2 → i;
    108 \u0DD3 → iː;
    109 \u0DD4 → u;
    110 \u0DD6 → uː;
    111 ෘ → ru;
    112 ෙ → e;
    113 ේ → eː;
    114 ෛ → aj;
    115 ො → o;
    116 ෝ → oː;
    117 ෞ → aw;  # TODO: check if this is correct
    118 ෟ → lu;
    119 ෲ → ruː;
    120 ෳ → luː;
    121 # Heuristics for turning /ə/ into /a/. Based on [1].
    122 $c=[k ɡ ŋ {ᵑɡ} c ɟ ɲ ʈ ɖ {ⁿɖ} t d n {ⁿd} p b m {ᵐb} j r l w ʃ s z h f];
    123 $s=[:^L:];
    124 # Rule #1
    125 ::Null;
    126 $s sv    { ə      → ə;  # exception (a)
    127 $s k     { ə } r  → ə;  # exception (b)
    128 $s $c    { ə } $s → ə;  # exception (c)
    129 $s $c $c { ə      → a;
    130 $s $c    { ə      → a;
    131 # Rule #2
    132 ::Null;
    133 $c r { ə } $c → a;  # clause (a) and (b)
    134 $c r { a } h  → a;  # clause (d), exception
    135 $c r { a } $c → ə;  # clause (c)
    136 # Rule #3
    137 # The paper is unclear about what this rule means. The interpretation here
    138 # assumes that "preceded" in the paper is a typo and should be read "followed".
    139 ::Null;
    140 [a e æ o ə] h { ə → a;
    141 # Rules #4 through #7
    142 ::Null;
    143 ə } $c $c     → a;  # Rule #4
    144 ə } [rbɖʈ] $s → ə;  # Rule #5 exception
    145 ə } $c     $s → a;  # Rule #5
    146 ə } ji     $s → a;  # Rule #6
    147 k { ə } [rl] u    → a;  # Rule #7
    148 # Rule #8
    149 # Note that the paper doesn't say explicitly that this rule should be
    150 # anchored at the beginning of a word, but the remarks before the rules
    151 # seem to imply this.
    152 ::Null;
    153 $s k { a } l[aeo]ːj   → ə;  # Typo in paper: /j/ was /y/.
    154 $s k { a } le[mh][ui] → ə;
    155 $s k { alə } h[ui]    → əle;
    156 $s k { a } lə         → ə;
    157 # Diphthongs
    158 ::Null;
    159 www+ → ww;  # යෞව\u0DCAවන
    160 [i {iː} e {eː} æ {æː} o {oː} a {aː}] { wu → w;
    161 əji → aj;
    162 iji → iː;  # perhaps: ij
    163 [u {uː} e {eː} æ {æː} o {oː} a {aː}] { ji → j;