si_si_FONIPA.txt (4343B)
1 # © 2016 and later: Unicode, Inc. and others. 2 # License & terms of use: http://www.unicode.org/copyright.html 3 # Generated using tools/cldr/cldr-to-icu/ 4 # 5 # File: si_si_FONIPA.txt 6 # Generated from CLDR 7 # 8 9 # Sinhala pronunciation rules 10 # 11 # Output 12 # k ɡ ŋ ᵑɡ c ɟ ɲ ʈ ɖ ⁿɖ t d n ⁿd p b m ᵐb j r l w ʃ s h f 13 # ə əː a aː æ æː i iː u uː e eː o oː 14 # 15 # References 16 # [1] Asanka Wasala, Ruvan Weerasinghe, and Kumudu Gamage: 17 # Sinhala Grapheme-to-Phoneme Conversion and Rules for Schwa Epenthesis. 18 # Proceedings of the COLING/ACL 2006 Main Conference Poster Sessions, 19 # pages 890–897. http://www.aclweb.org/anthology/P06-2114 20 # Simplify ya + yansaya to plain ya after a consonant. 21 [\u0D9A-\u0DC6] \u0DCA (\u200D)? { ය\u0DCAය → ය; 22 # Delete ZWNJ and ZWJ to simplify further processing. 23 \u200C → ; 24 \u200D → ; 25 # Insert a schwa after every consonant that is not followed by a dependent vowel 26 # or virama. 27 ::Null; 28 ([\u0D9A-\u0DC6]) } [^\u0DCA-\u0DDF \u0DF2\u0DF3] → $1 ə; 29 # Pronunciation rules proper. 30 ::Null; 31 # fප is an alternative spelling of ෆ. 32 # This occurs e.g. in ඩේව\u0DD2ඩ\u0DCA කොපර\u0DCAfප\u0DD3ල\u0DCAඩ\u0DCA (David Copperfield) 33 # [see http://bradshawofthefuture.blogspot.com/2013/02/f.html]. 34 [Ff]ප → f; 35 # zස is seemingly the only way to unambiguously indicate a voiced /z/ sound. 36 # This occurs in e.g. ඇල\u0DCAzසය\u0DD2ම' රෝගය (Alzheimer's disease) 37 # [see https://si.wikipedia.org/wiki/ඇල\u0DCAzසය\u0DD2ම%27_රෝගය] 38 # or in zස\u0DD3බ\u0DCAරා (zebra) [see https://si.wikipedia.org/wiki/zස\u0DD3බ\u0DCAරා]. 39 [Zz]ස → z; 40 ං → ŋ; 41 o → ŋ; # common substitution for anusvaraya 42 ඃ ([\u0D9A-\u0DC6]) → | $1 \u0DCA $1; # TODO: check which consonants geminate 43 ඃ → h; 44 අ → a; 45 ආ → aː; 46 ඇ → æ; 47 ඈ → æː; 48 ඉ → i; 49 ඊ → iː; 50 උ → u; 51 ඌ → uː; 52 ඍ → ri; 53 ඎ → ruː; 54 ඏ → ilu; 55 ඐ → iluː; 56 එ → e; 57 ඒ → eː; 58 ඓ → aj; 59 ඔ → o; 60 ඕ → oː; 61 ඖ → aw; # TODO: check if this is correct 62 ක → k; 63 ඛ → k; 64 ග → ɡ; 65 ඝ → ɡ; 66 ඞ → ŋ; 67 ඟ → ᵑɡ; 68 ච → c; 69 ඡ → c; 70 ජ → ɟ; 71 ඣ → ɟ; 72 ඤ → ɲ; 73 ඥ → kɲ; # TODO: double-check 74 ඦ → ɟ; 75 ට → ʈ; 76 ඨ → ʈ; 77 ඩ → ɖ; 78 ඪ → ɖ; 79 ණ → n; 80 ඬ → ⁿɖ; 81 ත → t; 82 ථ → t; 83 ද → d; 84 ධ → d; 85 න → n; 86 ඳ → ⁿd; 87 ප → p; 88 ඵ → p; 89 බ → b; 90 භ → b; 91 ම → m; 92 ඹ → ᵐb; 93 ය → j; 94 ර → r; 95 ල → l; 96 ව → w; 97 ශ → ʃ; 98 ෂ → ʃ; 99 ස → s; 100 හ → h; 101 ළ → l; 102 ෆ → f; 103 \u0DCA → ; # delete virama 104 ා → aː; 105 ැ → æ; 106 ෑ → æː; 107 \u0DD2 → i; 108 \u0DD3 → iː; 109 \u0DD4 → u; 110 \u0DD6 → uː; 111 ෘ → ru; 112 ෙ → e; 113 ේ → eː; 114 ෛ → aj; 115 ො → o; 116 ෝ → oː; 117 ෞ → aw; # TODO: check if this is correct 118 ෟ → lu; 119 ෲ → ruː; 120 ෳ → luː; 121 # Heuristics for turning /ə/ into /a/. Based on [1]. 122 $c=[k ɡ ŋ {ᵑɡ} c ɟ ɲ ʈ ɖ {ⁿɖ} t d n {ⁿd} p b m {ᵐb} j r l w ʃ s z h f]; 123 $s=[:^L:]; 124 # Rule #1 125 ::Null; 126 $s sv { ə → ə; # exception (a) 127 $s k { ə } r → ə; # exception (b) 128 $s $c { ə } $s → ə; # exception (c) 129 $s $c $c { ə → a; 130 $s $c { ə → a; 131 # Rule #2 132 ::Null; 133 $c r { ə } $c → a; # clause (a) and (b) 134 $c r { a } h → a; # clause (d), exception 135 $c r { a } $c → ə; # clause (c) 136 # Rule #3 137 # The paper is unclear about what this rule means. The interpretation here 138 # assumes that "preceded" in the paper is a typo and should be read "followed". 139 ::Null; 140 [a e æ o ə] h { ə → a; 141 # Rules #4 through #7 142 ::Null; 143 ə } $c $c → a; # Rule #4 144 ə } [rbɖʈ] $s → ə; # Rule #5 exception 145 ə } $c $s → a; # Rule #5 146 ə } ji $s → a; # Rule #6 147 k { ə } [rl] u → a; # Rule #7 148 # Rule #8 149 # Note that the paper doesn't say explicitly that this rule should be 150 # anchored at the beginning of a word, but the remarks before the rules 151 # seem to imply this. 152 ::Null; 153 $s k { a } l[aeo]ːj → ə; # Typo in paper: /j/ was /y/. 154 $s k { a } le[mh][ui] → ə; 155 $s k { alə } h[ui] → əle; 156 $s k { a } lə → ə; 157 # Diphthongs 158 ::Null; 159 www+ → ww; # යෞව\u0DCAවන 160 [i {iː} e {eː} æ {æː} o {oː} a {aː}] { wu → w; 161 əji → aj; 162 iji → iː; # perhaps: ij 163 [u {uː} e {eː} æ {æː} o {oː} a {aː}] { ji → j;