und_FONIPA_ar.txt (3671B)
1 # © 2016 and later: Unicode, Inc. and others. 2 # License & terms of use: http://www.unicode.org/copyright.html 3 # Generated using tools/cldr/cldr-to-icu/ 4 # 5 # File: und_FONIPA_ar.txt 6 # Generated from CLDR 7 # 8 9 # Vowels 10 # ------ 11 # In these rules, we produce ي و ا both for short and for long vowels. 12 # This would be wrong for writing Arabic, but when transliterating 13 # foreign words and names, it is strongly preferred to vowel marks. 14 # However, we emit short schwa [ə] and a few other, schwa-like vowels. 15 $IVowel = [i ɪ e {e\u031E}]; 16 $UVowel = [y {ɨ} {ʉ} ɯ u ʏ {ɪ\u0308} {ʊ\u0308} {ɯ\u033D} {ʊ} ø ɤ o {ø\u031E} {ɤ\u031E} {o\u031E} ɞ ɔ w {w\u0325} ʍ ʷ]; 17 $AVowel = [ɛ œ ɜ ʌ æ ɐ a ɶ {ä} {ɒ\u0308} ɑ ɒ]; 18 $SchwaVowel = [ɘ ɵ ə {ɵ\u031E}]; 19 $Vowel = [$IVowel $UVowel $AVowel $SchwaVowel]; 20 $Click = [ʘ ɋ ǀ ʇ ǃ ʗ ǂ ʄ ǁ ʖ]; 21 $Boundary = [^[:L:][:M:][:N:]]; 22 ::NFD; 23 [ʰ ʱ ʼ \u0303 \u0330 \u030B \u0301 \u0304 \u0300 \u030F \u030C \u0302 ˥ ˦ ˧ ˨ ˩ ꜜ ꜛ ↗ ↘ \u0361 \u035C \u032F] → ; 24 ʲ → j; 25 ᵐ → m; 26 ⁿ → n; 27 ᵑ → ŋ; 28 ::NFC; 29 # TODO: Diphthongs probably need more work. 30 # Romanian [sekujesk] → [sekujask], for emitting سيكوياسك not سيكويسك 31 $UVowel [j $IVowel] [e {e\u031E} $SchwaVowel] → uia; 32 # Kazakh Аягөз [ɑjɑɡy\u032Fʉz] → [ɑjɑɡiuz], to emit TODO 33 yʉ → iu; 34 ::NULL; 35 # Vowels 36 $Boundary {ʔ? $IVowel ː} → إ\u0650ي; 37 $Boundary {ʔ? $IVowel} → إ\u0650; 38 {$IVowel ʔ} $Boundary → ئ; 39 {$IVowel ː ʔ} $Boundary → يء; 40 {$IVowel ː ʔ} [$Vowel] → ئ; 41 $IVowel ː? → ي; 42 $Boundary {ʔ? $UVowel ː} → أو; 43 $Boundary {ʔ? $UVowel} → أ; 44 {$UVowel ʔ} $Boundary → ؤ; 45 {$UVowel ː ʔ} $Boundary → وء; 46 $UVowel ː? → و; 47 $Boundary {ʔ? $AVowel ː} → آ; 48 $Boundary {ʔ? $AVowel} → أ; 49 {$AVowel ʔ} $Boundary → أ; 50 {$AVowel ː ʔ} $Boundary → اء; 51 $AVowel ː? ʔ $AVowel ː? → اءا; 52 $AVowel ː? → ا; 53 $Boundary {ʔ? $SchwaVowel ː} → إ\u0650ي; 54 $Boundary {ʔ? $SchwaVowel} → أ; 55 $SchwaVowel ː → ي; 56 $SchwaVowel → ; 57 # TODO: Handle glottal stop. 58 ʔ → ; 59 # Shadda for long (geminated) consonants 60 ː → \u0651; 61 # Affricates 62 [{t\u0361ʃ} ʧ] → ت\u0652ش; 63 # Clicks 64 [ɡ g ɠ k] $Click → ك\u0652ش; 65 $Click → ت\u0652ش; 66 # Nasal stops 67 [{m\u0325} m ɱ] → م; 68 [{n\u033C\u030A} {n\u033C} {n\u0325} n {ɳ\u030A} ɳ {ɲ\u030A} {ɲ\u0325} ɲ] → ن; 69 [{ŋ\u030A} ŋ {ɴ\u0325} ɴ] k → نك; 70 [{ŋ\u030A} ŋ {ɴ\u0325} ɴ] [ɡ g ɠ]? → ن\u0652غ; 71 # Non-nasal stops 72 [p b {p\u032A} {b\u032A} ɓ] → ب; 73 [{d\u033C} d ɗ ᶑ] → د; 74 [{t\u033C} t] → ت; 75 [ʈ] → ط; 76 [ɖ] → ض; 77 c → ت\u0652ش; 78 ɟ → دج; 79 k → ك; 80 [ɡ g ɠ] → غ; 81 [q ɢ ʡ ʛ] → ق; 82 # Sibilant fricatives 83 s → س; 84 z → ز; 85 [ʃ ʂ ɕ ʄ] → ش; 86 [ʒ ʐ ʑ] → ج; 87 # Non-sibilant fricatives 88 [ɸ f v] → ف; 89 β → ب; 90 [{θ\u033C} θ {θ\u0331}] → ث; 91 [{ð\u033C} ð {ð\u0320}] → ذ; 92 ç → ش; 93 ʝ $IVowel? ː? → ي; 94 [x χ] → خ; 95 [ɣ ʁ] → غ; 96 ħ → ح; 97 ʕ → ع; 98 [h ɦ {ʔ\u031E}] → ه; 99 # Approximants, trills, flaps 100 ʋ → و; 101 ʙ → بر; 102 {r\u031D} → رش; 103 [{ɹ\u0325} {ɹ} {ɻ\u030A} {ɻ} {ɾ\u0325} ɾ {ɽ\u030A} ɽ {r\u033C} {r\u0325} r] → ر; 104 [{ʀ\u0325} ʀ] → غ; 105 ʜ → ح; 106 ʢ → ع; 107 j $IVowel? ː? → ي; 108 # Laterals 109 ɬ → ش\u0652ل; 110 ɮ → ج\u0652ل; 111 {[{ʎ\u0325} ʎ]} [^ $IVowel j ʝ] → لي; 112 [{l\u033C} {l\u0325} l {ɭ\u030A} ɭ {ʎ\u0325} ʎ] → ل; 113 [ʟ {ʟ\u0320}] → غ; 114 # Independent pass for misc cleanup. 115 ::NULL; 116 # Strip off syllable markers 117 \. → ; 118 # Sequences of three or more ووو look very confusing; we shorten them. 119 # Polish Darłowo [darwɔvɔ] → داروو → داروووو 120 ووو+ → وو;