und_FONIPA_fa.txt (3645B)
1 # © 2016 and later: Unicode, Inc. and others. 2 # License & terms of use: http://www.unicode.org/copyright.html 3 # Generated using tools/cldr/cldr-to-icu/ 4 # 5 # File: und_FONIPA_fa.txt 6 # Generated from CLDR 7 # 8 9 # Vowels 10 # ------ 11 # In these rules, we produce ی و ا both for short and for long vowels. 12 # This would be wrong for writing Farsi or Arabic, but when transliterating 13 # foreign words and names, it is strongly preferred to vowel marks. 14 # Short schwa [ə] and a few other, schwa-like vowels get omitted entirely 15 # unless at the end of the word, in which case we emit ه whose Farsi 16 # word-final pronunciation comes close to [ə]. At the beginning of words, 17 # Farsi speakers prefer to see آ for [ɑ] and a few other similar-sounding 18 # dark vowels; note that this use of آ is quite different from Arabic. 19 $IVowel = [i ɪ e {e\u031E}]; 20 $UVowel = [y {ɨ} {ʉ} ɯ u ʏ {ɪ\u0308} {ʊ\u0308} {ɯ\u033D} {ʊ} ø ɤ o {ø\u031E} {ɤ\u031E} {o\u031E} ɔ w {w\u0325} ʍ ʷ]; 21 $AVowel = [ɛ œ ɜ æ ɶ]; 22 $DarkAVowel = [ʌ a ɑ ɒ ɐ ɞ {ä} {ɒ\u0308}]; # آ instead of ا at beginning of words 23 $SchwaVowel = [ɘ ɵ ə {ɵ\u031E}]; 24 $Click = [ʘ ɋ ǀ ʇ ǃ ʗ ǂ ʄ ǁ ʖ]; 25 $Boundary = [^[:L:][:M:][:N:]]; 26 ::NFD; 27 [ʰ ʱ ʼ \u0303 \u0330 \u030B \u0301 \u0304 \u0300 \u030F \u030C \u0302 ˥ ˦ ˧ ˨ ˩ ꜜ ꜛ ↗ ↘ \u0361 \u035C \u032F] → ; 28 ʲ → j; 29 ᵐ → m; 30 ⁿ → n; 31 ᵑ → ŋ; 32 ::NFC; 33 # TODO: Diphthongs probably need more work. 34 # Romanian [sekujesk] → [sekujask], for emitting سیکویاسک not سیکویسک 35 $UVowel [j $IVowel] [e {e\u031E} $SchwaVowel] → uia; 36 # Kazakh Аягөз [ɑjɑɡy\u032Fʉz] → [ɑjɑɡiuz], to emit آیاگیوز not آیاگووز 37 yʉ → iu; 38 ::NULL; 39 # Vowels 40 $Boundary {$SchwaVowel ː?} → ای; 41 $SchwaVowel ː → ی; 42 {[$SchwaVowel e {e\u031E}]} [^[:L:][:M:][:N:][\.]] → ه; 43 $SchwaVowel → ; 44 $Boundary {$IVowel ː?} → ای; 45 $IVowel ː? j? → ی; 46 $Boundary {$UVowel ː?} → او; 47 $UVowel ː? → و; 48 $Boundary {$AVowel ː?} → ا; 49 $AVowel ː? → ا; 50 $Boundary {$DarkAVowel ː?} → آ; 51 $DarkAVowel ː? → ا; 52 # Shadda for long (geminated) consonants 53 ː → \u0651; 54 # Affricates 55 [{t\u0361ʃ} ʧ] → چ; 56 # Clicks 57 [ɡ g ɠ k] $Click → کچ; 58 [n ɲ]? $Click → نچ; 59 # Nasal stops 60 [{m\u0325} m ɱ] → م; 61 [{n\u033C\u030A} {n\u033C} {n\u0325} n {ɳ\u030A} ɳ {ɲ\u030A} {ɲ\u0325} ɲ] → ن; 62 [{ŋ\u030A} ŋ {ɴ\u0325} ɴ] k → نک; 63 [{ŋ\u030A} ŋ {ɴ\u0325} ɴ] [ɡ g]? → نگ; 64 # Non-nasal stops 65 [p {p\u032A}] → پ; 66 [b {b\u032A} ɓ] → ب; 67 [{d\u033C} d ɗ ᶑ] → د; 68 [{t\u033C} t] → ت; 69 [ʈ] → ط; 70 [ɖ] → ض; 71 c → چ; 72 ɟ → دج; 73 k → ک; 74 [ɡ g ɠ] → گ; 75 [q ɢ ʡ ʛ] → ق; 76 ʔ → ; 77 # Sibilant fricatives 78 s → س; 79 z → ز; 80 [ʃ ʂ ɕ ʄ] → ش; 81 [ʒ ʐ ʑ] → ژ; 82 # Non-sibilant fricatives 83 [ɸ f] → ف; 84 [β v] → و; 85 [{θ\u033C} θ {θ\u0331}] → ث; 86 [{ð\u033C} ð {ð\u0320}] → ذ; 87 ç → ش; 88 ʝ $IVowel? ː? → ی; 89 [x χ] → خ; 90 [ɣ ʁ] → غ; 91 ħ → ح; 92 ʕ → ع; 93 [h ɦ {ʔ\u031E}] → ه; 94 # Approximants, trills, flaps 95 ʋ → و; 96 ʙ → بر; 97 {r\u031D} → رژ; 98 [{ɹ\u0325} {ɹ} {ɻ\u030A} {ɻ} {ɾ\u0325} ɾ {ɽ\u030A} ɽ {r\u033C} {r\u0325} r] → ر; 99 [{ʀ\u0325} ʀ] → غ; 100 ʜ → ح; 101 ʢ → ع; 102 j $IVowel? ː? → ی; 103 # Laterals 104 ɬ → شل; 105 ɮ → ژل; 106 {[{ʎ\u0325} ʎ]} [^ $IVowel j ʝ] → لی; 107 [{l\u033C} {l\u0325} l {ɭ\u030A} ɭ {ʎ\u0325} ʎ] → ل; 108 [ʟ {ʟ\u0320}] → غ; 109 # Independent pass for misc cleanup. 110 ::NULL; 111 # Strip off syllable markers 112 \. → ; 113 # Sequences of three or more ووو look very confusing; we shorten them. 114 # Polish Darłowo [darwɔvɔ] → داروو → داروووو 115 ووو+ → وو;