Grek_Latn_UNGEGN.txt (4878B)
1 # © 2016 and later: Unicode, Inc. and others. 2 # License & terms of use: http://www.unicode.org/copyright.html 3 # Generated using tools/cldr/cldr-to-icu/ 4 # 5 # File: Grek_Latn_UNGEGN.txt 6 # Generated from CLDR 7 # 8 9 # For modern Greek, based on UNGEGN rules. 10 # Rules are predicated on running NFD first, and NFC afterwards 11 # MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN 12 # WARNING: need to add accents to both filters ### 13 # :: [\u0301\u0304\u0306\u0308;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ\u0300\u0302\u0313-\u0314\u0340\u0342-\u0343\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩϷ-\u07FBϹ] ; 14 :: [[[:Greek:][:Mn:][:Me:]] [\:-;?·;·]] ; 15 ::NFD (NFC) ; 16 # Useful variables 17 $lower = [[:Latin:][:Greek:] & [:Ll:]] ; 18 $upper = [[:Latin:][:Greek:] & [:Lu:]] ; 19 $accent = [[:Mn:][:Me:]] ; 20 $macron = \u0304 ; 21 $ddot = \u0308 ; 22 $lcgvowel = [αεηιουω] ; 23 $ucgvowel = [ΑΕΗΙΟΥΩ] ; 24 $gvowel = [$lcgvowel $ucgvowel] ; 25 $lcgvowelC = [$lcgvowel $accent] ; 26 $evowel = [aeiouyAEIOUY]; 27 $vowel = [ $evowel $gvowel] ; 28 $beforeLower = $accent * $lower ; 29 $gammaLike = [ΓΚΞΧγκξχϰ] ; 30 $egammaLike = [GKXCgkxc] ; 31 $smooth = \u0313 ; 32 $rough = \u0314 ; 33 $iotasub = \u0345 ; 34 $softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ; 35 $under = \u0331; 36 $caron = \u030C; 37 $afterLetter = [:L:] [\'$accent]* ; 38 $beforeLetter = [\'$accent]* [:L:] ; 39 # Fix punctuation 40 # preserve original 41 \: ↔ \: $under ; 42 \? ↔ \? $under ; 43 \; ↔ \? ; 44 · ↔ \: ; 45 # Fix any ancient characters that creep in 46 \u0342 → \u0301 ; 47 \u0302 → \u0301 ; 48 \u0300 → \u0301 ; 49 $smooth → ; 50 $rough → ; 51 $iotasub → ; 52 ͺ → ; 53 # need to have these up here so the rules don't mask 54 η ↔ i $under ; 55 Η ↔ I $under ; 56 Ψ } $beforeLower ↔ Ps ; 57 Ψ ↔ PS ; 58 ψ ↔ ps ; 59 ω ↔ o $under ; 60 Ω ↔ O $under; 61 # at begining or end of word, convert mp to b 62 [^[:L:]$accent] { μπ → b ; 63 μπ } [^[:L:]$accent] → b ; 64 [^[:L:]$accent] { [Μμ][Ππ] → B ; 65 [Μμ][Ππ] } [^[:L:]$accent] → B ; 66 μπ ← b ; 67 Μπ ← B } $beforeLower ; 68 ΜΠ ← B ; 69 # handle diphthongs ending with upsilon 70 ου ↔ ou ; 71 ΟΥ ↔ OU ; 72 Ου ↔ Ou ; 73 οΥ ↔ oU ; 74 $fmaker = [aeiAEI] $under ? ; 75 $shiftForwardVowels = [[:Mn:]-[\u0308]]; # note: a diaeresis keeps the items separate 76 $fmaker { υ ( $shiftForwardVowels )* } $softener → $1 v $under ; 77 υ $1 ← ( $shiftForwardVowels )* v $under ; 78 $fmaker { υ ( $shiftForwardVowels )* } → $1 f $under; 79 υ $1 ← ( $shiftForwardVowels )* f $under ; 80 $fmaker { Υ } $softener ↔ V $under ; 81 $fmaker { Υ ↔ U $under ; 82 υ ↔ y ; 83 Υ ↔ Y ; 84 # NORMAL 85 α ↔ a ; 86 Α ↔ A ; 87 β ↔ v ; 88 Β ↔ V ; 89 γ } $gammaLike ↔ n } $egammaLike ; 90 γ ↔ g ; 91 Γ } $gammaLike ↔ N } $egammaLike ; 92 Γ ↔ G ; 93 δ ↔ d ; 94 Δ ↔ D ; 95 ε ↔ e ; 96 Ε ↔ E ; 97 ζ ↔ z ; 98 Ζ ↔ Z ; 99 θ ↔ th ; 100 Θ } $beforeLower ↔ Th ; 101 Θ ↔ TH ; 102 ι ↔ i ; 103 Ι ↔ I ; 104 κ ↔ k ; 105 Κ ↔ K ; 106 λ ↔ l ; 107 Λ ↔ L ; 108 μ ↔ m ; 109 Μ ↔ M ; 110 ν } $gammaLike → n\' ; 111 ν ↔ n ; 112 Ν } $gammaLike ↔ N\' ; 113 Ν ↔ N ; 114 ξ ↔ x ; 115 Ξ ↔ X ; 116 ο ↔ o ; 117 Ο ↔ O ; 118 π ↔ p ; 119 Π ↔ P ; 120 ρ ↔ r ; 121 Ρ ↔ R ; 122 # insert separator before things that turn into s 123 [Pp] { } [ςσΣϷϸϺϻ] → \' ; 124 # special S variants 125 Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L 126 ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L 127 Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L 128 ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L 129 # Caron means exception 130 # before a letter, initial 131 ς } $beforeLetter ↔ s $under } $beforeLetter; 132 σ } $beforeLetter ↔ s } $beforeLetter; 133 # otherwise, after a letter = final 134 $afterLetter { σ ↔ $afterLetter { s $under; 135 $afterLetter { ς ↔ $afterLetter { s ; 136 # otherwise (isolated) = initial 137 ς ↔ s $under; 138 σ ↔ s ; 139 # [Pp] { Σ ↔ \'S ; 140 Σ ↔ S ; 141 τ ↔ t ; 142 Τ ↔ T ; 143 φ ↔ f ; 144 Φ ↔ F ; 145 χ ↔ ch ; 146 Χ } $beforeLower ↔ Ch ; 147 Χ ↔ CH ; 148 # Completeness for ASCII 149 # $ignore = [[:Mark:]''] * ; 150 | ch ← h ; 151 | k ← c ; 152 | i ← j ; 153 | k ← q ; 154 | b ← u } $vowel ; 155 | b ← w } $vowel ; 156 | y ← u ; 157 | y ← w ; 158 | Ch ← H ; 159 | K ← C ; 160 | I ← J ; 161 | K ← Q ; 162 | B ← W } $vowel ; 163 | B ← U } $vowel ; 164 | Y ← W ; 165 | Y ← U ; 166 # Completeness for Greek 167 ϐ → | β ; 168 ϑ → | θ ; 169 ϒ → | Υ ; 170 ϕ → | φ ; 171 ϖ → | π ; 172 ϰ → | κ ; 173 ϱ → | ρ ; 174 ϲ → | σ ; 175 Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL 176 ϳ → j ; 177 ϴ → | Θ ; 178 ϵ → | ε ; 179 µ → | μ ; 180 # delete any trailing ' marks used for roundtripping 181 ← [Ππ] { \' } [Ss] ; 182 ← [Νν] { \' } $egammaLike ; 183 ::NFC (NFD) ; 184 # MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD 185 :: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ;