Grek_Latn.txt (8513B)
1 # © 2016 and later: Unicode, Inc. and others. 2 # License & terms of use: http://www.unicode.org/copyright.html 3 # Generated using tools/cldr/cldr-to-icu/ 4 # 5 # File: Grek_Latn.txt 6 # Generated from CLDR 7 # 8 9 # Rules are predicated on running NFD first, and NFC afterwards 10 # :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:Nonspacing_Mark:]] ; 11 # MINIMAL FILTER GENERATED FOR: Greek-Latin 12 :: [΄´;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ\u0304\u0308\u0313-\u0314\u0342-\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ; 13 :: NFD (NFC) ; 14 # TEST CASES 15 # Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος 16 # ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ 17 # ᾳ ῃ ῳ ὃ ὄ 18 # ὠς ὡς ὢς ὣς 19 # Ὠς Ὡς Ὢς Ὣς 20 # ὨΣ ὩΣ ὪΣ ὫΣ 21 # Ạ, ạ, Ẹ, ẹ, Ọ, ọ 22 # Useful variables 23 $lower = [[:Latin:][:Greek:] & [:Ll:]]; 24 $glower = [[:Greek:] & [:Ll:]]; 25 $upper = [[:Latin:][:Greek:] & [:Lu:]] ; 26 $accent = [:M:] ; 27 # NOTE: restrict to just the Greek & Latin accents that we care about 28 # TODO: broaden out once interation is fixed 29 $accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ; 30 $macron = \u0304 ; 31 $ddot = \u0308 ; 32 $ddotmac = [$ddot$macron]; 33 $lcgvowel = [αεηιουω] ; 34 $ucgvowel = [ΑΕΗΙΟΥΩ] ; 35 $gvowel = [$lcgvowel $ucgvowel] ; 36 $lcgvowelC = [$lcgvowel $accent] ; 37 $evowel = [aeiouyAEIOUY]; 38 $evowel2 = [iuyIUY]; 39 $vowel = [ $evowel $gvowel] ; 40 $gammaLike = [ΓΚΞΧγκξχϰ] ; 41 $egammaLike = [GKXCgkxc] ; 42 $smooth = \u0313 ; 43 $rough = \u0314 ; 44 $iotasub = \u0345 ; 45 $evowel_i = [$evowel-[iI]] ; 46 $evowel2_i = [uyUY]; 47 $underbar = \u0331; 48 $afterLetter = [:L:] [[:M:]\']* ; 49 $beforeLetter = [[:M:]\']* [:L:] ; 50 $beforeLower = $accent * $lower ; 51 $notLetter = [^[:L:][:M:]] ; 52 $under = \u0331; 53 # Fix punctuation 54 # preserve original 55 \: ↔ \: $under ; 56 \? ↔ \? $under ; 57 \; ↔ \? ; 58 · ↔ \: ; 59 ΄ ↔ ´; 60 # CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve 61 \u0342 ↔ \u0302 ; 62 # IOTA: convert iota subscript to iota 63 # first make previous alpha long! 64 $accent_minus = [[$accent]-[$iotasub$macron]]; 65 Α } $accent_minus * $iotasub → | Α $macron ; 66 α } $accent_minus * $iotasub → | α $macron ; 67 # now convert to uppercase if after uppercase, ow to lowercase 68 $upper $accent * { $iotasub → I ; 69 $iotasub → i ; 70 | $1 $iotasub ← ($evowel $macron $accentMinus *) i ; 71 | $1 $iotasub ← ($evowel $macron $accentMinus *) I ; 72 # BREATHING 73 # Convert rough breathing to h, and move before letters. 74 # Make A ` x = → H a x 75 Α ($macron?) $rough } $beforeLower → H | α $1; 76 Ε $rough } $beforeLower → H | ε; 77 Η $rough } $beforeLower → H | η ; 78 Ι ($ddot?) $rough } $beforeLower → H | ι $1; 79 Ο $rough } $beforeLower → H | ο ; 80 Υ $rough } $beforeLower → H | υ ; 81 Ω ($ddot?) $rough } $beforeLower → H | ω $1; 82 # Make A x ` = → H a x 83 Α ($glower $macron?) $rough → H | α $1 ; 84 Ε ($glower) $rough → H | ε $1 ; 85 Η ($glower) $rough → H | η $1 ; 86 Ι ($glower $ddot?) $rough → H | ι $1 ; 87 Ο ($glower) $rough → H | ο $1 ; 88 Υ ($glower) $rough → H | υ $1 ; 89 Ω ($glower $ddot?) $rough → H | ω $1 ; 90 #Otherwise, make x ` into h x and X ` into H X 91 ($lcgvowel + $ddotmac? ) $rough → h | $1 ; 92 ($gvowel + $ddotmac? ) $rough → H | $1 ; 93 # Go backwards with H 94 | $1 $rough ← h ($evowel $macron $ddot? $evowel2_i $macron?) ; 95 | $1 $rough ← h ($evowel $ddot? $evowel2 $macron?) ; 96 | $1 $rough ← h ($evowel $macron? $ddot?) ; 97 | $1 $rough ← H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ; 98 | $1 $rough ← H ([AEIOUY] $ddot? $evowel2 $macron?) ; 99 | $1 $rough ← H ([AEIOUY] $macron? $ddot?) ; 100 # titlecase, have to fix individually 101 # in the future, we should add &uppercase() to make this easier 102 | A $1 $rough ← H a ($macron $ddot? $evowel2_i $macron?) ; 103 | E $1 $rough ← H e ($macron $ddot? $evowel2_i $macron?) ; 104 | I $1 $rough ← H i ($macron $ddot? $evowel2_i $macron?) ; 105 | O $1 $rough ← H o ($macron $ddot? $evowel2_i $macron?) ; 106 | U $1 $rough ← H u ($macron $ddot? $evowel2_i $macron?) ; 107 | Y $1 $rough ← H y ($macron $ddot? $evowel2_i $macron?) ; 108 | A $1 $rough ← H a ($ddot? $evowel2 $macron?) ; 109 | E $1 $rough ← H e ($ddot? $evowel2 $macron?) ; 110 | I $1 $rough ← H i ($ddot? $evowel2 $macron?) ; 111 | O $1 $rough ← H o ($ddot? $evowel2 $macron?) ; 112 | U $1 $rough ← H u ($ddot? $evowel2 $macron?) ; 113 | Y $1 $rough ← H y ($ddot? $evowel2 $macron?) ; 114 | A $1 $rough ← H a ($macron? $ddot? ) ; 115 | E $1 $rough ← H e ($macron? $ddot? ) ; 116 | I $1 $rough ← H i ($macron? $ddot? ) ; 117 | O $1 $rough ← H o ($macron? $ddot? ) ; 118 | U $1 $rough ← H u ($macron? $ddot? ) ; 119 | Y $1 $rough ← H y ($macron? $ddot? ) ; 120 # Now do smooth 121 #delete smooth breathing for Latin 122 $smooth → ; 123 # insert in Greek 124 # the assumption is that all Marks are on letters. 125 | $1 $smooth ← $notLetter { ([rR]) } [^hH$smooth$rough] ; 126 | $1 $smooth ← $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ; 127 | $1 $smooth ← $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ; 128 # TODO: preserve smooth/rough breathing if not 129 # on initial vowel sequence 130 # need to have these up here so the rules don't mask 131 # remove now superfluous macron when returning 132 Α ← A $macron ; 133 α ← a $macron ; 134 η ↔ e $macron ; 135 Η ↔ E $macron ; 136 φ ↔ ph ; 137 Ψ } $beforeLower ↔ Ps ; 138 Ψ ↔ PS ; 139 Φ } $beforeLower ↔ Ph ; 140 Φ ↔ PH ; 141 ψ ↔ ps ; 142 ω ↔ o $macron ; 143 Ω ↔ O $macron; 144 # NORMAL 145 α ↔ a ; 146 Α ↔ A ; 147 β ↔ b ; 148 Β ↔ B ; 149 γ } $gammaLike ↔ n } $egammaLike ; 150 γ ↔ g ; 151 Γ } $gammaLike ↔ N } $egammaLike ; 152 Γ ↔ G ; 153 δ ↔ d ; 154 Δ ↔ D ; 155 ε ↔ e ; 156 Ε ↔ E ; 157 ζ ↔ z ; 158 Ζ ↔ Z ; 159 θ ↔ th ; 160 Θ } $beforeLower ↔ Th ; 161 Θ ↔ TH ; 162 ι ↔ i ; 163 Ι ↔ I ; 164 κ ↔ k ; 165 Κ ↔ K ; 166 λ ↔ l ; 167 Λ ↔ L ; 168 μ ↔ m ; 169 Μ ↔ M ; 170 ν } $gammaLike → n\' ; 171 ν ↔ n ; 172 Ν } $gammaLike ↔ N\' ; 173 Ν ↔ N ; 174 ξ ↔ x ; 175 Ξ ↔ X ; 176 ο ↔ o ; 177 Ο ↔ O ; 178 π ↔ p ; 179 Π ↔ P ; 180 ρ $rough ↔ rh; 181 Ρ $rough } $beforeLower ↔ Rh ; 182 Ρ $rough ↔ RH ; 183 ρ ↔ r ; 184 Ρ ↔ R ; 185 # insert separator before things that turn into s 186 [Pp] { } [ςσΣϷϸϺϻ] → \' ; 187 # special S variants 188 Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L 189 ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L 190 Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L 191 ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L 192 # underbar means exception 193 # before a letter, initial 194 ς } $beforeLetter ↔ s $underbar } $beforeLetter; 195 σ } $beforeLetter ↔ s } $beforeLetter; 196 # otherwise, after a letter = final 197 $afterLetter { σ ↔ $afterLetter { s $underbar; 198 $afterLetter { ς ↔ $afterLetter { s ; 199 # otherwise (isolated) = initial 200 ς ↔ s $underbar; 201 σ ↔ s ; 202 # [Pp] { Σ ↔ \'S ; 203 Σ ↔ S ; 204 τ ↔ t ; 205 Τ ↔ T ; 206 $vowel {υ } ↔ u ; 207 υ ↔ y ; 208 $vowel { Υ ↔ U ; 209 Υ ↔ Y ; 210 χ ↔ ch ; 211 Χ } $beforeLower ↔ Ch ; 212 Χ ↔ CH ; 213 # Completeness for ASCII 214 $ignore = [[:Mark:]''] * ; 215 | k ← c ; 216 | ph ← f ; 217 | i ← j ; 218 | k ← q ; 219 | b ← v } $vowel ; 220 | b ← w } $vowel; 221 | u ← v ; 222 | u ← w; 223 | K ← C ; 224 | Ph ← F ; 225 | I ← J ; 226 | K ← Q ; 227 | B ← V } $vowel ; 228 | B ← W } $vowel ; 229 | U ← V ; 230 | U ← W ; 231 $rough } $ignore [:Uppercase_Letter:] → H ; 232 $ignore [:Uppercase_Letter:] { $rough → H ; 233 $rough ← H ; 234 $rough ↔ h ; 235 # Completeness for Greek 236 ϐ → | β ; 237 ϑ → | θ ; 238 ϒ → | Υ ; 239 ϕ → | φ ; 240 ϖ → | π ; 241 ϰ → | κ ; 242 ϱ → | ρ ; 243 ϲ → | σ ; 244 Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL 245 ϳ → j ; 246 ϴ → | Θ ; 247 ϵ → | ε ; 248 µ → | μ ; 249 ͺ → i; 250 # delete any trailing ' marks used for roundtripping 251 ← [Ππ] { \' } [Ss] ; 252 ← [Νν] { \' } $egammaLike ; 253 ::NFC (NFD) ; 254 # ([\u0000-\u007F [:Latin:] [:Greek:] [:Nonspacing_Mark:]]) ; 255 # ([\u0000-\u007F · [:Latin:] [:Nonspacing_Mark:]]) ; 256 # MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD 257 :: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ\u0300-\u0337\u0339-\u0345΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ;