tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

Grek_Latn.txt (8513B)


      1 # © 2016 and later: Unicode, Inc. and others.
      2 # License & terms of use: http://www.unicode.org/copyright.html
      3 # Generated using tools/cldr/cldr-to-icu/
      4 #
      5 # File: Grek_Latn.txt
      6 # Generated from CLDR
      7 #
      8 
      9 # Rules are predicated on running NFD first, and NFC afterwards
     10 # :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:Nonspacing_Mark:]] ;
     11 # MINIMAL FILTER GENERATED FOR: Greek-Latin
     12 :: [΄´;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ\u0304\u0308\u0313-\u0314\u0342-\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ;
     13 :: NFD (NFC) ;
     14 # TEST CASES
     15 # Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος
     16 # ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ
     17 # ᾳ ῃ ῳ ὃ ὄ
     18 # ὠς ὡς ὢς ὣς
     19 # Ὠς Ὡς Ὢς Ὣς
     20 # ὨΣ ὩΣ ὪΣ ὫΣ
     21 # Ạ, ạ, Ẹ, ẹ, Ọ, ọ
     22 # Useful variables
     23 $lower = [[:Latin:][:Greek:] & [:Ll:]];
     24 $glower = [[:Greek:] & [:Ll:]];
     25 $upper = [[:Latin:][:Greek:] & [:Lu:]] ;
     26 $accent = [:M:] ;
     27 # NOTE: restrict to just the Greek & Latin accents that we care about
     28 # TODO: broaden out once interation is fixed
     29 $accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ;
     30 $macron = \u0304 ;
     31 $ddot = \u0308 ;
     32 $ddotmac = [$ddot$macron];
     33 $lcgvowel = [αεηιουω] ;
     34 $ucgvowel = [ΑΕΗΙΟΥΩ] ;
     35 $gvowel = [$lcgvowel $ucgvowel] ;
     36 $lcgvowelC = [$lcgvowel $accent] ;
     37 $evowel = [aeiouyAEIOUY];
     38 $evowel2 = [iuyIUY];
     39 $vowel = [ $evowel $gvowel] ;
     40 $gammaLike = [ΓΚΞΧγκξχϰ] ;
     41 $egammaLike = [GKXCgkxc] ;
     42 $smooth = \u0313 ;
     43 $rough = \u0314 ;
     44 $iotasub = \u0345 ;
     45 $evowel_i = [$evowel-[iI]] ;
     46 $evowel2_i = [uyUY];
     47 $underbar = \u0331;
     48 $afterLetter = [:L:] [[:M:]\']* ;
     49 $beforeLetter = [[:M:]\']* [:L:] ;
     50 $beforeLower = $accent * $lower ;
     51 $notLetter = [^[:L:][:M:]] ;
     52 $under = \u0331;
     53 # Fix punctuation
     54 # preserve original
     55 \: ↔ \: $under ;
     56 \? ↔ \? $under ;
     57 \; ↔ \? ;
     58 · ↔ \: ;
     59 ΄ ↔ ´;
     60 # CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
     61 \u0342 ↔ \u0302 ;
     62 # IOTA: convert iota subscript to iota
     63 # first make previous alpha long!
     64 $accent_minus = [[$accent]-[$iotasub$macron]];
     65 Α } $accent_minus * $iotasub → | Α $macron ;
     66 α } $accent_minus * $iotasub → | α $macron ;
     67 # now convert to uppercase if after uppercase, ow to lowercase
     68 $upper $accent * { $iotasub → I ;
     69 $iotasub → i ;
     70 | $1 $iotasub ← ($evowel $macron $accentMinus *) i ;
     71 | $1 $iotasub ← ($evowel $macron $accentMinus *) I ;
     72 # BREATHING
     73 # Convert rough breathing to h, and move before letters.
     74 # Make A ` x = → H a x
     75 Α ($macron?) $rough } $beforeLower → H | α $1;
     76 Ε $rough } $beforeLower → H | ε;
     77 Η $rough } $beforeLower → H | η ;
     78 Ι ($ddot?) $rough } $beforeLower → H | ι  $1;
     79 Ο $rough } $beforeLower → H | ο ;
     80 Υ $rough } $beforeLower → H | υ ;
     81 Ω ($ddot?) $rough } $beforeLower → H | ω $1;
     82 # Make A x ` = → H a x
     83 Α ($glower $macron?) $rough → H | α $1 ;
     84 Ε ($glower) $rough → H | ε $1 ;
     85 Η ($glower) $rough → H | η $1 ;
     86 Ι ($glower $ddot?) $rough → H | ι $1 ;
     87 Ο ($glower) $rough → H | ο $1 ;
     88 Υ ($glower) $rough → H | υ $1 ;
     89 Ω ($glower  $ddot?) $rough → H | ω $1 ;
     90 #Otherwise, make x ` into h x and X ` into H X
     91 ($lcgvowel + $ddotmac? ) $rough → h | $1 ;
     92 ($gvowel + $ddotmac? ) $rough → H | $1 ;
     93 # Go backwards with H
     94 | $1 $rough ← h ($evowel $macron $ddot? $evowel2_i $macron?) ;
     95 | $1 $rough ← h ($evowel $ddot? $evowel2 $macron?) ;
     96 | $1 $rough ← h ($evowel $macron? $ddot?) ;
     97 | $1 $rough ← H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;
     98 | $1 $rough ← H ([AEIOUY] $ddot? $evowel2 $macron?) ;
     99 | $1 $rough ← H ([AEIOUY] $macron? $ddot?) ;
    100 # titlecase, have to fix individually
    101 # in the future, we should add &uppercase() to make this easier
    102 | A $1 $rough ← H a ($macron  $ddot? $evowel2_i $macron?) ;
    103 | E $1 $rough ← H e ($macron  $ddot? $evowel2_i $macron?) ;
    104 | I $1 $rough ← H i ($macron  $ddot? $evowel2_i $macron?) ;
    105 | O $1 $rough ← H o ($macron  $ddot? $evowel2_i $macron?) ;
    106 | U $1 $rough ← H u ($macron $ddot? $evowel2_i $macron?) ;
    107 | Y $1 $rough ← H y ($macron $ddot? $evowel2_i $macron?) ;
    108 | A $1 $rough ← H a ($ddot? $evowel2 $macron?) ;
    109 | E $1 $rough ← H e ($ddot? $evowel2 $macron?) ;
    110 | I $1 $rough ← H i ($ddot? $evowel2 $macron?) ;
    111 | O $1 $rough ← H o ($ddot? $evowel2 $macron?) ;
    112 | U $1 $rough ← H u ($ddot? $evowel2 $macron?) ;
    113 | Y $1 $rough ← H y ($ddot? $evowel2 $macron?) ;
    114 | A $1 $rough ← H a ($macron? $ddot? ) ;
    115 | E $1 $rough ← H e ($macron? $ddot? ) ;
    116 | I $1 $rough ← H i ($macron? $ddot? ) ;
    117 | O $1 $rough ← H o ($macron? $ddot? ) ;
    118 | U $1 $rough ← H u ($macron? $ddot? ) ;
    119 | Y $1 $rough ← H y ($macron? $ddot? ) ;
    120 # Now do smooth
    121 #delete smooth breathing for Latin
    122 $smooth → ;
    123 # insert in Greek
    124 # the assumption is that all Marks are on letters.
    125 | $1 $smooth ← $notLetter { ([rR]) } [^hH$smooth$rough] ;
    126 | $1 $smooth ← $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;
    127 | $1 $smooth ← $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;
    128 # TODO: preserve smooth/rough breathing if not
    129 # on initial vowel sequence
    130 # need to have these up here so the rules don't mask
    131 # remove now superfluous macron when returning
    132 Α ← A $macron ;
    133 α ← a $macron ;
    134 η ↔ e $macron ;
    135 Η ↔ E $macron ;
    136 φ ↔ ph ;
    137 Ψ } $beforeLower ↔ Ps ;
    138 Ψ ↔ PS ;
    139 Φ } $beforeLower ↔ Ph ;
    140 Φ ↔ PH ;
    141 ψ ↔ ps ;
    142 ω ↔ o $macron ;
    143 Ω ↔  O $macron;
    144 # NORMAL
    145 α ↔ a ;
    146 Α ↔ A ;
    147 β ↔ b ;
    148 Β ↔ B ;
    149 γ } $gammaLike ↔ n } $egammaLike ;
    150 γ ↔ g ;
    151 Γ } $gammaLike ↔ N } $egammaLike ;
    152 Γ ↔ G ;
    153 δ ↔ d ;
    154 Δ ↔ D ;
    155 ε ↔ e ;
    156 Ε ↔ E ;
    157 ζ ↔ z ;
    158 Ζ ↔ Z ;
    159 θ ↔ th ;
    160 Θ } $beforeLower ↔ Th ;
    161 Θ ↔ TH ;
    162 ι ↔ i ;
    163 Ι ↔ I ;
    164 κ ↔ k ;
    165 Κ ↔ K ;
    166 λ ↔ l ;
    167 Λ ↔ L ;
    168 μ ↔ m ;
    169 Μ ↔ M ;
    170 ν } $gammaLike → n\' ;
    171 ν ↔ n ;
    172 Ν } $gammaLike ↔ N\' ;
    173 Ν ↔ N ;
    174 ξ ↔ x ;
    175 Ξ ↔ X ;
    176 ο ↔ o ;
    177 Ο ↔ O ;
    178 π ↔ p ;
    179 Π ↔ P ;
    180 ρ $rough ↔ rh;
    181 Ρ $rough } $beforeLower ↔ Rh ;
    182 Ρ $rough ↔ RH ;
    183 ρ ↔ r ;
    184 Ρ ↔ R ;
    185 # insert separator before things that turn into s
    186 [Pp] { } [ςσΣϷϸϺϻ] → \' ;
    187 # special S variants
    188 Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
    189 ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
    190 Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
    191 ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
    192 # underbar means exception
    193 # before a letter, initial
    194 ς } $beforeLetter ↔ s $underbar } $beforeLetter;
    195 σ } $beforeLetter ↔ s } $beforeLetter;
    196 # otherwise, after a letter = final
    197 $afterLetter { σ ↔ $afterLetter { s $underbar;
    198 $afterLetter { ς ↔ $afterLetter { s ;
    199 # otherwise (isolated) = initial
    200 ς ↔ s $underbar;
    201 σ ↔ s ;
    202 # [Pp] { Σ ↔ \'S ;
    203 Σ ↔ S ;
    204 τ ↔ t ;
    205 Τ ↔ T ;
    206 $vowel {υ } ↔ u ;
    207 υ ↔ y ;
    208 $vowel { Υ ↔ U ;
    209 Υ ↔ Y ;
    210 χ ↔ ch ;
    211 Χ } $beforeLower ↔ Ch ;
    212 Χ ↔ CH ;
    213 # Completeness for ASCII
    214 $ignore = [[:Mark:]''] * ;
    215 | k  ← c ;
    216 | ph ← f ;
    217 | i  ← j ;
    218 | k ← q ;
    219 | b ← v } $vowel ;
    220 | b ← w } $vowel;
    221 | u ← v ;
    222 | u ← w;
    223 | K ← C ;
    224 | Ph ← F ;
    225 | I ← J ;
    226 | K ← Q ;
    227 | B ← V  } $vowel ;
    228 | B ← W  } $vowel ;
    229 | U ← V ;
    230 | U ← W ;
    231 $rough } $ignore [:Uppercase_Letter:] → H ;
    232 $ignore [:Uppercase_Letter:] { $rough → H ;
    233 $rough ← H ;
    234 $rough ↔ h ;
    235 # Completeness for Greek
    236 ϐ → | β ;
    237 ϑ → | θ ;
    238 ϒ → | Υ ;
    239 ϕ → | φ ;
    240 ϖ → | π ;
    241 ϰ → | κ ;
    242 ϱ → | ρ ;
    243 ϲ → | σ ;
    244 Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
    245 ϳ → j ;
    246 ϴ → | Θ ;
    247 ϵ → | ε ;
    248 µ → | μ ;
    249 ͺ → i;
    250 # delete any trailing ' marks used for roundtripping
    251 ← [Ππ] { \' } [Ss] ;
    252 ← [Νν] { \' } $egammaLike ;
    253 ::NFC (NFD) ;
    254 # ([\u0000-\u007F [:Latin:] [:Greek:] [:Nonspacing_Mark:]]) ;
    255 # ([\u0000-\u007F · [:Latin:] [:Nonspacing_Mark:]]) ;
    256 # MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD
    257 :: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ\u0300-\u0337\u0339-\u0345΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ;