tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

Grek_Latn_UNGEGN.txt (4878B)


      1 # © 2016 and later: Unicode, Inc. and others.
      2 # License & terms of use: http://www.unicode.org/copyright.html
      3 # Generated using tools/cldr/cldr-to-icu/
      4 #
      5 # File: Grek_Latn_UNGEGN.txt
      6 # Generated from CLDR
      7 #
      8 
      9 # For modern Greek, based on UNGEGN rules.
     10 # Rules are predicated on running NFD first, and NFC afterwards
     11 # MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN
     12 # WARNING: need to add accents to both filters ###
     13 # :: [\u0301\u0304\u0306\u0308;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ\u0300\u0302\u0313-\u0314\u0340\u0342-\u0343\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩϷ-\u07FBϹ] ;
     14 :: [[[:Greek:][:Mn:][:Me:]] [\:-;?·;·]] ;
     15 ::NFD (NFC) ;
     16 # Useful variables
     17 $lower = [[:Latin:][:Greek:] & [:Ll:]] ;
     18 $upper = [[:Latin:][:Greek:] & [:Lu:]] ;
     19 $accent = [[:Mn:][:Me:]] ;
     20 $macron = \u0304 ;
     21 $ddot = \u0308 ;
     22 $lcgvowel = [αεηιουω] ;
     23 $ucgvowel = [ΑΕΗΙΟΥΩ] ;
     24 $gvowel = [$lcgvowel $ucgvowel] ;
     25 $lcgvowelC = [$lcgvowel $accent] ;
     26 $evowel = [aeiouyAEIOUY];
     27 $vowel = [ $evowel $gvowel] ;
     28 $beforeLower = $accent * $lower ;
     29 $gammaLike = [ΓΚΞΧγκξχϰ] ;
     30 $egammaLike = [GKXCgkxc] ;
     31 $smooth = \u0313 ;
     32 $rough = \u0314 ;
     33 $iotasub = \u0345 ;
     34 $softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;
     35 $under = \u0331;
     36 $caron = \u030C;
     37 $afterLetter = [:L:] [\'$accent]* ;
     38 $beforeLetter = [\'$accent]* [:L:] ;
     39 # Fix punctuation
     40 # preserve original
     41 \: ↔ \: $under ;
     42 \? ↔ \? $under ;
     43 \; ↔ \? ;
     44 · ↔ \: ;
     45 # Fix any ancient characters that creep in
     46 \u0342 → \u0301 ;
     47 \u0302 → \u0301 ;
     48 \u0300 → \u0301 ;
     49 $smooth → ;
     50 $rough → ;
     51 $iotasub → ;
     52 ͺ → ;
     53 # need to have these up here so the rules don't mask
     54 η ↔ i $under ;
     55 Η ↔ I $under ;
     56 Ψ } $beforeLower ↔ Ps ;
     57 Ψ ↔ PS ;
     58 ψ ↔ ps ;
     59 ω ↔ o $under ;
     60 Ω ↔  O $under;
     61 # at begining or end of word, convert mp to b
     62 [^[:L:]$accent] { μπ → b ;
     63 μπ } [^[:L:]$accent] → b ;
     64 [^[:L:]$accent] { [Μμ][Ππ] → B ;
     65 [Μμ][Ππ] } [^[:L:]$accent] → B ;
     66 μπ ← b ;
     67 Μπ ← B } $beforeLower ;
     68 ΜΠ ← B ;
     69 # handle diphthongs ending with upsilon
     70 ου ↔ ou ;
     71 ΟΥ ↔ OU ;
     72 Ου ↔ Ou ;
     73 οΥ ↔ oU ;
     74 $fmaker = [aeiAEI] $under ? ;
     75 $shiftForwardVowels = [[:Mn:]-[\u0308]]; # note: a diaeresis keeps the items separate
     76 $fmaker { υ ( $shiftForwardVowels )* } $softener → $1 v $under ;
     77 υ $1 ← ( $shiftForwardVowels )* v $under ;
     78 $fmaker { υ ( $shiftForwardVowels )* } → $1 f $under;
     79 υ $1 ← ( $shiftForwardVowels )* f $under ;
     80 $fmaker { Υ } $softener ↔ V $under ;
     81 $fmaker { Υ ↔ U $under ;
     82 υ ↔ y ;
     83 Υ ↔ Y ;
     84 # NORMAL
     85 α ↔ a ;
     86 Α ↔ A ;
     87 β ↔ v ;
     88 Β ↔ V ;
     89 γ } $gammaLike ↔ n } $egammaLike ;
     90 γ ↔ g ;
     91 Γ } $gammaLike ↔ N } $egammaLike ;
     92 Γ ↔ G ;
     93 δ ↔ d ;
     94 Δ ↔ D ;
     95 ε ↔ e ;
     96 Ε ↔ E ;
     97 ζ ↔ z ;
     98 Ζ ↔ Z ;
     99 θ ↔ th ;
    100 Θ } $beforeLower ↔ Th ;
    101 Θ ↔ TH ;
    102 ι ↔ i ;
    103 Ι ↔ I ;
    104 κ ↔ k ;
    105 Κ ↔ K ;
    106 λ ↔ l ;
    107 Λ ↔ L ;
    108 μ ↔ m ;
    109 Μ ↔ M ;
    110 ν } $gammaLike → n\' ;
    111 ν ↔ n ;
    112 Ν } $gammaLike ↔ N\' ;
    113 Ν ↔ N ;
    114 ξ ↔ x ;
    115 Ξ ↔ X ;
    116 ο ↔ o ;
    117 Ο ↔ O ;
    118 π ↔ p ;
    119 Π ↔ P ;
    120 ρ ↔ r ;
    121 Ρ ↔ R ;
    122 # insert separator before things that turn into s
    123 [Pp] { } [ςσΣϷϸϺϻ] → \' ;
    124 # special S variants
    125 Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
    126 ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
    127 Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
    128 ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
    129 # Caron means exception
    130 # before a letter, initial
    131 ς } $beforeLetter ↔ s $under } $beforeLetter;
    132 σ } $beforeLetter ↔ s } $beforeLetter;
    133 # otherwise, after a letter = final
    134 $afterLetter { σ ↔ $afterLetter { s $under;
    135 $afterLetter { ς ↔ $afterLetter { s ;
    136 # otherwise (isolated) = initial
    137 ς ↔ s $under;
    138 σ ↔ s ;
    139 # [Pp] { Σ ↔ \'S ;
    140 Σ ↔ S ;
    141 τ ↔ t ;
    142 Τ ↔ T ;
    143 φ ↔ f ;
    144 Φ ↔ F ;
    145 χ ↔ ch ;
    146 Χ } $beforeLower ↔ Ch ;
    147 Χ ↔ CH ;
    148 # Completeness for ASCII
    149 # $ignore = [[:Mark:]''] * ;
    150 | ch ← h ;
    151 | k  ← c ;
    152 | i  ← j ;
    153 | k ← q ;
    154 | b ← u } $vowel ;
    155 | b ← w } $vowel ;
    156 | y ← u ;
    157 | y ← w ;
    158 | Ch ← H ;
    159 | K ← C ;
    160 | I ← J ;
    161 | K ← Q ;
    162 | B ← W } $vowel ;
    163 | B ← U } $vowel ;
    164 | Y ← W ;
    165 | Y ← U ;
    166 # Completeness for Greek
    167 ϐ → | β ;
    168 ϑ → | θ ;
    169 ϒ → | Υ ;
    170 ϕ → | φ ;
    171 ϖ → | π ;
    172 ϰ → | κ ;
    173 ϱ → | ρ ;
    174 ϲ → | σ ;
    175 Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
    176 ϳ → j ;
    177 ϴ → | Θ ;
    178 ϵ → | ε ;
    179 µ → | μ ;
    180 # delete any trailing ' marks used for roundtripping
    181 ← [Ππ] { \' } [Ss] ;
    182 ← [Νν] { \' } $egammaLike ;
    183 ::NFC (NFD) ;
    184 # MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD
    185 :: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ;