tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ThaiLogical_Latin.txt (6784B)


      1 # © 2016 and later: Unicode, Inc. and others.
      2 # License & terms of use: http://www.unicode.org/copyright.html
      3 # Generated using tools/cldr/cldr-to-icu/
      4 #
      5 # File: ThaiLogical_Latin.txt
      6 # Generated from CLDR
      7 #
      8 
      9 # Thai-Latin
     10 # This set of rules follows ISO 11940
     11 #     see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf
     12 # except that that does not mention an implicit vowel, so we use o\u0323
     13 #
     14 # The transcription is fairly ugly, so we ought to also do the UNGEGN version
     15 #     see: http://www.eki.ee/wgrs/rom1_th.pdf
     16 # and probably make that the main variant.
     17 #
     18 # Note: this is an internal file. The NFD/NFC is handled externally, in the index
     19 # The insertion of spaces between words, the reversal of the vowels
     20 # and the conversion of space to semicolon are done *outside* of these rules.
     21 # So as far as these rules are concerned, the vowels are in logical order!
     22 # insert implicit vowel (and remove it going the other way)
     23 # COMMENTED out: the implicit vowel positions cannot be predicted algorithmically
     24 #$consonant = [ก-ฮ];
     25 #$vowel = [ะ-\u0E3Aเ-ไ\u0E47];
     26 #{ ( $consonant ) } [^$vowel \uE000] → | $1 \uE000 ;
     27 #\uE000 → o\u0323 ;
     28 # ← o\u0323 ;
     29 $notAbove = [^\p{ccc=0}\p{ccc=Above}] ;
     30 $notBelow = [^\p{ccc=0}\p{ccc=Below}] ;
     31 # Consonants
     32 # Warning: the 'h's need to be handled carefully!
     33 # What we really want to say is the following, but we can't
     34 # $notHAccent = !($notAbove*   \u0304 | $notBelow*   \u0323) ;
     35 # Since the only accents we care about that could cause problems are free-standing accents below, we use instead:
     36 $freeStandingBelow = [\u0325  ];
     37 $hAccent =  [   \u0304     \u0323];
     38 $notHAccent0 = [^$freeStandingBelow$hAccent];
     39 $notHAccent1 = $freeStandingBelow [^$hAccent];
     40 ห → h\u0304 ; # THAI CHARACTER HO HIP
     41 ห | $1 ← h ($notAbove*)    \u0304; # backward case, account for reordering
     42 ฮ ↔ h\u0323 ; # THAI CHARACTER HO NOKHUK
     43 ข ↔ k\u0304h ; # THAI CHARACTER KHO KHAI
     44 ฃ ↔ k\u0323\u0304h ; # THAI CHARACTER KHO KHUAT
     45 ฅ ↔ kʹh ; # THAI CHARACTER KHO KHON
     46 ฆ ↔ k\u0323h ; # THAI CHARACTER KHO RAKHANG
     47 ค ← kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI
     48 ค ↔ kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI
     49 ก ↔ k ; # THAI CHARACTER KO KAI
     50 ภ ↔ p\u0323h ; # THAI CHARACTER PHO SAMPHAO
     51 ผ ↔ p\u0304h ; # THAI CHARACTER PHO PHUNG
     52 พ ← ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN
     53 พ ↔ ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN
     54 ป ↔ p ; # THAI CHARACTER PO PLA
     55 ฉ ↔ c\u0304h ; # THAI CHARACTER CHO CHING
     56 ฌ ↔ c\u0323h ; # THAI CHARACTER CHO CHOE
     57 ช ← ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG
     58 ช ↔ ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG
     59 จ ↔ c ; # THAI CHARACTER CHO CHAN
     60 ฐ ↔ t\u0323\u0304h ; # THAI CHARACTER THO THAN
     61 ฑ ↔ t\u0331h ; # THAI CHARACTER THO NANGMONTHO
     62 ฒ ↔ tʹh ; # THAI CHARACTER THO PHUTHAO
     63 ถ ↔ t\u0304h ; # THAI CHARACTER THO THUNG
     64 ธ ↔ t\u0323h ; # THAI CHARACTER THO THONG
     65 ท ← th } $notHAccent1 ; # THAI CHARACTER THO THAHAN
     66 ท ↔ th } $notHAccent0 ; # THAI CHARACTER THO THAHAN
     67 #Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick.
     68 ฏ ↔ t\u0329 ; # THAI CHARACTER TO PATAK
     69 ต ↔ t ; # THAI CHARACTER TO TAO
     70 # since there is no singleton g (generated), don't worry about that.
     71 ง ↔ ng ; # THAI CHARACTER NGO NGU
     72 ณ ↔ n\u0323 ; # THAI CHARACTER NO NEN
     73 น ↔ n ; # THAI CHARACTER NO NU
     74 ญ ↔ y\u0323  ; # THAI CHARACTER YO YING
     75 ฎ ↔ d\u0323 ; # THAI CHARACTER DO CHADA
     76 ด ↔ d ; # THAI CHARACTER DO DEK
     77 บ ↔ b ; # THAI CHARACTER BO BAIMAI
     78 ฝ ↔ f\u0304 ; # THAI CHARACTER FO FA
     79 ฝ | $1 ← f ($notAbove*)    \u0304; # backward case, account for reordering
     80 ม ↔ m ; # THAI CHARACTER MO MA
     81 ย ↔ y ; # THAI CHARACTER YO YAK
     82 ร ↔ r ; # THAI CHARACTER RO RUA
     83 ฤ ↔ v ; # THAI CHARACTER RU
     84 ฦ ↔ ł ; # THAI CHARACTER LU
     85 ว ↔ w ; # THAI CHARACTER WO WAEN
     86 ศ ↔ s\u0323\u0304 ; # THAI CHARACTER SO SALA***
     87 ศ | $1 ← s    \u0323 ($notAbove*)    \u0304; # backward case, account for reordering
     88 ษ ↔ s\u0304ʹ ; # THAI CHARACTER SO RUSI
     89 ส → s\u0304 ; # THAI CHARACTER SO SUA***
     90 ส | $1 ← s ($notAbove*)    \u0304; # backward case, account for reordering
     91 ฬ ↔ l\u0323 ; # THAI CHARACTER LO CHULA
     92 ล ↔ l ; # THAI CHARACTER LO LING
     93 ฟ ↔ f ; # THAI CHARACTER FO FAN
     94 อ ↔ x ; # THAI CHARACTER O ANG
     95 ซ ↔ s ; # THAI CHARACTER SO SO
     96 # vowels
     97 \u0E31 ↔ a\u0323 ; # THAI CHARACTER MAI HAN-AKAT
     98 า → a\u0304 ; # THAI CHARACTER SARA AA
     99 า | $1 ← a ($notAbove*)    \u0304; # backward case, account for reordering
    100 # We deviate from ISO for SARA AM for disambiguation
    101 ำ → a  \u0309; # THAI CHARACTER SARA AM
    102 ำ | $1 ← a ($notAbove*)  \u0309 ; # backward case, account for reordering
    103 ะ ↔ a ; # THAI CHARACTER SARA A
    104 \u0E35 ↔ i\u0304 ; # THAI CHARACTER SARA II
    105 \u0E35 | $1 ← i ($notAbove*)    \u0304  ; # backward case, account for reordering
    106 \u0E37 ↔ u\u0323\u0304 ; # THAI CHARACTER SARA UEE
    107 \u0E37 | $1 ← u   \u0323 ($notAbove*)    \u0304  ; # backward case, account for reordering
    108 \u0E36 ↔ u\u0323 ; # THAI CHARACTER SARA UE
    109 \u0E39 ↔ u\u0304 ; # THAI CHARACTER SARA UU
    110 \u0E39 | $1 ← u  ($notAbove*)    \u0304  ; # backward case, account for reordering
    111 \u0E38 ↔ u ; # THAI CHARACTER SARA U
    112 ฯ ↔ ‡ ; # THAI CHARACTER PAIYANNOI
    113 # ฿ ↔ XXX ; # THAI CURRENCY SYMBOL BAHT
    114 เ ↔ e ; # THAI CHARACTER SARA E
    115 แ ↔ æ ; # THAI CHARACTER SARA AE
    116 โ ↔ o ; # THAI CHARACTER SARA O
    117 ใ ↔ ı ; # THAI CHARACTER SARA AI MAIMUAN
    118 ไ ↔ i\u0323 ; # THAI CHARACTER SARA AI MAIMALAI
    119 ๅ ↔ ɨ ; # THAI CHARACTER LAKKHANGYAO
    120 \u0E47 ↔ \u0306 ; # THAI CHARACTER MAITAIKHU
    121 \u0E48 ↔ \u0300 ; # THAI CHARACTER MAI EK
    122 \u0E49 ↔ \u0302 ; # THAI CHARACTER MAI THO
    123 \u0E4A ↔ \u0301 ; # THAI CHARACTER MAI TRI
    124 \u0E4B ↔ \u030C ; # THAI CHARACTER MAI CHATTAWA
    125 \u0E4C ↔ \u0312 ; # THAI CHARACTER THANTHAKHAT
    126 \u0E4E ↔ '~' ; # THAI CHARACTER YAMAKKAN
    127 # We deviate from ISO for disambiguation
    128 \u0E4D ↔  \u030A ; # THAI CHARACTER NIKHAHIT
    129 ๏ ↔ '§' ; # THAI CHARACTER FONGMAN
    130 ๐ ↔ 0 ; # THAI DIGIT ZERO
    131 ๑ ↔ 1 ; # THAI DIGIT ONE
    132 ๒ ↔ 2 ; # THAI DIGIT TWO
    133 ๓ ↔ 3 ; # THAI DIGIT THREE
    134 ๔ ↔ 4 ; # THAI DIGIT FOUR
    135 ๕ ↔ 5 ; # THAI DIGIT FIVE
    136 ๖ ↔ 6 ; # THAI DIGIT SIX
    137 ๗ ↔ 7 ; # THAI DIGIT SEVEN
    138 ๘ ↔ 8 ; # THAI DIGIT EIGHT
    139 ๙ ↔ 9 ; # THAI DIGIT NINE
    140 ๚ ↔ '||' ; # THAI CHARACTER ANGKHANKHU
    141 ๛ ↔ » ; # THAI CHARACTER KHOMUT
    142 ๆ ↔ « ; # THAI CHARACTER MAIYAMOK
    143 # moved down to make shorter first
    144 #Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below.
    145 \u0E3A ↔ ˌ ; # THAI CHARACTER PHINTHU
    146 \u0E34 ↔ i ; # THAI CHARACTER SARA I
    147 # fallbacks
    148 | k ← g ;
    149 | k ← h ;
    150 | c ← j ;
    151 | k ← q ;
    152 | s ← z ;
    153 :: (lower);