tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

Arab_Latn.txt (6620B)


      1 # © 2016 and later: Unicode, Inc. and others.
      2 # License & terms of use: http://www.unicode.org/copyright.html
      3 # Generated using tools/cldr/cldr-to-icu/
      4 #
      5 # File: Arab_Latn.txt
      6 # Generated from CLDR
      7 #
      8 
      9 # Generally follows UNGEGN
     10 #     http://www.eki.ee/wgrs/rom1_ar.pdf
     11 # Occasionally deviates in the direction of ISO 233
     12 #     http://homepage.mac.com/sirbinks/pdf/Arabic.pdf
     13 # a) where required for disambiguation.
     14 # b) with underdot instead of cedilla for letter like SAD,
     15 #    since those are explicitly in Unicode for transliteration.
     16 # c) with extra non-Arabic-language letters, like PEH
     17 #
     18 # Does *not* do assimilation of "al", nor hyphenation.
     19 # While it could be done, we need to determine whether a prefix "al" could
     20 # occur other than as the definite article (since no space is used).
     21 :: [[:Arabic:][:Block=Arabic:][‎ⁿ،؛؟ـ\u064B-\u0655٠-٬۰-۹﷼ښ][\u0611\u0670]] ;
     22 :: NFKD (NFC);
     23 $disambig =  \u0331 ;
     24 $disambig2 =  \u0330 ;
     25 $under =  \u0323 ;
     26 $descender = ˌ;
     27 $notAbove = [[:^ccc=0:] & [:^ccc=230:]];
     28 # non-letters
     29 [:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR
     30 [:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR
     31 ٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR
     32 ٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR
     33 #  ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate
     34 ، ↔ ',' ; # ARABIC COMMA
     35 ؛ ↔ ';' ; # ARABIC SEMICOLON
     36 ؟ ↔ '?' ; # ARABIC QUESTION MARK
     37 ٪ ↔ '%' ; # ARABIC PERCENT SIGN
     38 ۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO
     39 ۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE
     40 ۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO
     41 ۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE
     42 ۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR
     43 ۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE
     44 ۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX
     45 ۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN
     46 ۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT
     47 ۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE
     48 ٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO
     49 ١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE
     50 ٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO
     51 ٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE
     52 ٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR
     53 ٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE
     54 ٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX
     55 ٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN
     56 ٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT
     57 ٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE
     58 ؉ ↔ ‰ ;	# U+0609	ARABIC-INDIC PER MILLE SIGN
     59 ؊ ↔ ‱ ;	 # U+060A	ARABIC-INDIC PER TEN THOUSAND SIGN
     60 ‎۔‎ ↔ '.' ; 	# U+06D4	ARABIC FULL STOP
     61 # letters
     62 # long vowels
     63 \u064Eا↔ a\u0304 ; # ARABIC FATHA, ARABIC LETTER ALEF
     64 \u064Fو ↔ u\u0304 ; # ARABIC DAMMA, ARABIC LETTER WAW
     65 \u0650ي ↔ i\u0304 ; # ARABIC KASRA, ARABIC LETTER YEH
     66 # longer items moved here to prevent masking
     67 ث ↔ t h $disambig ; # ARABIC LETTER THEH
     68 ذ ↔ d h $disambig ; # ARABIC LETTER THAL
     69 ش ↔ s h $disambig ; # ARABIC LETTER SHEEN
     70 ص ↔ s $under ; # ARABIC LETTER SAD
     71 ض ↔ d $under ; # ARABIC LETTER DAD
     72 ط ↔ t $under ; # ARABIC LETTER TAH
     73 ظ ↔ z $under ; # ARABIC LETTER ZAH
     74 غ ↔ g h $disambig ; # ARABIC LETTER GHAIN
     75 # WARNING: special case
     76 # ←t, umlaut, half-ring below→ will be canonically ordered as ←t, half-ring below, umlaut→
     77 # so on the return, we have to skip over (but preserve) the half-ring below (or others like it)
     78 # ة\u0655 ← t\u0339\u0308 ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS
     79 ة ↔ t \u0308 ; # ARABIC LETTER TEH MARBUTA
     80 ة | $1 ← t ($notAbove+) \u0308 ; # ARABIC LETTER TEH MARBUTA
     81 # non-Arabic language
     82 ژ ↔ z h $disambig ; # ARABIC LETTER JEH
     83 ڭ ↔ n $disambig g ; # ARABIC LETTER NG
     84 ۋ ↔ v $disambig ; # ARABIC LETTER VE
     85 ی ↔ y $disambig2 ; # ARABIC LETTER FARSI YEH
     86 ښ ↔ s $descender;
     87 # Arabic language
     88 ء ↔ ʾ ; # ARABIC LETTER HAMZA
     89 ا ↔ a $under; # ARABIC LETTER ALEF
     90 ب ↔ b ; # ARABIC LETTER BEH
     91 ت ↔ t ; # ARABIC LETTER TEH
     92 ج ↔ j ; # ARABIC LETTER JEEM
     93 ح ↔ h $under ; # ARABIC LETTER HAH
     94 خ ↔ k h $disambig ; # ARABIC LETTER KHAH
     95 د ↔ d ; # ARABIC LETTER DAL
     96 ر ↔ r ; # ARABIC LETTER REH
     97 ز ↔ z ; # ARABIC LETTER ZAIN
     98 س ↔ s ; # ARABIC LETTER SEEN
     99 ع ↔ ʿ ; # ARABIC LETTER AIN
    100 ـ → ; # ARABIC TATWEEL
    101 ف ↔ f ; # ARABIC LETTER FEH
    102 ق ↔ q ; # ARABIC LETTER QAF
    103 ک ↔ k $disambig ; # ARABIC LETTER KEHEH
    104 ك ↔ k ; # ARABIC LETTER KAF
    105 ل ↔ l ; # ARABIC LETTER LAM
    106 م ↔ m ; # ARABIC LETTER MEEM
    107 ن ↔ n ; # ARABIC LETTER NOON
    108 ه ↔ h ; # ARABIC LETTER HEH
    109 و ↔ w ; # ARABIC LETTER WAW
    110 ى ↔ y $disambig ; # ARABIC LETTER ALEF MAKSURA
    111 ي ↔ y ; # ARABIC LETTER YEH
    112 \u064B ↔ aⁿ ; # ARABIC FATHATAN
    113 \u064C ↔ uⁿ ; # ARABIC DAMMATAN
    114 \u064D ↔ iⁿ ; # ARABIC KASRATAN
    115 \u064E ↔ a ; # ARABIC FATHA
    116 \u064F ↔ u ; # ARABIC DAMMA
    117 \u0650 ↔ i ; # ARABIC KASRA
    118 \u0651 ↔   \u0303 ; # ARABIC SHADDA
    119 \u0652 ↔   \u030A ; # ARABIC SUKUN
    120 # special combining marks
    121 \u0653 ↔  \u0302 ; # ARABIC MADDAH ABOVE
    122 \u0654 ↔  \u0309 ; # ARABIC HAMZA ABOVE
    123 \u0655 ↔  \u0339 ; # ARABIC HAMZA BELOW
    124 # Some non-Arabic language (not in UNGEGN)
    125 پ ↔ p ; # ARABIC LETTER PEH
    126 چ ↔ c h $disambig ; # ARABIC LETTER TCHEH
    127 ڤ ↔ v ; # ARABIC LETTER VEH
    128 # ڥ ↔ v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW
    129 # ڢ ↔ f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW
    130 گ ↔ g ; # ARABIC LETTER GAF
    131 # fallbacks TODO roundtrip where possible, using diacritics to distinguish
    132 #https://en.wikipedia.org/wiki/Sindhi_transliteration
    133 ‎ٺ‎→ṭh;
    134 ‎ٿ‎→th;
    135 ‎ٽ‎→ṭ;
    136 ‎ڙ‎→ṛ;
    137 ‎ڦ‎→ph;
    138 ‎ڻ‎→ṇ;
    139 ‎ڱ‎→ṅ;
    140 ‎ڃ‎→ñ;
    141 ‎ڪ‎→k;
    142 ‎ڄ‎→j\u0308;
    143 ‎ۃ‎→ẖ;
    144 ‎ڳ‎→g\u0324;
    145 ‎ڍ‎→ḍh;
    146 ‎ڌ‎→dh;
    147 ‎ڏ‎→d\u0324;
    148 ‎ڊ‎→ḍ;
    149 ‎ڇ‎→ch;
    150 ‎ڀ‎→bh;
    151 ‎ٻ‎→ḇ;
    152 ‎۽‎→'&';
    153 ‎۾‎→'mn';
    154 #https://en.wiktionary.org/wiki/Wiktionary:Urdu_transliteration
    155 ‎ھ‎ → ʱ ;
    156 ‎ں‎ → ◌\u0303 ;
    157 ‎ے‎ → ai ;
    158 ‎ڈ‎ → ḍ ;
    159 ‎ڑ‎ → ṛ ;
    160 ‎ٹ‎ → ṭ ;
    161 #https://www.eki.ee/wgrs/rom2_ps.htm
    162 #https://en.wikipedia.org/wiki/Pashto_alphabet
    163 ‎ټ‎ → ṯ ;
    164 ‎ځ‎ → dz ;
    165 ‎څ‎ → ts ;
    166 ‎ډ‎ → ḏ ;
    167 ‎ړ‎ → ṟ ;
    168 ‎ږ‎ → z\u035Fh ;
    169 ‎ګ‎ → g ;
    170 ‎ڼ‎ → ṉ ;
    171 ‎ۍ‎ → ạy ;
    172 ‎ې‎ → e ;
    173 #https://www.eki.ee/wgrs/rom1_ug.pdf
    174 ‎ہ‎ → ḥ ;
    175 ‎ە‎ → ĥ ;
    176 # Delete marks without correspondants
    177 [\u0611\u0670] → ;
    178 # fallbacks
    179 | s ← c } [eiy];
    180 | k ← c ;
    181 | i ← e ;
    182 | u ← o ;
    183 | ks ← x ;
    184 | n ← ‎ⁿ;
    185 :: (lower) ;
    186 ::NFC (NFD);
    187 :: ( [[:Latin:] [%,.0-9;?ʾ-ʿ\u0302-\u0304\u0308-\u030A\u0323\u0330-\u0331\u0339;ˌ]] );