Arab_Latn.txt (6620B)
1 # © 2016 and later: Unicode, Inc. and others. 2 # License & terms of use: http://www.unicode.org/copyright.html 3 # Generated using tools/cldr/cldr-to-icu/ 4 # 5 # File: Arab_Latn.txt 6 # Generated from CLDR 7 # 8 9 # Generally follows UNGEGN 10 # http://www.eki.ee/wgrs/rom1_ar.pdf 11 # Occasionally deviates in the direction of ISO 233 12 # http://homepage.mac.com/sirbinks/pdf/Arabic.pdf 13 # a) where required for disambiguation. 14 # b) with underdot instead of cedilla for letter like SAD, 15 # since those are explicitly in Unicode for transliteration. 16 # c) with extra non-Arabic-language letters, like PEH 17 # 18 # Does *not* do assimilation of "al", nor hyphenation. 19 # While it could be done, we need to determine whether a prefix "al" could 20 # occur other than as the definite article (since no space is used). 21 :: [[:Arabic:][:Block=Arabic:][ⁿ،؛؟ـ\u064B-\u0655٠-٬۰-۹﷼ښ][\u0611\u0670]] ; 22 :: NFKD (NFC); 23 $disambig = \u0331 ; 24 $disambig2 = \u0330 ; 25 $under = \u0323 ; 26 $descender = ˌ; 27 $notAbove = [[:^ccc=0:] & [:^ccc=230:]]; 28 # non-letters 29 [:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR 30 [:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR 31 ٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR 32 ٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR 33 # ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate 34 ، ↔ ',' ; # ARABIC COMMA 35 ؛ ↔ ';' ; # ARABIC SEMICOLON 36 ؟ ↔ '?' ; # ARABIC QUESTION MARK 37 ٪ ↔ '%' ; # ARABIC PERCENT SIGN 38 ۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO 39 ۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE 40 ۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO 41 ۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE 42 ۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR 43 ۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE 44 ۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX 45 ۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN 46 ۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT 47 ۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE 48 ٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO 49 ١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE 50 ٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO 51 ٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE 52 ٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR 53 ٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE 54 ٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX 55 ٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN 56 ٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT 57 ٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE 58 ؉ ↔ ‰ ; # U+0609 ARABIC-INDIC PER MILLE SIGN 59 ؊ ↔ ‱ ; # U+060A ARABIC-INDIC PER TEN THOUSAND SIGN 60 ۔ ↔ '.' ; # U+06D4 ARABIC FULL STOP 61 # letters 62 # long vowels 63 \u064Eا↔ a\u0304 ; # ARABIC FATHA, ARABIC LETTER ALEF 64 \u064Fو ↔ u\u0304 ; # ARABIC DAMMA, ARABIC LETTER WAW 65 \u0650ي ↔ i\u0304 ; # ARABIC KASRA, ARABIC LETTER YEH 66 # longer items moved here to prevent masking 67 ث ↔ t h $disambig ; # ARABIC LETTER THEH 68 ذ ↔ d h $disambig ; # ARABIC LETTER THAL 69 ش ↔ s h $disambig ; # ARABIC LETTER SHEEN 70 ص ↔ s $under ; # ARABIC LETTER SAD 71 ض ↔ d $under ; # ARABIC LETTER DAD 72 ط ↔ t $under ; # ARABIC LETTER TAH 73 ظ ↔ z $under ; # ARABIC LETTER ZAH 74 غ ↔ g h $disambig ; # ARABIC LETTER GHAIN 75 # WARNING: special case 76 # ←t, umlaut, half-ring below→ will be canonically ordered as ←t, half-ring below, umlaut→ 77 # so on the return, we have to skip over (but preserve) the half-ring below (or others like it) 78 # ة\u0655 ← t\u0339\u0308 ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS 79 ة ↔ t \u0308 ; # ARABIC LETTER TEH MARBUTA 80 ة | $1 ← t ($notAbove+) \u0308 ; # ARABIC LETTER TEH MARBUTA 81 # non-Arabic language 82 ژ ↔ z h $disambig ; # ARABIC LETTER JEH 83 ڭ ↔ n $disambig g ; # ARABIC LETTER NG 84 ۋ ↔ v $disambig ; # ARABIC LETTER VE 85 ی ↔ y $disambig2 ; # ARABIC LETTER FARSI YEH 86 ښ ↔ s $descender; 87 # Arabic language 88 ء ↔ ʾ ; # ARABIC LETTER HAMZA 89 ا ↔ a $under; # ARABIC LETTER ALEF 90 ب ↔ b ; # ARABIC LETTER BEH 91 ت ↔ t ; # ARABIC LETTER TEH 92 ج ↔ j ; # ARABIC LETTER JEEM 93 ح ↔ h $under ; # ARABIC LETTER HAH 94 خ ↔ k h $disambig ; # ARABIC LETTER KHAH 95 د ↔ d ; # ARABIC LETTER DAL 96 ر ↔ r ; # ARABIC LETTER REH 97 ز ↔ z ; # ARABIC LETTER ZAIN 98 س ↔ s ; # ARABIC LETTER SEEN 99 ع ↔ ʿ ; # ARABIC LETTER AIN 100 ـ → ; # ARABIC TATWEEL 101 ف ↔ f ; # ARABIC LETTER FEH 102 ق ↔ q ; # ARABIC LETTER QAF 103 ک ↔ k $disambig ; # ARABIC LETTER KEHEH 104 ك ↔ k ; # ARABIC LETTER KAF 105 ل ↔ l ; # ARABIC LETTER LAM 106 م ↔ m ; # ARABIC LETTER MEEM 107 ن ↔ n ; # ARABIC LETTER NOON 108 ه ↔ h ; # ARABIC LETTER HEH 109 و ↔ w ; # ARABIC LETTER WAW 110 ى ↔ y $disambig ; # ARABIC LETTER ALEF MAKSURA 111 ي ↔ y ; # ARABIC LETTER YEH 112 \u064B ↔ aⁿ ; # ARABIC FATHATAN 113 \u064C ↔ uⁿ ; # ARABIC DAMMATAN 114 \u064D ↔ iⁿ ; # ARABIC KASRATAN 115 \u064E ↔ a ; # ARABIC FATHA 116 \u064F ↔ u ; # ARABIC DAMMA 117 \u0650 ↔ i ; # ARABIC KASRA 118 \u0651 ↔ \u0303 ; # ARABIC SHADDA 119 \u0652 ↔ \u030A ; # ARABIC SUKUN 120 # special combining marks 121 \u0653 ↔ \u0302 ; # ARABIC MADDAH ABOVE 122 \u0654 ↔ \u0309 ; # ARABIC HAMZA ABOVE 123 \u0655 ↔ \u0339 ; # ARABIC HAMZA BELOW 124 # Some non-Arabic language (not in UNGEGN) 125 پ ↔ p ; # ARABIC LETTER PEH 126 چ ↔ c h $disambig ; # ARABIC LETTER TCHEH 127 ڤ ↔ v ; # ARABIC LETTER VEH 128 # ڥ ↔ v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW 129 # ڢ ↔ f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW 130 گ ↔ g ; # ARABIC LETTER GAF 131 # fallbacks TODO roundtrip where possible, using diacritics to distinguish 132 #https://en.wikipedia.org/wiki/Sindhi_transliteration 133 ٺ→ṭh; 134 ٿ→th; 135 ٽ→ṭ; 136 ڙ→ṛ; 137 ڦ→ph; 138 ڻ→ṇ; 139 ڱ→ṅ; 140 ڃ→ñ; 141 ڪ→k; 142 ڄ→j\u0308; 143 ۃ→ẖ; 144 ڳ→g\u0324; 145 ڍ→ḍh; 146 ڌ→dh; 147 ڏ→d\u0324; 148 ڊ→ḍ; 149 ڇ→ch; 150 ڀ→bh; 151 ٻ→ḇ; 152 ۽→'&'; 153 ۾→'mn'; 154 #https://en.wiktionary.org/wiki/Wiktionary:Urdu_transliteration 155 ھ → ʱ ; 156 ں → ◌\u0303 ; 157 ے → ai ; 158 ڈ → ḍ ; 159 ڑ → ṛ ; 160 ٹ → ṭ ; 161 #https://www.eki.ee/wgrs/rom2_ps.htm 162 #https://en.wikipedia.org/wiki/Pashto_alphabet 163 ټ → ṯ ; 164 ځ → dz ; 165 څ → ts ; 166 ډ → ḏ ; 167 ړ → ṟ ; 168 ږ → z\u035Fh ; 169 ګ → g ; 170 ڼ → ṉ ; 171 ۍ → ạy ; 172 ې → e ; 173 #https://www.eki.ee/wgrs/rom1_ug.pdf 174 ہ → ḥ ; 175 ە → ĥ ; 176 # Delete marks without correspondants 177 [\u0611\u0670] → ; 178 # fallbacks 179 | s ← c } [eiy]; 180 | k ← c ; 181 | i ← e ; 182 | u ← o ; 183 | ks ← x ; 184 | n ← ⁿ; 185 :: (lower) ; 186 ::NFC (NFD); 187 :: ( [[:Latin:] [%,.0-9;?ʾ-ʿ\u0302-\u0304\u0308-\u030A\u0323\u0330-\u0331\u0339;ˌ]] );