Latin_InterIndic.txt (8140B)
1 # © 2016 and later: Unicode, Inc. and others. 2 # License & terms of use: http://www.unicode.org/copyright.html 3 # Generated using tools/cldr/cldr-to-icu/ 4 # 5 # File: Latin_InterIndic.txt 6 # Generated from CLDR 7 # 8 9 # Latin-InterIndic 10 #:: NFD; 11 #\u0E00 reserved 12 #consonants 13 $chandrabindu=\uE001; 14 $anusvara=\uE002; 15 $visarga=\uE003; 16 #\u0E004 reserved 17 # w←vowel→ represents the stand-alone form 18 $wa=\uE005; 19 $waa=\uE006; 20 $wi=\uE007; 21 $wii=\uE008; 22 $wu=\uE009; 23 $wuu=\uE00A; 24 $wr=\uE00B; 25 $wl=\uE00C; 26 $wce=\uE00D; # LETTER CANDRA E 27 $wse=\uE00E; # LETTER SHORT E 28 $we=\uE00F; # ए LETTER E 29 $wai=\uE010; 30 $wco=\uE011; # LETTER CANDRA O 31 $wso=\uE012; # LETTER SHORT O 32 $wo=\uE013; # ओ LETTER O 33 $wau=\uE014; 34 $ka=\uE015; 35 $kha=\uE016; 36 $ga=\uE017; 37 $gha=\uE018; 38 $nga=\uE019; 39 $ca=\uE01A; 40 $cha=\uE01B; 41 $ja=\uE01C; 42 $jha=\uE01D; 43 $nya=\uE01E; 44 $tta=\uE01F; 45 $ttha=\uE020; 46 $dda=\uE021; 47 $ddha=\uE022; 48 $nna=\uE023; 49 $ta=\uE024; 50 $tha=\uE025; 51 $da=\uE026; 52 $dha=\uE027; 53 $na=\uE028; 54 $ena=\uE029; #compatibility 55 $pa=\uE02A; 56 $pha=\uE02B; 57 $ba=\uE02C; 58 $bha=\uE02D; 59 $ma=\uE02E; 60 $ya=\uE02F; 61 $ra=\uE030; 62 $rra=\uE031; 63 $la=\uE032; 64 $lla=\uE033; 65 $ela=\uE034; #compatibility 66 $va=\uE035; 67 $vva=\uE081; 68 $sha=\uE036; 69 $ssa=\uE037; 70 $sa=\uE038; 71 $ha=\uE039; 72 #\u093A Reserved 73 #\u093B Reserved 74 $nukta=\uE03C; 75 $avagraha=\uE03D; # SIGN AVAGRAHA 76 # ←vowel→ represents the dependent form 77 $aa=\uE03E; 78 $i=\uE03F; 79 $ii=\uE040; 80 $u=\uE041; 81 $uu=\uE042; 82 $rh=\uE043; 83 $rrh=\uE044; 84 $ce=\uE045; #VOWEL SIGN CANDRA E 85 $se=\uE046; #VOWEL SIGN SHORT E 86 $e=\uE047; 87 $ai=\uE048; 88 $co=\uE049; # VOWEL SIGN CANDRA O 89 $so=\uE04A; # VOWEL SIGN SHORT O 90 $o=\uE04B; # ो 91 $au=\uE04C; 92 $virama=\uE04D; 93 # \u094E Reserved 94 # \u094F Reserved 95 $om = \uE050; # OM 96 # \u0951→; # UNMAPPED STRESS SIGN UDATTA 97 # \u0952→; # UNMAPPED STRESS SIGN ANUDATTA 98 # \u0953→; # UNMAPPED GRAVE ACCENT 99 # \u0954→; # UNMAPPED ACUTE ACCENT 100 $lm = \uE055;# Telugu Length Mark 101 $ailm=\uE056;# AI Length Mark 102 $aulm=\uE057;# AU Length Mark 103 #urdu compatibility forms 104 $uka=\uE058; 105 $ukha=\uE059; 106 $ugha=\uE05A; 107 $ujha=\uE05B; 108 $uddha=\uE05C; 109 $udha=\uE05D; 110 $ufa=\uE05E; 111 $uya=\uE05F; 112 $wrr=\uE060; 113 $wll=\uE061; 114 $lh=\uE062; 115 $llh=\uE063; 116 $danda=\uE064; 117 $doubleDanda=\uE065; 118 $zero=\uE066; # DIGIT ZERO 119 $one=\uE067; # DIGIT ONE 120 $two=\uE068; # DIGIT TWO 121 $three=\uE069; # DIGIT THREE 122 $four=\uE06A; # DIGIT FOUR 123 $five=\uE06B; # DIGIT FIVE 124 $six=\uE06C; # DIGIT SIX 125 $seven=\uE06D; # DIGIT SEVEN 126 $eight=\uE06E; # DIGIT EIGHT 127 $nine=\uE06F; # DIGIT NINE 128 $dgs=\uE082; 129 # For all other scripts 130 $ecp0=\uE070; 131 $ecp1=\uE071; 132 $ecp2=\uE072; 133 $ecp3=\uE073; 134 $ecp4=\uE074; 135 $ecp5=\uE075; 136 $ecp6=\uE076; 137 $ecp7=\uE077; 138 $ecp8=\uE078; 139 $ecp9=\uE079; 140 $ecpA=\uE07A; 141 $ecpB=\uE07B; 142 $ecpC=\uE07C; 143 $ecpD=\uE07D; 144 $ecpE=\uE07E; 145 $ecpF=\uE07F; 146 # Khanda-ta 147 $kta=\uE083; 148 # ॰→; # nothing in Latin maps to InterIndic ABBREVIATION SIGN 149 $depVowelAbove=[\uE03E-\uE040\uE045-\uE04C]; 150 $depVowelBelow=[\uE041-\uE044]; 151 $endThing=[$danda$doubleDanda]; 152 # $x was originally called '§'; $z was '%' 153 $x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co]; 154 $z=[bcdfghjklmnpqrstvwxyz]; 155 $consonants=[[$ka-$ha]$z[क-ह][ক-হ][ਕ-ਹ][ક-હ][କ-ହ][க-ஹ][క-హ][ಕ-ಹ][ക-ഹ]]; 156 \u0315 → $avagraha; 157 \u0303→$chandrabindu$anusvara; 158 m\u0310→$chandrabindu; 159 h\u0323→$visarga; 160 x→$ka$virama$sa; 161 # convert to independent forms at start of word or syllable: 162 # dependent forms for roundtrip 163 \u0314a\u0304→$aa; 164 \u0314ai→$ai; 165 \u0314au→$au; 166 \u0314ii→$ii; 167 \u0314i\u0304→$ii; 168 \u0314i→$i; 169 \u0314u\u0304→$uu; 170 \u0314u→$u; 171 \u0314r\u0325\u0304→$rrh; 172 \u0314r\u0325→$rh; 173 \u0314l\u0325\u0304→$llh; 174 \u0314lh→$lh; 175 \u0314l\u0325→$lh; 176 \u0314e\u0304→$e; 177 \u0314o\u0304→$o; 178 \u0314a→; 179 \u0314e\u0306→$ce; 180 \u0314o\u0306→$co; 181 \u0314e→$se; 182 \u0314o→$so; 183 # preceded by consonants 184 $consonants{ a\u0304→$aa; 185 $consonants{ ai→$ai; 186 $consonants{ au→$au; 187 $consonants{ ii→$ii; 188 $consonants{ i\u0304→$ii; 189 $consonants{ i→$i; 190 $consonants{ u\u0304→$uu; 191 $consonants{ u→$u; 192 $consonants{ r\u0325\u0304→$rrh; 193 $consonants{ r\u0325a→$rh; 194 $consonants{ r\u0325→$rh; 195 $consonants{ l\u0325\u0304→$llh; 196 $consonants{ lh→$lh; 197 $consonants{ l\u0325→$lh; 198 $consonants{ e\u0304→$e; 199 $consonants{ o\u0304→$o; 200 $consonants{ e\u0306→$ce; 201 $consonants{ o\u0306→$co; 202 $consonants{ e→$se; 203 $consonants{ o→$so; 204 # e.g. keai -→ {ka}{e}{wai}; k'ai -→ {ka}{wai}; (ai) -→ ({wai}) 205 a\u0304→$waa; 206 ai→$wai; 207 au→$wau; 208 i\u0304→$wii; 209 i→$wi; 210 u\u0304→$wuu; 211 u→$wu; 212 r\u0325\u0304→$wrr; 213 r\u0325→$wr; 214 l\u0325\u0304→$wll; 215 lh→$wl; 216 l\u0325→$wl; 217 e\u0304→$we; 218 o\u0304→$wo; 219 a→$wa; 220 e\u0306→$wce; 221 o\u0306→$wco; 222 e→$wse; 223 ''om→$om; 224 o→$wso; 225 # rules for anusvara 226 n}r\u0325 → $na|$virama; 227 n}l\u0325 → $na|$virama; 228 n}na → $na|$virama; 229 n\u0307}[kg] → $anusvara; 230 n\u0307}n\u0307 → $anusvara; 231 n\u0304}[cj] → $anusvara; 232 n\u0304}n\u0303 → $anusvara; 233 n\u0323}[tdn]\u0323 → $anusvara; 234 n}[tdn] → $anusvara; 235 m}[pbm] → $anusvara; 236 n}[ylvshr] → $anusvara; 237 m\u0307 → $anusvara; 238 #urdu compatibility 239 q→$uka|$virama; 240 k\u0331h\u0331→$ukha |$virama; 241 g\u0307→ $ugha | $virama; 242 z → $ujha |$virama; 243 f → $ufa|$virama; 244 t\u0331→$kta; 245 # dev 246 y\u0307→$uya|$virama; 247 l\u0331→$ela|$virama; 248 n\u0331→$ena|$virama; 249 n\u0307→$nga|$virama; 250 n\u0303→$nya|$virama; 251 n\u0323→$nna|$virama; 252 t\u0323h→$ttha|$virama; 253 t\u0323→$tta|$virama; 254 r\u0323h→$udha|$virama; 255 r\u0323→$uddha|$virama; 256 d\u0323h→$ddha|$virama; 257 d\u0323→$dda|$virama; 258 kh→$kha|$virama; 259 k→$ka|$virama; 260 gh→$gha|$virama; 261 g→$ga|$virama; 262 ch→$cha|$virama; 263 c→$ca|$virama; 264 jh→$jha|$virama; 265 j→$ja|$virama; 266 ny→$nya|$virama; 267 tth→$ttha|$virama; 268 ddh→$ddha|$virama; 269 th→$tha|$virama; 270 t→$ta|$virama; 271 dh→$dha|$virama; 272 d→$da|$virama; 273 n→$na|$virama; 274 ph→$pha|$virama; 275 p→$pa|$virama; 276 bh→$bha|$virama; 277 b→$ba|$virama; 278 m→$ma|$virama; 279 y→$ya|$virama; 280 r\u0331→$rra|$virama; 281 r→$ra|$virama; 282 l\u0323→$lla|$virama; 283 l→$la|$virama; 284 v→$va|$virama; 285 w\u0307→$vva|$virama; 286 w→$va|$virama; 287 sh→$sha|$virama; 288 ss→$ssa|$virama; 289 s\u0323→$ssa|$virama; 290 s\u0301→$sha|$virama; 291 s→$sa|$virama; 292 h→$ha|$virama; 293 '.'→$danda; 294 $danda'.'→$doubleDanda; 295 $depVowelAbove{'~'→$anusvara; 296 $depVowelBelow{'~'→$chandrabindu; 297 # convert to dependent forms after consonant with no vowel: 298 # e.g. kai -→ {ka}{virama}ai -→ {ka}{ai} 299 #$virama aa→$aa; 300 $virama a\u0304→$aa; 301 $virama ai→$ai; 302 $virama au→$au; 303 $virama ii→$ii; 304 $virama i\u0304→$ii; 305 $virama i→$i; 306 #$virama uu→$uu; 307 $virama u\u0304→$uu; 308 $virama u→$u; 309 #$virama rrh→$rrh; 310 $virama r\u0325\u0304→$rrh; 311 #$virama rh→$rh; 312 $virama r\u0325a→$rh; 313 $virama r\u0325→$rh; 314 $virama l\u0325\u0304→$llh; 315 $virama lh→$lh; 316 $virama l\u0325→$lh; 317 $virama e\u0304→$e; 318 $virama o\u0304→$o; 319 $virama a→; 320 $virama e\u0306→$ce; 321 $virama o\u0306→$co; 322 $virama e→$se; 323 $virama o→$so; 324 # otherwise convert independent forms when separated by ': k'ai -→ {ka}{virama}{wai} 325 #$virama''aa→$waa; 326 $virama''a\u0304→$waa; 327 $virama''ai→$wai; 328 $virama''au→$wau; 329 #$virama''ii→$wii; 330 $virama''i\u0304→$wii; 331 $virama''i→$wi; 332 #$virama''uu→$wuu; 333 $virama''u\u0304→$wuu; 334 $virama''u→$wu; 335 #$virama''rrh→$wrr; 336 $virama''r\u0325\u0304→$wrr; 337 #$virama''rh→$wr; 338 $virama''r\u0325→$wr; 339 $virama''l\u0325\u0304→$wll; 340 #$virama''lh→$wl; 341 $virama''l\u0325→$wl; 342 $virama''e\u0304→$we; 343 $virama''o\u0304→$wo; 344 $virama''a→$wa; 345 $virama''e\u0306→$wce; 346 $virama''o\u0306→$wco; 347 $virama''e→$wse; 348 $virama''o→$wso; 349 # no virama 350 ''a\u0304→$waa; 351 ''ai→$wai; 352 ''au→$wau; 353 ''i\u0304→$wii; 354 ''i→$wi; 355 ''u\u0304→$wuu; 356 ''u→$wu; 357 ''r\u0325\u0304→$wrr; 358 ''r\u0325→$wr; 359 ''l\u0325\u0304→$wll; 360 ''l\u0325→$wl; 361 ''e\u0304→$we; 362 ''o\u0304→$wo; 363 ''a→$wa; 364 ''e\u0306→$wce; 365 ''o\u0306→$wco; 366 ''e→$wse; 367 ''o→$wso; 368 $virama } [$z] → $virama; 369 $virama } ' ' → $virama ; 370 $virama}$endThing→; 371 ʔ→$dgs; # Glottal Stop 372 0→$zero; 373 1→$one; 374 2→$two; 375 3→$three; 376 4→$four; 377 5→$five; 378 6→$six; 379 7→$seven; 380 8→$eight; 381 9→$nine; 382 ''→; 383 #:: NFC (NFD) ;