Latin_ConjoiningJamo.txt (19017B)
1 # © 2016 and later: Unicode, Inc. and others. 2 # License & terms of use: http://www.unicode.org/copyright.html 3 # Generated using tools/cldr/cldr-to-icu/ 4 # 5 # File: Latin_ConjoiningJamo.txt 6 # Generated from CLDR 7 # 8 9 # Follows the Ministry of Culture and Tourism romanization: see http://www.korea.net/korea/kor_loca.asp?code=A020303 10 # http://www.unicode.org/cldr/transliteration_guidelines.html#Korean 11 #- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in 12 #- the INDEX file. This transliterator is, by itself, not 13 #- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or 14 #- inverses thereof. 15 # Transliteration from Latin characters to Korean script is done in 16 # two steps: Latin to Jamo, then Jamo to Hangul. The Jamo-Hangul 17 # transliteration is done algorithmically following Unicode 3.0 18 # section 3.11. This file implements the Latin to Jamo 19 # transliteration using rules. 20 # Jamo occupy the block 1100-11FF. Within this block there are three 21 # groups of characters: initial consonants or choseong (I), medial 22 # vowels or jungseong (M), and trailing consonants or jongseong (F). 23 # Standard Korean syllables are of the form I+M+F*. 24 # Section 3.11 describes the use of 'filler' jamo to convert 25 # nonstandard syllables to standard form: the choseong filler 115F and 26 # the junseong filler 1160. In this transliterator, we will not use 27 # 115F or 1160. 28 # We will, however, insert two 'null' jamo to make foreign words 29 # conform to Korean syllable structure. These are the null initial 30 # consonant 110B (IEUNG) and the null vowel 1173 (EU). In Latin text, 31 # we will use the separator in order to disambiguate strings, 32 # e.g. "kan-ggan" (initial GG) vs. "kanggan" (final NG + initial G). 33 # We will not use all of the characters in the jamo block. We will 34 # only use the 19 initials, 21 medials, and 27 finals possessing a 35 # jamo short name as defined in section 4.4 of the Unicode book. 36 # Rules of thumb. These guidelines provide the basic framework 37 # for the rules. They are phrased in terms of Latin-Jamo transliteration. 38 # The Jamo-Latin rules derive from these, since the Jamo-Latin rules are 39 # just context-free transliteration of jamo to corresponding short names, 40 # with the addition of separators to maintain round-trip integrity 41 # in the context of the Latin-Jamo rules. 42 # A sequence of vowels: 43 # - Take the longest sequence you can. If there are too many, or you don't 44 # have a starting consonant, introduce a 110B necessary. 45 # A sequence of consonants. 46 # - First join the double consonants: G + G -→ GG 47 # - In the remaining list, 48 # -- If there is no preceding vowel, take the first consonant, and insert EU 49 # after it. Continue with the rest of the consonants. 50 # -- If there is one consonant, attach to the following vowel 51 # -- If there are two consonants and a following vowel, attach one to the 52 # preceding vowel, and one to the following vowel. 53 # -- If there are more than two consonants, join the first two together if you 54 # can: L + G =→ LG 55 # -- If you still end up with more than 2 consonants, insert EU after the 56 # first one, and continue with the rest of the consonants. 57 #---------------------------------------------------------------------- 58 # Variables 59 # Some latin consonants or consonant pairs only occur as initials, and 60 # some only as finals, but some occur as both. This makes some jamo 61 # consonants ambiguous when transliterated into latin. 62 # Initial only: IEUNG BB DD JJ R 63 # Final only: BS GS L LB LG LH LM LP LS LT NG NH NJ 64 # Initial and Final: B C D G GG H J K M N P S SS T 65 $Gi = ᄀ; 66 $KKi = ᄁ; 67 $Ni = ᄂ; 68 $Di = ᄃ; 69 $TTi = ᄄ; 70 $Li = ᄅ; 71 $Mi = ᄆ; 72 $Bi = ᄇ; 73 $PPi = ᄈ; 74 $Si = ᄉ; 75 $SSi = ᄊ; 76 $IEUNG = ᄋ; # null initial, inserted during Latin-Jamo 77 $Ji = ᄌ; 78 $JJi = ᄍ; 79 $CHi = ᄎ; 80 $Ki = ᄏ; 81 $Ti = ᄐ; 82 $Pi = ᄑ; 83 $Hi = ᄒ; 84 $A = ᅡ; 85 $AE = ᅢ; 86 $YA = ᅣ; 87 $YAE = ᅤ; 88 $EO = ᅥ; 89 $E = ᅦ; 90 $YEO = ᅧ; 91 $YE = ᅨ; 92 $O = ᅩ; 93 $WA = ᅪ; 94 $WAE = ᅫ; 95 $OE = ᅬ; 96 $YO = ᅭ; 97 $U = ᅮ; 98 $WO = ᅯ; 99 $WE = ᅰ; 100 $WI = ᅱ; 101 $YU = ᅲ; 102 $EU = ᅳ; # null medial, inserted during Latin-Jamo 103 $UI = ᅴ; 104 $I = ᅵ; 105 $Gf = ᆨ; 106 $GGf = ᆩ; 107 $GS = ᆪ; 108 $Nf = ᆫ; 109 $NJ = ᆬ; 110 $NH = ᆭ; 111 $Df = ᆮ; 112 $L = ᆯ; 113 $LG = ᆰ; 114 $LM = ᆱ; 115 $LB = ᆲ; 116 $LS = ᆳ; 117 $LT = ᆴ; 118 $LP = ᆵ; 119 $LH = ᆶ; 120 $Mf = ᆷ; 121 $Bf = ᆸ; 122 $BS = ᆹ; 123 $Sf = ᆺ; 124 $SSf = ᆻ; 125 $NG = ᆼ; 126 $Jf = ᆽ; 127 $Cf = ᆾ; 128 $Kf = ᆿ; 129 $Tf = ᇀ; 130 $Pf = ᇁ; 131 $Hf = ᇂ; 132 $jamoInitial = [ᄀ-ᄒ]; 133 $jamoMedial = [ᅡ-ᅵ]; 134 $latinInitial = [bcdghjklmnprst]; 135 # Any character in the latin transliteration of a medial 136 $latinMedial = [aeiouwy]; 137 # The last character of the latin transliteration of a medial 138 $latinMedialEnd = [aeiou]; 139 # Disambiguation separator 140 $sep = \-; 141 #---------------------------------------------------------------------- 142 # Jamo-Latin 143 # 144 # Jamo to latin is relatively simple, since it is the latin that is 145 # ambiguous. Most rules are straightforward, and we encode them below 146 # as simple add-on back rule, e.g.: 147 # $jamoMedial {bs} → $BS; 148 # becomes 149 # $jamoMedial {bs} ↔ $BS; 150 # 151 # Furthermore, we don't care about the ordering for Jamo-Latin because 152 # we are going from single characters, so we can very easily piggyback 153 # on the Latin-Jamo. 154 # 155 # The main issue with Jamo-Latin is when to insert separators. 156 # Separators are inserted to obtain correct round trip behavior. For 157 # example, the sequence Ki A Gf Gi E, if transliterated to "kagge", 158 # would then round trip to Ki A GGi E. To prevent this, we insert a 159 # separator: "kag-ge". IMPORTANT: The need for separators depends 160 # very specifically on the behavior of the Latin-Jamo rules. A change 161 # in the Latin-Jamo behavior can completely change the way the 162 # separator insertion must be done. 163 # First try to preserve actual separators in the jamo text by doubling 164 # them. This fixes problems like: 165 # (Di)(A)(Ji)(U)(NG)-(IEUNG)(YEO)(Nf)(Gi)(YEO)(L) =→ dajung-yeongyeol 166 # =→ (Di)(A)(Ji)(U)(NG)(IEUNG)(YEO)(Nf)(Gi)(YEO)(L). This is optional 167 # -- if we don't care about losing separators in the jamo, we can delete 168 # this rule. 169 $sep $sep ↔ $sep; 170 # Triple consonants. For three consonants "axxx" we insert a 171 # separator between the first and second "x" if XXf, Xf, and Xi all 172 # exist, and we have A Xf XXi. This prevents the reverse 173 # transliteration to A XXf Xi. 174 $sep ← $latinMedialEnd s {} $SSi; 175 # For vowels the rule is similar. If there is a vowel "ae" such that 176 # "a" by itself and "e" by itself are vowels, then we want to map A E 177 # to "a-e" so as not to round trip to AE. However, in the text Ki EO 178 # IEUNG E we don't need to map to "keo-e". "keoe" suffices. For 179 # vowels of the form "aei", both "ae" + "i" and "a" + "ei" must be 180 # tested. NOTE: These rules used to have a left context of 181 # $latinInitial instead of [^$latinMedial]. The problem with this is 182 # sequences where an initial IEUNG is transliterated away: 183 # (IEUNG)(A)(IEUNG)(EO) =→ aeo =→ (IEUNG)(AE)(IEUNG)(O) 184 # Also problems in cases like gayeo, which needs to be gaye-o 185 # The hard case is a chain, like aeoeu. Normally interpreted as ae oe u. So for a-eoeu, we have to insert $sep 186 # But, we don't insert between the o and the e. 187 # 188 # a ae 189 # e eo eu 190 # i 191 # o oe 192 # u 193 # ui 194 # wa wae we wi 195 # yae ya yeo ye yo yu 196 # These are simple, since they can't chain. Note that we don't handle extreme cases like [ga][eo][e][o] 197 $sep ← a {} [$E $EO $EU]; 198 $sep ← [^aow] e {} [$O $OE]; 199 $sep ← [^aowy] e {} [$U $UI]; 200 $sep ← [^ey] o {} [$E $EO $EU]; 201 $sep ← [^y] u {} [$I]; 202 # Similar to the above, but with an intervening $IEUNG. 203 $sep ← [^$latinMedial] [y] e {} $IEUNG [$O $OE]; 204 $sep ← [^$latinMedial] e {} $IEUNG [$O $OE $U]; 205 $sep ← [^$latinMedial] [o a] {} $IEUNG [$E $EO $EU]; 206 $sep ← [^$latinMedial] [w y] a {} $IEUNG [$E $EO $EU]; 207 # Single finals followed by IEUNG. The jamo sequence A Xf IEUNG E, 208 # where Xi also exists, must be transliterated as "ax-e" to prevent 209 # the round trip conversion to A Xi E. 210 $sep ← $latinMedialEnd b {} $IEUNG $jamoMedial; 211 $sep ← $latinMedialEnd d {} $IEUNG $jamoMedial; 212 $sep ← $latinMedialEnd g {} $IEUNG $jamoMedial; 213 $sep ← $latinMedialEnd h {} $IEUNG $jamoMedial; 214 $sep ← $latinMedialEnd j {} $IEUNG $jamoMedial; 215 $sep ← $latinMedialEnd k {} $IEUNG $jamoMedial; 216 $sep ← $latinMedialEnd m {} $IEUNG $jamoMedial; 217 $sep ← $latinMedialEnd n {} $IEUNG $jamoMedial; 218 $sep ← $latinMedialEnd p {} $IEUNG $jamoMedial; 219 $sep ← $latinMedialEnd s {} $IEUNG $jamoMedial; 220 $sep ← $latinMedialEnd t {} $IEUNG $jamoMedial; 221 $sep ← $latinMedialEnd l {} $IEUNG $jamoMedial; 222 # Double finals followed by IEUNG. Similar to the single finals 223 # followed by IEUNG. Any latin consonant pair X Y, between medials, 224 # that we would split by Latin-Jamo, we must handle when it occurs as 225 # part of A XYf IEUNG E, to prevent round trip conversion to A Xf Yi E 226 $sep ← $latinMedialEnd b s {} $IEUNG $jamoMedial; 227 $sep ← $latinMedialEnd k k {} $IEUNG $jamoMedial; 228 $sep ← $latinMedialEnd g s {} $IEUNG $jamoMedial; 229 $sep ← $latinMedialEnd l b {} $IEUNG $jamoMedial; 230 $sep ← $latinMedialEnd l g {} $IEUNG $jamoMedial; 231 $sep ← $latinMedialEnd l h {} $IEUNG $jamoMedial; 232 $sep ← $latinMedialEnd l m {} $IEUNG $jamoMedial; 233 $sep ← $latinMedialEnd l p {} $IEUNG $jamoMedial; 234 $sep ← $latinMedialEnd l s {} $IEUNG $jamoMedial; 235 $sep ← $latinMedialEnd l t {} $IEUNG $jamoMedial; 236 $sep ← $latinMedialEnd n g {} $IEUNG $jamoMedial; 237 $sep ← $latinMedialEnd n h {} $IEUNG $jamoMedial; 238 $sep ← $latinMedialEnd n j {} $IEUNG $jamoMedial; 239 $sep ← $latinMedialEnd s s {} $IEUNG $jamoMedial; 240 $sep ← $latinMedialEnd ch {} $IEUNG $jamoMedial; 241 # Split doubles. Text of the form A Xi Xf E, where XXi also occurs, 242 # we transliterate as "ax-xe" to prevent round trip transliteration as 243 # A XXi E. 244 $sep ← $latinMedialEnd j {} $Ji $jamoMedial; 245 $sep ← $latinMedialEnd k {} $Ki $jamoMedial; 246 $sep ← $latinMedialEnd s {} $Si $jamoMedial; 247 # XYY. This corresponds to the XYY rule in Latin-Jamo. By default 248 # Latin-Jamo maps "xyy" to Xf YYi, to keep YY together. As a result, 249 # "xyy" forms that correspond to XYf Yi must be transliterated as 250 # "xy-y". 251 $sep ← $latinMedialEnd b s {} [$Si $SSi]; 252 $sep ← $latinMedialEnd g s {} [$Si $SSi]; 253 $sep ← $latinMedialEnd l b {} [$Bi]; 254 $sep ← $latinMedialEnd l g {} [$Gi]; 255 $sep ← $latinMedialEnd l s {} [$Si $SSi]; 256 $sep ← $latinMedialEnd n g {} [$Gi]; 257 $sep ← $latinMedialEnd n j {} [$Ji $JJi]; 258 # $sep ← $latinMedialEnd l {} [$PPi]; 259 # $sep ← $latinMedialEnd l {} [$TTi]; 260 $sep ← $latinMedialEnd l p {} [$Pi]; 261 $sep ← $latinMedialEnd l t {} [$Ti]; 262 $sep ← $latinMedialEnd k {} [$KKi $Ki]; 263 $sep ← $latinMedialEnd p {} $Pi; 264 $sep ← $latinMedialEnd t {} $Ti; 265 $sep ← $latinMedialEnd c {} [$Hi]; 266 # Deletion of IEUNG is handled below. 267 #---------------------------------------------------------------------- 268 # Latin-Jamo 269 # [Basic, context-free Jamo-Latin rules are embedded here too. See 270 # above.] 271 # Split digraphs: Text of the form 'axye', where 'xy' is a final 272 # digraph, 'x' is a final (by itself), 'y' is an initial, and 'a' and 273 # 'e' are medials, we want to transliterate this as A Xf Yi E rather 274 # than A XYf IEUNG E. We do NOT include text of the form "axxe", 275 # since that is handled differently below. These rules are generated 276 # programmatically from the jamo data. 277 $jamoMedial {b s} $latinMedial → $Bf $Si; 278 $jamoMedial {g s} $latinMedial → $Gf $Si; 279 $jamoMedial {l b} $latinMedial → $L $Bi; 280 $jamoMedial {l g} $latinMedial → $L $Gi; 281 $jamoMedial {l h} $latinMedial → $L $Hi; 282 $jamoMedial {l m} $latinMedial → $L $Mi; 283 $jamoMedial {l p} $latinMedial → $L $Pi; 284 $jamoMedial {l s} $latinMedial → $L $Si; 285 $jamoMedial {l t} $latinMedial → $L $Ti; 286 $jamoMedial {n g} $latinMedial → $Nf $Gi; 287 $jamoMedial {n h} $latinMedial → $Nf $Hi; 288 $jamoMedial {n j} $latinMedial → $Nf $Ji; 289 # Single consonants are initials: Text of the form 'axe', where 'x' 290 # can be an initial or a final, and 'a' and 'e' are medials, we want 291 # to transliterate as A Xi E rather than A Xf IEUNG E. 292 $jamoMedial {b} $latinMedial → $Bi; 293 $jamoMedial {ch} $latinMedial → $CHi; 294 $jamoMedial {d} $latinMedial → $Di; 295 $jamoMedial {g} $latinMedial → $Gi; 296 $jamoMedial {h} $latinMedial → $Hi; 297 $jamoMedial {j} $latinMedial → $Ji; 298 $jamoMedial {k} $latinMedial → $Ki; 299 $jamoMedial {m} $latinMedial → $Mi; 300 $jamoMedial {n} $latinMedial → $Ni; 301 $jamoMedial {p} $latinMedial → $Pi; 302 $jamoMedial {s} $latinMedial → $Si; 303 $jamoMedial {t} $latinMedial → $Ti; 304 $jamoMedial {l} $latinMedial → $Li; 305 # Doubled initials. The sequence "axxe", where XX exists as an initial 306 # (XXi), and also Xi and Xf exist (true of all digraphs XX), we want 307 # to transliterate as A XXi E, rather than split to A Xf Xi E. 308 $jamoMedial {p p} $latinMedial → $PPi; 309 $jamoMedial {t t} $latinMedial → $TTi; 310 $jamoMedial {j j} $latinMedial → $JJi; 311 $jamoMedial {k k} $latinMedial → $KKi; 312 $jamoMedial {s s} $latinMedial → $SSi; 313 # XYY. Because doubled consonants bind more strongly than XY 314 # consonants, we must handle the sequence "axyy" specially. Here XYf 315 # and YYi must exist. In these cases, we map to Xf YYi rather than 316 # XYf. 317 # However, there are two special cases. 318 $jamoMedial {lp} p p → $LP; 319 $jamoMedial {lt} t t → $LT; 320 # End special cases 321 $jamoMedial {b} s s → $Bf; 322 $jamoMedial {g} s s → $Gf; 323 $jamoMedial {l} b b → $L; 324 $jamoMedial {l} g g → $L; 325 $jamoMedial {l} s s → $L; 326 $jamoMedial {l} t t → $L; 327 $jamoMedial {l} p p → $L; 328 $jamoMedial {n} g g → $Nf; 329 $jamoMedial {n} j j → $Nf; 330 # Finals: Attach consonant with preceding medial to preceding medial. 331 # Do this BEFORE mapping consonants to initials. Longer keys must 332 # precede shorter keys that they start with, e.g., the rule for 'bs' 333 # must precede 'b'. 334 # [BASIC Jamo-Latin FINALS handled here. Order irrelevant within this 335 # block for Jamo-Latin.] 336 $jamoMedial {bs} ↔ $BS; 337 $jamoMedial {b} ↔ $Bf; 338 $jamoMedial {ch} ↔ $Cf; 339 $jamoMedial {c} → $Cf; 340 $jamoMedial {d} ↔ $Df; 341 $jamoMedial {kk} ↔ $GGf; 342 $jamoMedial {gs} ↔ $GS; 343 $jamoMedial {g} ↔ $Gf; 344 $jamoMedial {h} ↔ $Hf; 345 $jamoMedial {j} ↔ $Jf; 346 $jamoMedial {k} ↔ $Kf; 347 $jamoMedial {lb} ↔ $LB; $jamoMedial {lg} ↔ $LG; 348 $jamoMedial {lh} ↔ $LH; 349 $jamoMedial {lm} ↔ $LM; 350 $jamoMedial {lp} ↔ $LP; 351 $jamoMedial {ls} ↔ $LS; 352 $jamoMedial {lt} ↔ $LT; 353 $jamoMedial {l} ↔ $L; 354 $jamoMedial {m} ↔ $Mf; 355 $jamoMedial {ng} ↔ $NG; 356 $jamoMedial {nh} ↔ $NH; 357 $jamoMedial {nj} ↔ $NJ; 358 $jamoMedial {n} ↔ $Nf; 359 $jamoMedial {p} ↔ $Pf; 360 $jamoMedial {ss} ↔ $SSf; 361 $jamoMedial {s} ↔ $Sf; 362 $jamoMedial {t} ↔ $Tf; 363 # Initials: Attach single consonant to following medial. Do this 364 # AFTER mapping finals. Longer keys must precede shorter keys that 365 # they start with, e.g., the rule for 'gg' must precede 'g'. 366 # [BASIC Jamo-Latin INITIALS handled here. Order irrelevant within 367 # this block for Jamo-Latin.] 368 {kk} $latinMedial ↔ $KKi; 369 {g} $latinMedial ↔ $Gi; 370 {n} $latinMedial ↔ $Ni; 371 {tt} $latinMedial ↔ $TTi; 372 {d} $latinMedial ↔ $Di; 373 {l} $latinMedial ↔ $Li; 374 {m} $latinMedial ↔ $Mi; 375 {pp} $latinMedial ↔ $PPi; 376 {b} $latinMedial ↔ $Bi; 377 {ss} $latinMedial ↔ $SSi; 378 {s} $latinMedial ↔ $Si; 379 {jj} $latinMedial ↔ $JJi; 380 {j} $latinMedial ↔ $Ji; 381 {ch} $latinMedial ↔ $CHi; 382 {c} $latinMedial → $CHi; 383 {k} $latinMedial ↔ $Ki; 384 {t} $latinMedial ↔ $Ti; 385 {p} $latinMedial ↔ $Pi; 386 {h} $latinMedial ↔ $Hi; 387 # 'r' in final position. Because of the equivalency of the 'l' and 388 # 'r' jamo (the glyphs are the same), we try to provide the same 389 # equivalency in Latin-Jamo. The 'l' to 'r' conversion is handled 390 # below. If we see an 'r' in an apparent final position, treat it 391 # like 'l'. For example, "karka" =→ Ki A R EU Ki A without this rule. 392 # Instead, we want Ki A L Ki A. 393 # Initial + Final: If we match the next rule, we have initial then 394 # final consonant with no intervening medial. We insert the null 395 # vowel BEFORE it to create a well-formed syllable. (In the next rule 396 # we insert a null vowel AFTER an anomalous initial.) 397 # Initial + X: This block matches an initial consonant not followed by 398 # a medial. We insert the null vowel after it. We handle double 399 # initials explicitly here; for single initial consonants we insert EU 400 # (as Latin) after them and let standard rules do the rest. 401 # BREAKS ROUND TRIP INTEGRITY 402 kk → $KKi $EU; 403 tt → $TTi $EU; 404 pp → $PPi $EU; 405 ss → $SSi $EU; 406 jj → $JJi $EU; 407 ch → $CHi $EU; 408 ([lbdghjkmnpst]) → | $1 eu; 409 # X + Final: Finally we have to deal with a consonant that can only be 410 # interpreted as a final (not an initial) and which is preceded 411 # neither by an initial nor a medial. It is the start of the 412 # syllable, but cannot be. Most of these will already be handled by 413 # the above rules. 'bs' splits into Bi EU Sf. Similar for 'gs' 'ng' 414 # 'nh' 'nj'. The only problem is 'l' and digraphs starting with 'l'. 415 # For this isolated case, we could add a null initial and medial, 416 # which would give "la" =→ IEUNG EU L IEUNG A, for example. A more 417 # economical solution is to transliterate isolated "l" (that is, 418 # initial "l") to "r". (Other similar conversions of consonants that 419 # occur neither as initials nor as finals are handled below.) 420 l → | r; 421 # Medials. If a medial is preceded by an initial, then we proceed 422 # normally. As usual, longer keys must precede shorter ones. 423 # [BASIC Jamo-Latin MEDIALS handled here. Order irrelevant within 424 # this block for Jamo-Latin.] 425 # 426 # a e i o u 427 # ae 428 # eo eu 429 # oe 430 # ui 431 # wa we wi 432 # wae 433 # yae ya yeo ye yo yu 434 $jamoInitial {ae} ↔ $AE; 435 $jamoInitial {a} ↔ $A; 436 $jamoInitial {eo} ↔ $EO; 437 $jamoInitial {eu} ↔ $EU; 438 $jamoInitial {e} ↔ $E; 439 $jamoInitial {i} ↔ $I; 440 $jamoInitial {oe} ↔ $OE; 441 $jamoInitial {o} ↔ $O; 442 $jamoInitial {ui} ↔ $UI; 443 $jamoInitial {u} ↔ $U; 444 $jamoInitial {wae} ↔ $WAE; 445 $jamoInitial {wa} ↔ $WA; 446 $jamoInitial {wo} ↔ $WO; 447 $jamoInitial {we} ↔ $WE; 448 $jamoInitial {wi} ↔ $WI; 449 $jamoInitial {yae} ↔ $YAE; 450 $jamoInitial {ya} ↔ $YA; 451 $jamoInitial {yeo} ↔ $YEO; 452 $jamoInitial {ye} ↔ $YE; 453 $jamoInitial {yo} ↔ $YO; 454 $jamoInitial {yu} ↔ $YU; 455 # We may see an anomalous isolated 'w' or 'y'. In that case, we 456 # interpret it as 'wi' and 'yu', respectively. 457 # BREAKS ROUND TRIP INTEGRITY 458 $jamoInitial {w} → | wi; 459 $jamoInitial {y} → | yu; 460 # Otherwise, insert a null consonant IEUNG before the medial (which is 461 # still an untransliterated latin vowel). 462 ($latinMedial) → $IEUNG | $1; 463 # Convert non-jamo latin consonants to equivalents. These occur as 464 # neither initials nor finals in jamo. 'l' occurs as a final, but not 465 # an initial; it is handled above. The following letters (left hand 466 # side) will never be output by Jamo-Latin. 467 f → | p; 468 q → | k; 469 v → | b; 470 x → | ks; 471 z → | s; 472 r → | l; 473 c → | k; 474 # Delete separators (Latin-Jamo). 475 $sep → ; 476 # Delete null consonants (Jamo-Latin). Do NOT delete null EU vowels, 477 # since these may also occur in text. 478 ← $IEUNG; 479 #- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in 480 #- the INDEX file. This transliterator is, by itself, not 481 #- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or 482 #- inverses thereof. 483 # eof