[ tor-browser ].git.dasho

collationtest.txt (45308B)
      1 # Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 # License & terms of use: http://www.unicode.org/copyright.html
      3 # Copyright (c) 2012-2015 International Business Machines
      4 # Corporation and others. All Rights Reserved.
      5 #
      6 # This file should be in UTF-8 with a signature byte sequence ("BOM").
      7 #
      8 # collationtest.txt: Collation test data.
      9 #
     10 # created on: 2012apr13
     11 # created by: Markus W. Scherer
     12 
     13 # A line with "** test: description" is used for verbose and error output.
     14 
     15 # A collator can be set with "@ root" or "@ locale language-tag",
     16 # for example "@ locale de-u-co-phonebk".
     17 # An old-style locale ID can also be used, for example "@ locale de@collation=phonebook".
     18 
     19 # A collator can be built with "@ rules".
     20 # An "@ rules" line is followed by one or more lines with the tailoring rules.
     21 
     22 # A collator can be modified with "% attribute=value".
     23 
     24 # "* compare" tests the order (= or <) of the following strings.
     25 # The relation can be "=" or "<" (the level of the difference is not specified)
     26 # or "<1", "<2", "<c", "<3", "<4" (indicating the level of the difference).
     27 
     28 # Test sections ("* compare") are terminated by
     29 # definitions of new collators, changing attributes, or new test sections.
     30 
     31 ** test: simple CEs & expansions
     32 # Many types of mappings are tested elsewhere, including via the UCA conformance tests.
     33 # Here we mostly cover a few unusual mappings.
     34 @ rules
     35 &\x01                           # most control codes are ignorable
     36 <<<\u0300                       # tertiary CE
     37 &9<\x00                         # NUL not ignorable
     38 &\uA00A\uA00B=\uA002            # two long-primary CEs
     39 &\uA00A\uA00B\u00050005=\uA003  # three CEs, require 64 bits
     40 
     41 * compare
     42 =  \x01
     43 =  \x02
     44 <3 \u0300
     45 <1 9
     46 <1 \x00
     47 =  \x01\x00\x02
     48 <1 a
     49 <3 a\u0300
     50 <2 a\u0308
     51 =  ä
     52 <1 b
     53 <1 か        # Hiragana Ka (U+304B)
     54 <2 か\u3099  # plus voiced sound mark
     55 =  が        # Hiragana Ga (U+304C)
     56 <1 \uA00A\uA00B
     57 =  \uA002
     58 <1 \uA00A\uA00B\u00050004
     59 <1 \uA00A\uA00B\u00050005
     60 =  \uA003
     61 <1 \uA00A\uA00B\u00050006
     62 
     63 ** test: contractions
     64 # Create some interesting mappings, and map some normalization-inert characters
     65 # (which are not subject to canonical reordering)
     66 # to some of the same CEs to check the sequence of CEs.
     67 @ rules
     68 
     69 # Contractions starting with 'a' should not continue with any character < U+0300
     70 # so that we can test a shortcut for that.
     71 &a=ⓐ
     72 &b<bz=ⓑ
     73 &d<dz\u0301=ⓓ           # d+z+acute
     74 &z
     75 <a\u0301=Ⓐ              # a+acute sorts after z
     76 <a\u0301\u0301=Ⓑ        # a+acute+acute
     77 <a\u0301\u0301\u0358=Ⓒ  # a+acute+acute+dot above right
     78 <a\u030a=Ⓓ              # a+ring
     79 <a\u0323=Ⓔ              # a+dot below
     80 <a\u0323\u0358=Ⓕ        # a+dot below+dot above right
     81 <a\u0327\u0323\u030a=Ⓖ  # a+cedilla+dot below+ring
     82 <a\u0327\u0323bz=Ⓗ      # a+cedilla+dot below+b+z
     83 
     84 &\U0001D158=⁰           # musical notehead black (has a symbol primary)
     85 <\U0001D158\U0001D165=¼ # musical quarter note
     86 
     87 # deliberately missing prefix contractions:
     88 # dz
     89 # a\u0327
     90 # a\u0327\u0323
     91 # a\u0327\u0323b
     92 
     93 &\x01
     94 <<<\U0001D165=¹         # musical stem (ccc=216)
     95 <<<\U0001D16D=²         # musical augmentation dot (ccc=226)
     96 <<<\U0001D165\U0001D16D=³  # stem+dot (ccc=216 226)
     97 &\u0301=❶               # acute (ccc=230)
     98 &\u030a=❷               # ring (ccc=230)
     99 &\u0308=❸               # diaeresis (ccc=230)
    100 <<\u0308\u0301=❹        # diaeresis+acute (=dialytika tonos) (ccc=230 230)
    101 &\u0327=❺               # cedilla (ccc=202)
    102 &\u0323=❻               # dot below (ccc=220)
    103 &\u0331=❼               # macron below (ccc=220)
    104 <<\u0331\u0358=❽        # macron below+dot above right (ccc=220 232)
    105 &\u0334=❾               # tilde overlay (ccc=1)
    106 &\u0358=❿               # dot above right (ccc=232)
    107 
    108 &\u0f71=①               # tibetan vowel sign aa
    109 &\u0f72=②               # tibetan vowel sign i
    110 #  \u0f71\u0f72         # tibetan vowel sign aa + i = ii = U+0F73
    111 &\u0f73=③               # tibetan vowel sign ii (ccc=0 but lccc=129)
    112 
    113 ** test: simple contractions
    114 
    115 # Some strings are chosen to cause incremental contiguous contraction matching to
    116 # go into partial matches for prefixes of contractions
    117 # (where the prefixes are deliberately not also contractions).
    118 # When there is no complete match, then the matching code must back out of those
    119 # so that discontiguous contractions work as specified.
    120 
    121 * compare
    122 # contraction starter with no following text, or mismatch, or blocked
    123 <1 a
    124 =  ⓐ
    125 <1 aa
    126 =  ⓐⓐ
    127 <1 ab
    128 =  ⓐb
    129 <1 az
    130 =  ⓐz
    131 
    132 * compare
    133 <1 a
    134 <2 a\u0308\u030a  # ring blocked by diaeresis
    135 =  ⓐ❸❷
    136 <2 a\u0327
    137 =  ⓐ❺
    138 
    139 * compare
    140 <2 \u0308
    141 =  ❸
    142 <2 \u0308\u030a\u0301  # acute blocked by ring
    143 =  ❸❷❶
    144 
    145 * compare
    146 <1 \U0001D158
    147 =  ⁰
    148 <1 \U0001D158\U0001D165
    149 =  ¼
    150 
    151 # no discontiguous contraction because of missing prefix contraction d+z,
    152 # and a starter ('z') after the 'd'
    153 * compare
    154 <1 dz\u0323\u0301
    155 =  dz❻❶
    156 
    157 # contiguous contractions
    158 * compare
    159 <1 abz
    160 =  ⓐⓑ
    161 <1 abzz
    162 =  ⓐⓑz
    163 
    164 * compare
    165 <1 a
    166 <1 z
    167 <1 a\u0301
    168 =  Ⓐ
    169 <1 a\u0301\u0301
    170 =  Ⓑ
    171 <1 a\u0301\u0301\u0358
    172 =  Ⓒ
    173 <1 a\u030a
    174 =  Ⓓ
    175 <1 a\u0323\u0358
    176 =  Ⓕ
    177 <1 a\u0327\u0323\u030a  # match despite missing prefix
    178 =  Ⓖ
    179 <1 a\u0327\u0323bz
    180 =  Ⓗ
    181 
    182 * compare
    183 <2 \u0308\u0308\u0301  # acute blocked from first diaeresis, contracts with second
    184 =  ❸❹
    185 
    186 * compare
    187 <1 \U0001D158\U0001D165
    188 =  ¼
    189 
    190 * compare
    191 <3 \U0001D165\U0001D16D
    192 =  ³
    193 
    194 ** test: discontiguous contractions
    195 * compare
    196 <1 a\u0327\u030a                # a+ring skips cedilla
    197 =  Ⓓ❺
    198 <2 a\u0327\u0327\u030a          # a+ring skips 2 cedillas
    199 =  Ⓓ❺❺
    200 <2 a\u0327\u0327\u0327\u030a    # a+ring skips 3 cedillas
    201 =  Ⓓ❺❺❺
    202 <2 a\u0334\u0327\u0327\u030a    # a+ring skips tilde overlay & 2 cedillas
    203 =  Ⓓ❾❺❺
    204 <1 a\u0327\u0323                # a+dot below skips cedilla
    205 =  Ⓔ❺
    206 <1 a\u0323\u0301\u0358          # a+dot below+dot ab.r.: 2-char match, then skips acute
    207 =  Ⓕ❶
    208 <2 a\u0334\u0323\u0358          # a+dot below skips tilde overlay
    209 =  Ⓕ❾
    210 
    211 * compare
    212 <2 \u0331\u0331\u0358           # macron below+dot ab.r. skips the second macron below
    213 =  ❽❼
    214 
    215 * compare
    216 <1 a\u0327\u0331\u0323\u030a    # a+ring skips cedilla, macron below, dot below (dot blocked by macron)
    217 =  Ⓓ❺❼❻
    218 <1 a\u0327\u0323\U0001D16D\u030a  # a+dot below skips cedilla
    219 =  Ⓔ❺²❷
    220 <2 a\u0327\u0327\u0323\u030a    # a+dot below skips 2 cedillas
    221 =  Ⓔ❺❺❷
    222 <2 a\u0327\u0323\u0323\u030a    # a+dot below skips cedilla
    223 =  Ⓔ❺❻❷
    224 <2 a\u0334\u0327\u0323\u030a    # a+dot below skips tilde overlay & cedilla
    225 =  Ⓔ❾❺❷
    226 
    227 * compare
    228 <1 \U0001D158\u0327\U0001D165   # quarter note skips cedilla
    229 =  ¼❺
    230 <1 a\U0001D165\u0323            # a+dot below skips stem
    231 =  Ⓔ¹
    232 
    233 # partial contiguous match, backs up, matches discontiguous contraction
    234 <1 a\u0327\u0323b
    235 =  Ⓔ❺b
    236 <1 a\u0327\u0323ba
    237 =  Ⓔ❺bⓐ
    238 
    239 # a+acute+acute+dot above right skips cedilla, continues matching 2 same-ccc combining marks
    240 * compare
    241 <1 a\u0327\u0301\u0301\u0358
    242 =  Ⓒ❺
    243 
    244 # FCD but not NFD
    245 * compare
    246 <1 a\u0f73\u0301                # a+acute skips tibetan ii
    247 =  Ⓐ③
    248 
    249 # FCD but the 0f71 inside the 0f73 must be skipped
    250 # to match the discontiguous contraction of the first 0f71 with the trailing 0f72 inside the 0f73
    251 * compare
    252 <1 \u0f71\u0f73                 # == \u0f73\u0f71 == \u0f71\u0f71\u0f72
    253 =  ③①
    254 
    255 ** test: discontiguous contractions with nested contractions
    256 * compare
    257 <1 a\u0323\u0308\u0301\u0358
    258 =  Ⓕ❹
    259 <2 a\u0323\u0308\u0301\u0308\u0301\u0358
    260 =  Ⓕ❹❹
    261 
    262 ** test: discontiguous contractions with interleaved contractions
    263 * compare
    264 # a+ring & cedilla & macron below+dot above right
    265 <1 a\u0327\u0331\u030a\u0358
    266 =  Ⓓ❺❽
    267 
    268 # a+ring & 1x..3x macron below+dot above right
    269 <2 a\u0331\u030a\u0358
    270 =  Ⓓ❽
    271 <2 a\u0331\u0331\u030a\u0358\u0358
    272 =  Ⓓ❽❽
    273 # also skips acute
    274 <2 a\u0331\u0331\u0331\u030a\u0301\u0358\u0358\u0358
    275 =  Ⓓ❽❽❽❶
    276 
    277 # a+dot below & stem+augmentation dot, followed by contiguous d+z+acute
    278 <1 a\U0001D165\u0323\U0001D16Ddz\u0301
    279 =  Ⓔ³ⓓ
    280 
    281 ** test: some simple string comparisons
    282 @ root
    283 * compare
    284 # first string compares against ""
    285 = \u0000
    286 < a
    287 <1 b
    288 <3 B
    289 = \u0000B\u0000
    290 
    291 ** test: compare with strength=primary
    292 % strength=primary
    293 * compare
    294 <1 a
    295 <1 b
    296 = B
    297 
    298 ** test: compare with strength=secondary
    299 % strength=secondary
    300 * compare
    301 <1 a
    302 <1 b
    303 = B
    304 
    305 ** test: compare with strength=tertiary
    306 % strength=tertiary
    307 * compare
    308 <1 a
    309 <1 b
    310 <3 B
    311 
    312 ** test: compare with strength=quaternary
    313 % strength=quaternary
    314 * compare
    315 <1 a
    316 <1 b
    317 <3 B
    318 
    319 ** test: compare with strength=identical
    320 % strength=identical
    321 * compare
    322 <1 a
    323 <1 b
    324 <3 B
    325 
    326 ** test: côté with forwards secondary
    327 @ root
    328 * compare
    329 <1 cote
    330 <2 coté
    331 <2 côte
    332 <2 côté
    333 
    334 ** test: côté with forwards secondary vs. U+FFFE merge separator
    335 # Merged sort keys: On each level, any difference in the first segment
    336 # must trump any further difference.
    337 * compare
    338 <1 cote\uFFFEcôté
    339 <2 coté\uFFFEcôte
    340 <2 côte\uFFFEcoté
    341 <2 côté\uFFFEcote
    342 
    343 ** test: côté with backwards secondary
    344 % backwards=on
    345 * compare
    346 <1 cote
    347 <2 côte
    348 <2 coté
    349 <2 côté
    350 
    351 ** test: côté with backwards secondary vs. U+FFFE merge separator
    352 # Merged sort keys: On each level, any difference in the first segment
    353 # must trump any further difference.
    354 * compare
    355 <1 cote\uFFFEcôté
    356 <2 côte\uFFFEcoté
    357 <2 coté\uFFFEcôte
    358 <2 côté\uFFFEcote
    359 
    360 ** test: U+FFFE on identical level
    361 @ root
    362 % strength=identical
    363 * compare
    364 # All of these control codes are completely-ignorable, so that
    365 # their low code points are compared with the merge separator.
    366 # The merge separator must compare less than any other character.
    367 <1 \uFFFE\u0001\u0002\u0003
    368 <i \u0001\uFFFE\u0002\u0003
    369 <i \u0001\u0002\uFFFE\u0003
    370 <i \u0001\u0002\u0003\uFFFE
    371 
    372 * compare
    373 # The merge separator must even compare less than U+0000.
    374 <1 \uFFFE\u0000\u0000
    375 <i \u0000\uFFFE\u0000
    376 <i \u0000\u0000\uFFFE
    377 
    378 ** test: Hani < surrogates < U+FFFD
    379 # Note: compareUTF8() treats unpaired surrogates like U+FFFD,
    380 # so with that the strings with surrogates will compare equal to each other
    381 # and equal to the string with U+FFFD.
    382 @ root
    383 % strength=identical
    384 * compare
    385 <1 abz
    386 <1 a\u4e00z
    387 <1 a\U00020000z
    388 <1 a\ud800z
    389 <1 a\udbffz
    390 <1 a\udc00z
    391 <1 a\udfffz
    392 <1 a\ufffdz
    393 
    394 ** test: script reordering
    395 @ root
    396 % reorder Hani Zzzz digit
    397 * compare
    398 <1 ?
    399 <1 +
    400 <1 丂
    401 <1 a
    402 <1 α
    403 <1 5
    404 
    405 % reorder default
    406 * compare
    407 <1 ?
    408 <1 +
    409 <1 5
    410 <1 a
    411 <1 α
    412 <1 丂
    413 
    414 ** test: empty rules
    415 @ rules
    416 * compare
    417 <1 a
    418 <2 ä
    419 <3 Ä
    420 <1 b
    421 
    422 ** test: very simple rules
    423 @ rules
    424 &a=e<<<<q<<<<r<x<<<X<<y<<<Y;z,Z
    425 % strength=quaternary
    426 * compare
    427 <1 a
    428 =  e
    429 <4 q
    430 <4 r
    431 <1 x
    432 <3 X
    433 <2 y
    434 <3 Y
    435 <2 z
    436 <3 Z
    437 
    438 ** test: tailoring twice before a root position: primary
    439 @ rules
    440 &[before 1]b<p
    441 &[before 1]b<q
    442 * compare
    443 <1 a
    444 <1 p
    445 <1 q
    446 <1 b
    447 
    448 ** test: tailoring twice before a root position: secondary
    449 @ rules
    450 &[before 2]ſ<<p
    451 &[before 2]ſ<<q
    452 * compare
    453 <1 s
    454 <2 p
    455 <2 q
    456 <2 ſ
    457 
    458 # secondary-before common weight
    459 @ rules
    460 &[before 2]b<<p
    461 &[before 2]b<<q
    462 * compare
    463 <1 a
    464 <1 p
    465 <2 q
    466 <2 b
    467 
    468 ** test: tailoring twice before a root position: tertiary
    469 @ rules
    470 &[before 3]B<<<p
    471 &[before 3]B<<<q
    472 * compare
    473 <1 b
    474 <3 p
    475 <3 q
    476 <3 B
    477 
    478 # tertiary-before common weight
    479 @ rules
    480 &[before 3]b<<<p
    481 &[before 3]b<<<q
    482 * compare
    483 <1 a
    484 <1 p
    485 <3 q
    486 <3 b
    487 
    488 @ rules
    489 &[before 2]b<<s
    490 &[before 3]s<<<p
    491 &[before 3]s<<<q
    492 * compare
    493 <1 a
    494 <1 p
    495 <3 q
    496 <3 s
    497 <2 b
    498 
    499 ** test: tailor after completely ignorable
    500 @ rules
    501 &\x00<<<x<<y
    502 * compare
    503 = \x00
    504 = \x1F
    505 <3 x
    506 <2 y
    507 
    508 ** test: secondary tailoring gaps, ICU ticket 9362
    509 @ rules
    510 &[before 2]s<<'_'
    511 &s<<r  # secondary between s and ſ (long s)
    512 &ſ<<*a-q  # more than 15 between ſ and secondary CE boundary
    513 &[before 2][first primary ignorable]<<u<<v  # between secondary CE boundary & lowest secondary CE
    514 &[last primary ignorable]<<y<<z
    515 
    516 * compare
    517 <2 u
    518 <2 v
    519 <2 \u0332  # lowest secondary CE
    520 <2 \u0308
    521 <2 y
    522 <2 z
    523 <1 s_
    524 <2 ss
    525 <2 sr
    526 <2 sſ
    527 <2 sa
    528 <2 sb
    529 <2 sp
    530 <2 sq
    531 <2 sus
    532 <2 svs
    533 <2 rs
    534 
    535 ** test: tertiary tailoring gaps, ICU ticket 9362
    536 @ rules
    537 &[before 3]t<<<'_'
    538 &t<<<r  # tertiary between t and fullwidth t
    539 &ᵀ<<<*a-q  # more than 15 between ᵀ (modifier letter T) and tertiary CE boundary
    540 &[before 3][first secondary ignorable]<<<u<<<v  # between tertiary CE boundary & lowest tertiary CE
    541 &[last secondary ignorable]<<<y<<<z
    542 
    543 * compare
    544 <3 u
    545 <3 v
    546 # Note: The root collator currently does not map any characters to tertiary CEs.
    547 <3 y
    548 <3 z
    549 <1 t_
    550 <3 tt
    551 <3 tr
    552 <3 tｔ
    553 <3 tᵀ
    554 <3 ta
    555 <3 tb
    556 <3 tp
    557 <3 tq
    558 <3 tut
    559 <3 tvt
    560 <3 rt
    561 
    562 ** test: secondary & tertiary around root character
    563 @ rules
    564 &[before 2]m<<r
    565 &m<<s
    566 &[before 3]m<<<u
    567 &m<<<v
    568 * compare
    569 <1 l
    570 <1 r
    571 <2 u
    572 <3 m
    573 <3 v
    574 <2 s
    575 <1 n
    576 
    577 ** test: secondary & tertiary around tailored item
    578 @ rules
    579 &m<x
    580 &[before 2]x<<r
    581 &x<<s
    582 &[before 3]x<<<u
    583 &x<<<v
    584 * compare
    585 <1 m
    586 <1 r
    587 <2 u
    588 <3 x
    589 <3 v
    590 <2 s
    591 <1 n
    592 
    593 ** test: more nesting of secondary & tertiary before
    594 @ rules
    595 &[before 3]m<<<u
    596 &[before 2]m<<r
    597 &[before 3]r<<<q
    598 &m<<<w
    599 &m<<t
    600 &[before 3]w<<<v
    601 &w<<<x
    602 &w<<s
    603 * compare
    604 <1 l
    605 <1 q
    606 <3 r
    607 <2 u
    608 <3 m
    609 <3 v
    610 <3 w
    611 <3 x
    612 <2 s
    613 <2 t
    614 <1 n
    615 
    616 ** test: case bits
    617 @ rules
    618 &w<x  # tailored CE getting case bits
    619  =uv=uV=Uv=UV  # 2 chars -> 1 CE
    620 &ae=ch=cH=Ch=CH  # 2 chars -> 2 CEs
    621 &rst=yz=yZ=Yz=YZ  # 2 chars -> 3 CEs
    622 % caseFirst=lower
    623 * compare
    624 <1 ae
    625 =  ch
    626 <3 cH
    627 <3 Ch
    628 <3 CH
    629 <1 rst
    630 =  yz
    631 <3 yZ
    632 <3 Yz
    633 <3 YZ
    634 <1 w
    635 <1 x
    636 =  uv
    637 <3 uV
    638 =  Uv  # mixed case on single CE cannot distinguish variations
    639 <3 UV
    640 
    641 ** test: tertiary CEs, tertiary, caseLevel=off, caseFirst=lower
    642 @ rules
    643 &\u0001<<<t<<<T  # tertiary CEs
    644 % caseFirst=lower
    645 * compare
    646 <1 aa
    647 <3 aat
    648 <3 aaT
    649 <3 aA
    650 <3 aAt
    651 <3 ata
    652 <3 aTa
    653 
    654 ** test: tertiary CEs, tertiary, caseLevel=off, caseFirst=upper
    655 % caseFirst=upper
    656 * compare
    657 <1 aA
    658 <3 aAt
    659 <3 aa
    660 <3 aat
    661 <3 aaT
    662 <3 ata
    663 <3 aTa
    664 
    665 ** test: reset on expansion, ICU tickets 9415 & 9593
    666 @ rules
    667 &æ<x    # tailor the last primary CE so that x sorts between ae and af
    668 &æb=bæ  # copy all reset CEs to make bæ sort the same
    669 &각<h    # copy/tailor 3 CEs to make h sort before the next Hangul syllable 갂
    670 &⒀<<y   # copy/tailor 4 CEs to make y sort with only a secondary difference
    671 &l·=z   # handle the pre-context for · when fetching reset CEs
    672   <<u  # copy/tailor 2 CEs
    673 
    674 * compare
    675 <1 ae
    676 <2 æ
    677 <1 x
    678 <1 af
    679 
    680 * compare
    681 <1 aeb
    682 <2 æb
    683 =  bæ
    684 
    685 * compare
    686 <1 각
    687 <1 h
    688 <1 갂
    689 <1 갃
    690 
    691 * compare
    692 <1 ·    # by itself: primary CE
    693 <1 l
    694 <2 l·   # l+middle dot has only a secondary difference from l
    695 =  z
    696 <2 u
    697 
    698 * compare
    699 <1 (13)
    700 <3 ⒀  # DUCET sets special tertiary weights in all CEs
    701 <2 y
    702 <1 (13[
    703 
    704 % alternate=shifted
    705 * compare
    706 <1 (13)
    707 =  13
    708 <3 ⒀
    709 =  y  # alternate=shifted removes the tailoring difference on the last CE
    710 <1 14
    711 
    712 ** test: contraction inside extension, ICU ticket 9378
    713 @ rules
    714 &а<<х/й     # all letters are Cyrillic
    715 * compare
    716 <1 ай
    717 <2 х
    718 
    719 ** test: no duplicate tailored CEs for different reset positions with same CEs, ICU ticket 10104
    720 @ rules
    721 &t<x &ᵀ<y           # same primary weights
    722 &q<u &[before 1]ꝗ<v # q and ꝗ are primary adjacent
    723 * compare
    724 <1 q
    725 <1 u
    726 <1 v
    727 <1 ꝗ
    728 <1 t
    729 <3 ᵀ
    730 <1 y
    731 <1 x
    732 
    733 # Principle: Each rule builds on the state of preceding rules and ignores following rules.
    734 
    735 ** test: later rule does not affect earlier reset position, ICU ticket 10105
    736 @ rules
    737 &a < u < v < w  &ov < x  &b < v
    738 * compare
    739 <1 oa
    740 <1 ou
    741 <1 x    # CE(o) followed by CE between u and w
    742 <1 ow
    743 <1 ob
    744 <1 ov
    745 
    746 ** test: later rule does not affect earlier extension (1), ICU ticket 10105
    747 @ rules
    748 &a=x/b &v=b
    749 % strength=secondary
    750 * compare
    751 <1 B
    752 <1 c
    753 <1 v
    754 =  b
    755 * compare
    756 <1 AB
    757 =  x
    758 <1 ac
    759 <1 av
    760 =  ab
    761 
    762 ** test: later rule does not affect earlier extension (2), ICU ticket 10105
    763 @ rules
    764 &a <<< c / e &g <<< e / l
    765 % strength=secondary
    766 * compare
    767 <1 AE
    768 =  c
    769 <2 æ
    770 <1 agl
    771 =  ae
    772 
    773 ** test: later rule does not affect earlier extension (3), ICU ticket 10105
    774 @ rules
    775 &a = b / c  &d = c / e
    776 % strength=secondary
    777 * compare
    778 <1 AC  # C is still only tertiary different from the original c
    779 =  b
    780 <1 ade
    781 =  ac
    782 
    783 ** test: extension contains tailored character, ICU ticket 10105
    784 @ rules
    785 &a=e &b=u/e
    786 * compare
    787 <1 a
    788 =  e
    789 <1 ba
    790 =  be
    791 =  u
    792 
    793 ** test: add simple mappings for characters with root context
    794 @ rules
    795 &z=·    # middle dot has a prefix mapping in the CLDR root
    796 &n=и    # и (U+0438) has contractions in the root
    797 * compare
    798 <1 l
    799 <2 l·   # root mapping for l|· still works
    800 <1 z
    801 =  ·
    802 * compare
    803 <1 n
    804 =  и
    805 <1 И
    806 <1 и\u0306  # root mapping for й=и\u0306 still works
    807 =  й
    808 <3 Й
    809 
    810 ** test: add context mappings around characters with root context
    811 @ rules
    812 &z=·h   # middle dot has a prefix mapping in the CLDR root
    813 &n=ә|и  # и (U+0438) has contractions in the root
    814 * compare
    815 <1 l
    816 <2 l·   # root mapping for l|· still works
    817 <1 z
    818 =  ·h
    819 * compare
    820 <1 и
    821 <3 И
    822 <1 и\u0306  # root mapping for й=и\u0306 still works
    823 =  й
    824 * compare
    825 <1 әn
    826 =  әи
    827 <1 әo
    828 
    829 ** test: many secondary CEs at the top of their range
    830 @ rules
    831 &[last primary ignorable]<<*\u2801-\u28ff
    832 * compare
    833 <2 \u0308
    834 <2 \u2801
    835 <2 \u2802
    836 <2 \u2803
    837 <2 \u2804
    838 <2 \u28fd
    839 <2 \u28fe
    840 <2 \u28ff
    841 <1 \x20
    842 
    843 ** test: many tertiary CEs at the top of their range
    844 @ rules
    845 &[last secondary ignorable]<<<*a-z
    846 * compare
    847 <3 a
    848 <3 b
    849 <3 c
    850 <3 d
    851 # e..w
    852 <3 x
    853 <3 y
    854 <3 z
    855 <2 \u0308
    856 
    857 ** test: tailor contraction together with nearly equivalent prefix, ICU ticket 10101
    858 @ rules
    859 &a=p|x &b=px &c=op
    860 * compare
    861 <1 b
    862 =  px
    863 <3 B
    864 <1 c
    865 =  op
    866 <3 C
    867 * compare
    868 <1 ca
    869 =  opx  # first contraction op, then prefix p|x
    870 <3 cA
    871 <3 Ca
    872 
    873 ** test: reset position with prefix (pre-context), ICU ticket 10102
    874 @ rules
    875 &a=p|x &px=y
    876 * compare
    877 <1 pa
    878 =  px
    879 =  y
    880 <3 pA
    881 <1 q
    882 <1 x
    883 
    884 ** test: prefix+contraction together (1), ICU ticket 10071
    885 @ rules
    886 &x=a|bc
    887 * compare
    888 <1 ab
    889 <1 Abc
    890 <1 abd
    891 <1 ac
    892 <1 aw
    893 <1 ax
    894 =  abc
    895 <3 aX
    896 <3 Ax
    897 <1 b
    898 <1 bb
    899 <1 bc
    900 <3 bC
    901 <3 Bc
    902 <1 bd
    903 
    904 ** test: prefix+contraction together (2), ICU ticket 10071
    905 @ rules
    906 &w=bc &x=a|b
    907 * compare
    908 <1 w
    909 =  bc
    910 <3 W
    911 * compare
    912 <1 aw
    913 <1 ax
    914 =  ab
    915 <3 aX
    916 <1 axb
    917 <1 axc
    918 =  abc  # prefix match a|b takes precedence over contraction match bc
    919 <3 abC
    920 <1 abd
    921 <1 ay
    922 
    923 ** test: prefix+contraction together (3), ICU ticket 10071
    924 @ rules
    925 &x=a|b &w=bc    # reverse order of rules as previous test, order should not matter here
    926 * compare       # same "compare" sequences as previous test
    927 <1 w
    928 =  bc
    929 <3 W
    930 * compare
    931 <1 aw
    932 <1 ax
    933 =  ab
    934 <3 aX
    935 <1 axb
    936 <1 axc
    937 =  abc  # prefix match a|b takes precedence over contraction match bc
    938 <3 abC
    939 <1 abd
    940 <1 ay
    941 
    942 ** test: no mapping p|c, falls back to contraction ch, CLDR ticket 5962
    943 @ rules
    944 &d=ch &v=p|ci
    945 * compare
    946 <1 pc
    947 <3 pC
    948 <1 pcH
    949 <1 pcI
    950 <1 pd
    951 =  pch  # no-prefix contraction ch matches
    952 <3 pD
    953 <1 pv
    954 =  pci  # prefix+contraction p|ci matches
    955 <3 pV
    956 
    957 ** test: tailor in & around compact ranges of root primaries
    958 # The Ogham characters U+1681..U+169A are in simple ascending order of primary CEs
    959 # which should be reliably encoded as one range in the root elements data.
    960 @ rules
    961 &[before 1]ᚁ<a
    962 &ᚁ<b
    963 &[before 1]ᚂ<c
    964 &ᚂ<d
    965 &[before 1]ᚚ<y
    966 &ᚚ<z
    967 &[before 2]ᚁ<<r
    968 &ᚁ<<s
    969 &[before 3]ᚚ<<<t
    970 &ᚚ<<<u
    971 * compare
    972 <1 ᣵ    # U+18F5 last Canadian Aboriginal
    973 <1 a
    974 <1 r
    975 <2 ᚁ
    976 <2 s
    977 <1 b
    978 <1 c
    979 <1 ᚂ
    980 <1 d
    981 <1 ᚃ
    982 <1 ᚙ
    983 <1 y
    984 <1 t
    985 <3 ᚚ
    986 <3 u
    987 <1 z
    988 <1 ᚠ    # U+16A0 first Runic
    989 
    990 ** test: suppressContractions
    991 @ rules
    992 &z<ch<әж [suppressContractions [·cә]]
    993 * compare
    994 <1 ch
    995 <3 cH   # ch was suppressed
    996 <1 l
    997 <1 l·   # primary difference, not secondary, because l|· was suppressed
    998 <1 ә
    999 <2 ә\u0308  # secondary difference, not primary, because contractions for ә were suppressed
   1000 <1 әж
   1001 <3 әЖ
   1002 
   1003 ** test: Hangul & Jamo
   1004 @ rules
   1005 &L=\u1100  # first Jamo L
   1006 &V=\u1161  # first Jamo V
   1007 &T=\u11A8  # first Jamo T
   1008 &\uAC01<<*\u4E00-\u4EFF  # first Hangul LVT syllable & lots of secondary diffs
   1009 * compare
   1010 <1 Lv
   1011 <3 LV
   1012 =  \u1100\u1161
   1013 =  \uAC00
   1014 <1 LVt
   1015 <3 LVT
   1016 =  \u1100\u1161\u11A8
   1017 =  \uAC00\u11A8
   1018 =  \uAC01
   1019 <2 LVT\u0308
   1020 <2 \u4E00
   1021 <2 \u4E01
   1022 <2 \u4E80
   1023 <2 \u4EFF
   1024 <2 LV\u0308T
   1025 <1 \uAC02
   1026 
   1027 ** test: adjust special reset positions according to previous rules, CLDR ticket 6070
   1028 @ rules
   1029 &[last variable]<x
   1030 [maxVariable space]  # has effect only after building, no effect on following rules
   1031 &[last variable]<y
   1032 &[before 1][first regular]<z
   1033 * compare
   1034 <1 ?  # some punctuation
   1035 <1 x
   1036 <1 y
   1037 <1 z
   1038 <1 $  # some symbol
   1039 
   1040 @ rules
   1041 &[last primary ignorable]<<x<<<y
   1042 &[last primary ignorable]<<z
   1043 * compare
   1044 <2 \u0358
   1045 <2 x
   1046 <3 y
   1047 <2 z
   1048 <1 \x20
   1049 
   1050 @ rules
   1051 &[last secondary ignorable]<<<x
   1052 &[last secondary ignorable]<<<y
   1053 * compare
   1054 <3 x
   1055 <3 y
   1056 <2 \u0358
   1057 
   1058 @ rules
   1059 &[before 2][first variable]<<z
   1060 &[before 2][first variable]<<y
   1061 &[before 3][first variable]<<<x
   1062 &[before 3][first variable]<<<w
   1063 &[before 1][first variable]<v
   1064 &[before 2][first variable]<<u
   1065 &[before 3][first variable]<<<t
   1066 &[before 2]\uFDD1\xA0<<s  # FractionalUCA.txt: FDD1 00A0, SPACE first primary
   1067 * compare
   1068 <2 \u0358
   1069 <1 s
   1070 <2 \uFDD1\xA0
   1071 <1 t
   1072 <3 u
   1073 <2 v
   1074 <1 w
   1075 <3 x
   1076 <3 y
   1077 <2 z
   1078 <2 \t
   1079 
   1080 @ rules
   1081 &[before 2][first regular]<<z
   1082 &[before 3][first regular]<<<y
   1083 &[before 1][first regular]<x
   1084 &[before 3][first regular]<<<w
   1085 &[before 2]\uFDD1\u263A<<v  # FractionalUCA.txt: FDD1 263A, SYMBOL first primary
   1086 &[before 3][first regular]<<<u
   1087 &[before 1][first regular]<p  # primary before the boundary: becomes variable
   1088 &[before 3][first regular]<<<t  # not affected by p
   1089 &[last variable]<q              # after p!
   1090 * compare
   1091 <1 ?
   1092 <1 p
   1093 <1 q
   1094 <1 t
   1095 <3 u
   1096 <3 v
   1097 <1 w
   1098 <3 x
   1099 <1 y
   1100 <3 z
   1101 <1 $
   1102 
   1103 # check that p & q are indeed variable
   1104 % alternate=shifted
   1105 * compare
   1106 =  ?
   1107 =  p
   1108 =  q
   1109 <1 t
   1110 <3 u
   1111 <3 v
   1112 <1 w
   1113 <3 x
   1114 <1 y
   1115 <3 z
   1116 <1 $
   1117 
   1118 @ rules
   1119 &[before 2][first trailing]<<z
   1120 &[before 1][first trailing]<y
   1121 &[before 3][first trailing]<<<x
   1122 * compare
   1123 <1 \u4E00  # first Han, first implicit
   1124 <1 \uFDD1\uFDD0  # FractionalUCA.txt: unassigned first primary
   1125 # Note: The root collator currently does not map any characters to the trailing first boundary primary.
   1126 <1 x
   1127 <3 y
   1128 <1 z
   1129 <2 \uFFFD  # The root collator currently maps U+FFFD to the first real trailing primary.
   1130 
   1131 @ rules
   1132 &[before 2][first primary ignorable]<<z
   1133 &[before 2][first primary ignorable]<<y
   1134 &[before 3][first primary ignorable]<<<x
   1135 &[before 3][first primary ignorable]<<<w
   1136 * compare
   1137 =  \x01
   1138 <2 w
   1139 <3 x
   1140 <3 y
   1141 <2 z
   1142 <2 \u0301
   1143 
   1144 @ rules
   1145 &[before 3][first secondary ignorable]<<<y
   1146 &[before 3][first secondary ignorable]<<<x
   1147 * compare
   1148 =  \x01
   1149 <3 x
   1150 <3 y
   1151 <2 \u0301
   1152 
   1153 ** test: canonical closure
   1154 @ rules
   1155 &X=A &U=Â
   1156 * compare
   1157 <1 U
   1158 =  Â
   1159 =  A\u0302
   1160 <2 Ú  # U with acute
   1161 =  U\u0301
   1162 =  Ấ  # A with circumflex & acute
   1163 =  Â\u0301
   1164 =  A\u0302\u0301
   1165 <1 X
   1166 =  A
   1167 <2 X\u030A  # with ring above
   1168 =  Å
   1169 =  A\u030A
   1170 =  \u212B  # Angstrom sign
   1171 
   1172 @ rules
   1173 &x=\u5140\u55C0
   1174 * compare
   1175 <1 x
   1176 =  \u5140\u55C0
   1177 =  \u5140\uFA0D
   1178 =  \uFA0C\u55C0
   1179 =  \uFA0C\uFA0D  # CJK compatibility characters
   1180 <3 X
   1181 
   1182 # canonical closure on prefix rules, ICU ticket 9444
   1183 @ rules
   1184 &x=ä|ŝ
   1185 * compare
   1186 <1 äs  # not tailored
   1187 <1 äx
   1188 =  äŝ
   1189 =  a\u0308s\u0302
   1190 =  a\u0308ŝ
   1191 =  äs\u0302
   1192 <3 äX
   1193 
   1194 ** test: conjoining Jamo map to expansions
   1195 @ rules
   1196 &gg=\u1101  # Jamo Lead consonant GG
   1197 &nj=\u11AC  # Jamo Trail consonant NJ
   1198 * compare
   1199 <1 gg\u1161nj
   1200 =  \u1101\u1161\u11AC
   1201 =  \uAE4C\u11AC
   1202 =  \uAE51
   1203 <3 gg\u1161nJ
   1204 <1 \u1100\u1100
   1205 
   1206 ** test: canonical tail closure, ICU ticket 5913
   1207 @ rules
   1208 &a<â
   1209 * compare
   1210 <1 a
   1211 <1 â              # tailored
   1212 =  a\u0302
   1213 <2 a\u0323\u0302  # discontiguous contraction
   1214 =  ạ\u0302        # equivalent
   1215 =  ậ              # equivalent
   1216 <1 b
   1217 
   1218 @ rules
   1219 &a<ạ
   1220 * compare
   1221 <1 a
   1222 <1 ạ              # tailored
   1223 =  a\u0323
   1224 <2 a\u0323\u0302  # contiguous contraction plus extra diacritic
   1225 =  ạ\u0302        # equivalent
   1226 =  ậ              # equivalent
   1227 <1 b
   1228 
   1229 # Tail closure should work even if there is a prefix and/or contraction.
   1230 @ rules
   1231 &a<\u5140|câ
   1232 # In order to find discontiguous contractions for \u5140|câ
   1233 # there must exist a mapping for \u5140|ca, regardless of what it maps to.
   1234 # (This follows from the UCA spec.)
   1235 &x=\u5140|ca
   1236 * compare
   1237 <1 \u5140a
   1238 =  \uFA0Ca
   1239 <1 \u5140câ              # tailored
   1240 =  \uFA0Ccâ
   1241 =  \u5140ca\u0302
   1242 =  \uFA0Cca\u0302
   1243 <2 \u5140ca\u0323\u0302  # discontiguous contraction
   1244 =  \uFA0Cca\u0323\u0302
   1245 =  \u5140cạ\u0302
   1246 =  \uFA0Ccạ\u0302
   1247 =  \u5140cậ
   1248 =  \uFA0Ccậ
   1249 <1 \u5140b
   1250 =  \uFA0Cb
   1251 <1 \u5140x
   1252 =  \u5140ca
   1253 
   1254 # Double-check that without the extra mapping there will be no discontiguous match.
   1255 @ rules
   1256 &a<\u5140|câ
   1257 * compare
   1258 <1 \u5140a
   1259 =  \uFA0Ca
   1260 <1 \u5140câ              # tailored
   1261 =  \uFA0Ccâ
   1262 =  \u5140ca\u0302
   1263 =  \uFA0Cca\u0302
   1264 <1 \u5140b
   1265 =  \uFA0Cb
   1266 <1 \u5140ca\u0323\u0302  # no discontiguous contraction
   1267 =  \uFA0Cca\u0323\u0302
   1268 =  \u5140cạ\u0302
   1269 =  \uFA0Ccạ\u0302
   1270 =  \u5140cậ
   1271 =  \uFA0Ccậ
   1272 
   1273 @ rules
   1274 &a<cạ
   1275 * compare
   1276 <1 a
   1277 <1 cạ              # tailored
   1278 =  ca\u0323
   1279 <2 ca\u0323\u0302  # contiguous contraction plus extra diacritic
   1280 =  cạ\u0302        # equivalent
   1281 =  cậ              # equivalent
   1282 <1 b
   1283 
   1284 # ᾢ = U+1FA2 GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
   1285 #   = 03C9 0313 0300 0345
   1286 # ccc = 0, 230, 230, 240
   1287 @ rules
   1288 &δ=αῳ
   1289 # In order to find discontiguous contractions for αῳ
   1290 # there must exist a mapping for αω, regardless of what it maps to.
   1291 # (This follows from the UCA spec.)
   1292 &ε=αω
   1293 * compare
   1294 <1 δ
   1295 =  αῳ
   1296 =  αω\u0345
   1297 <2 αω\u0313\u0300\u0345  # discontiguous contraction
   1298 =  αὠ\u0300\u0345
   1299 =  αὢ\u0345
   1300 =  αᾢ
   1301 <2 αω\u0300\u0313\u0345
   1302 =  αὼ\u0313\u0345
   1303 =  αῲ\u0313  # not FCD
   1304 <1 ε
   1305 =  αω
   1306 
   1307 # Double-check that without the extra mapping there will be no discontiguous match.
   1308 @ rules
   1309 &δ=αῳ
   1310 * compare
   1311 <1 αω\u0313\u0300\u0345  # no discontiguous contraction
   1312 =  αὠ\u0300\u0345
   1313 =  αὢ\u0345
   1314 =  αᾢ
   1315 <2 αω\u0300\u0313\u0345
   1316 =  αὼ\u0313\u0345
   1317 =  αῲ\u0313  # not FCD
   1318 <1 δ
   1319 =  αῳ
   1320 =  αω\u0345
   1321 
   1322 # Add U+0315 COMBINING COMMA ABOVE RIGHT which has ccc=232.
   1323 # Tests code paths where the tailored string has a combining mark
   1324 # that does not occur in any composite's decomposition.
   1325 @ rules
   1326 &δ=αὼ\u0315
   1327 * compare
   1328 <1 αω\u0313\u0300\u0315  # Not tailored: The grave accent blocks the comma above.
   1329 =  αὠ\u0300\u0315
   1330 =  αὢ\u0315
   1331 <1 δ
   1332 =  αὼ\u0315
   1333 =  αω\u0300\u0315
   1334 <2 αω\u0300\u0315\u0345
   1335 =  αὼ\u0315\u0345
   1336 =  αῲ\u0315  # not FCD
   1337 
   1338 ** test: danish a+a vs. a-umlaut, ICU ticket 9319
   1339 @ rules
   1340 &z<aa
   1341 * compare
   1342 <1 z
   1343 <1 aa
   1344 <2 aa\u0308
   1345 =  aä
   1346 
   1347 ** test: Jamo L with and in prefix
   1348 # Useful for the Korean "searchjl" tailoring (instead of contractions of pairs of Jamo L).
   1349 @ rules
   1350 # Jamo Lead consonant G after G or GG
   1351 &[last primary ignorable]<<\u1100|\u1100=\u1101|\u1100
   1352 # Jamo Lead consonant GG sorts like G+G
   1353 &\u1100\u1100=\u1101
   1354 # Note: Making G|GG and GG|GG sort the same as G|G+G
   1355 # would require the ability to reset on G|G+G,
   1356 # or we could make G-after-G equal to some secondary-CE character,
   1357 # and reset on a pair of those.
   1358 # (It does not matter much if there are at most two G in a row in real text.)
   1359 * compare
   1360 <1 \u1100
   1361 <2 \u1100\u1100  # only one primary from a sequence of G lead consonants
   1362 =  \u1101
   1363 <2 \u1100\u1100\u1100
   1364 =  \u1101\u1100
   1365 # but not = \u1100\u1101, see above
   1366 <1 \u1100\u1161
   1367 =  \uAC00
   1368 <2 \u1100\u1100\u1161
   1369 =  \u1100\uAC00  # prefix match from the L of the LV syllable
   1370 =  \u1101\u1161
   1371 =  \uAE4C
   1372 
   1373 ** test: proposed Korean "searchjl" tailoring with prefixes, CLDR ticket 6546
   1374 @ rules
   1375 # Low secondary CEs for Jamo V & T.
   1376 # Note: T should sort before V for proper syllable order.
   1377 &\u0332  # COMBINING LOW LINE (first primary ignorable)
   1378 <<\u1161<<\u1162
   1379 
   1380 # Korean Jamo lead consonant search rules, part 2:
   1381 # Make modern compound L jamo primary equivalent to non-compound forms.
   1382 
   1383 # Secondary CEs for Jamo L-after-L, greater than Jamo V & T.
   1384 &\u0313  # COMBINING COMMA ABOVE (second primary ignorable)
   1385 =\u1100|\u1100
   1386 =\u1103|\u1103
   1387 =\u1107|\u1107
   1388 =\u1109|\u1109
   1389 =\u110C|\u110C
   1390 
   1391 # Compound L Jamo map to equivalent expansions of primary+secondary CE.
   1392 &\u1100\u0313=\u1101<<<\u3132  # HANGUL CHOSEONG SSANGKIYEOK, HANGUL LETTER SSANGKIYEOK
   1393 &\u1103\u0313=\u1104<<<\u3138  # HANGUL CHOSEONG SSANGTIKEUT, HANGUL LETTER SSANGTIKEUT
   1394 &\u1107\u0313=\u1108<<<\u3143  # HANGUL CHOSEONG SSANGPIEUP, HANGUL LETTER SSANGPIEUP
   1395 &\u1109\u0313=\u110A<<<\u3146  # HANGUL CHOSEONG SSANGSIOS, HANGUL LETTER SSANGSIOS
   1396 &\u110C\u0313=\u110D<<<\u3149  # HANGUL CHOSEONG SSANGCIEUC, HANGUL LETTER SSANGCIEUC
   1397 
   1398 * compare
   1399 <1 \u1100\u1161
   1400 =  \uAC00
   1401 <2 \u1100\u1162
   1402 =  \uAC1C
   1403 <2 \u1100\u1100\u1161
   1404 =  \u1100\uAC00
   1405 =  \u1101\u1161
   1406 =  \uAE4C
   1407 <3 \u3132\u1161
   1408 
   1409 ** test: Hangul syllables in prefix & in the interior of a contraction
   1410 @ rules
   1411 &x=\u1100\u1161|a\u1102\u1162z
   1412 * compare
   1413 <1 \u1100\u1161x
   1414 =  \u1100\u1161a\u1102\u1162z
   1415 =  \u1100\u1161a\uB0B4z
   1416 =  \uAC00a\u1102\u1162z
   1417 =  \uAC00a\uB0B4z
   1418 
   1419 ** test: digits are unsafe-backwards when numeric=on
   1420 @ root
   1421 % numeric=on
   1422 * compare
   1423 # If digits are not unsafe, then numeric collation sees "1"=="01" and "b">"a".
   1424 # We need to back up before the identical prefix "1" and compare the full numbers.
   1425 <1 11b
   1426 <1 101a
   1427 
   1428 ** test: simple locale data test
   1429 @ locale de
   1430 * compare
   1431 <1 a
   1432 <2 ä
   1433 <1 ae
   1434 <2 æ
   1435 
   1436 @ locale de-u-co-phonebk
   1437 * compare
   1438 <1 a
   1439 <1 ae
   1440 <2 ä
   1441 <2 æ
   1442 
   1443 # The following test cases were moved here from ICU 52's DataDrivenCollationTest.txt.
   1444 
   1445 ** test: DataDrivenCollationTest/TestMorePinyin
   1446 # Testing the primary strength.
   1447 @ locale zh
   1448 % strength=primary
   1449 * compare
   1450 < lā
   1451 = lĀ
   1452 = Lā
   1453 = LĀ
   1454 < lān
   1455 = lĀn
   1456 < lē
   1457 = lĒ
   1458 = Lē
   1459 = LĒ
   1460 < lēn
   1461 = lĒn
   1462 
   1463 ** test: DataDrivenCollationTest/TestLithuanian
   1464 # Lithuanian sort order.
   1465 @ locale lt
   1466 * compare
   1467 < cz
   1468 < č
   1469 < d
   1470 < iz
   1471 < j
   1472 < sz
   1473 < š
   1474 < t
   1475 < zz
   1476 < ž
   1477 
   1478 ** test: DataDrivenCollationTest/TestLatvian
   1479 # Latvian sort order.
   1480 @ locale lv
   1481 * compare
   1482 < az
   1483 < ā
   1484 < b
   1485 < cz
   1486 < č
   1487 < d
   1488 < ez
   1489 < ē
   1490 < f
   1491 < gz
   1492 < ģ
   1493 < h
   1494 < iz
   1495 < y
   1496 < ī
   1497 < j
   1498 < kz
   1499 < ķ
   1500 < l
   1501 < lz
   1502 < ļ
   1503 < m
   1504 < nz
   1505 < ņ
   1506 < o
   1507 < oz
   1508 < ō
   1509 < p
   1510 < rz
   1511 < ŗ
   1512 < s
   1513 < sz
   1514 < š
   1515 < t
   1516 < uz
   1517 < ū
   1518 < v
   1519 < zz
   1520 < ž
   1521 
   1522 ** test: DataDrivenCollationTest/TestEstonian
   1523 # Estonian sort order.
   1524 @ locale et
   1525 * compare
   1526 < sy
   1527 < š
   1528 < šy
   1529 < z
   1530 < zy
   1531 < ž
   1532 < v
   1533 < va
   1534 < w
   1535 < õ
   1536 < õy
   1537 < ä
   1538 < äy
   1539 < ö
   1540 < öy
   1541 < ü
   1542 < üy
   1543 < x
   1544 
   1545 ** test: DataDrivenCollationTest/TestAlbanian
   1546 # Albanian sort order.
   1547 @ locale sq
   1548 * compare
   1549 < cz
   1550 < ç
   1551 < d
   1552 < dz
   1553 < dh
   1554 < e
   1555 < ez
   1556 < ë
   1557 < f
   1558 < gz
   1559 < gj
   1560 < h
   1561 < lz
   1562 < ll
   1563 < m
   1564 < nz
   1565 < nj
   1566 < o
   1567 < rz
   1568 < rr
   1569 < s
   1570 < sz
   1571 < sh
   1572 < t
   1573 < tz
   1574 < th
   1575 < u
   1576 < xz
   1577 < xh
   1578 < y
   1579 < zz
   1580 < zh
   1581 
   1582 ** test: DataDrivenCollationTest/TestSimplifiedChineseOrder
   1583 # Sorted file has different order.
   1584 @ root
   1585 # normalization=on turned on & off automatically.
   1586 * compare
   1587 < \u5F20
   1588 < \u5F20\u4E00\u8E3F
   1589 
   1590 ** test: DataDrivenCollationTest/TestTibetanNormalizedIterativeCrash
   1591 # This pretty much crashes.
   1592 @ root
   1593 * compare
   1594 < \u0f71\u0f72\u0f80\u0f71\u0f72
   1595 < \u0f80
   1596 
   1597 ** test: DataDrivenCollationTest/TestThaiPartialSortKeyProblems
   1598 # These are examples of strings that caused trouble in partial sort key testing.
   1599 @ locale th-TH
   1600 * compare
   1601 < \u0E01\u0E01\u0E38\u0E18\u0E20\u0E31\u0E13\u0E11\u0E4C
   1602 < \u0E01\u0E01\u0E38\u0E2A\u0E31\u0E19\u0E42\u0E18
   1603 * compare
   1604 < \u0E01\u0E07\u0E01\u0E32\u0E23
   1605 < \u0E01\u0E07\u0E42\u0E01\u0E49
   1606 * compare
   1607 < \u0E01\u0E23\u0E19\u0E17\u0E32
   1608 < \u0E01\u0E23\u0E19\u0E19\u0E40\u0E0A\u0E49\u0E32
   1609 * compare
   1610 < \u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E22\u0E27
   1611 < \u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E4A\u0E22\u0E27
   1612 * compare
   1613 < \u0E01\u0E23\u0E23\u0E40\u0E0A\u0E2D
   1614 < \u0E01\u0E23\u0E23\u0E40\u0E0A\u0E49\u0E32
   1615 
   1616 ** test: DataDrivenCollationTest/TestJavaStyleRule
   1617 # java.text allows rules to start as '<<<x<<<y...'
   1618 # we emulate this by assuming a &[first tertiary ignorable] in this case.
   1619 @ rules
   1620 &\u0001=equal<<<z<<x<<<w &[first tertiary ignorable]=a &[first primary ignorable]=b
   1621 * compare
   1622 = a
   1623 = equal
   1624 < z
   1625 < x
   1626 = b  # x had become the new first primary ignorable
   1627 < w
   1628 
   1629 ** test: DataDrivenCollationTest/TestShiftedIgnorable
   1630 # The UCA states that primary ignorables should be completely
   1631 # ignorable when following a shifted code point.
   1632 @ root
   1633 % alternate=shifted
   1634 % strength=quaternary
   1635 * compare
   1636 < a\u0020b
   1637 = a\u0020\u0300b
   1638 = a\u0020\u0301b
   1639 < a_b
   1640 = a_\u0300b
   1641 = a_\u0301b
   1642 < A\u0020b
   1643 = A\u0020\u0300b
   1644 = A\u0020\u0301b
   1645 < A_b
   1646 = A_\u0300b
   1647 = A_\u0301b
   1648 < a\u0301b
   1649 < A\u0301b
   1650 < a\u0300b
   1651 < A\u0300b
   1652 
   1653 ** test: DataDrivenCollationTest/TestNShiftedIgnorable
   1654 # The UCA states that primary ignorables should be completely
   1655 # ignorable when following a shifted code point.
   1656 @ root
   1657 % alternate=non-ignorable
   1658 % strength=tertiary
   1659 * compare
   1660 < a\u0020b
   1661 < A\u0020b
   1662 < a\u0020\u0301b
   1663 < A\u0020\u0301b
   1664 < a\u0020\u0300b
   1665 < A\u0020\u0300b
   1666 < a_b
   1667 < A_b
   1668 < a_\u0301b
   1669 < A_\u0301b
   1670 < a_\u0300b
   1671 < A_\u0300b
   1672 < a\u0301b
   1673 < A\u0301b
   1674 < a\u0300b
   1675 < A\u0300b
   1676 
   1677 ** test: DataDrivenCollationTest/TestSafeSurrogates
   1678 # It turned out that surrogates were not skipped properly
   1679 # when iterating backwards if they were in the middle of a
   1680 # contraction. This test assures that this is fixed.
   1681 @ rules
   1682 &a < x\ud800\udc00b
   1683 * compare
   1684 < a
   1685 < x\ud800\udc00b
   1686 
   1687 ** test: DataDrivenCollationTest/da_TestPrimary
   1688 # This test goes through primary strength cases
   1689 @ locale da
   1690 % strength=primary
   1691 * compare
   1692 < Lvi
   1693 < Lwi
   1694 * compare
   1695 < L\u00e4vi
   1696 < L\u00f6wi
   1697 * compare
   1698 < L\u00fcbeck
   1699 = Lybeck
   1700 
   1701 ** test: DataDrivenCollationTest/da_TestTertiary
   1702 # This test goes through tertiary strength cases
   1703 @ locale da
   1704 % strength=tertiary
   1705 * compare
   1706 < Luc
   1707 < luck
   1708 * compare
   1709 < luck
   1710 < L\u00fcbeck
   1711 * compare
   1712 < lybeck
   1713 < L\u00fcbeck
   1714 * compare
   1715 < L\u00e4vi
   1716 < L\u00f6we
   1717 * compare
   1718 < L\u00f6ww
   1719 < mast
   1720 
   1721 * compare
   1722 < A/S
   1723 < ANDRE
   1724 < ANDR\u00c9
   1725 < ANDREAS
   1726 < AS
   1727 < CA
   1728 < \u00c7A
   1729 < CB
   1730 < \u00c7C
   1731 < D.S.B.
   1732 < DA
   1733 < \u00d0A
   1734 < DB
   1735 < \u00d0C
   1736 < DSB
   1737 < DSC
   1738 < EKSTRA_ARBEJDE
   1739 < EKSTRABUD0
   1740 < H\u00d8ST
   1741 < HAAG
   1742 < H\u00c5NDBOG
   1743 < HAANDV\u00c6RKSBANKEN
   1744 < Karl
   1745 < karl
   1746 < NIELS\u0020J\u00d8RGEN
   1747 < NIELS-J\u00d8RGEN
   1748 < NIELSEN
   1749 < R\u00c9E,\u0020A
   1750 < REE,\u0020B
   1751 < R\u00c9E,\u0020L
   1752 < REE,\u0020V
   1753 < SCHYTT,\u0020B
   1754 < SCHYTT,\u0020H
   1755 < SCH\u00dcTT,\u0020H
   1756 < SCHYTT,\u0020L
   1757 < SCH\u00dcTT,\u0020M
   1758 < SS
   1759 < \u00df
   1760 < SSA
   1761 < STORE\u0020VILDMOSE
   1762 < STOREK\u00c6R0
   1763 < STORM\u0020PETERSEN
   1764 < STORMLY
   1765 < THORVALD
   1766 < THORVARDUR
   1767 < \u00feORVAR\u00d0UR
   1768 < THYGESEN
   1769 < VESTERG\u00c5RD,\u0020A
   1770 < VESTERGAARD,\u0020A
   1771 < VESTERG\u00c5RD,\u0020B
   1772 < \u00c6BLE
   1773 < \u00c4BLE
   1774 < \u00d8BERG
   1775 < \u00d6BERG
   1776 
   1777 * compare
   1778 < andere
   1779 < chaque
   1780 < chemin
   1781 < cote
   1782 < cot\u00e9
   1783 < c\u00f4te
   1784 < c\u00f4t\u00e9
   1785 < \u010du\u010d\u0113t
   1786 < Czech
   1787 < hi\u0161a
   1788 < irdisch
   1789 < lie
   1790 < lire
   1791 < llama
   1792 < l\u00f5ug
   1793 < l\u00f2za
   1794 < lu\u010d
   1795 < luck
   1796 < L\u00fcbeck
   1797 < lye
   1798 < l\u00e4vi
   1799 < L\u00f6wen
   1800 < m\u00e0\u0161ta
   1801 < m\u00eer
   1802 < myndig
   1803 < M\u00e4nner
   1804 < m\u00f6chten
   1805 < pi\u00f1a
   1806 < pint
   1807 < pylon
   1808 < \u0161\u00e0ran
   1809 < savoir
   1810 < \u0160erb\u016bra
   1811 < Sietla
   1812 < \u015blub
   1813 < subtle
   1814 < symbol
   1815 < s\u00e4mtlich
   1816 < verkehrt
   1817 < vox
   1818 < v\u00e4ga
   1819 < waffle
   1820 < wood
   1821 < yen
   1822 < yuan
   1823 < yucca
   1824 < \u017eal
   1825 < \u017eena
   1826 < \u017den\u0113va
   1827 < zoo0
   1828 < Zviedrija
   1829 < Z\u00fcrich
   1830 < zysk0
   1831 < \u00e4ndere
   1832 
   1833 ** test: DataDrivenCollationTest/hi_TestNewRules
   1834 # This test goes through new rules and tests against old rules
   1835 @ locale hi
   1836 * compare
   1837 < कॐ
   1838 < कं
   1839 < कँ
   1840 < कः
   1841 
   1842 ** test: DataDrivenCollationTest/ro_TestNewRules
   1843 # This test goes through new rules and tests against old rules
   1844 @ locale ro
   1845 * compare
   1846 < xAx
   1847 < xă
   1848 < xĂ
   1849 < Xă
   1850 < XĂ
   1851 < xăx
   1852 < xĂx
   1853 < xâ
   1854 < xÂ
   1855 < Xâ
   1856 < XÂ
   1857 < xâx
   1858 < xÂx
   1859 < xb
   1860 < xIx
   1861 < xî
   1862 < xÎ
   1863 < Xî
   1864 < XÎ
   1865 < xîx
   1866 < xÎx
   1867 < xj
   1868 < xSx
   1869 < xș
   1870 = xş
   1871 < xȘ
   1872 = xŞ
   1873 < Xș
   1874 = Xş
   1875 < XȘ
   1876 = XŞ
   1877 < xșx
   1878 = xşx
   1879 < xȘx
   1880 = xŞx
   1881 < xT
   1882 < xTx
   1883 < xț
   1884 = xţ
   1885 < xȚ
   1886 = xŢ
   1887 < Xț
   1888 = Xţ
   1889 < XȚ
   1890 = XŢ
   1891 < xțx
   1892 = xţx
   1893 < xȚx
   1894 = xŢx
   1895 < xU
   1896 
   1897 ** test: DataDrivenCollationTest/testOffsets
   1898 # This tests cases where forwards and backwards iteration get different offsets
   1899 @ locale en
   1900 % strength=tertiary
   1901 * compare
   1902 < a\uD800\uDC00\uDC00
   1903 < b\uD800\uDC00\uDC00
   1904 * compare
   1905 < \u0301A\u0301\u0301
   1906 < \u0301B\u0301\u0301
   1907 * compare
   1908 < abcd\r\u0301
   1909 < abce\r\u0301
   1910 # TODO: test offsets in new CollationTest
   1911 
   1912 # End of test cases moved here from ICU 52's DataDrivenCollationTest.txt.
   1913 
   1914 ** test: was ICU 52 cmsccoll/TestRedundantRules
   1915 @ rules
   1916 & a < b < c < d& [before 1] c < m
   1917 * compare
   1918 <1 a
   1919 <1 b
   1920 <1 m
   1921 <1 c
   1922 <1 d
   1923 
   1924 @ rules
   1925 & a < b <<< c << d <<< e& [before 3] e <<< x
   1926 * compare
   1927 <1 a
   1928 <1 b
   1929 <3 c
   1930 <2 d
   1931 <3 x
   1932 <3 e
   1933 
   1934 @ rules
   1935 & a < b <<< c << d <<< e <<< f < g& [before 1] g < x
   1936 * compare
   1937 <1 a
   1938 <1 b
   1939 <3 c
   1940 <2 d
   1941 <3 e
   1942 <3 f
   1943 <1 x
   1944 <1 g
   1945 
   1946 @ rules
   1947 & a <<< b << c < d& a < m
   1948 * compare
   1949 <1 a
   1950 <3 b
   1951 <2 c
   1952 <1 m
   1953 <1 d
   1954 
   1955 @ rules
   1956 &a<b<<b\u0301 &z<b
   1957 * compare
   1958 <1 a
   1959 <1 b\u0301
   1960 <1 z
   1961 <1 b
   1962 
   1963 @ rules
   1964 &z<m<<<q<<<m
   1965 * compare
   1966 <1 z
   1967 <1 q
   1968 <3 m
   1969 
   1970 @ rules
   1971 &z<<<m<q<<<m
   1972 * compare
   1973 <1 z
   1974 <1 q
   1975 <3 m
   1976 
   1977 @ rules
   1978 & a < b < c < d& r < c
   1979 * compare
   1980 <1 a
   1981 <1 b
   1982 <1 d
   1983 <1 r
   1984 <1 c
   1985 
   1986 @ rules
   1987 & a < b < c < d& c < m
   1988 * compare
   1989 <1 a
   1990 <1 b
   1991 <1 c
   1992 <1 m
   1993 <1 d
   1994 
   1995 @ rules
   1996 & a < b < c < d& a < m
   1997 * compare
   1998 <1 a
   1999 <1 m
   2000 <1 b
   2001 <1 c
   2002 <1 d
   2003 
   2004 ** test: was ICU 52 cmsccoll/TestExpansionSyntax
   2005 # The following two rules should sort the particular list of strings the same.
   2006 @ rules
   2007 &AE <<< a << b <<< c &d <<< f
   2008 * compare
   2009 <1 AE
   2010 <3 a
   2011 <2 b
   2012 <3 c
   2013 <1 d
   2014 <3 f
   2015 
   2016 @ rules
   2017 &A <<< a / E << b / E <<< c /E  &d <<< f
   2018 * compare
   2019 <1 AE
   2020 <3 a
   2021 <2 b
   2022 <3 c
   2023 <1 d
   2024 <3 f
   2025 
   2026 # The following two rules should sort the particular list of strings the same.
   2027 @ rules
   2028 &AE <<< a <<< b << c << d < e < f <<< g
   2029 * compare
   2030 <1 AE
   2031 <3 a
   2032 <3 b
   2033 <2 c
   2034 <2 d
   2035 <1 e
   2036 <1 f
   2037 <3 g
   2038 
   2039 @ rules
   2040 &A <<< a / E <<< b / E << c / E << d / E < e < f <<< g
   2041 * compare
   2042 <1 AE
   2043 <3 a
   2044 <3 b
   2045 <2 c
   2046 <2 d
   2047 <1 e
   2048 <1 f
   2049 <3 g
   2050 
   2051 # The following two rules should sort the particular list of strings the same.
   2052 @ rules
   2053 &AE <<< B <<< C / D <<< F
   2054 * compare
   2055 <1 AE
   2056 <3 B
   2057 <3 F
   2058 <1 AED
   2059 <3 C
   2060 
   2061 @ rules
   2062 &A <<< B / E <<< C / ED <<< F / E
   2063 * compare
   2064 <1 AE
   2065 <3 B
   2066 <3 F
   2067 <1 AED
   2068 <3 C
   2069 
   2070 ** test: never reorder trailing primaries
   2071 @ root
   2072 % reorder Zzzz Grek
   2073 * compare
   2074 <1 L
   2075 <1 字
   2076 <1 Ω
   2077 <1 \uFFFD
   2078 <1 \uFFFF
   2079 
   2080 ** test: fall back to mappings with shorter prefixes, not immediately to ones with no prefixes
   2081 @ rules
   2082 &u=ab|cd
   2083 &v=b|ce
   2084 * compare
   2085 <1 abc
   2086 <1 abcc
   2087 <1 abcf
   2088 <1 abcd
   2089 =  abu
   2090 <1 abce
   2091 =  abv
   2092 
   2093 # With the following rules, there is only one prefix per composite ĉ or ç,
   2094 # but both prefixes apply to just c in NFD form.
   2095 # We would get different results for composed vs. NFD input
   2096 # if we fell back directly from longest-prefix mappings to no-prefix mappings.
   2097 @ rules
   2098 &x=op|ĉ
   2099 &y=p|ç
   2100 * compare
   2101 <1 opc
   2102 <2 opć
   2103 <1 opcz
   2104 <1 opd
   2105 <1 opĉ
   2106 =  opc\u0302
   2107 =  opx
   2108 <1 opç
   2109 =  opc\u0327
   2110 =  opy
   2111 
   2112 # The mapping is used which has the longest matching prefix for which
   2113 # there is also a suffix match, with the longest suffix match among several for that prefix.
   2114 @ rules
   2115 &❶=d
   2116 &❷=de
   2117 &❸=def
   2118 &①=c|d
   2119 &②=c|de
   2120 &③=c|def
   2121 &④=bc|d
   2122 &⑤=bc|de
   2123 &⑥=bc|def
   2124 &⑦=abc|d
   2125 &⑧=abc|de
   2126 &⑨=abc|def
   2127 * compare
   2128 <1 9aadzz
   2129 =  9aa❶zz
   2130 <1 9aadez
   2131 =  9aa❷z
   2132 <1 9aadef
   2133 =  9aa❸
   2134 <1 9acdzz
   2135 =  9ac①zz
   2136 <1 9acdez
   2137 =  9ac②z
   2138 <1 9acdef
   2139 =  9ac③
   2140 <1 9bcdzz
   2141 =  9bc④zz
   2142 <1 9bcdez
   2143 =  9bc⑤z
   2144 <1 9bcdef
   2145 =  9bc⑥
   2146 <1 abcdzz
   2147 =  abc⑦zz
   2148 <1 abcdez
   2149 =  abc⑧z
   2150 <1 abcdef
   2151 =  abc⑨
   2152 
   2153 ** test: prefix + discontiguous contraction with missing prefix contraction
   2154 # Unfortunate terminology: The first "prefix" here is the pre-context,
   2155 # the second "prefix" refers to the contraction/relation string that is
   2156 # one shorter than the one being tested.
   2157 @ rules
   2158 &x=p|e
   2159 &y=p|ê
   2160 &z=op|ê
   2161 # No mapping for op|e:
   2162 # Discontiguous contraction matching should not match op|ê in opệ
   2163 # because it would have to skip the dot below and extend a match on op|e by the circumflex,
   2164 # but there is no match on op|e.
   2165 * compare
   2166 <1 oPe
   2167 <1 ope
   2168 =  opx
   2169 <1 opệ
   2170 =  opy\u0323  # y not z
   2171 <1 opê
   2172 =  opz
   2173 
   2174 # We cannot test for fallback by whether the contraction default CE32
   2175 # is for another contraction. With the following rules, there is no mapping for op|e,
   2176 # and the fallback to prefix p has no contractions.
   2177 @ rules
   2178 &x=p|e
   2179 &z=op|ê
   2180 * compare
   2181 <1 oPe
   2182 <1 ope
   2183 =  opx
   2184 <2 opệ
   2185 =  opx\u0323\u0302  # x not z
   2186 <1 opê
   2187 =  opz
   2188 
   2189 # One more variation: Fallback to the simple code point, no shorter non-empty prefix.
   2190 @ rules
   2191 &x=e
   2192 &z=op|ê
   2193 * compare
   2194 <1 ope
   2195 =  opx
   2196 <3 oPe
   2197 =  oPx
   2198 <2 opệ
   2199 =  opx\u0323\u0302  # x not z
   2200 <1 opê
   2201 =  opz
   2202 
   2203 ** test: maxVariable via rules
   2204 @ rules
   2205 [maxVariable space][alternate shifted]
   2206 * compare
   2207 =  \u0020
   2208 =  \u000A
   2209 <1 .
   2210 <1 °  # degree sign
   2211 <1 $
   2212 <1 0
   2213 
   2214 ** test: maxVariable via setting
   2215 @ root
   2216 % maxVariable=currency
   2217 % alternate=shifted
   2218 * compare
   2219 =  \u0020
   2220 =  \u000A
   2221 =  .
   2222 =  °  # degree sign
   2223 =  $
   2224 <1 0
   2225 
   2226 ** test: ICU4J CollationMiscTest/TestContractionClosure (ää)
   2227 # This tests canonical closure, but it also tests that CollationFastLatin
   2228 # bails out properly for contractions with combining marks.
   2229 # For that we need pairs of strings that remain in the Latin fastpath
   2230 # long enough, hence the extra "= b" lines.
   2231 @ rules
   2232 &b=\u00e4\u00e4
   2233 * compare
   2234 <1 b
   2235 =  \u00e4\u00e4
   2236 =  b
   2237 =  a\u0308a\u0308
   2238 =  b
   2239 =  \u00e4a\u0308
   2240 =  b
   2241 =  a\u0308\u00e4
   2242 
   2243 ** test: ICU4J CollationMiscTest/TestContractionClosure (Å)
   2244 @ rules
   2245 &b=\u00C5
   2246 * compare
   2247 <1 b
   2248 =  \u00C5
   2249 =  b
   2250 =  A\u030A
   2251 =  b
   2252 =  \u212B
   2253 
   2254 ** test: reset-before on already-tailored characters, ICU ticket 10108
   2255 @ rules
   2256 &a<w<<x &[before 2]x<<y
   2257 * compare
   2258 <1 a
   2259 <1 w
   2260 <2 y
   2261 <2 x
   2262 
   2263 @ rules
   2264 &a<<w<<<x &[before 2]x<<y
   2265 * compare
   2266 <1 a
   2267 <2 y
   2268 <2 w
   2269 <3 x
   2270 
   2271 @ rules
   2272 &a<w<x &[before 2]x<<y
   2273 * compare
   2274 <1 a
   2275 <1 w
   2276 <1 y
   2277 <2 x
   2278 
   2279 @ rules
   2280 &a<w<<<x &[before 2]x<<y
   2281 * compare
   2282 <1 a
   2283 <1 y
   2284 <2 w
   2285 <3 x
   2286 
   2287 ** test: numeric collation with other settings, ICU ticket 9092
   2288 @ root
   2289 % strength=identical
   2290 % caseFirst=upper
   2291 % numeric=on
   2292 * compare
   2293 <1 100\u0020a
   2294 <1 101
   2295 
   2296 ** test: collation type fallback from unsupported type, ICU ticket 10149
   2297 @ locale fr-CA-u-co-phonebk
   2298 # Expect the same result as with fr-CA, using backwards-secondary order.
   2299 # That is, we should fall back from the unsupported collation type
   2300 # to the locale's default collation type.
   2301 * compare
   2302 <1 cote
   2303 <2 côte
   2304 <2 coté
   2305 <2 côté
   2306 
   2307 ** test: @ is equivalent to [backwards 2], ICU ticket 9956
   2308 @ rules
   2309 &b<a @ &v<<w
   2310 * compare
   2311 <1 b
   2312 <1 a
   2313 <1 cote
   2314 <2 côte
   2315 <2 coté
   2316 <2 côté
   2317 <1 v
   2318 <2 w
   2319 <1 x
   2320 
   2321 ** test: shifted+reordering, ICU ticket 9507
   2322 @ root
   2323 % reorder Grek punct space
   2324 % alternate=shifted
   2325 % strength=quaternary
   2326 # Which primaries are "variable" should be determined without script reordering,
   2327 # and then primaries should be reordered whether they are shifted to quaternary or not.
   2328 * compare
   2329 <4 (  # punctuation
   2330 <4 )
   2331 <4 \u0020  # space
   2332 <1 `  # symbol
   2333 <1 ^
   2334 <1 $  # currency symbol
   2335 <1 €
   2336 <1 0  # numbers
   2337 <1 ε  # Greek
   2338 <1 e  # Latin
   2339 <1 e(e
   2340 <4 e)e
   2341 <4 e\u0020e
   2342 <4 ee
   2343 <3 e(E
   2344 <4 e)E
   2345 <4 e\u0020E
   2346 <4 eE
   2347 
   2348 ** test: "uppercase first" could sort a string before its prefix, ICU ticket 9351
   2349 @ rules
   2350 &\u0001<<<b<<<B
   2351 % caseFirst=upper
   2352 * compare
   2353 <1 aaa
   2354 <3 aaaB
   2355 
   2356 ** test: secondary+case ignores secondary ignorables, ICU ticket 9355
   2357 @ rules
   2358 &\u0001<<<b<<<B
   2359 % strength=secondary
   2360 % caseLevel=on
   2361 * compare
   2362 <1 a
   2363 =  ab
   2364 =  aB
   2365 
   2366 ** test: custom collation rules involving tail of a contraction in Malayalam, ICU ticket 6328
   2367 @ rules
   2368 &[before 2] ൌ << ൗ  # U+0D57 << U+0D4C == 0D46+0D57
   2369 * compare
   2370 <1 ൗx
   2371 <2 ൌx
   2372 <1 ൗy
   2373 <2 ൌy
   2374 
   2375 ** test: quoted apostrophe in compact syntax, ICU ticket 8204
   2376 @ rules
   2377 &q<<*a''c
   2378 * compare
   2379 <1 d
   2380 <1 p
   2381 <1 q
   2382 <2 a
   2383 <2 \u0027
   2384 <2 c
   2385 <1 r
   2386 
   2387 # ICU ticket #8260 "Support all collation-related keywords in Collator.getInstance()"
   2388 ** test: locale -u- with collation keywords, ICU ticket 8260
   2389 @ locale de-u-kv-sPace-ka-shifTed-kn-kk-falsE-kf-Upper-kc-tRue-ks-leVel4
   2390 * compare
   2391 <4 \u0020  # space is shifted, strength=quaternary
   2392 <1 !  # punctuation is regular
   2393 <1 2
   2394 <1 12  # numeric sorting
   2395 <1 B
   2396 <c b  # uppercase first on case level
   2397 <1 x\u0301\u0308
   2398 <2 x\u0308\u0301  # normalization off
   2399 
   2400 ** test: locale @ with collation keywords, ICU ticket 8260
   2401 @ locale fr@colbAckwards=yes;ColStrength=Quaternary;kv=currencY;colalternate=shifted
   2402 * compare
   2403 <4 $  # currency symbols are shifted, strength=quaternary
   2404 <1 àla
   2405 <2 alà  # backwards secondary level
   2406 
   2407 ** test: locale -u- with script reordering, ICU ticket 8260
   2408 @ locale el-u-kr-kana-SYMBOL-Grek-hani-cyrl-latn-digit-armn-deva-ethi-thai
   2409 * compare
   2410 <1 \u0020
   2411 <1 あ
   2412 <1 ☂
   2413 <1 Ω
   2414 <1 丂
   2415 <1 ж
   2416 <1 L
   2417 <1 4
   2418 <1 Ձ
   2419 <1 अ
   2420 <1 ሄ
   2421 <1 ฉ
   2422 
   2423 ** test: locale @collation=type should be case-insensitive
   2424 @ locale de@coLLation=PhoneBook
   2425 * compare
   2426 <1 ae
   2427 <2 ä
   2428 <3 Ä
   2429 
   2430 ** test: import root search rules plus German phonebook rules, ICU ticket 8962
   2431 @ locale de-u-co-search
   2432 * compare
   2433 <1 =
   2434 <1 ≠
   2435 <1 a
   2436 <1 ae
   2437 <2 ä
   2438 
   2439 # Once more, but with runtime builder.
   2440 @ rules
   2441 [import und-u-co-search][import de-u-co-phonebk]
   2442 * compare
   2443 <1 =
   2444 <1 ≠
   2445 <1 a
   2446 <1 ae
   2447 <2 ä
   2448 
   2449 # Once again, with import from "root" not "und" (as in a proper language tag).
   2450 @ rules
   2451 [import root-u-co-search][import de-u-co-phonebk]
   2452 * compare
   2453 <1 =
   2454 <1 ≠
   2455 <1 a
   2456 <1 ae
   2457 <2 ä
   2458 
   2459 ** test: import rules from a language with non-Latin native script, and reset the reordering, ICU ticket 10998
   2460 # Greek should sort Greek first.
   2461 @ rules
   2462 [import el]
   2463 * compare
   2464 <1 4
   2465 <1 Ω
   2466 <1 L
   2467 
   2468 # Import Greek, and then reset the reordering.
   2469 @ rules
   2470 [import el][reorder Zzzz]
   2471 * compare
   2472 <1 4
   2473 <1 L
   2474 <1 Ω
   2475 
   2476 # "others" is a synonym for Zzzz.
   2477 @ rules
   2478 [import el][reorder others]
   2479 * compare
   2480 <1 4
   2481 <1 L
   2482 <1 Ω
   2483 
   2484 ** test: regression test for CollationFastLatinBuilder, ICU ticket 11388
   2485 @ rules
   2486 &x<<aa<<<Aa<<<AA
   2487 % strength=secondary
   2488 * compare
   2489 <1 AA
   2490 <2 Aẩ
   2491 <2 aą
   2492 * compare
   2493 <1 AA
   2494 <2 aą
   2495 
   2496 ** test: tailor tertiary-after a common tertiary where there is a lower one
   2497 # Assume that Hiragana small A has a below-common tertiary, and Hiragana A has a common one.
   2498 # See ICU ticket 11448 & CLDR ticket 7222.
   2499 @ rules
   2500 &あ<<<x<<<y<<<z
   2501 * compare
   2502 <1 ぁ
   2503 <3 あ
   2504 <3 x
   2505 <3 y
   2506 <3 z
   2507 <3 ァ
   2508 <1 い
   2509 
   2510 ** test: tailor tertiary-after a below-common tertiary
   2511 @ rules
   2512 &ぁ<<<x<<<y<<<z
   2513 * compare
   2514 <1 ぁ
   2515 <3 x
   2516 <3 y
   2517 <3 z
   2518 <3 あ
   2519 <3 ァ
   2520 <1 い
   2521 
   2522 ** test: tailor tertiary-before a common tertiary where there is a lower one
   2523 @ rules
   2524 &[before 3]あ<<<x<<<y<<<z
   2525 * compare
   2526 <1 ぁ
   2527 <3 x
   2528 <3 y
   2529 <3 z
   2530 <3 あ
   2531 <3 ァ
   2532 <1 い
   2533 
   2534 ** test: tailor tertiary-before a below-common tertiary
   2535 @ rules
   2536 &[before 3]ぁ<<<x<<<y<<<z
   2537 * compare
   2538 <1 x
   2539 <3 y
   2540 <3 z
   2541 <3 ぁ
   2542 <3 あ
   2543 <3 ァ
   2544 <1 い
   2545 
   2546 ** test: reorder single scripts not groups, ICU ticket 11449
   2547 @ root
   2548 % reorder Goth Latn
   2549 * compare
   2550 <1 4
   2551 <1 𐌰  # Gothic
   2552 <1 L
   2553 <1 Ω
   2554 # Before ICU 55, the following reordered together with Gothic.
   2555 <1 𐌈  # Old Italic
   2556 <1 𐑐  # Shavian
   2557 
   2558 # Check for presence of certain chars 乛冂刂卜又小彑艹日月爫牛辶 in
   2559 # zh pinyin and stroke, ICU-13790
   2560 # (bracket pinyin test with 卬..作, stroke test with 一..乾)
   2561 
   2562 ** test: DataDrivenCollationTest/VerifyCertainCharsInPinyin
   2563 @ locale zh-u-co-pinyin
   2564 * compare
   2565 < 卬
   2566 < 卜
   2567 < 艹
   2568 < 辶
   2569 < 刂
   2570 < 彑
   2571 < 冂
   2572 < 牛
   2573 < 日
   2574 < 小
   2575 < 乛
   2576 < 又
   2577 < 月
   2578 < 爫
   2579 < 作
   2580 
   2581 ** test: DataDrivenCollationTest/VerifyCertainCharsInStroke
   2582 @ locale zh-u-co-stroke
   2583 * compare
   2584 < 一
   2585 < 乛
   2586 < 冂
   2587 < 刂
   2588 < 卜
   2589 < 又
   2590 < 小
   2591 < 彑
   2592 < 艹
   2593 < 日
   2594 < 月
   2595 < 爫
   2596 < 牛
   2597 < 辶
   2598 < 乾
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE