tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ssearch.xml (14300B)


      1 <?xml version="1.0" encoding="UTF-8"?>
      2 
      3 <!-- Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html -->
      4 <!-- Copyright (c) 2007-2009 IBM Corporation and others. All rights reserved -->
      5 
      6 <!-- Test data file for string search  -->
      7 <!DOCTYPE stringsearch-tests [
      8 <!ELEMENT stringsearch-tests (test-case+)>
      9 <!ATTLIST stringsearch-tests debug IDREF #IMPLIED >
     10 <!ELEMENT test-case (pattern, pre?, m?, post?)>
     11 <!ATTLIST test-case 
     12          id ID #REQUIRED
     13          locale CDATA "en" 
     14          strength (PRIMARY | SECONDARY | TERTIARY | QUATERNARY | IDENTICAL) "TERTIARY" 
     15          norm (ON | OFF) "OFF"
     16          alternate_handling (NON_IGNORABLE | SHIFTED) "NON_IGNORABLE"
     17          >
     18 
     19 <!ELEMENT pattern (#PCDATA)>
     20 <!ELEMENT pre  (#PCDATA)>
     21 <!ELEMENT m    (#PCDATA)>
     22 <!ELEMENT post (#PCDATA)>
     23 ]>
     24 
     25 <stringsearch-tests>
     26  <!-- debug="test11"     (for copying into the above element)  -->
     27    
     28    <!-- Very simple match  -->
     29    <test-case id="test01" >
     30       <pattern>abc</pattern>
     31       <pre>xxx</pre><m>abc</m><post>yyy</post>
     32    </test-case>
     33    
     34    <!-- Very simple no-match  -->
     35    <test-case id="test02" >
     36       <pattern>abc</pattern>
     37       <pre>xxx</pre><post>yyy</post>
     38    </test-case>
     39 
     40    <!-- Match after several near-misses. -->
     41    <test-case id="test03" >
     42       <pattern>string</pattern>
     43       <pre>silly spring stling strxng strilg strinx stri</pre><m>string</m><post> fling</post>
     44    </test-case>
     45    
     46    <test-case id="test04" strength="PRIMARY" >
     47       <pattern>FUSS</pattern>
     48       <pre>abc</pre><m>fuss</m><post>sss</post>
     49    </test-case>
     50    
     51    <test-case id="test05" strength="PRIMARY" >
     52       <pattern>FUSS</pattern>
     53       <pre>abc</pre><m>fuß</m><post>sss</post>
     54    </test-case>
     55 
     56  <test-case id="test05.5" strength="PRIMARY" >
     57    <pattern>fuss</pattern>
     58    <pre>a </pre>
     59    <m>fuß</m>
     60    <post>ball table</post>
     61  </test-case>
     62 
     63  <test-case id="test06" strength="PRIMARY" >
     64      <pattern>fuß</pattern>
     65       <pre>abc</pre><m>fuss</m><post>xyz</post>
     66    </test-case>
     67    
     68    <test-case id="test07" strength="SECONDARY" >
     69      <pattern>fuß</pattern>
     70      <pre>abcfussxyz</pre>
     71    </test-case>
     72    
     73    <test-case id="test08" strength="PRIMARY" >
     74      <pattern>fus</pattern>
     75      <pre>abcfuß</pre><post>xyz</post>
     76    </test-case>
     77    
     78    <!-- A good match following an initial match that failed because
     79         of not ending on a character boundary -->
     80    <test-case id="test09" strength="PRIMARY">
     81      <pattern>fus</pattern>
     82      <pre>fuß  </pre><m>fus</m><post>sss</post>
     83    </test-case>
     84 
     85 
     86    <!-- Test cases from usrchdat.inc  BREAKITERATOREXACT -->
     87 
     88    <test-case id="test10" strength="TERTIARY">
     89      <pattern>fox</pattern>
     90      <m>fox</m><post>y fox</post>
     91    </test-case>
     92 
     93    <test-case id="test11" strength="PRIMARY" locale="de_DE@collation=phonebook">
     94      <pattern>toe</pattern>
     95      <pre>This is a </pre><m></m><post>ne</post>
     96    </test-case>
     97    
     98    <test-case id="test11a" strength="SECONDARY" locale="de_DE@collation=phonebook">
     99      <pattern>toe</pattern>
    100      <pre>This is a </pre><post>Töne</post>
    101    </test-case>
    102    
    103    <test-case id="test12" strength="TERTIARY">
    104      <pattern>e</pattern>
    105      <pre>tésting that é doés not match </pre><m>e</m><post></post>
    106    </test-case>
    107    
    108    <test-case id="test13" strength="PRIMARY" locale="fr">
    109      <pattern>e</pattern>
    110      <pre></pre><m>É</m><post>É</post>
    111    </test-case>
    112    
    113    <test-case id="test14" strength="PRIMARY" locale="fr">
    114      <pattern>O</pattern>
    115      <pre>C</pre><m>O\u0302</m><post></post>
    116    </test-case>
    117 
    118 
    119    <!-- Test cases from usrchdat.inc  STRENGTH -->
    120 
    121 
    122    <test-case id="test15" strength="PRIMARY" locale="en">
    123      <pattern>fox</pattern>
    124      <pre>The quick brown </pre><m>fox</m><post> jumps over the lazy foxes</post>
    125    </test-case>
    126    
    127    <test-case id="test16" strength="PRIMARY" locale="fr">
    128      <pattern>peche</pattern>
    129      <pre>blackbirds pat </pre><m>p\u00E9ch\u00E9</m><post> </post>
    130    </test-case>
    131    
    132    <test-case id="test17" strength="PRIMARY" locale="fr">
    133      <pattern>peche</pattern>
    134      <pre>blackbirds pat </pre><m>p\u00EAche</m><post> </post>
    135    </test-case>
    136    
    137    <test-case id="test18" strength="PRIMARY" locale="fr">
    138      <pattern>peche</pattern>
    139      <pre>blackbirds pat </pre><m>p\u00E9che</m><post>r </post>
    140    </test-case>
    141    
    142    <test-case id="test19" strength="PRIMARY" locale="fr">
    143      <pattern>peche</pattern>
    144      <pre>blackbirds pat </pre><m>p\u00EAche</m><post>r </post>
    145    </test-case>
    146    
    147    <test-case id="test20" strength="PRIMARY" locale="es">
    148      <pattern>channel</pattern>
    149      <pre>A </pre><m>channel</m><post>, </post>
    150    </test-case>
    151    
    152    <test-case id="test21" strength="PRIMARY" locale="es">
    153      <pattern>channel</pattern>
    154      <pre>A </pre><m>CHANNEL</m><post>, </post>
    155    </test-case>
    156    
    157    <test-case id="test22" strength="PRIMARY" locale="es">
    158      <pattern>channel</pattern>
    159      <pre>A </pre><m>Channel</m><post>s, </post>
    160    </test-case>
    161    
    162    <test-case id="test23" strength="PRIMARY" locale="es">
    163      <pattern>channel</pattern>
    164      <pre>A </pre><m>channel</m><post>... </post>
    165    </test-case>
    166    
    167    <test-case id="test24" strength="TERTIARY" locale="en">
    168      <pattern>A\u0300</pattern>
    169      <pre>A miss, and then </pre><m>\u00c0</m><post> should match but not A"</post>
    170    </test-case>
    171    
    172    <!-- TODO:  In the original test data, this test matched at IDENTICAL strength.
    173                Doesn't seem right.  The characters are different.
    174                -->
    175    <test-case id="test24a" strength="IDENTICAL" locale="en">
    176      <pattern>A\u0300</pattern>
    177      <pre>At IDENTICAL, should this match?  </pre><m>\u00c0</m><post></post>
    178    </test-case>
    179 
    180  <test-case id="test24b" strength="IDENTICAL" alternate_handling="SHIFTED" locale="en">
    181    <pattern>A\u0300</pattern>
    182    <pre>At IDENTICAL, should this match?  </pre>
    183    <m>\u00c0</m>
    184    <post></post>
    185  </test-case>
    186 
    187  <test-case id="test25" strength="SECONDARY" locale="en">
    188      <pattern>Ű</pattern>
    189      <pre>12</pre><m>ű</m><post> Ű</post>
    190    </test-case>
    191    
    192    <test-case id="test26" strength="SECONDARY" locale="en">
    193      <pattern>A</pattern>
    194      <pre>12</pre><m>a</m><post>...</post>
    195    </test-case>
    196 
    197 
    198    <!--  Test Cases from usrchdat.inc,  VARIABLE -->
    199    <test-case id="test27" strength="TERTIARY" locale="en">
    200      <pattern>blackbird</pattern>
    201      <pre>black-bird </pre><m>blackbird</m><post>...</post>
    202    </test-case>
    203 
    204    <test-case id="test28" strength="TERTIARY" locale="en">
    205      <pattern>go</pattern>
    206      <pre> on</pre>
    207    </test-case>
    208 
    209    <!-- TODO:  this gives an U_ILLEGAL_ARGUMENT error when opening
    210                the UStringSearch.  How did the original test run? -->
    211    <!--
    212    <test-case id="test29" strength="PRIMARY" locale="en">
    213      <pattern>  </pattern>
    214      <pre></pre><m></m><post>abc</post>
    215    </test-case>
    216    -->
    217 
    218    <test-case id="test30" strength="SECONDARY" locale="en">
    219      <pattern>abc</pattern>
    220      <pre>  a bc   ab c    a  bc     ab  c"</pre>
    221    </test-case>
    222 
    223    <test-case id="test31" strength="SECONDARY" locale="en">
    224      <pattern>abc</pattern>
    225      <pre>           ---------------</pre>
    226    </test-case>
    227 
    228 
    229    <!--  Normalization test cases from usrchdat.inc  -->
    230    <test-case id="test32" strength="TERTIARY"  norm="ON">
    231      <pattern>a\u0325\u0300</pattern>
    232      <pre></pre><m>a\u0300\u0325</m>
    233    </test-case>
    234 
    235 
    236    <test-case id="test32a" strength="TERTIARY"  norm="OFF">
    237      <pattern>a\u0325\u0300</pattern>
    238      <pre>a\u0300\u0325</pre>
    239    </test-case>
    240 
    241 
    242    <!-- COMPOSITEBOUNDARIES from usrchdat.inc
    243         Boundaries are not identical to original test data because
    244         of matching only full combining sequences
    245    -->
    246    <test-case id="test40" strength="TERTIARY">
    247      <pattern>A</pattern>
    248      <pre>À</pre>   <!-- \u00C0 -->
    249    </test-case>
    250    
    251    <test-case id="test41" strength="TERTIARY">
    252      <pattern>A</pattern>
    253      <pre>À</pre><m>A</m><post>C</post>
    254    </test-case>
    255    
    256    <test-case id="test42" strength="TERTIARY">
    257      <pattern>A\u030A</pattern>
    258      <pre>À\u01FA</pre>
    259    </test-case>
    260 
    261 
    262 
    263    <!-- SUPPLEMENTARYCANONICAL from usrchdat.inc  -->
    264    <test-case id="test50" strength="TERTIARY">
    265      <pattern>\uD800\uDC00</pattern>
    266      <pre>abc \uD802\uDC00 \uD800\uDC01 \uD801\uDC00 </pre><m>\uD800\uDC00</m>
    267      <post>abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00</post>
    268    </test-case>
    269    
    270    <test-case id="test51" strength="TERTIARY">
    271      <pattern>\\uD834\\uDDB9</pattern>
    272      <pre>and</pre><m>\\uD834\\uDDB9</m><post>this sentence</post>
    273    </test-case>
    274 
    275    <test-case id="test52" strength="TERTIARY">
    276      <pattern> \\uD834\\uDDB9 </pattern>
    277      <pre>and</pre><m> \\uD834\\uDDB9 </m><post>this sentence</post>
    278    </test-case>
    279    
    280    <test-case id="test53" strength="TERTIARY">
    281      <pattern>-\\uD834\\uDDB9-</pattern>
    282      <pre>and</pre><m>-\\uD834\\uDDB9-</m><post>this sentence</post>
    283    </test-case>
    284    
    285    <test-case id="test54" strength="TERTIARY">
    286      <pattern>,\\uD834\\uDDB9,</pattern>
    287      <pre>and</pre><m>,\\uD834\\uDDB9,</m><post>this sentence</post>
    288    </test-case>
    289    
    290    <test-case id="test55" strength="TERTIARY">
    291      <pattern>?\\uD834\\uDDB9?</pattern>
    292      <pre>and</pre><m>?\\uD834\\uDDB9?</m><post>this sentence</post>
    293    </test-case>
    294    
    295 
    296    <!-- Long combining sequences  -->
    297    <!-- Backwards search fails because patterns ends w/ ignorables
    298    <test-case id="test60" strength="PRIMARY">
    299      <pattern>A\u0301\u0301\u0301\u0301</pattern>
    300      <m>A\u0301\u0301\u0301\u0301\u0301</m>
    301    </test-case>
    302    -->
    303 
    304    <test-case id="test61" strength="TERTIARY">
    305      <pattern>A\u0301\u0301\u0301\u0301</pattern>
    306          <pre>A\u0301\u0301\u0301\u0301\u0301</pre>
    307    </test-case>
    308    
    309    <test-case id="test62" strength="TERTIARY">
    310      <pattern>A\u0301\u0301\u0301\u0301</pattern>
    311            <m>A\u0301\u0301\u0301\u0301</m>
    312    </test-case>
    313 
    314    <!-- stand-alone combining marks don't match attached marks  -->
    315    <test-case id="test63" strength="TERTIARY">
    316      <pattern>\u0301</pattern>
    317      <pre>A\u0301\u0301\u0301\u0301</pre>
    318    </test-case>
    319    
    320    <test-case id="test64" strength="TERTIARY">
    321      <pattern>\u0301</pattern>
    322      <post>\u0301\u0301\u0301\u0301</post>
    323    </test-case>
    324 
    325  <!-- stand-alone combining mark does match an un-attached combining mark -->
    326    <test-case id="test65" strength="TERTIARY">
    327       <pattern>\u0301</pattern>
    328       <m>\u0301</m><post>A\u0301\u0301</post>
    329    </test-case>
    330 
    331    <test-case id="test66" strength="TERTIARY">
    332       <pattern>\u0301</pattern>
    333       <m>\u0301</m>
    334    </test-case>
    335          
    336    <!-- stand-alone combining marks at end of the target text -->
    337    <test-case id="test67" strength="TERTIARY">
    338       <pattern>\u0301</pattern>
    339       <pre>abcd\r</pre><m>\u0301</m>
    340    </test-case>
    341 
    342      <!-- attached combining marks at end of the target text, no match -->
    343    <test-case id="test68" strength="TERTIARY">
    344       <pattern>\u0301</pattern>
    345       <pre>abcd\u0301</pre>
    346    </test-case>
    347 
    348 
    349 
    350   <!-- no match within expansions at the start -->
    351    <test-case id="test70" strength="PRIMARY">
    352      <pattern>Eligature</pattern>
    353      <pre>Æligature</pre>
    354    </test-case>
    355 
    356    <test-case id="test71" strength="PRIMARY">
    357      <pattern>AEligature</pattern>
    358      <m>Æligature</m>
    359    </test-case>
    360 
    361    <test-case id="test72" strength="PRIMARY">
    362        <pattern>AEligature</pattern>
    363        <m>Æligature</m>
    364    </test-case>
    365    
    366    <!-- unattached combining Tilde will not match a Tilde that is
    367         part of a composed Ñ  (\u00D1)  -->
    368    <test-case id="test73" strength="SECONDARY">
    369        <pattern>\u0303</pattern>  <!-- combining tilde -->
    370        <pre>Ñ&#x0d;</pre><m>\u0303</m>
    371    </test-case>
    372    
    373    <test-case id="test74" strength="SECONDARY">
    374        <pattern>\u0303</pattern>  <!-- combining tilde -->
    375        <pre>Ñ &#x0d;</pre><m>\u0303</m><post>a</post>
    376    </test-case>
    377 
    378  <test-case id="test75" strength="TERTIARY" locale="fr">
    379    <pattern>\u00EA</pattern>
    380    <pre>p</pre><m>\u00EA</m><post>che</post>
    381  </test-case>
    382 
    383  <test-case id="test76" strength="TERTIARY" locale="fr">
    384    <pattern>\u00EA</pattern>
    385    <pre>p</pre><m>e\u0302</m><post>che</post>
    386  </test-case>
    387 
    388  <test-case id="test77" strength="TERTIARY" locale="fr">
    389    <pattern>e\u0302</pattern>
    390    <pre>p</pre><m>\u00EA</m><post>che</post>
    391  </test-case>
    392 
    393  <!-- Test cases from ticket:5382 -->
    394  <test-case id="test78" strength="SECONDARY" locale="hu_HU">
    395    <pattern>\u0170</pattern>
    396    <m>\u0171</m>
    397    <post>12</post>
    398  </test-case>
    399 
    400  <test-case id="test79" strength="SECONDARY" locale="hu_HU">
    401    <pattern>\u0170</pattern>
    402    <pre>1</pre>
    403    <m>\u0171</m>
    404    <post>2</post>
    405  </test-case>
    406 
    407  <test-case id="test80" strength="SECONDARY" locale="hu_HU">
    408    <pattern>\u0170</pattern>
    409    <pre>12</pre>
    410    <m>\u0171</m>
    411  </test-case>
    412  
    413  <!-- Test cases from ticket:5959 -->
    414  <test-case id="test81" strength="SECONDARY">
    415    <pattern>\u2166</pattern>
    416    <m>VII</m>
    417  </test-case>
    418 
    419  <test-case id="test82" strength="SECONDARY">
    420    <pattern>VII</pattern>
    421    <m>\u2166</m>
    422  </test-case>
    423 
    424  <test-case id="test83" strength="IDENTICAL" alternate_handling="SHIFTED" locale="en">
    425    <pattern>Universal Declaration of Human Rights</pattern>
    426    <pre>Proclaims this </pre><m>Universal Declaration of Human Rights</m><post> as a common standard of achievement for all peoples and all nations</post>
    427  </test-case>
    428 
    429  <test-case id="test83b" strength="TERTIARY" alternate_handling="SHIFTED" locale="en">
    430    <pattern>Universal Declaration of Human Rights</pattern>
    431    <pre>Proclaims this </pre>
    432    <m>Universal-Declaration-of-Human-Rights</m>
    433    <post> as a common standard of achievement for all peoples and all nations</post>
    434  </test-case>
    435 
    436  <test-case id="test84" strength="TERTIARY" locale="en">
    437    <pattern>\u05E9\u0591\u05E9</pattern>
    438    <m>\u05E9\u0592\u05E9</m>
    439  </test-case>
    440 
    441  <test-case id="test84b" strength="IDENTICAL" locale="en">
    442    <pattern>\u05E9\u0591\u05E9</pattern>
    443    <pre>\u05E9\u0592\u05E9</pre>
    444  </test-case>
    445 </stringsearch-tests>
    446