tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

non-unicode-case-folding-backreference.js (2392B)


      1 // |jit-test| skip-if: typeof Intl === 'undefined'
      2 
      3 // See https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch
      4 function Canonicalize(ch) {
      5  var u = ch.toUpperCase();
      6  if (u.length > 1) return ch;
      7  var cu = u.charCodeAt(0);
      8  if (ch.charCodeAt(0) >= 128 && cu < 128) return ch;
      9  return cu;
     10 }
     11 
     12 function TestEquivalenceClass(eclass) {
     13  var backref = /(.)\1/i;
     14 
     15  for (var i = 0; i < eclass.length; i++) {
     16    for (var j = 0; j < eclass.length; j++) {
     17      if (i == j) continue;
     18      var c1 = eclass[i];
     19      var c2 = eclass[j];
     20      var cc = c1 + c2;
     21      var shouldMatch = Canonicalize(c1) === Canonicalize(c2);
     22 
     23      assertEq(backref.test(cc), shouldMatch);
     24    }
     25  }
     26 }
     27 
     28 function TestAll() {
     29  for (var eclass of equivalence_classes) {
     30    TestEquivalenceClass(eclass);
     31  }
     32 }
     33 
     34 // Interesting case-folding equivalence classes (as determined by
     35 // ICU's UnicodeSet::closeOver). A class is interesting if it contains
     36 // more than two characters, or if it contains any characters in
     37 // IgnoreSet or SpecialAddSet as defined in new-regexp/special-case.h.
     38 var equivalence_classes = [
     39  '\u0041\u0061',              // Aa (sanity check)
     40  '\u004b\u006b\u212a',        // KkK
     41  '\u0053\u0073\u017f',        // Ssſ
     42  '\u00b5\u039c\u03bc',        // µΜμ
     43  '\u00c5\u00e5\u212b',        // ÅåÅ
     44  '\u00df\u1e9e',              // ßẞ
     45  '\u03a9\u03c9\u2126',        // ΩωΩ
     46  '\u0390\u1fd3',              // ΐΐ
     47  '\u0398\u03b8\u03d1\u03f4',  // Θθϑϴ
     48  '\u03b0\u1fe3',              // ΰΰ
     49  '\u1f80\u1f88',              // ᾀᾈ
     50  '\u1fb3\u1fbc',              // ᾳᾼ
     51  '\u1fc3\u1fcc',              // ῃῌ
     52  '\u1ff3\u1ffc',              // ῳῼ
     53  '\ufb05\ufb06',              // ſtst
     54 
     55  // Everything below this line is a well-behaved case-folding
     56  // equivalence class with more than two characters but only one
     57  // canonical case-folded character
     58  '\u01c4\u01c5\u01c6', '\u01c7\u01c8\u01c9', '\u01ca\u01cb\u01cc',
     59  '\u01f1\u01f2\u01f3', '\u0345\u0399\u03b9\u1fbe', '\u0392\u03b2\u03d0',
     60  '\u0395\u03b5\u03f5', '\u039a\u03ba\u03f0', '\u03a0\u03c0\u03d6',
     61  '\u03a1\u03c1\u03f1', '\u03a3\u03c2\u03c3', '\u03a6\u03c6\u03d5',
     62  '\u0412\u0432\u1c80', '\u0414\u0434\u1c81', '\u041e\u043e\u1c82',
     63  '\u0421\u0441\u1c83', '\u0422\u0442\u1c84\u1c85', '\u042a\u044a\u1c86',
     64  '\u0462\u0463\u1c87', '\u1c88\ua64a\ua64b', '\u1e60\u1e61\u1e9b'
     65 ];
     66 
     67 TestAll();