tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

non-unicode-case-folding.js (2467B)


      1 // |jit-test| skip-if: typeof Intl === 'undefined'
      2 
      3 // See https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch
      4 function Canonicalize(ch) {
      5  var u = ch.toUpperCase();
      6  if (u.length > 1) return ch;
      7  var cu = u.charCodeAt(0);
      8  if (ch.charCodeAt(0) >= 128 && cu < 128) return ch;
      9  return cu;
     10 }
     11 
     12 function TestEquivalenceClass(eclass) {
     13  for (var i = 0; i < eclass.length; i++) {
     14    for (var j = 0; j < eclass.length; j++) {
     15      if (i == j) continue;
     16      var c1 = eclass[i];
     17      var c2 = eclass[j];
     18      var shouldMatch = Canonicalize(c1) === Canonicalize(c2);
     19 
     20      var re1 = new RegExp(c1, 'i');
     21      var re2 = new RegExp('[' + c1 + ']', 'i');
     22 
     23      assertEq(re1.test(c2), shouldMatch);
     24      assertEq(re2.test(c2), shouldMatch);
     25    }
     26  }
     27 }
     28 
     29 function TestAll() {
     30  for (var eclass of equivalence_classes) {
     31    TestEquivalenceClass(eclass);
     32  }
     33 }
     34 
     35 // Interesting case-folding equivalence classes (as determined by
     36 // ICU's UnicodeSet::closeOver). A class is interesting if it contains
     37 // more than two characters, or if it contains any characters in
     38 // IgnoreSet or SpecialAddSet as defined in new-regexp/special-case.h.
     39 var equivalence_classes = [
     40  '\u0041\u0061',              // Aa (sanity check)
     41  '\u004b\u006b\u212a',        // KkK
     42  '\u0053\u0073\u017f',        // Ssſ
     43  '\u00b5\u039c\u03bc',        // µΜμ
     44  '\u00c5\u00e5\u212b',        // ÅåÅ
     45  '\u00df\u1e9e',              // ßẞ
     46  '\u03a9\u03c9\u2126',        // ΩωΩ
     47  '\u0390\u1fd3',              // ΐΐ
     48  '\u0398\u03b8\u03d1\u03f4',  // Θθϑϴ
     49  '\u03b0\u1fe3',              // ΰΰ
     50  '\u1f80\u1f88',              // ᾀᾈ
     51  '\u1fb3\u1fbc',              // ᾳᾼ
     52  '\u1fc3\u1fcc',              // ῃῌ
     53  '\u1ff3\u1ffc',              // ῳῼ
     54  '\ufb05\ufb06',              // ſtst
     55 
     56  // Everything below this line is a well-behaved case-folding
     57  // equivalence class with more than two characters but only one
     58  // canonical case-folded character
     59  '\u01c4\u01c5\u01c6', '\u01c7\u01c8\u01c9', '\u01ca\u01cb\u01cc',
     60  '\u01f1\u01f2\u01f3', '\u0345\u0399\u03b9\u1fbe', '\u0392\u03b2\u03d0',
     61  '\u0395\u03b5\u03f5', '\u039a\u03ba\u03f0', '\u03a0\u03c0\u03d6',
     62  '\u03a1\u03c1\u03f1', '\u03a3\u03c2\u03c3', '\u03a6\u03c6\u03d5',
     63  '\u0412\u0432\u1c80', '\u0414\u0434\u1c81', '\u041e\u043e\u1c82',
     64  '\u0421\u0441\u1c83', '\u0422\u0442\u1c84\u1c85', '\u042a\u044a\u1c86',
     65  '\u0462\u0463\u1c87', '\u1c88\ua64a\ua64b', '\u1e60\u1e61\u1e9b'
     66 ];
     67 
     68 TestAll();