non-unicode-case-folding.js (2467B)
1 // |jit-test| skip-if: typeof Intl === 'undefined' 2 3 // See https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch 4 function Canonicalize(ch) { 5 var u = ch.toUpperCase(); 6 if (u.length > 1) return ch; 7 var cu = u.charCodeAt(0); 8 if (ch.charCodeAt(0) >= 128 && cu < 128) return ch; 9 return cu; 10 } 11 12 function TestEquivalenceClass(eclass) { 13 for (var i = 0; i < eclass.length; i++) { 14 for (var j = 0; j < eclass.length; j++) { 15 if (i == j) continue; 16 var c1 = eclass[i]; 17 var c2 = eclass[j]; 18 var shouldMatch = Canonicalize(c1) === Canonicalize(c2); 19 20 var re1 = new RegExp(c1, 'i'); 21 var re2 = new RegExp('[' + c1 + ']', 'i'); 22 23 assertEq(re1.test(c2), shouldMatch); 24 assertEq(re2.test(c2), shouldMatch); 25 } 26 } 27 } 28 29 function TestAll() { 30 for (var eclass of equivalence_classes) { 31 TestEquivalenceClass(eclass); 32 } 33 } 34 35 // Interesting case-folding equivalence classes (as determined by 36 // ICU's UnicodeSet::closeOver). A class is interesting if it contains 37 // more than two characters, or if it contains any characters in 38 // IgnoreSet or SpecialAddSet as defined in new-regexp/special-case.h. 39 var equivalence_classes = [ 40 '\u0041\u0061', // Aa (sanity check) 41 '\u004b\u006b\u212a', // KkK 42 '\u0053\u0073\u017f', // Ssſ 43 '\u00b5\u039c\u03bc', // µΜμ 44 '\u00c5\u00e5\u212b', // ÅåÅ 45 '\u00df\u1e9e', // ßẞ 46 '\u03a9\u03c9\u2126', // ΩωΩ 47 '\u0390\u1fd3', // ΐΐ 48 '\u0398\u03b8\u03d1\u03f4', // Θθϑϴ 49 '\u03b0\u1fe3', // ΰΰ 50 '\u1f80\u1f88', // ᾀᾈ 51 '\u1fb3\u1fbc', // ᾳᾼ 52 '\u1fc3\u1fcc', // ῃῌ 53 '\u1ff3\u1ffc', // ῳῼ 54 '\ufb05\ufb06', // ſtst 55 56 // Everything below this line is a well-behaved case-folding 57 // equivalence class with more than two characters but only one 58 // canonical case-folded character 59 '\u01c4\u01c5\u01c6', '\u01c7\u01c8\u01c9', '\u01ca\u01cb\u01cc', 60 '\u01f1\u01f2\u01f3', '\u0345\u0399\u03b9\u1fbe', '\u0392\u03b2\u03d0', 61 '\u0395\u03b5\u03f5', '\u039a\u03ba\u03f0', '\u03a0\u03c0\u03d6', 62 '\u03a1\u03c1\u03f1', '\u03a3\u03c2\u03c3', '\u03a6\u03c6\u03d5', 63 '\u0412\u0432\u1c80', '\u0414\u0434\u1c81', '\u041e\u043e\u1c82', 64 '\u0421\u0441\u1c83', '\u0422\u0442\u1c84\u1c85', '\u042a\u044a\u1c86', 65 '\u0462\u0463\u1c87', '\u1c88\ua64a\ua64b', '\u1e60\u1e61\u1e9b' 66 ]; 67 68 TestAll();