grapheme.js (3982B)
1 // |reftest| skip-if(!this.hasOwnProperty('Intl')||!this.Intl.Segmenter) 2 3 // Grapheme boundaries are locale independent. Test with various locales to 4 // ensure we get the same results. 5 const locales = [ 6 "en", "de", "fr", "ar", "ja", "zh", "th", 7 ]; 8 9 let strings = { 10 // Empty string 11 "": [], 12 13 // Ascii 14 "test": "test".split(""), 15 "hello world": "hello world".split(""), 16 "hello\0world": "hello\0world".split(""), 17 "\r\n": ["\r\n"], 18 19 // Latin-1 20 "äöü éèê µß \xff": "äöü éèê µß \xff".split(""), 21 22 // Two-Byte 23 "中文字": "中文字".split(""), 24 25 // Grapheme Clusters: https://www.unicode.org/reports/tr29/#Table_Sample_Grapheme_Clusters 26 "e\u0300": ["e\u0300"], 27 "\u1100\u1161\u11A8": ["\u1100\u1161\u11A8"], // Hangul syllable "gag" 28 "\u0E01\u0E33": ["\u0E01\u0E33"], // Thai kam 29 "\u0937\u093F": ["\u0937\u093F"], // Devanagari ssi 30 31 // Emojis 32 "\u263A\uFE0F": ["\u263A\uFE0F"], // Variant selector 33 "\u{1F385}\u{1F3FB}": ["\u{1F385}\u{1F3FB}"], // Skin tone selector 34 "\u{1F469}\u{1F3FD}\u{200D}\u{1F52C}": ["\u{1F469}\u{1F3FD}\u{200D}\u{1F52C}"], // ZWJ 35 "\u{1F469}\u{1F3FD}\u{200D}\u{1F52C}\u{FE0F}": ["\u{1F469}\u{1F3FD}\u{200D}\u{1F52C}\u{FE0F}"], // ZWJ + VS 36 "\u{1F926}\u{1F3FC}\u{200D}\u{2642}\u{FE0F}": ["\u{1F926}\u{1F3FC}\u{200D}\u{2642}\u{FE0F}"], // ZWJ + VS with BMP modifier 37 "\u{1F1E9}\u{1F1EA}": ["\u{1F1E9}\u{1F1EA}"], // Flags 38 "\u{1F3F4}\u{E0067}\u{E0062}\u{E0073}\u{E0063}\u{E0074}\u{E007F}": ["\u{1F3F4}\u{E0067}\u{E0062}\u{E0073}\u{E0063}\u{E0074}\u{E007F}"], // Subdivision flags 39 }; 40 41 function assertIsSegmentDataObject(obj) { 42 // The prototype is %Object.prototype%. 43 assertEq(Object.getPrototypeOf(obj), Object.prototype); 44 45 // The Segment Data object has exactly three own properties. 46 let keys = Reflect.ownKeys(obj); 47 assertEq(keys.length, 3); 48 assertEq(keys[0], "segment"); 49 assertEq(keys[1], "index"); 50 assertEq(keys[2], "input"); 51 52 // Ensure each property has the correct value type. 53 assertEq(typeof obj.segment, "string"); 54 assertEq(typeof obj.index, "number"); 55 assertEq(typeof obj.input, "string"); 56 57 // |index| is an integer index into |string|. 58 assertEq(Number.isInteger(obj.index), true); 59 assertEq(obj.index >= 0, true); 60 assertEq(obj.index < obj.input.length, true); 61 62 // Segments are non-empty. 63 assertEq(obj.segment.length > 0, true); 64 65 // Ensure the segment is present in the input at the correct position. 66 assertEq(obj.input.substr(obj.index, obj.segment.length), obj.segment); 67 } 68 69 function segmentsFromContaining(segmenter, string) { 70 let segments = segmenter.segment(string); 71 72 let result = []; 73 for (let index = 0, data; (data = segments.containing(index)); index += data.segment.length) { 74 result.push(data); 75 } 76 return result; 77 } 78 79 for (let locale of locales) { 80 let segmenter = new Intl.Segmenter(locale, {granularity: "grapheme"}); 81 82 let resolved = segmenter.resolvedOptions(); 83 assertEq(resolved.locale, locale); 84 assertEq(resolved.granularity, "grapheme"); 85 86 for (let [string, graphemes] of Object.entries(strings)) { 87 let segments = [...segmenter.segment(string)]; 88 89 // Assert each segment is a valid Segment Data object. 90 segments.forEach(assertIsSegmentDataObject); 91 92 // Concatenating all segments should return the input. 93 assertEq(segments.reduce((acc, {segment}) => acc + segment, ""), string); 94 95 // The "input" property matches the original input string. 96 assertEq(segments.every(({input}) => input === string), true); 97 98 // The indices are sorted in ascending order. 99 assertEq(isNaN(segments.reduce((acc, {index}) => index > acc ? index : NaN, -Infinity)), false); 100 101 // The computed segments match the expected value. 102 assertEqArray(segments.map(({segment}) => segment), graphemes); 103 104 // Segment iteration and %Segments.prototype%.containing return the same results. 105 assertDeepEq(segmentsFromContaining(segmenter, string), segments); 106 } 107 } 108 109 if (typeof reportCompare === "function") 110 reportCompare(0, 0);