sentence-latin.js (2466B)
1 // |reftest| skip-if(!this.hasOwnProperty('Intl')||!this.Intl.Segmenter) 2 3 // https://www.unicode.org/reports/tr29/#Sentence_Boundary_Rules 4 5 const strings = { 6 // SB1, SB2 7 "": [], 8 9 // SB3 10 "\r\n": ["\r\n"], 11 12 // SB4 13 "First paragraph.\nSecond paragraph.": ["First paragraph.\n", "Second paragraph."], 14 "First paragraph.\rSecond paragraph.": ["First paragraph.\r", "Second paragraph."], 15 "First paragraph.\r\nSecond paragraph.": ["First paragraph.\r\n", "Second paragraph."], 16 "First paragraph.\x85Second paragraph.": ["First paragraph.\x85", "Second paragraph."], 17 18 // SB5 19 "\xADWo\xADrd\xAD.\xAD": ["\xADWo\xADrd\xAD.\xAD"], 20 "Word.\n\xAD": ["Word.\n", "\xAD"], 21 "Word.\r\xAD\n": ["Word.\r", "\xAD\n"], 22 23 // SB6 24 ".2": [".2"], 25 "1.2": ["1.2"], 26 "!2": ["!", "2"], 27 "1!2": ["1!", "2"], 28 29 // SB7 30 "A.B": ["A.B"], 31 "a.B": ["a.B"], 32 "A. B": ["A. ", "B"], 33 "a. B": ["a. ", "B"], 34 35 // SB8 36 "#.a": ["#.a"], 37 "#. a": ["#. a"], 38 "#. # a": ["#. # a"], 39 "#. 1 a": ["#. 1 a"], 40 "#. , a": ["#. , a"], 41 "#. Aa": ["#. ", "Aa"], 42 43 // SB8a 44 "Word..": ["Word.."], 45 "Word . , ": ["Word . , "], 46 "Word.'\t , ": ["Word.'\t , "], 47 48 // SB9, SB10, SB11 49 "Word.''": ["Word.''"], 50 "Word.'\t ": ["Word.'\t "], 51 "Word.'\t \n": ["Word.'\t \n"], 52 }; 53 54 function assertSegments(string, sentences) { 55 let seg = segmenter.segment(string); 56 let segments = [...seg]; 57 58 // The computed segments match the expected value. 59 assertEqArray(segments.map(({segment}) => segment), sentences); 60 61 // |containing()| should return the same result. 62 for (let expected of segments) { 63 let {segment, index} = expected; 64 for (let i = index; i < index + segment.length; ++i) { 65 let actual = seg.containing(i); 66 assertDeepEq(actual, expected); 67 } 68 } 69 } 70 71 let segmenter = new Intl.Segmenter("en", {granularity: "sentence"}); 72 73 for (let [string, words] of Object.entries(strings)) { 74 assertSegments(string, words); 75 } 76 77 // Locale-dependent sentence segmentation. 78 { 79 // https://en.wikipedia.org/wiki/Greek_question_mark#Greek_question_mark 80 let string = "A sentence; semicolon separated."; 81 82 let english = new Intl.Segmenter("en", {granularity: "sentence"}); 83 let greek = new Intl.Segmenter("el", {granularity: "sentence"}); 84 85 // A single sentence in English. 86 assertEq([...english.segment(string)].length, 1); 87 88 // Two sentences in Greek. 89 assertEq([...greek.segment(string)].length, 2); 90 } 91 92 if (typeof reportCompare === "function") 93 reportCompare(0, 0);