tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

sentence-latin.js (2466B)


      1 // |reftest| skip-if(!this.hasOwnProperty('Intl')||!this.Intl.Segmenter)
      2 
      3 // https://www.unicode.org/reports/tr29/#Sentence_Boundary_Rules
      4 
      5 const strings = {
      6  // SB1, SB2
      7  "": [],
      8 
      9  // SB3
     10  "\r\n": ["\r\n"],
     11 
     12  // SB4
     13  "First paragraph.\nSecond paragraph.": ["First paragraph.\n", "Second paragraph."],
     14  "First paragraph.\rSecond paragraph.": ["First paragraph.\r", "Second paragraph."],
     15  "First paragraph.\r\nSecond paragraph.": ["First paragraph.\r\n", "Second paragraph."],
     16  "First paragraph.\x85Second paragraph.": ["First paragraph.\x85", "Second paragraph."],
     17 
     18  // SB5
     19  "\xADWo\xADrd\xAD.\xAD": ["\xADWo\xADrd\xAD.\xAD"],
     20  "Word.\n\xAD": ["Word.\n", "\xAD"],
     21  "Word.\r\xAD\n": ["Word.\r", "\xAD\n"],
     22 
     23  // SB6
     24  ".2": [".2"],
     25  "1.2": ["1.2"],
     26  "!2": ["!", "2"],
     27  "1!2": ["1!", "2"],
     28 
     29  // SB7
     30  "A.B": ["A.B"],
     31  "a.B": ["a.B"],
     32  "A. B": ["A. ", "B"],
     33  "a. B": ["a. ", "B"],
     34 
     35  // SB8
     36  "#.a": ["#.a"],
     37  "#. a": ["#. a"],
     38  "#. # a": ["#. # a"],
     39  "#. 1 a": ["#. 1 a"],
     40  "#. , a": ["#. , a"],
     41  "#. Aa": ["#. ", "Aa"],
     42 
     43  // SB8a
     44  "Word..": ["Word.."],
     45  "Word . , ": ["Word . , "],
     46  "Word.'\t , ": ["Word.'\t , "],
     47 
     48  // SB9, SB10, SB11
     49  "Word.''": ["Word.''"],
     50  "Word.'\t ": ["Word.'\t "],
     51  "Word.'\t \n": ["Word.'\t \n"],
     52 };
     53 
     54 function assertSegments(string, sentences) {
     55  let seg = segmenter.segment(string);
     56  let segments = [...seg];
     57 
     58  // The computed segments match the expected value.
     59  assertEqArray(segments.map(({segment}) => segment), sentences);
     60 
     61  // |containing()| should return the same result.
     62  for (let expected of segments) {
     63    let {segment, index} = expected;
     64    for (let i = index; i < index + segment.length; ++i) {
     65      let actual = seg.containing(i);
     66      assertDeepEq(actual, expected);
     67    }
     68  }
     69 }
     70 
     71 let segmenter = new Intl.Segmenter("en", {granularity: "sentence"});
     72 
     73 for (let [string, words] of Object.entries(strings)) {
     74  assertSegments(string, words);
     75 }
     76 
     77 // Locale-dependent sentence segmentation.
     78 {
     79  // https://en.wikipedia.org/wiki/Greek_question_mark#Greek_question_mark
     80  let string = "A sentence; semicolon separated.";
     81 
     82  let english = new Intl.Segmenter("en", {granularity: "sentence"});
     83  let greek = new Intl.Segmenter("el", {granularity: "sentence"});
     84 
     85  // A single sentence in English.
     86  assertEq([...english.segment(string)].length, 1);
     87 
     88  // Two sentences in Greek.
     89  assertEq([...greek.segment(string)].length, 2);
     90 }
     91 
     92 if (typeof reportCompare === "function")
     93  reportCompare(0, 0);