TestSegmenter.cpp (8553B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this file, 5 * You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "gtest/gtest.h" 8 9 #include "mozilla/intl/Segmenter.h" 10 #include "mozilla/Preferences.h" 11 12 namespace mozilla::intl { 13 14 TEST(IntlSegmenter, TestLineBreakIteratorUtf16SeekOld) 15 { 16 nsresult rv = Preferences::SetBool("intl.icu4x.segmenter.enabled", false); 17 EXPECT_TRUE(rv == NS_OK); 18 19 const SegmenterOptions options{SegmenterGranularity::Line}; 20 auto result = Segmenter::TryCreate("en", options); 21 ASSERT_TRUE(result.isOk()); 22 auto lineSegmenter = result.unwrap(); 23 24 const char16_t text[] = u"hello world"; 25 UniquePtr<SegmentIteratorUtf16> segIter = 26 lineSegmenter->Segment(MakeStringSpan(text)); 27 28 // Seek to space between "hello" and "world". 29 ASSERT_EQ(segIter->Seek(5u), Some(11u)); 30 31 ASSERT_EQ(segIter->Next(), Nothing()); 32 33 // Same as calling Next(). 34 ASSERT_EQ(segIter->Seek(0u), Nothing()); 35 } 36 37 TEST(IntlSegmenter, TestLineBreakIteratorUtf16Seek) 38 { 39 nsresult rv = Preferences::SetBool("intl.icu4x.segmenter.enabled", true); 40 EXPECT_TRUE(rv == NS_OK); 41 42 const SegmenterOptions options{SegmenterGranularity::Line}; 43 auto result = Segmenter::TryCreate("en", options); 44 ASSERT_TRUE(result.isOk()); 45 auto lineSegmenter = result.unwrap(); 46 47 const char16_t text[] = u"hello world"; 48 UniquePtr<SegmentIteratorUtf16> segIter = 49 lineSegmenter->Segment(MakeStringSpan(text)); 50 51 // Seek to space between "hello" and "world". 52 // UAX#14 rule returns before "w". 53 ASSERT_EQ(segIter->Seek(5u), Some(6u)); 54 55 ASSERT_EQ(segIter->Next(), Some(11u)); 56 57 ASSERT_EQ(segIter->Next(), Nothing()); 58 59 // Same as calling Next(). 60 ASSERT_EQ(segIter->Seek(0u), Nothing()); 61 } 62 63 TEST(IntlSegmenter, TestWordBreakIteratorUtf16Simple) 64 { 65 const SegmenterOptions options{SegmenterGranularity::Word}; 66 auto result = Segmenter::TryCreate("en", options); 67 ASSERT_TRUE(result.isOk()); 68 auto wordSegmenter = result.unwrap(); 69 70 const char16_t text[] = u"hello world"; 71 UniquePtr<SegmentIteratorUtf16> segIter = 72 wordSegmenter->Segment(MakeStringSpan(text)); 73 74 ASSERT_EQ(segIter->Next(), Some(5u)); 75 ASSERT_EQ(segIter->Next(), Some(6u)); 76 ASSERT_EQ(segIter->Next(), Some(11u)); 77 ASSERT_EQ(segIter->Next(), Nothing()); 78 } 79 80 TEST(IntlSegmenter, TestWordBreakIteratorUtf16Seek) 81 { 82 const SegmenterOptions options{SegmenterGranularity::Word}; 83 auto result = Segmenter::TryCreate("en", options); 84 ASSERT_TRUE(result.isOk()); 85 auto wordSegmenter = result.unwrap(); 86 87 const char16_t text[] = u"hello world"; 88 UniquePtr<SegmentIteratorUtf16> segIter = 89 wordSegmenter->Segment(MakeStringSpan(text)); 90 91 // Seek to the space between "hello" and "world" 92 ASSERT_EQ(segIter->Seek(5u), Some(6u)); 93 94 ASSERT_EQ(segIter->Next(), Some(11u)); 95 ASSERT_EQ(segIter->Next(), Nothing()); 96 97 // Same as calling Next(). 98 ASSERT_EQ(segIter->Seek(0u), Nothing()); 99 } 100 101 TEST(IntlSegmenter, TestWordBreakIteratorUtf16ResetAndSeekOld) 102 { 103 nsresult rv = Preferences::SetBool("intl.icu4x.segmenter.enabled", false); 104 EXPECT_TRUE(rv == NS_OK); 105 106 const SegmenterOptions options{SegmenterGranularity::Word}; 107 auto result = Segmenter::TryCreate("en", options); 108 ASSERT_TRUE(result.isOk()); 109 auto wordSegmenter = result.unwrap(); 110 111 const char16_t text[] = u"hello world"; 112 UniquePtr<SegmentIteratorUtf16> segIter = 113 wordSegmenter->Segment(MakeStringSpan(text)); 114 115 ASSERT_EQ(segIter->Next(), Some(5u)); 116 static_cast<WordBreakIteratorUtf16*>(segIter.get()) 117 ->Reset(MakeStringSpan(text)); 118 ASSERT_EQ(segIter->Next(), Some(5u)); 119 ASSERT_EQ(segIter->Next(), Some(6u)); 120 ASSERT_EQ(segIter->Next(), Some(11u)); 121 122 static_cast<WordBreakIteratorUtf16*>(segIter.get()) 123 ->Reset(MakeStringSpan(text)); 124 // Seek to space between "hello" and "world". 125 ASSERT_EQ(segIter->Seek(5u), Some(6u)); 126 } 127 128 TEST(IntlSegmenter, TestWordBreakIteratorUtf16ResetAndSeek) 129 { 130 nsresult rv = Preferences::SetBool("intl.icu4x.segmenter.enabled", true); 131 EXPECT_TRUE(rv == NS_OK); 132 133 const SegmenterOptions options{SegmenterGranularity::Word}; 134 auto result = Segmenter::TryCreate("en", options); 135 ASSERT_TRUE(result.isOk()); 136 auto wordSegmenter = result.unwrap(); 137 138 const char16_t text[] = u"hello world"; 139 UniquePtr<SegmentIteratorUtf16> segIter = 140 wordSegmenter->Segment(MakeStringSpan(text)); 141 142 ASSERT_EQ(segIter->Next(), Some(5u)); 143 static_cast<WordBreakIteratorUtf16*>(segIter.get()) 144 ->Reset(MakeStringSpan(text)); 145 ASSERT_EQ(segIter->Next(), Some(5u)); 146 ASSERT_EQ(segIter->Next(), Some(6u)); 147 ASSERT_EQ(segIter->Next(), Some(11u)); 148 149 static_cast<WordBreakIteratorUtf16*>(segIter.get()) 150 ->Reset(MakeStringSpan(text)); 151 // Seek to space between "hello" and "world". 152 ASSERT_EQ(segIter->Seek(5u), Some(6u)); 153 } 154 155 TEST(IntlSegmenter, TestGraphemeClusterBreakIteratorUtf16Simple) 156 { 157 SegmenterOptions options{SegmenterGranularity::Grapheme}; 158 auto result = Segmenter::TryCreate("en", options); 159 ASSERT_TRUE(result.isOk()); 160 auto graphemeClusterSegmenter = result.unwrap(); 161 162 const char16_t text[] = u"hello world"; 163 UniquePtr<SegmentIteratorUtf16> segIter = 164 graphemeClusterSegmenter->Segment(MakeStringSpan(text)); 165 166 ASSERT_EQ(segIter->Next(), Some(1u)); 167 ASSERT_EQ(segIter->Next(), Some(2u)); 168 ASSERT_EQ(segIter->Next(), Some(3u)); 169 ASSERT_EQ(segIter->Next(), Some(4u)); 170 ASSERT_EQ(segIter->Next(), Some(5u)); 171 ASSERT_EQ(segIter->Next(), Some(6u)); 172 ASSERT_EQ(segIter->Next(), Some(7u)); 173 ASSERT_EQ(segIter->Next(), Some(8u)); 174 ASSERT_EQ(segIter->Next(), Some(9u)); 175 ASSERT_EQ(segIter->Next(), Some(10u)); 176 ASSERT_EQ(segIter->Next(), Some(11u)); 177 ASSERT_EQ(segIter->Next(), Nothing()); 178 } 179 180 TEST(IntlSegmenter, TestGraphemeClusterBreakIteratorUtf16Seek) 181 { 182 SegmenterOptions options{SegmenterGranularity::Grapheme}; 183 auto result = Segmenter::TryCreate("en", options); 184 ASSERT_TRUE(result.isOk()); 185 auto graphemeClusterSegmenter = result.unwrap(); 186 187 const char16_t text[] = u"hello world"; 188 UniquePtr<SegmentIteratorUtf16> segIter = 189 graphemeClusterSegmenter->Segment(MakeStringSpan(text)); 190 191 // Seek to the space between "hello" and "world" 192 ASSERT_EQ(segIter->Seek(5u), Some(6u)); 193 194 ASSERT_EQ(segIter->Next(), Some(7u)); 195 ASSERT_EQ(segIter->Next(), Some(8u)); 196 ASSERT_EQ(segIter->Next(), Some(9u)); 197 ASSERT_EQ(segIter->Next(), Some(10u)); 198 ASSERT_EQ(segIter->Next(), Some(11u)); 199 ASSERT_EQ(segIter->Next(), Nothing()); 200 201 // Same as calling Next(). 202 ASSERT_EQ(segIter->Seek(0u), Nothing()); 203 } 204 205 TEST(IntlSegmenter, TestGraphemeClusterBreakReverseIteratorUtf16) 206 { 207 const char16_t text[] = u"hello world"; 208 GraphemeClusterBreakReverseIteratorUtf16 segIter(MakeStringSpan(text)); 209 210 // Seek to the space between "hello" and "world" 211 ASSERT_EQ(segIter.Seek(6u), Some(5u)); 212 213 ASSERT_EQ(segIter.Next(), Some(4u)); 214 ASSERT_EQ(segIter.Next(), Some(3u)); 215 ASSERT_EQ(segIter.Next(), Some(2u)); 216 ASSERT_EQ(segIter.Next(), Some(1u)); 217 ASSERT_EQ(segIter.Next(), Some(0u)); 218 ASSERT_EQ(segIter.Next(), Nothing()); 219 220 // Same as calling Next(). 221 ASSERT_EQ(segIter.Seek(0u), Nothing()); 222 } 223 224 TEST(IntlSegmenter, TestSentenceBreakIteratorUtf16) 225 { 226 nsresult rv = Preferences::SetBool("intl.icu4x.segmenter.enabled", true); 227 EXPECT_TRUE(rv == NS_OK); 228 229 SegmenterOptions options{SegmenterGranularity::Sentence}; 230 auto result = Segmenter::TryCreate("en", options); 231 ASSERT_TRUE(result.isOk()); 232 auto sentenceSegmenter = result.unwrap(); 233 234 const char16_t text[] = u"Hello world. Hello world."; 235 UniquePtr<SegmentIteratorUtf16> segIter = 236 sentenceSegmenter->Segment(MakeStringSpan(text)); 237 238 ASSERT_EQ(segIter->Next(), Some(13u)); 239 ASSERT_EQ(segIter->Next(), Some(25u)); 240 ASSERT_EQ(segIter->Next(), Nothing()); 241 242 // Same as calling Next(). 243 ASSERT_EQ(segIter->Seek(0u), Nothing()); 244 } 245 246 TEST(IntlSegmenter, TestSentenceBreakIteratorUtf16Seek) 247 { 248 nsresult rv = Preferences::SetBool("intl.icu4x.segmenter.enabled", true); 249 EXPECT_TRUE(rv == NS_OK); 250 251 SegmenterOptions options{SegmenterGranularity::Sentence}; 252 auto result = Segmenter::TryCreate("en", options); 253 ASSERT_TRUE(result.isOk()); 254 auto sentenceSegmenter = result.unwrap(); 255 256 const char16_t text[] = u"Hello world. Hello world."; 257 UniquePtr<SegmentIteratorUtf16> segIter = 258 sentenceSegmenter->Segment(MakeStringSpan(text)); 259 260 ASSERT_EQ(segIter->Seek(5u), Some(13u)); 261 } 262 263 } // namespace mozilla::intl