TestCollator.cpp (11820B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 #include "gtest/gtest.h" 5 6 #include <string.h> 7 #include <string_view> 8 #include "mozilla/intl/Collator.h" 9 #include "mozilla/Span.h" 10 #include "TestBuffer.h" 11 12 namespace mozilla::intl { 13 14 TEST(IntlCollator, SetAttributesInternal) 15 { 16 // Run through each settings to make sure MOZ_ASSERT is not triggered for 17 // misconfigured attributes. 18 auto result = Collator::TryCreate("en-US"); 19 ASSERT_TRUE(result.isOk()); 20 auto collator = result.unwrap(); 21 22 collator->SetStrength(Collator::Strength::Primary); 23 collator->SetStrength(Collator::Strength::Secondary); 24 collator->SetStrength(Collator::Strength::Tertiary); 25 collator->SetStrength(Collator::Strength::Quaternary); 26 collator->SetStrength(Collator::Strength::Identical); 27 collator->SetStrength(Collator::Strength::Default); 28 29 collator->SetAlternateHandling(Collator::AlternateHandling::NonIgnorable) 30 .unwrap(); 31 collator->SetAlternateHandling(Collator::AlternateHandling::Shifted).unwrap(); 32 collator->SetAlternateHandling(Collator::AlternateHandling::Default).unwrap(); 33 34 collator->SetCaseFirst(Collator::CaseFirst::False).unwrap(); 35 collator->SetCaseFirst(Collator::CaseFirst::Upper).unwrap(); 36 collator->SetCaseFirst(Collator::CaseFirst::Lower).unwrap(); 37 38 collator->SetCaseLevel(Collator::Feature::On).unwrap(); 39 collator->SetCaseLevel(Collator::Feature::Off).unwrap(); 40 collator->SetCaseLevel(Collator::Feature::Default).unwrap(); 41 42 collator->SetNumericCollation(Collator::Feature::On).unwrap(); 43 collator->SetNumericCollation(Collator::Feature::Off).unwrap(); 44 collator->SetNumericCollation(Collator::Feature::Default).unwrap(); 45 46 collator->SetNormalizationMode(Collator::Feature::On).unwrap(); 47 collator->SetNormalizationMode(Collator::Feature::Off).unwrap(); 48 collator->SetNormalizationMode(Collator::Feature::Default).unwrap(); 49 } 50 51 TEST(IntlCollator, GetSortKey) 52 { 53 // Do some light sort key comparisons to ensure everything is wired up 54 // correctly. This is not doing extensive correctness testing. 55 auto result = Collator::TryCreate("en-US"); 56 ASSERT_TRUE(result.isOk()); 57 auto collator = result.unwrap(); 58 TestBuffer<uint8_t> bufferA; 59 TestBuffer<uint8_t> bufferB; 60 61 auto compareSortKeys = [&](const char16_t* a, const char16_t* b) { 62 collator->GetSortKey(MakeStringSpan(a), bufferA).unwrap(); 63 collator->GetSortKey(MakeStringSpan(b), bufferB).unwrap(); 64 return strcmp(reinterpret_cast<const char*>(bufferA.data()), 65 reinterpret_cast<const char*>(bufferB.data())); 66 }; 67 68 ASSERT_TRUE(compareSortKeys(u"aaa", u"bbb") < 0); 69 ASSERT_TRUE(compareSortKeys(u"bbb", u"aaa") > 0); 70 ASSERT_TRUE(compareSortKeys(u"aaa", u"aaa") == 0); 71 ASSERT_TRUE(compareSortKeys(u"👍", u"👎") < 0); 72 } 73 74 TEST(IntlCollator, CompareStrings) 75 { 76 // Do some light string comparisons to ensure everything is wired up 77 // correctly. This is not doing extensive correctness testing. 78 auto result = Collator::TryCreate("en-US"); 79 ASSERT_TRUE(result.isOk()); 80 auto collator = result.unwrap(); 81 TestBuffer<uint8_t> bufferA; 82 TestBuffer<uint8_t> bufferB; 83 84 ASSERT_EQ(collator->CompareStrings(u"aaa", u"bbb"), -1); 85 ASSERT_EQ(collator->CompareStrings(u"bbb", u"aaa"), 1); 86 ASSERT_EQ(collator->CompareStrings(u"aaa", u"aaa"), 0); 87 ASSERT_EQ(collator->CompareStrings(u"👍", u"👎"), -1); 88 } 89 90 TEST(IntlCollator, SetOptionsSensitivity) 91 { 92 // Test the ECMA 402 sensitivity behavior per: 93 // https://tc39.es/ecma402/#sec-collator-comparestrings 94 auto result = Collator::TryCreate("en-US"); 95 ASSERT_TRUE(result.isOk()); 96 auto collator = result.unwrap(); 97 98 TestBuffer<uint8_t> bufferA; 99 TestBuffer<uint8_t> bufferB; 100 ICUResult optResult = Ok(); 101 Collator::Options options{}; 102 103 options.sensitivity = Collator::Sensitivity::Base; 104 optResult = collator->SetOptions(options); 105 ASSERT_TRUE(optResult.isOk()); 106 ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1); 107 ASSERT_EQ(collator->CompareStrings(u"a", u"á"), 0); 108 ASSERT_EQ(collator->CompareStrings(u"a", u"A"), 0); 109 110 options.sensitivity = Collator::Sensitivity::Accent; 111 optResult = collator->SetOptions(options); 112 ASSERT_TRUE(optResult.isOk()); 113 ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1); 114 ASSERT_EQ(collator->CompareStrings(u"a", u"á"), -1); 115 ASSERT_EQ(collator->CompareStrings(u"a", u"A"), 0); 116 117 options.sensitivity = Collator::Sensitivity::Case; 118 optResult = collator->SetOptions(options); 119 ASSERT_TRUE(optResult.isOk()); 120 ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1); 121 ASSERT_EQ(collator->CompareStrings(u"a", u"á"), 0); 122 ASSERT_EQ(collator->CompareStrings(u"a", u"A"), -1); 123 124 options.sensitivity = Collator::Sensitivity::Variant; 125 optResult = collator->SetOptions(options); 126 ASSERT_TRUE(optResult.isOk()); 127 ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1); 128 ASSERT_EQ(collator->CompareStrings(u"a", u"á"), -1); 129 ASSERT_EQ(collator->CompareStrings(u"a", u"A"), -1); 130 } 131 132 TEST(IntlCollator, LocaleSensitiveCollations) 133 { 134 UniquePtr<Collator> collator = nullptr; 135 TestBuffer<uint8_t> bufferA; 136 TestBuffer<uint8_t> bufferB; 137 138 auto changeLocale = [&](const char* locale) { 139 auto result = Collator::TryCreate(locale); 140 ASSERT_TRUE(result.isOk()); 141 collator = result.unwrap(); 142 143 Collator::Options options{}; 144 options.sensitivity = Collator::Sensitivity::Base; 145 auto optResult = collator->SetOptions(options); 146 ASSERT_TRUE(optResult.isOk()); 147 }; 148 149 // Swedish treats "Ö" as a separate character, which sorts after "Z". 150 changeLocale("en-US"); 151 ASSERT_EQ(collator->CompareStrings(u"Österreich", u"Västervik"), -1); 152 changeLocale("sv-SE"); 153 ASSERT_EQ(collator->CompareStrings(u"Österreich", u"Västervik"), 1); 154 155 // Country names in their respective scripts. 156 auto china = MakeStringSpan(u"中国"); 157 auto japan = MakeStringSpan(u"日本"); 158 auto korea = MakeStringSpan(u"한국"); 159 160 changeLocale("en-US"); 161 ASSERT_EQ(collator->CompareStrings(china, japan), -1); 162 ASSERT_EQ(collator->CompareStrings(china, korea), 1); 163 changeLocale("zh"); 164 ASSERT_EQ(collator->CompareStrings(china, japan), 1); 165 ASSERT_EQ(collator->CompareStrings(china, korea), -1); 166 changeLocale("ja"); 167 ASSERT_EQ(collator->CompareStrings(china, japan), -1); 168 ASSERT_EQ(collator->CompareStrings(china, korea), -1); 169 changeLocale("ko"); 170 ASSERT_EQ(collator->CompareStrings(china, japan), 1); 171 ASSERT_EQ(collator->CompareStrings(china, korea), -1); 172 } 173 174 TEST(IntlCollator, IgnorePunctuation) 175 { 176 TestBuffer<uint8_t> bufferA; 177 TestBuffer<uint8_t> bufferB; 178 179 auto result = Collator::TryCreate("en-US"); 180 ASSERT_TRUE(result.isOk()); 181 auto collator = result.unwrap(); 182 Collator::Options options{}; 183 options.ignorePunctuation = true; 184 185 auto optResult = collator->SetOptions(options); 186 ASSERT_TRUE(optResult.isOk()); 187 188 ASSERT_EQ(collator->CompareStrings(u"aa", u".bb"), -1); 189 190 options.ignorePunctuation = false; 191 optResult = collator->SetOptions(options); 192 ASSERT_TRUE(optResult.isOk()); 193 194 ASSERT_EQ(collator->CompareStrings(u"aa", u".bb"), 1); 195 } 196 197 TEST(IntlCollator, GetBcp47KeywordValuesForLocale) 198 { 199 auto extsResult = Collator::GetBcp47KeywordValuesForLocale("de"); 200 ASSERT_TRUE(extsResult.isOk()); 201 auto extensions = extsResult.unwrap(); 202 203 // Since this list is dependent on ICU, and may change between upgrades, only 204 // test a subset of the keywords. 205 auto standard = MakeStringSpan("standard"); 206 auto search = MakeStringSpan("search"); 207 auto phonebk = MakeStringSpan("phonebk"); // Valid BCP 47. 208 auto phonebook = MakeStringSpan("phonebook"); // Not valid BCP 47. 209 bool hasStandard = false; 210 bool hasSearch = false; 211 bool hasPhonebk = false; 212 bool hasPhonebook = false; 213 214 for (auto extensionResult : extensions) { 215 ASSERT_TRUE(extensionResult.isOk()); 216 auto extension = extensionResult.unwrap(); 217 hasStandard |= extension == standard; 218 hasSearch |= extension == search; 219 hasPhonebk |= extension == phonebk; 220 hasPhonebook |= extension == phonebook; 221 } 222 223 ASSERT_TRUE(hasStandard); 224 ASSERT_TRUE(hasSearch); 225 ASSERT_TRUE(hasPhonebk); 226 227 ASSERT_FALSE(hasPhonebook); // Not valid BCP 47. 228 } 229 230 TEST(IntlCollator, GetBcp47KeywordValuesForLocaleCommonlyUsed) 231 { 232 auto extsResult = Collator::GetBcp47KeywordValuesForLocale( 233 "fr", Collator::CommonlyUsed::Yes); 234 ASSERT_TRUE(extsResult.isOk()); 235 auto extensions = extsResult.unwrap(); 236 237 // Since this list is dependent on ICU, and may change between upgrades, only 238 // test a subset of the keywords. 239 auto standard = MakeStringSpan("standard"); 240 auto search = MakeStringSpan("search"); 241 auto phonebk = MakeStringSpan("phonebk"); // Valid BCP 47. 242 auto phonebook = MakeStringSpan("phonebook"); // Not valid BCP 47. 243 bool hasStandard = false; 244 bool hasSearch = false; 245 bool hasPhonebk = false; 246 bool hasPhonebook = false; 247 248 for (auto extensionResult : extensions) { 249 ASSERT_TRUE(extensionResult.isOk()); 250 auto extension = extensionResult.unwrap(); 251 hasStandard |= extension == standard; 252 hasSearch |= extension == search; 253 hasPhonebk |= extension == phonebk; 254 hasPhonebook |= extension == phonebook; 255 } 256 257 ASSERT_TRUE(hasStandard); 258 ASSERT_TRUE(hasSearch); 259 260 ASSERT_FALSE(hasPhonebk); // Not commonly used in French. 261 ASSERT_FALSE(hasPhonebook); // Not valid BCP 47. 262 } 263 264 TEST(IntlCollator, GetBcp47KeywordValues) 265 { 266 auto extsResult = Collator::GetBcp47KeywordValues(); 267 ASSERT_TRUE(extsResult.isOk()); 268 auto extensions = extsResult.unwrap(); 269 270 // Since this list is dependent on ICU, and may change between upgrades, only 271 // test a subset of the keywords. 272 auto standard = MakeStringSpan("standard"); 273 auto search = MakeStringSpan("search"); 274 auto phonebk = MakeStringSpan("phonebk"); // Valid BCP 47. 275 auto phonebook = MakeStringSpan("phonebook"); // Not valid BCP 47. 276 bool hasStandard = false; 277 bool hasSearch = false; 278 bool hasPhonebk = false; 279 bool hasPhonebook = false; 280 281 for (auto extensionResult : extensions) { 282 ASSERT_TRUE(extensionResult.isOk()); 283 auto extension = extensionResult.unwrap(); 284 hasStandard |= extension == standard; 285 hasSearch |= extension == search; 286 hasPhonebk |= extension == phonebk; 287 hasPhonebook |= extension == phonebook; 288 } 289 290 ASSERT_TRUE(hasStandard); 291 ASSERT_TRUE(hasSearch); 292 ASSERT_TRUE(hasPhonebk); 293 294 ASSERT_FALSE(hasPhonebook); // Not valid BCP 47. 295 } 296 297 TEST(IntlCollator, GetAvailableLocales) 298 { 299 using namespace std::literals; 300 301 int32_t english = 0; 302 int32_t german = 0; 303 int32_t chinese = 0; 304 305 // Since this list is dependent on ICU, and may change between upgrades, only 306 // test a subset of the available locales. 307 for (const char* locale : Collator::GetAvailableLocales()) { 308 if (locale == "en"sv) { 309 english++; 310 } else if (locale == "de"sv) { 311 german++; 312 } else if (locale == "zh"sv) { 313 chinese++; 314 } 315 } 316 317 // Each locale should be found exactly once. 318 ASSERT_EQ(english, 1); 319 ASSERT_EQ(german, 1); 320 ASSERT_EQ(chinese, 1); 321 } 322 323 TEST(IntlCollator, GetCaseFirst) 324 { 325 auto result = Collator::TryCreate("en-US"); 326 ASSERT_TRUE(result.isOk()); 327 auto collator = result.unwrap(); 328 329 auto caseFirst = collator->GetCaseFirst(); 330 ASSERT_TRUE(caseFirst.isOk()); 331 ASSERT_EQ(caseFirst.unwrap(), Collator::CaseFirst::False); 332 333 for (auto kf : {Collator::CaseFirst::Upper, Collator::CaseFirst::Lower, 334 Collator::CaseFirst::False}) { 335 Collator::Options options{}; 336 options.caseFirst = kf; 337 338 auto optResult = collator->SetOptions(options); 339 ASSERT_TRUE(optResult.isOk()); 340 341 auto caseFirst = collator->GetCaseFirst(); 342 ASSERT_TRUE(caseFirst.isOk()); 343 ASSERT_EQ(caseFirst.unwrap(), kf); 344 } 345 } 346 347 } // namespace mozilla::intl