tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

TestCollator.cpp (11820B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 #include "gtest/gtest.h"
      5 
      6 #include <string.h>
      7 #include <string_view>
      8 #include "mozilla/intl/Collator.h"
      9 #include "mozilla/Span.h"
     10 #include "TestBuffer.h"
     11 
     12 namespace mozilla::intl {
     13 
     14 TEST(IntlCollator, SetAttributesInternal)
     15 {
     16  // Run through each settings to make sure MOZ_ASSERT is not triggered for
     17  // misconfigured attributes.
     18  auto result = Collator::TryCreate("en-US");
     19  ASSERT_TRUE(result.isOk());
     20  auto collator = result.unwrap();
     21 
     22  collator->SetStrength(Collator::Strength::Primary);
     23  collator->SetStrength(Collator::Strength::Secondary);
     24  collator->SetStrength(Collator::Strength::Tertiary);
     25  collator->SetStrength(Collator::Strength::Quaternary);
     26  collator->SetStrength(Collator::Strength::Identical);
     27  collator->SetStrength(Collator::Strength::Default);
     28 
     29  collator->SetAlternateHandling(Collator::AlternateHandling::NonIgnorable)
     30      .unwrap();
     31  collator->SetAlternateHandling(Collator::AlternateHandling::Shifted).unwrap();
     32  collator->SetAlternateHandling(Collator::AlternateHandling::Default).unwrap();
     33 
     34  collator->SetCaseFirst(Collator::CaseFirst::False).unwrap();
     35  collator->SetCaseFirst(Collator::CaseFirst::Upper).unwrap();
     36  collator->SetCaseFirst(Collator::CaseFirst::Lower).unwrap();
     37 
     38  collator->SetCaseLevel(Collator::Feature::On).unwrap();
     39  collator->SetCaseLevel(Collator::Feature::Off).unwrap();
     40  collator->SetCaseLevel(Collator::Feature::Default).unwrap();
     41 
     42  collator->SetNumericCollation(Collator::Feature::On).unwrap();
     43  collator->SetNumericCollation(Collator::Feature::Off).unwrap();
     44  collator->SetNumericCollation(Collator::Feature::Default).unwrap();
     45 
     46  collator->SetNormalizationMode(Collator::Feature::On).unwrap();
     47  collator->SetNormalizationMode(Collator::Feature::Off).unwrap();
     48  collator->SetNormalizationMode(Collator::Feature::Default).unwrap();
     49 }
     50 
     51 TEST(IntlCollator, GetSortKey)
     52 {
     53  // Do some light sort key comparisons to ensure everything is wired up
     54  // correctly. This is not doing extensive correctness testing.
     55  auto result = Collator::TryCreate("en-US");
     56  ASSERT_TRUE(result.isOk());
     57  auto collator = result.unwrap();
     58  TestBuffer<uint8_t> bufferA;
     59  TestBuffer<uint8_t> bufferB;
     60 
     61  auto compareSortKeys = [&](const char16_t* a, const char16_t* b) {
     62    collator->GetSortKey(MakeStringSpan(a), bufferA).unwrap();
     63    collator->GetSortKey(MakeStringSpan(b), bufferB).unwrap();
     64    return strcmp(reinterpret_cast<const char*>(bufferA.data()),
     65                  reinterpret_cast<const char*>(bufferB.data()));
     66  };
     67 
     68  ASSERT_TRUE(compareSortKeys(u"aaa", u"bbb") < 0);
     69  ASSERT_TRUE(compareSortKeys(u"bbb", u"aaa") > 0);
     70  ASSERT_TRUE(compareSortKeys(u"aaa", u"aaa") == 0);
     71  ASSERT_TRUE(compareSortKeys(u"👍", u"👎") < 0);
     72 }
     73 
     74 TEST(IntlCollator, CompareStrings)
     75 {
     76  // Do some light string comparisons to ensure everything is wired up
     77  // correctly. This is not doing extensive correctness testing.
     78  auto result = Collator::TryCreate("en-US");
     79  ASSERT_TRUE(result.isOk());
     80  auto collator = result.unwrap();
     81  TestBuffer<uint8_t> bufferA;
     82  TestBuffer<uint8_t> bufferB;
     83 
     84  ASSERT_EQ(collator->CompareStrings(u"aaa", u"bbb"), -1);
     85  ASSERT_EQ(collator->CompareStrings(u"bbb", u"aaa"), 1);
     86  ASSERT_EQ(collator->CompareStrings(u"aaa", u"aaa"), 0);
     87  ASSERT_EQ(collator->CompareStrings(u"👍", u"👎"), -1);
     88 }
     89 
     90 TEST(IntlCollator, SetOptionsSensitivity)
     91 {
     92  // Test the ECMA 402 sensitivity behavior per:
     93  // https://tc39.es/ecma402/#sec-collator-comparestrings
     94  auto result = Collator::TryCreate("en-US");
     95  ASSERT_TRUE(result.isOk());
     96  auto collator = result.unwrap();
     97 
     98  TestBuffer<uint8_t> bufferA;
     99  TestBuffer<uint8_t> bufferB;
    100  ICUResult optResult = Ok();
    101  Collator::Options options{};
    102 
    103  options.sensitivity = Collator::Sensitivity::Base;
    104  optResult = collator->SetOptions(options);
    105  ASSERT_TRUE(optResult.isOk());
    106  ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1);
    107  ASSERT_EQ(collator->CompareStrings(u"a", u"á"), 0);
    108  ASSERT_EQ(collator->CompareStrings(u"a", u"A"), 0);
    109 
    110  options.sensitivity = Collator::Sensitivity::Accent;
    111  optResult = collator->SetOptions(options);
    112  ASSERT_TRUE(optResult.isOk());
    113  ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1);
    114  ASSERT_EQ(collator->CompareStrings(u"a", u"á"), -1);
    115  ASSERT_EQ(collator->CompareStrings(u"a", u"A"), 0);
    116 
    117  options.sensitivity = Collator::Sensitivity::Case;
    118  optResult = collator->SetOptions(options);
    119  ASSERT_TRUE(optResult.isOk());
    120  ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1);
    121  ASSERT_EQ(collator->CompareStrings(u"a", u"á"), 0);
    122  ASSERT_EQ(collator->CompareStrings(u"a", u"A"), -1);
    123 
    124  options.sensitivity = Collator::Sensitivity::Variant;
    125  optResult = collator->SetOptions(options);
    126  ASSERT_TRUE(optResult.isOk());
    127  ASSERT_EQ(collator->CompareStrings(u"a", u"b"), -1);
    128  ASSERT_EQ(collator->CompareStrings(u"a", u"á"), -1);
    129  ASSERT_EQ(collator->CompareStrings(u"a", u"A"), -1);
    130 }
    131 
    132 TEST(IntlCollator, LocaleSensitiveCollations)
    133 {
    134  UniquePtr<Collator> collator = nullptr;
    135  TestBuffer<uint8_t> bufferA;
    136  TestBuffer<uint8_t> bufferB;
    137 
    138  auto changeLocale = [&](const char* locale) {
    139    auto result = Collator::TryCreate(locale);
    140    ASSERT_TRUE(result.isOk());
    141    collator = result.unwrap();
    142 
    143    Collator::Options options{};
    144    options.sensitivity = Collator::Sensitivity::Base;
    145    auto optResult = collator->SetOptions(options);
    146    ASSERT_TRUE(optResult.isOk());
    147  };
    148 
    149  // Swedish treats "Ö" as a separate character, which sorts after "Z".
    150  changeLocale("en-US");
    151  ASSERT_EQ(collator->CompareStrings(u"Österreich", u"Västervik"), -1);
    152  changeLocale("sv-SE");
    153  ASSERT_EQ(collator->CompareStrings(u"Österreich", u"Västervik"), 1);
    154 
    155  // Country names in their respective scripts.
    156  auto china = MakeStringSpan(u"中国");
    157  auto japan = MakeStringSpan(u"日本");
    158  auto korea = MakeStringSpan(u"한국");
    159 
    160  changeLocale("en-US");
    161  ASSERT_EQ(collator->CompareStrings(china, japan), -1);
    162  ASSERT_EQ(collator->CompareStrings(china, korea), 1);
    163  changeLocale("zh");
    164  ASSERT_EQ(collator->CompareStrings(china, japan), 1);
    165  ASSERT_EQ(collator->CompareStrings(china, korea), -1);
    166  changeLocale("ja");
    167  ASSERT_EQ(collator->CompareStrings(china, japan), -1);
    168  ASSERT_EQ(collator->CompareStrings(china, korea), -1);
    169  changeLocale("ko");
    170  ASSERT_EQ(collator->CompareStrings(china, japan), 1);
    171  ASSERT_EQ(collator->CompareStrings(china, korea), -1);
    172 }
    173 
    174 TEST(IntlCollator, IgnorePunctuation)
    175 {
    176  TestBuffer<uint8_t> bufferA;
    177  TestBuffer<uint8_t> bufferB;
    178 
    179  auto result = Collator::TryCreate("en-US");
    180  ASSERT_TRUE(result.isOk());
    181  auto collator = result.unwrap();
    182  Collator::Options options{};
    183  options.ignorePunctuation = true;
    184 
    185  auto optResult = collator->SetOptions(options);
    186  ASSERT_TRUE(optResult.isOk());
    187 
    188  ASSERT_EQ(collator->CompareStrings(u"aa", u".bb"), -1);
    189 
    190  options.ignorePunctuation = false;
    191  optResult = collator->SetOptions(options);
    192  ASSERT_TRUE(optResult.isOk());
    193 
    194  ASSERT_EQ(collator->CompareStrings(u"aa", u".bb"), 1);
    195 }
    196 
    197 TEST(IntlCollator, GetBcp47KeywordValuesForLocale)
    198 {
    199  auto extsResult = Collator::GetBcp47KeywordValuesForLocale("de");
    200  ASSERT_TRUE(extsResult.isOk());
    201  auto extensions = extsResult.unwrap();
    202 
    203  // Since this list is dependent on ICU, and may change between upgrades, only
    204  // test a subset of the keywords.
    205  auto standard = MakeStringSpan("standard");
    206  auto search = MakeStringSpan("search");
    207  auto phonebk = MakeStringSpan("phonebk");      // Valid BCP 47.
    208  auto phonebook = MakeStringSpan("phonebook");  // Not valid BCP 47.
    209  bool hasStandard = false;
    210  bool hasSearch = false;
    211  bool hasPhonebk = false;
    212  bool hasPhonebook = false;
    213 
    214  for (auto extensionResult : extensions) {
    215    ASSERT_TRUE(extensionResult.isOk());
    216    auto extension = extensionResult.unwrap();
    217    hasStandard |= extension == standard;
    218    hasSearch |= extension == search;
    219    hasPhonebk |= extension == phonebk;
    220    hasPhonebook |= extension == phonebook;
    221  }
    222 
    223  ASSERT_TRUE(hasStandard);
    224  ASSERT_TRUE(hasSearch);
    225  ASSERT_TRUE(hasPhonebk);
    226 
    227  ASSERT_FALSE(hasPhonebook);  // Not valid BCP 47.
    228 }
    229 
    230 TEST(IntlCollator, GetBcp47KeywordValuesForLocaleCommonlyUsed)
    231 {
    232  auto extsResult = Collator::GetBcp47KeywordValuesForLocale(
    233      "fr", Collator::CommonlyUsed::Yes);
    234  ASSERT_TRUE(extsResult.isOk());
    235  auto extensions = extsResult.unwrap();
    236 
    237  // Since this list is dependent on ICU, and may change between upgrades, only
    238  // test a subset of the keywords.
    239  auto standard = MakeStringSpan("standard");
    240  auto search = MakeStringSpan("search");
    241  auto phonebk = MakeStringSpan("phonebk");      // Valid BCP 47.
    242  auto phonebook = MakeStringSpan("phonebook");  // Not valid BCP 47.
    243  bool hasStandard = false;
    244  bool hasSearch = false;
    245  bool hasPhonebk = false;
    246  bool hasPhonebook = false;
    247 
    248  for (auto extensionResult : extensions) {
    249    ASSERT_TRUE(extensionResult.isOk());
    250    auto extension = extensionResult.unwrap();
    251    hasStandard |= extension == standard;
    252    hasSearch |= extension == search;
    253    hasPhonebk |= extension == phonebk;
    254    hasPhonebook |= extension == phonebook;
    255  }
    256 
    257  ASSERT_TRUE(hasStandard);
    258  ASSERT_TRUE(hasSearch);
    259 
    260  ASSERT_FALSE(hasPhonebk);    // Not commonly used in French.
    261  ASSERT_FALSE(hasPhonebook);  // Not valid BCP 47.
    262 }
    263 
    264 TEST(IntlCollator, GetBcp47KeywordValues)
    265 {
    266  auto extsResult = Collator::GetBcp47KeywordValues();
    267  ASSERT_TRUE(extsResult.isOk());
    268  auto extensions = extsResult.unwrap();
    269 
    270  // Since this list is dependent on ICU, and may change between upgrades, only
    271  // test a subset of the keywords.
    272  auto standard = MakeStringSpan("standard");
    273  auto search = MakeStringSpan("search");
    274  auto phonebk = MakeStringSpan("phonebk");      // Valid BCP 47.
    275  auto phonebook = MakeStringSpan("phonebook");  // Not valid BCP 47.
    276  bool hasStandard = false;
    277  bool hasSearch = false;
    278  bool hasPhonebk = false;
    279  bool hasPhonebook = false;
    280 
    281  for (auto extensionResult : extensions) {
    282    ASSERT_TRUE(extensionResult.isOk());
    283    auto extension = extensionResult.unwrap();
    284    hasStandard |= extension == standard;
    285    hasSearch |= extension == search;
    286    hasPhonebk |= extension == phonebk;
    287    hasPhonebook |= extension == phonebook;
    288  }
    289 
    290  ASSERT_TRUE(hasStandard);
    291  ASSERT_TRUE(hasSearch);
    292  ASSERT_TRUE(hasPhonebk);
    293 
    294  ASSERT_FALSE(hasPhonebook);  // Not valid BCP 47.
    295 }
    296 
    297 TEST(IntlCollator, GetAvailableLocales)
    298 {
    299  using namespace std::literals;
    300 
    301  int32_t english = 0;
    302  int32_t german = 0;
    303  int32_t chinese = 0;
    304 
    305  // Since this list is dependent on ICU, and may change between upgrades, only
    306  // test a subset of the available locales.
    307  for (const char* locale : Collator::GetAvailableLocales()) {
    308    if (locale == "en"sv) {
    309      english++;
    310    } else if (locale == "de"sv) {
    311      german++;
    312    } else if (locale == "zh"sv) {
    313      chinese++;
    314    }
    315  }
    316 
    317  // Each locale should be found exactly once.
    318  ASSERT_EQ(english, 1);
    319  ASSERT_EQ(german, 1);
    320  ASSERT_EQ(chinese, 1);
    321 }
    322 
    323 TEST(IntlCollator, GetCaseFirst)
    324 {
    325  auto result = Collator::TryCreate("en-US");
    326  ASSERT_TRUE(result.isOk());
    327  auto collator = result.unwrap();
    328 
    329  auto caseFirst = collator->GetCaseFirst();
    330  ASSERT_TRUE(caseFirst.isOk());
    331  ASSERT_EQ(caseFirst.unwrap(), Collator::CaseFirst::False);
    332 
    333  for (auto kf : {Collator::CaseFirst::Upper, Collator::CaseFirst::Lower,
    334                  Collator::CaseFirst::False}) {
    335    Collator::Options options{};
    336    options.caseFirst = kf;
    337 
    338    auto optResult = collator->SetOptions(options);
    339    ASSERT_TRUE(optResult.isOk());
    340 
    341    auto caseFirst = collator->GetCaseFirst();
    342    ASSERT_TRUE(caseFirst.isOk());
    343    ASSERT_EQ(caseFirst.unwrap(), kf);
    344  }
    345 }
    346 
    347 }  // namespace mozilla::intl