tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

Collator.cpp (9015B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 #include <algorithm>
      6 #include <string.h>
      7 #include "mozilla/intl/Collator.h"
      8 
      9 namespace mozilla::intl {
     10 
     11 Collator::Collator(UCollator* aCollator) : mCollator(aCollator) {
     12  MOZ_ASSERT(aCollator);
     13 }
     14 
     15 Collator::~Collator() {
     16  if (mCollator.GetMut()) {
     17    ucol_close(mCollator.GetMut());
     18  }
     19 }
     20 
     21 Result<UniquePtr<Collator>, ICUError> Collator::TryCreate(const char* aLocale) {
     22  UErrorCode status = U_ZERO_ERROR;
     23  UCollator* collator = ucol_open(IcuLocale(aLocale), &status);
     24  if (U_FAILURE(status)) {
     25    return Err(ToICUError(status));
     26  }
     27  return MakeUnique<Collator>(collator);
     28 };
     29 
     30 int32_t Collator::CompareStrings(Span<const char16_t> aSource,
     31                                 Span<const char16_t> aTarget) const {
     32  switch (ucol_strcoll(mCollator.GetConst(), aSource.data(),
     33                       static_cast<int32_t>(aSource.size()), aTarget.data(),
     34                       static_cast<int32_t>(aTarget.size()))) {
     35    case UCOL_LESS:
     36      return -1;
     37    case UCOL_EQUAL:
     38      return 0;
     39    case UCOL_GREATER:
     40      return 1;
     41  }
     42  MOZ_ASSERT_UNREACHABLE("ucol_strcoll returned bad UCollationResult");
     43  return 0;
     44 }
     45 
     46 int32_t Collator::CompareSortKeys(Span<const uint8_t> aKey1,
     47                                  Span<const uint8_t> aKey2) const {
     48  size_t minLength = std::min(aKey1.Length(), aKey2.Length());
     49  int32_t tmpResult = strncmp((const char*)aKey1.Elements(),
     50                              (const char*)aKey2.Elements(), minLength);
     51  if (tmpResult < 0) {
     52    return -1;
     53  }
     54  if (tmpResult > 0) {
     55    return 1;
     56  }
     57  if (aKey1.Length() > minLength) {
     58    // First string contains second one, so comes later, hence return > 0.
     59    return 1;
     60  }
     61  if (aKey2.Length() > minLength) {
     62    // First string is a substring of second one, so comes earlier,
     63    // hence return < 0.
     64    return -1;
     65  }
     66  return 0;
     67 }
     68 
     69 static UColAttributeValue CaseFirstToICU(Collator::CaseFirst caseFirst) {
     70  switch (caseFirst) {
     71    case Collator::CaseFirst::False:
     72      return UCOL_OFF;
     73    case Collator::CaseFirst::Upper:
     74      return UCOL_UPPER_FIRST;
     75    case Collator::CaseFirst::Lower:
     76      return UCOL_LOWER_FIRST;
     77  }
     78 
     79  MOZ_ASSERT_UNREACHABLE();
     80  return UCOL_DEFAULT;
     81 }
     82 
     83 void Collator::SetStrength(Collator::Strength aStrength) {
     84  UColAttributeValue strength;
     85  switch (aStrength) {
     86    case Collator::Strength::Default:
     87      strength = UCOL_DEFAULT_STRENGTH;
     88      break;
     89    case Collator::Strength::Primary:
     90      strength = UCOL_PRIMARY;
     91      break;
     92    case Collator::Strength::Secondary:
     93      strength = UCOL_SECONDARY;
     94      break;
     95    case Collator::Strength::Tertiary:
     96      strength = UCOL_TERTIARY;
     97      break;
     98    case Collator::Strength::Quaternary:
     99      strength = UCOL_QUATERNARY;
    100      break;
    101    case Collator::Strength::Identical:
    102      strength = UCOL_IDENTICAL;
    103      break;
    104  }
    105 
    106  ucol_setStrength(mCollator.GetMut(), strength);
    107 }
    108 
    109 ICUResult Collator::SetCaseLevel(Collator::Feature aFeature) {
    110  UErrorCode status = U_ZERO_ERROR;
    111  ucol_setAttribute(mCollator.GetMut(), UCOL_CASE_LEVEL,
    112                    ToUColAttributeValue(aFeature), &status);
    113  return ToICUResult(status);
    114 }
    115 
    116 ICUResult Collator::SetAlternateHandling(
    117    Collator::AlternateHandling aAlternateHandling) {
    118  UErrorCode status = U_ZERO_ERROR;
    119  UColAttributeValue handling;
    120  switch (aAlternateHandling) {
    121    case Collator::AlternateHandling::NonIgnorable:
    122      handling = UCOL_NON_IGNORABLE;
    123      break;
    124    case Collator::AlternateHandling::Shifted:
    125      handling = UCOL_SHIFTED;
    126      break;
    127    case Collator::AlternateHandling::Default:
    128      handling = UCOL_DEFAULT;
    129      break;
    130  }
    131 
    132  ucol_setAttribute(mCollator.GetMut(), UCOL_ALTERNATE_HANDLING, handling,
    133                    &status);
    134  return ToICUResult(status);
    135 }
    136 
    137 ICUResult Collator::SetNumericCollation(Collator::Feature aFeature) {
    138  UErrorCode status = U_ZERO_ERROR;
    139  ucol_setAttribute(mCollator.GetMut(), UCOL_NUMERIC_COLLATION,
    140                    ToUColAttributeValue(aFeature), &status);
    141  return ToICUResult(status);
    142 }
    143 
    144 ICUResult Collator::SetNormalizationMode(Collator::Feature aFeature) {
    145  UErrorCode status = U_ZERO_ERROR;
    146  ucol_setAttribute(mCollator.GetMut(), UCOL_NORMALIZATION_MODE,
    147                    ToUColAttributeValue(aFeature), &status);
    148  return ToICUResult(status);
    149 }
    150 
    151 ICUResult Collator::SetCaseFirst(Collator::CaseFirst aCaseFirst) {
    152  UErrorCode status = U_ZERO_ERROR;
    153  ucol_setAttribute(mCollator.GetMut(), UCOL_CASE_FIRST,
    154                    CaseFirstToICU(aCaseFirst), &status);
    155  return ToICUResult(status);
    156 }
    157 
    158 ICUResult Collator::SetOptions(const Options& aOptions,
    159                               const Maybe<Options&> aPrevOptions) {
    160  if (aPrevOptions &&
    161      // Check the equality of the previous options.
    162      aPrevOptions->sensitivity == aOptions.sensitivity &&
    163      aPrevOptions->caseFirst == aOptions.caseFirst &&
    164      aPrevOptions->ignorePunctuation == aOptions.ignorePunctuation &&
    165      aPrevOptions->numeric == aOptions.numeric) {
    166    return Ok();
    167  }
    168 
    169  Collator::Strength strength = Collator::Strength::Default;
    170  Collator::Feature caseLevel = Collator::Feature::Off;
    171  switch (aOptions.sensitivity) {
    172    case Collator::Sensitivity::Base:
    173      strength = Collator::Strength::Primary;
    174      break;
    175    case Collator::Sensitivity::Accent:
    176      strength = Collator::Strength::Secondary;
    177      break;
    178    case Collator::Sensitivity::Case:
    179      caseLevel = Collator::Feature::On;
    180      strength = Collator::Strength::Primary;
    181      break;
    182    case Collator::Sensitivity::Variant:
    183      strength = Collator::Strength::Tertiary;
    184      break;
    185  }
    186 
    187  SetStrength(strength);
    188 
    189  ICUResult result = Ok();
    190 
    191  // According to the ICU team, UCOL_SHIFTED causes punctuation to be
    192  // ignored. Looking at Unicode Technical Report 35, Unicode Locale Data
    193  // Markup Language, "shifted" causes whitespace and punctuation to be
    194  // ignored - that's a bit more than asked for, but there's no way to get
    195  // less.
    196  result = this->SetAlternateHandling(
    197      aOptions.ignorePunctuation ? Collator::AlternateHandling::Shifted
    198                                 : Collator::AlternateHandling::NonIgnorable);
    199  if (result.isErr()) {
    200    return result;
    201  }
    202 
    203  result = SetCaseLevel(caseLevel);
    204  if (result.isErr()) {
    205    return result;
    206  }
    207 
    208  result = SetNumericCollation(aOptions.numeric ? Collator::Feature::On
    209                                                : Collator::Feature::Off);
    210  if (result.isErr()) {
    211    return result;
    212  }
    213 
    214  // Normalization is always on to meet the canonical equivalence requirement.
    215  result = SetNormalizationMode(Collator::Feature::On);
    216  if (result.isErr()) {
    217    return result;
    218  }
    219 
    220  result = SetCaseFirst(aOptions.caseFirst);
    221  if (result.isErr()) {
    222    return result;
    223  }
    224  return Ok();
    225 }
    226 
    227 Result<Collator::CaseFirst, ICUError> Collator::GetCaseFirst() const {
    228  UErrorCode status = U_ZERO_ERROR;
    229  UColAttributeValue caseFirst =
    230      ucol_getAttribute(mCollator.GetConst(), UCOL_CASE_FIRST, &status);
    231  if (U_FAILURE(status)) {
    232    return Err(ToICUError(status));
    233  }
    234 
    235  if (caseFirst == UCOL_OFF) {
    236    return CaseFirst::False;
    237  }
    238  if (caseFirst == UCOL_UPPER_FIRST) {
    239    return CaseFirst::Upper;
    240  }
    241  MOZ_ASSERT(caseFirst == UCOL_LOWER_FIRST);
    242  return CaseFirst::Lower;
    243 }
    244 
    245 Result<bool, ICUError> Collator::GetIgnorePunctuation() const {
    246  UErrorCode status = U_ZERO_ERROR;
    247  UColAttributeValue alternateHandling =
    248      ucol_getAttribute(mCollator.GetConst(), UCOL_ALTERNATE_HANDLING, &status);
    249  if (U_FAILURE(status)) {
    250    return Err(ToICUError(status));
    251  }
    252 
    253  MOZ_ASSERT(alternateHandling == UCOL_SHIFTED ||
    254             alternateHandling == UCOL_NON_IGNORABLE);
    255  return alternateHandling == UCOL_SHIFTED;
    256 }
    257 
    258 /* static */
    259 Result<Collator::Bcp47ExtEnumeration, ICUError>
    260 Collator::GetBcp47KeywordValuesForLocale(const char* aLocale,
    261                                         CommonlyUsed aCommonlyUsed) {
    262  UErrorCode status = U_ZERO_ERROR;
    263  UEnumeration* enumeration = ucol_getKeywordValuesForLocale(
    264      "collation", aLocale, static_cast<bool>(aCommonlyUsed), &status);
    265 
    266  if (U_SUCCESS(status)) {
    267    return Bcp47ExtEnumeration(enumeration);
    268  }
    269 
    270  return Err(ToICUError(status));
    271 }
    272 
    273 /* static */
    274 Result<Collator::Bcp47ExtEnumeration, ICUError>
    275 Collator::GetBcp47KeywordValues() {
    276  UErrorCode status = U_ZERO_ERROR;
    277  UEnumeration* enumeration = ucol_getKeywordValues("collation", &status);
    278 
    279  if (U_SUCCESS(status)) {
    280    return Bcp47ExtEnumeration(enumeration);
    281  }
    282 
    283  return Err(ToICUError(status));
    284 }
    285 
    286 /* static */
    287 SpanResult<char> Collator::KeywordValueToBcp47Extension(const char* aKeyword,
    288                                                        int32_t aLength) {
    289  if (aKeyword == nullptr) {
    290    return Err(InternalError{});
    291  }
    292  return MakeStringSpan(uloc_toUnicodeLocaleType("co", aKeyword));
    293 }
    294 
    295 }  // namespace mozilla::intl