Collator.cpp (9015B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 #include <algorithm> 6 #include <string.h> 7 #include "mozilla/intl/Collator.h" 8 9 namespace mozilla::intl { 10 11 Collator::Collator(UCollator* aCollator) : mCollator(aCollator) { 12 MOZ_ASSERT(aCollator); 13 } 14 15 Collator::~Collator() { 16 if (mCollator.GetMut()) { 17 ucol_close(mCollator.GetMut()); 18 } 19 } 20 21 Result<UniquePtr<Collator>, ICUError> Collator::TryCreate(const char* aLocale) { 22 UErrorCode status = U_ZERO_ERROR; 23 UCollator* collator = ucol_open(IcuLocale(aLocale), &status); 24 if (U_FAILURE(status)) { 25 return Err(ToICUError(status)); 26 } 27 return MakeUnique<Collator>(collator); 28 }; 29 30 int32_t Collator::CompareStrings(Span<const char16_t> aSource, 31 Span<const char16_t> aTarget) const { 32 switch (ucol_strcoll(mCollator.GetConst(), aSource.data(), 33 static_cast<int32_t>(aSource.size()), aTarget.data(), 34 static_cast<int32_t>(aTarget.size()))) { 35 case UCOL_LESS: 36 return -1; 37 case UCOL_EQUAL: 38 return 0; 39 case UCOL_GREATER: 40 return 1; 41 } 42 MOZ_ASSERT_UNREACHABLE("ucol_strcoll returned bad UCollationResult"); 43 return 0; 44 } 45 46 int32_t Collator::CompareSortKeys(Span<const uint8_t> aKey1, 47 Span<const uint8_t> aKey2) const { 48 size_t minLength = std::min(aKey1.Length(), aKey2.Length()); 49 int32_t tmpResult = strncmp((const char*)aKey1.Elements(), 50 (const char*)aKey2.Elements(), minLength); 51 if (tmpResult < 0) { 52 return -1; 53 } 54 if (tmpResult > 0) { 55 return 1; 56 } 57 if (aKey1.Length() > minLength) { 58 // First string contains second one, so comes later, hence return > 0. 59 return 1; 60 } 61 if (aKey2.Length() > minLength) { 62 // First string is a substring of second one, so comes earlier, 63 // hence return < 0. 64 return -1; 65 } 66 return 0; 67 } 68 69 static UColAttributeValue CaseFirstToICU(Collator::CaseFirst caseFirst) { 70 switch (caseFirst) { 71 case Collator::CaseFirst::False: 72 return UCOL_OFF; 73 case Collator::CaseFirst::Upper: 74 return UCOL_UPPER_FIRST; 75 case Collator::CaseFirst::Lower: 76 return UCOL_LOWER_FIRST; 77 } 78 79 MOZ_ASSERT_UNREACHABLE(); 80 return UCOL_DEFAULT; 81 } 82 83 void Collator::SetStrength(Collator::Strength aStrength) { 84 UColAttributeValue strength; 85 switch (aStrength) { 86 case Collator::Strength::Default: 87 strength = UCOL_DEFAULT_STRENGTH; 88 break; 89 case Collator::Strength::Primary: 90 strength = UCOL_PRIMARY; 91 break; 92 case Collator::Strength::Secondary: 93 strength = UCOL_SECONDARY; 94 break; 95 case Collator::Strength::Tertiary: 96 strength = UCOL_TERTIARY; 97 break; 98 case Collator::Strength::Quaternary: 99 strength = UCOL_QUATERNARY; 100 break; 101 case Collator::Strength::Identical: 102 strength = UCOL_IDENTICAL; 103 break; 104 } 105 106 ucol_setStrength(mCollator.GetMut(), strength); 107 } 108 109 ICUResult Collator::SetCaseLevel(Collator::Feature aFeature) { 110 UErrorCode status = U_ZERO_ERROR; 111 ucol_setAttribute(mCollator.GetMut(), UCOL_CASE_LEVEL, 112 ToUColAttributeValue(aFeature), &status); 113 return ToICUResult(status); 114 } 115 116 ICUResult Collator::SetAlternateHandling( 117 Collator::AlternateHandling aAlternateHandling) { 118 UErrorCode status = U_ZERO_ERROR; 119 UColAttributeValue handling; 120 switch (aAlternateHandling) { 121 case Collator::AlternateHandling::NonIgnorable: 122 handling = UCOL_NON_IGNORABLE; 123 break; 124 case Collator::AlternateHandling::Shifted: 125 handling = UCOL_SHIFTED; 126 break; 127 case Collator::AlternateHandling::Default: 128 handling = UCOL_DEFAULT; 129 break; 130 } 131 132 ucol_setAttribute(mCollator.GetMut(), UCOL_ALTERNATE_HANDLING, handling, 133 &status); 134 return ToICUResult(status); 135 } 136 137 ICUResult Collator::SetNumericCollation(Collator::Feature aFeature) { 138 UErrorCode status = U_ZERO_ERROR; 139 ucol_setAttribute(mCollator.GetMut(), UCOL_NUMERIC_COLLATION, 140 ToUColAttributeValue(aFeature), &status); 141 return ToICUResult(status); 142 } 143 144 ICUResult Collator::SetNormalizationMode(Collator::Feature aFeature) { 145 UErrorCode status = U_ZERO_ERROR; 146 ucol_setAttribute(mCollator.GetMut(), UCOL_NORMALIZATION_MODE, 147 ToUColAttributeValue(aFeature), &status); 148 return ToICUResult(status); 149 } 150 151 ICUResult Collator::SetCaseFirst(Collator::CaseFirst aCaseFirst) { 152 UErrorCode status = U_ZERO_ERROR; 153 ucol_setAttribute(mCollator.GetMut(), UCOL_CASE_FIRST, 154 CaseFirstToICU(aCaseFirst), &status); 155 return ToICUResult(status); 156 } 157 158 ICUResult Collator::SetOptions(const Options& aOptions, 159 const Maybe<Options&> aPrevOptions) { 160 if (aPrevOptions && 161 // Check the equality of the previous options. 162 aPrevOptions->sensitivity == aOptions.sensitivity && 163 aPrevOptions->caseFirst == aOptions.caseFirst && 164 aPrevOptions->ignorePunctuation == aOptions.ignorePunctuation && 165 aPrevOptions->numeric == aOptions.numeric) { 166 return Ok(); 167 } 168 169 Collator::Strength strength = Collator::Strength::Default; 170 Collator::Feature caseLevel = Collator::Feature::Off; 171 switch (aOptions.sensitivity) { 172 case Collator::Sensitivity::Base: 173 strength = Collator::Strength::Primary; 174 break; 175 case Collator::Sensitivity::Accent: 176 strength = Collator::Strength::Secondary; 177 break; 178 case Collator::Sensitivity::Case: 179 caseLevel = Collator::Feature::On; 180 strength = Collator::Strength::Primary; 181 break; 182 case Collator::Sensitivity::Variant: 183 strength = Collator::Strength::Tertiary; 184 break; 185 } 186 187 SetStrength(strength); 188 189 ICUResult result = Ok(); 190 191 // According to the ICU team, UCOL_SHIFTED causes punctuation to be 192 // ignored. Looking at Unicode Technical Report 35, Unicode Locale Data 193 // Markup Language, "shifted" causes whitespace and punctuation to be 194 // ignored - that's a bit more than asked for, but there's no way to get 195 // less. 196 result = this->SetAlternateHandling( 197 aOptions.ignorePunctuation ? Collator::AlternateHandling::Shifted 198 : Collator::AlternateHandling::NonIgnorable); 199 if (result.isErr()) { 200 return result; 201 } 202 203 result = SetCaseLevel(caseLevel); 204 if (result.isErr()) { 205 return result; 206 } 207 208 result = SetNumericCollation(aOptions.numeric ? Collator::Feature::On 209 : Collator::Feature::Off); 210 if (result.isErr()) { 211 return result; 212 } 213 214 // Normalization is always on to meet the canonical equivalence requirement. 215 result = SetNormalizationMode(Collator::Feature::On); 216 if (result.isErr()) { 217 return result; 218 } 219 220 result = SetCaseFirst(aOptions.caseFirst); 221 if (result.isErr()) { 222 return result; 223 } 224 return Ok(); 225 } 226 227 Result<Collator::CaseFirst, ICUError> Collator::GetCaseFirst() const { 228 UErrorCode status = U_ZERO_ERROR; 229 UColAttributeValue caseFirst = 230 ucol_getAttribute(mCollator.GetConst(), UCOL_CASE_FIRST, &status); 231 if (U_FAILURE(status)) { 232 return Err(ToICUError(status)); 233 } 234 235 if (caseFirst == UCOL_OFF) { 236 return CaseFirst::False; 237 } 238 if (caseFirst == UCOL_UPPER_FIRST) { 239 return CaseFirst::Upper; 240 } 241 MOZ_ASSERT(caseFirst == UCOL_LOWER_FIRST); 242 return CaseFirst::Lower; 243 } 244 245 Result<bool, ICUError> Collator::GetIgnorePunctuation() const { 246 UErrorCode status = U_ZERO_ERROR; 247 UColAttributeValue alternateHandling = 248 ucol_getAttribute(mCollator.GetConst(), UCOL_ALTERNATE_HANDLING, &status); 249 if (U_FAILURE(status)) { 250 return Err(ToICUError(status)); 251 } 252 253 MOZ_ASSERT(alternateHandling == UCOL_SHIFTED || 254 alternateHandling == UCOL_NON_IGNORABLE); 255 return alternateHandling == UCOL_SHIFTED; 256 } 257 258 /* static */ 259 Result<Collator::Bcp47ExtEnumeration, ICUError> 260 Collator::GetBcp47KeywordValuesForLocale(const char* aLocale, 261 CommonlyUsed aCommonlyUsed) { 262 UErrorCode status = U_ZERO_ERROR; 263 UEnumeration* enumeration = ucol_getKeywordValuesForLocale( 264 "collation", aLocale, static_cast<bool>(aCommonlyUsed), &status); 265 266 if (U_SUCCESS(status)) { 267 return Bcp47ExtEnumeration(enumeration); 268 } 269 270 return Err(ToICUError(status)); 271 } 272 273 /* static */ 274 Result<Collator::Bcp47ExtEnumeration, ICUError> 275 Collator::GetBcp47KeywordValues() { 276 UErrorCode status = U_ZERO_ERROR; 277 UEnumeration* enumeration = ucol_getKeywordValues("collation", &status); 278 279 if (U_SUCCESS(status)) { 280 return Bcp47ExtEnumeration(enumeration); 281 } 282 283 return Err(ToICUError(status)); 284 } 285 286 /* static */ 287 SpanResult<char> Collator::KeywordValueToBcp47Extension(const char* aKeyword, 288 int32_t aLength) { 289 if (aKeyword == nullptr) { 290 return Err(InternalError{}); 291 } 292 return MakeStringSpan(uloc_toUnicodeLocaleType("co", aKeyword)); 293 } 294 295 } // namespace mozilla::intl