tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

rulebasedcollator.cpp (61988B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 1996-2015, International Business Machines
      6 * Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 * rulebasedcollator.cpp
      9 *
     10 * (replaced the former tblcoll.cpp)
     11 *
     12 * created on: 2012feb14 with new and old collation code
     13 * created by: Markus W. Scherer
     14 */
     15 
     16 #include "unicode/utypes.h"
     17 
     18 #if !UCONFIG_NO_COLLATION
     19 
     20 #include "unicode/coll.h"
     21 #include "unicode/coleitr.h"
     22 #include "unicode/localpointer.h"
     23 #include "unicode/locid.h"
     24 #include "unicode/sortkey.h"
     25 #include "unicode/tblcoll.h"
     26 #include "unicode/ucol.h"
     27 #include "unicode/uiter.h"
     28 #include "unicode/uloc.h"
     29 #include "unicode/uniset.h"
     30 #include "unicode/unistr.h"
     31 #include "unicode/usetiter.h"
     32 #include "unicode/utf8.h"
     33 #include "unicode/uversion.h"
     34 #include "bocsu.h"
     35 #include "charstr.h"
     36 #include "cmemory.h"
     37 #include "collation.h"
     38 #include "collationcompare.h"
     39 #include "collationdata.h"
     40 #include "collationdatareader.h"
     41 #include "collationfastlatin.h"
     42 #include "collationiterator.h"
     43 #include "collationkeys.h"
     44 #include "collationroot.h"
     45 #include "collationsets.h"
     46 #include "collationsettings.h"
     47 #include "collationtailoring.h"
     48 #include "cstring.h"
     49 #include "uassert.h"
     50 #include "ucol_imp.h"
     51 #include "uhash.h"
     52 #include "uitercollationiterator.h"
     53 #include "ulocimp.h"
     54 #include "ustr_imp.h"
     55 #include "utf16collationiterator.h"
     56 #include "utf8collationiterator.h"
     57 #include "uvectr64.h"
     58 
     59 U_NAMESPACE_BEGIN
     60 
     61 namespace {
     62 
     63 class FixedSortKeyByteSink : public SortKeyByteSink {
     64 public:
     65    FixedSortKeyByteSink(char *dest, int32_t destCapacity)
     66            : SortKeyByteSink(dest, destCapacity) {}
     67    virtual ~FixedSortKeyByteSink();
     68 
     69 private:
     70    virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) override;
     71    virtual UBool Resize(int32_t appendCapacity, int32_t length) override;
     72 };
     73 
     74 FixedSortKeyByteSink::~FixedSortKeyByteSink() {}
     75 
     76 void
     77 FixedSortKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t /*n*/, int32_t length) {
     78    // buffer_ != nullptr && bytes != nullptr && n > 0 && appended_ > capacity_
     79    // Fill the buffer completely.
     80    int32_t available = capacity_ - length;
     81    if (available > 0) {
     82        uprv_memcpy(buffer_ + length, bytes, available);
     83    }
     84 }
     85 
     86 UBool
     87 FixedSortKeyByteSink::Resize(int32_t /*appendCapacity*/, int32_t /*length*/) {
     88    return false;
     89 }
     90 
     91 }  // namespace
     92 
     93 // Not in an anonymous namespace, so that it can be a friend of CollationKey.
     94 class CollationKeyByteSink : public SortKeyByteSink {
     95 public:
     96    CollationKeyByteSink(CollationKey &key)
     97            : SortKeyByteSink(reinterpret_cast<char *>(key.getBytes()), key.getCapacity()),
     98              key_(key) {}
     99    virtual ~CollationKeyByteSink();
    100 
    101 private:
    102    virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) override;
    103    virtual UBool Resize(int32_t appendCapacity, int32_t length) override;
    104 
    105    CollationKey &key_;
    106 };
    107 
    108 CollationKeyByteSink::~CollationKeyByteSink() {}
    109 
    110 void
    111 CollationKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) {
    112    // buffer_ != nullptr && bytes != nullptr && n > 0 && appended_ > capacity_
    113    if (Resize(n, length)) {
    114        uprv_memcpy(buffer_ + length, bytes, n);
    115    }
    116 }
    117 
    118 UBool
    119 CollationKeyByteSink::Resize(int32_t appendCapacity, int32_t length) {
    120    if (buffer_ == nullptr) {
    121        return false;  // allocation failed before already
    122    }
    123    int32_t newCapacity = 2 * capacity_;
    124    int32_t altCapacity = length + 2 * appendCapacity;
    125    if (newCapacity < altCapacity) {
    126        newCapacity = altCapacity;
    127    }
    128    if (newCapacity < 200) {
    129        newCapacity = 200;
    130    }
    131    uint8_t *newBuffer = key_.reallocate(newCapacity, length);
    132    if (newBuffer == nullptr) {
    133        SetNotOk();
    134        return false;
    135    }
    136    buffer_ = reinterpret_cast<char *>(newBuffer);
    137    capacity_ = newCapacity;
    138    return true;
    139 }
    140 
    141 RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator &other)
    142        : Collator(other),
    143          data(other.data),
    144          settings(other.settings),
    145          tailoring(other.tailoring),
    146          cacheEntry(other.cacheEntry),
    147          validLocale(other.validLocale),
    148          explicitlySetAttributes(other.explicitlySetAttributes),
    149          actualLocaleIsSameAsValid(other.actualLocaleIsSameAsValid) {
    150    settings->addRef();
    151    cacheEntry->addRef();
    152 }
    153 
    154 RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
    155                                     const RuleBasedCollator *base, UErrorCode &errorCode)
    156        : data(nullptr),
    157          settings(nullptr),
    158          tailoring(nullptr),
    159          cacheEntry(nullptr),
    160          validLocale(""),
    161          explicitlySetAttributes(0),
    162          actualLocaleIsSameAsValid(false) {
    163    if(U_FAILURE(errorCode)) { return; }
    164    if(bin == nullptr || length == 0 || base == nullptr) {
    165        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    166        return;
    167    }
    168    const CollationTailoring *root = CollationRoot::getRoot(errorCode);
    169    if(U_FAILURE(errorCode)) { return; }
    170    if(base->tailoring != root) {
    171        errorCode = U_UNSUPPORTED_ERROR;
    172        return;
    173    }
    174    LocalPointer<CollationTailoring> t(new CollationTailoring(base->tailoring->settings));
    175    if(t.isNull() || t->isBogus()) {
    176        errorCode = U_MEMORY_ALLOCATION_ERROR;
    177        return;
    178    }
    179    CollationDataReader::read(base->tailoring, bin, length, *t, errorCode);
    180    if(U_FAILURE(errorCode)) { return; }
    181    t->actualLocale.setToBogus();
    182    adoptTailoring(t.orphan(), errorCode);
    183 }
    184 
    185 RuleBasedCollator::RuleBasedCollator(const CollationCacheEntry *entry)
    186        : data(entry->tailoring->data),
    187          settings(entry->tailoring->settings),
    188          tailoring(entry->tailoring),
    189          cacheEntry(entry),
    190          validLocale(entry->validLocale),
    191          explicitlySetAttributes(0),
    192          actualLocaleIsSameAsValid(false) {
    193    settings->addRef();
    194    cacheEntry->addRef();
    195 }
    196 
    197 RuleBasedCollator::~RuleBasedCollator() {
    198    SharedObject::clearPtr(settings);
    199    SharedObject::clearPtr(cacheEntry);
    200 }
    201 
    202 void
    203 RuleBasedCollator::adoptTailoring(CollationTailoring *t, UErrorCode &errorCode) {
    204    if(U_FAILURE(errorCode)) {
    205        t->deleteIfZeroRefCount();
    206        return;
    207    }
    208    U_ASSERT(settings == nullptr && data == nullptr && tailoring == nullptr && cacheEntry == nullptr);
    209    cacheEntry = new CollationCacheEntry(t->actualLocale, t);
    210    if(cacheEntry == nullptr) {
    211        errorCode = U_MEMORY_ALLOCATION_ERROR;
    212        t->deleteIfZeroRefCount();
    213        return;
    214    }
    215    data = t->data;
    216    settings = t->settings;
    217    settings->addRef();
    218    tailoring = t;
    219    cacheEntry->addRef();
    220    validLocale = t->actualLocale;
    221    actualLocaleIsSameAsValid = false;
    222 }
    223 
    224 RuleBasedCollator *
    225 RuleBasedCollator::clone() const {
    226    return new RuleBasedCollator(*this);
    227 }
    228 
    229 RuleBasedCollator &RuleBasedCollator::operator=(const RuleBasedCollator &other) {
    230    if(this == &other) { return *this; }
    231    SharedObject::copyPtr(other.settings, settings);
    232    tailoring = other.tailoring;
    233    SharedObject::copyPtr(other.cacheEntry, cacheEntry);
    234    data = tailoring->data;
    235    validLocale = other.validLocale;
    236    explicitlySetAttributes = other.explicitlySetAttributes;
    237    actualLocaleIsSameAsValid = other.actualLocaleIsSameAsValid;
    238    return *this;
    239 }
    240 
    241 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
    242 
    243 bool
    244 RuleBasedCollator::operator==(const Collator& other) const {
    245    if(this == &other) { return true; }
    246    if(!Collator::operator==(other)) { return false; }
    247    const RuleBasedCollator &o = static_cast<const RuleBasedCollator &>(other);
    248    if(*settings != *o.settings) { return false; }
    249    if(data == o.data) { return true; }
    250    UBool thisIsRoot = data->base == nullptr;
    251    UBool otherIsRoot = o.data->base == nullptr;
    252    U_ASSERT(!thisIsRoot || !otherIsRoot);  // otherwise their data pointers should be ==
    253    if(thisIsRoot != otherIsRoot) { return false; }
    254    if((thisIsRoot || !tailoring->rules.isEmpty()) &&
    255            (otherIsRoot || !o.tailoring->rules.isEmpty())) {
    256        // Shortcut: If both collators have valid rule strings, then compare those.
    257        if(tailoring->rules == o.tailoring->rules) { return true; }
    258    }
    259    // Different rule strings can result in the same or equivalent tailoring.
    260    // The rule strings are optional in ICU resource bundles, although included by default.
    261    // cloneBinary() drops the rule string.
    262    UErrorCode errorCode = U_ZERO_ERROR;
    263    LocalPointer<UnicodeSet> thisTailored(getTailoredSet(errorCode));
    264    LocalPointer<UnicodeSet> otherTailored(o.getTailoredSet(errorCode));
    265    if(U_FAILURE(errorCode)) { return false; }
    266    if(*thisTailored != *otherTailored) { return false; }
    267    // For completeness, we should compare all of the mappings;
    268    // or we should create a list of strings, sort it with one collator,
    269    // and check if both collators compare adjacent strings the same
    270    // (order & strength, down to quaternary); or similar.
    271    // Testing equality of collators seems unusual.
    272    return true;
    273 }
    274 
    275 int32_t
    276 RuleBasedCollator::hashCode() const {
    277    int32_t h = settings->hashCode();
    278    if(data->base == nullptr) { return h; }  // root collator
    279    // Do not rely on the rule string, see comments in operator==().
    280    UErrorCode errorCode = U_ZERO_ERROR;
    281    LocalPointer<UnicodeSet> set(getTailoredSet(errorCode));
    282    if(U_FAILURE(errorCode)) { return 0; }
    283    UnicodeSetIterator iter(*set);
    284    while(iter.next() && !iter.isString()) {
    285        h ^= data->getCE32(iter.getCodepoint());
    286    }
    287    return h;
    288 }
    289 
    290 void
    291 RuleBasedCollator::setLocales(const Locale &requested, const Locale &valid,
    292                              const Locale &actual) {
    293    if(actual == tailoring->actualLocale) {
    294        actualLocaleIsSameAsValid = false;
    295    } else {
    296        U_ASSERT(actual == valid);
    297        actualLocaleIsSameAsValid = true;
    298    }
    299    // Do not modify tailoring.actualLocale:
    300    // We cannot be sure that that would be thread-safe.
    301    validLocale = valid;
    302    (void)requested;  // Ignore, see also ticket #10477.
    303 }
    304 
    305 Locale
    306 RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode& errorCode) const {
    307    if(U_FAILURE(errorCode)) {
    308        return Locale::getRoot();
    309    }
    310    switch(type) {
    311    case ULOC_ACTUAL_LOCALE:
    312        return actualLocaleIsSameAsValid ? validLocale : tailoring->actualLocale;
    313    case ULOC_VALID_LOCALE:
    314        return validLocale;
    315    case ULOC_REQUESTED_LOCALE:
    316    default:
    317        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    318        return Locale::getRoot();
    319    }
    320 }
    321 
    322 const char *
    323 RuleBasedCollator::internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const {
    324    if(U_FAILURE(errorCode)) {
    325        return nullptr;
    326    }
    327    const Locale *result;
    328    switch(type) {
    329    case ULOC_ACTUAL_LOCALE:
    330        result = actualLocaleIsSameAsValid ? &validLocale : &tailoring->actualLocale;
    331        break;
    332    case ULOC_VALID_LOCALE:
    333        result = &validLocale;
    334        break;
    335    case ULOC_REQUESTED_LOCALE:
    336    default:
    337        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    338        return nullptr;
    339    }
    340    if(result->isBogus()) { return nullptr; }
    341    const char *id = result->getName();
    342    return id[0] == 0 ? "root" : id;
    343 }
    344 
    345 const UnicodeString&
    346 RuleBasedCollator::getRules() const {
    347    return tailoring->rules;
    348 }
    349 
    350 void
    351 RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) const {
    352    if(delta == UCOL_TAILORING_ONLY) {
    353        buffer = tailoring->rules;
    354        return;
    355    }
    356    // UCOL_FULL_RULES
    357    buffer.remove();
    358    CollationLoader::appendRootRules(buffer);
    359    buffer.append(tailoring->rules).getTerminatedBuffer();
    360 }
    361 
    362 void
    363 RuleBasedCollator::getVersion(UVersionInfo version) const {
    364    uprv_memcpy(version, tailoring->version, U_MAX_VERSION_LENGTH);
    365    version[0] += (UCOL_RUNTIME_VERSION << 4) + (UCOL_RUNTIME_VERSION >> 4);
    366 }
    367 
    368 UnicodeSet *
    369 RuleBasedCollator::getTailoredSet(UErrorCode &errorCode) const {
    370    if(U_FAILURE(errorCode)) { return nullptr; }
    371    UnicodeSet *tailored = new UnicodeSet();
    372    if(tailored == nullptr) {
    373        errorCode = U_MEMORY_ALLOCATION_ERROR;
    374        return nullptr;
    375    }
    376    if(data->base != nullptr) {
    377        TailoredSet(tailored).forData(data, errorCode);
    378        if(U_FAILURE(errorCode)) {
    379            delete tailored;
    380            return nullptr;
    381        }
    382    }
    383    return tailored;
    384 }
    385 
    386 void
    387 RuleBasedCollator::internalGetContractionsAndExpansions(
    388        UnicodeSet *contractions, UnicodeSet *expansions,
    389        UBool addPrefixes, UErrorCode &errorCode) const {
    390    if(U_FAILURE(errorCode)) { return; }
    391    if(contractions != nullptr) {
    392        contractions->clear();
    393    }
    394    if(expansions != nullptr) {
    395        expansions->clear();
    396    }
    397    ContractionsAndExpansions(contractions, expansions, nullptr, addPrefixes).forData(data, errorCode);
    398 }
    399 
    400 void
    401 RuleBasedCollator::internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const {
    402    if(U_FAILURE(errorCode)) { return; }
    403    ContractionsAndExpansions(&set, nullptr, nullptr, false).forCodePoint(data, c, errorCode);
    404 }
    405 
    406 const CollationSettings &
    407 RuleBasedCollator::getDefaultSettings() const {
    408    return *tailoring->settings;
    409 }
    410 
    411 UColAttributeValue
    412 RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCode &errorCode) const {
    413    if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; }
    414    int32_t option;
    415    switch(attr) {
    416    case UCOL_FRENCH_COLLATION:
    417        option = CollationSettings::BACKWARD_SECONDARY;
    418        break;
    419    case UCOL_ALTERNATE_HANDLING:
    420        return settings->getAlternateHandling();
    421    case UCOL_CASE_FIRST:
    422        return settings->getCaseFirst();
    423    case UCOL_CASE_LEVEL:
    424        option = CollationSettings::CASE_LEVEL;
    425        break;
    426    case UCOL_NORMALIZATION_MODE:
    427        option = CollationSettings::CHECK_FCD;
    428        break;
    429    case UCOL_STRENGTH:
    430        return static_cast<UColAttributeValue>(settings->getStrength());
    431    case UCOL_HIRAGANA_QUATERNARY_MODE:
    432        // Deprecated attribute, unsettable.
    433        return UCOL_OFF;
    434    case UCOL_NUMERIC_COLLATION:
    435        option = CollationSettings::NUMERIC;
    436        break;
    437    default:
    438        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    439        return UCOL_DEFAULT;
    440    }
    441    return ((settings->options & option) == 0) ? UCOL_OFF : UCOL_ON;
    442 }
    443 
    444 void
    445 RuleBasedCollator::setAttribute(UColAttribute attr, UColAttributeValue value,
    446                                UErrorCode &errorCode) {
    447    UColAttributeValue oldValue = getAttribute(attr, errorCode);
    448    if(U_FAILURE(errorCode)) { return; }
    449    if(value == oldValue) {
    450        setAttributeExplicitly(attr);
    451        return;
    452    }
    453    const CollationSettings &defaultSettings = getDefaultSettings();
    454    if(settings == &defaultSettings) {
    455        if(value == UCOL_DEFAULT) {
    456            setAttributeDefault(attr);
    457            return;
    458        }
    459    }
    460    CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
    461    if(ownedSettings == nullptr) {
    462        errorCode = U_MEMORY_ALLOCATION_ERROR;
    463        return;
    464    }
    465 
    466    switch(attr) {
    467    case UCOL_FRENCH_COLLATION:
    468        ownedSettings->setFlag(CollationSettings::BACKWARD_SECONDARY, value,
    469                               defaultSettings.options, errorCode);
    470        break;
    471    case UCOL_ALTERNATE_HANDLING:
    472        ownedSettings->setAlternateHandling(value, defaultSettings.options, errorCode);
    473        break;
    474    case UCOL_CASE_FIRST:
    475        ownedSettings->setCaseFirst(value, defaultSettings.options, errorCode);
    476        break;
    477    case UCOL_CASE_LEVEL:
    478        ownedSettings->setFlag(CollationSettings::CASE_LEVEL, value,
    479                               defaultSettings.options, errorCode);
    480        break;
    481    case UCOL_NORMALIZATION_MODE:
    482        ownedSettings->setFlag(CollationSettings::CHECK_FCD, value,
    483                               defaultSettings.options, errorCode);
    484        break;
    485    case UCOL_STRENGTH:
    486        ownedSettings->setStrength(value, defaultSettings.options, errorCode);
    487        break;
    488    case UCOL_HIRAGANA_QUATERNARY_MODE:
    489        // Deprecated attribute. Check for valid values but do not change anything.
    490        if(value != UCOL_OFF && value != UCOL_ON && value != UCOL_DEFAULT) {
    491            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    492        }
    493        break;
    494    case UCOL_NUMERIC_COLLATION:
    495        ownedSettings->setFlag(CollationSettings::NUMERIC, value, defaultSettings.options, errorCode);
    496        break;
    497    default:
    498        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    499        break;
    500    }
    501    if(U_FAILURE(errorCode)) { return; }
    502    setFastLatinOptions(*ownedSettings);
    503    if(value == UCOL_DEFAULT) {
    504        setAttributeDefault(attr);
    505    } else {
    506        setAttributeExplicitly(attr);
    507    }
    508 }
    509 
    510 Collator &
    511 RuleBasedCollator::setMaxVariable(UColReorderCode group, UErrorCode &errorCode) {
    512    if(U_FAILURE(errorCode)) { return *this; }
    513    // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1.
    514    int32_t value;
    515    if(group == UCOL_REORDER_CODE_DEFAULT) {
    516        value = UCOL_DEFAULT;
    517    } else if(UCOL_REORDER_CODE_FIRST <= group && group <= UCOL_REORDER_CODE_CURRENCY) {
    518        value = group - UCOL_REORDER_CODE_FIRST;
    519    } else {
    520        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    521        return *this;
    522    }
    523    CollationSettings::MaxVariable oldValue = settings->getMaxVariable();
    524    if(value == oldValue) {
    525        setAttributeExplicitly(ATTR_VARIABLE_TOP);
    526        return *this;
    527    }
    528    const CollationSettings &defaultSettings = getDefaultSettings();
    529    if(settings == &defaultSettings) {
    530        if(value == UCOL_DEFAULT) {
    531            setAttributeDefault(ATTR_VARIABLE_TOP);
    532            return *this;
    533        }
    534    }
    535    CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
    536    if(ownedSettings == nullptr) {
    537        errorCode = U_MEMORY_ALLOCATION_ERROR;
    538        return *this;
    539    }
    540 
    541    if(group == UCOL_REORDER_CODE_DEFAULT) {
    542        group = static_cast<UColReorderCode>(
    543            UCOL_REORDER_CODE_FIRST + int32_t{defaultSettings.getMaxVariable()});
    544    }
    545    uint32_t varTop = data->getLastPrimaryForGroup(group);
    546    U_ASSERT(varTop != 0);
    547    ownedSettings->setMaxVariable(value, defaultSettings.options, errorCode);
    548    if(U_FAILURE(errorCode)) { return *this; }
    549    ownedSettings->variableTop = varTop;
    550    setFastLatinOptions(*ownedSettings);
    551    if(value == UCOL_DEFAULT) {
    552        setAttributeDefault(ATTR_VARIABLE_TOP);
    553    } else {
    554        setAttributeExplicitly(ATTR_VARIABLE_TOP);
    555    }
    556    return *this;
    557 }
    558 
    559 UColReorderCode
    560 RuleBasedCollator::getMaxVariable() const {
    561    return static_cast<UColReorderCode>(UCOL_REORDER_CODE_FIRST + int32_t{settings->getMaxVariable()});
    562 }
    563 
    564 uint32_t
    565 RuleBasedCollator::getVariableTop(UErrorCode & /*errorCode*/) const {
    566    return settings->variableTop;
    567 }
    568 
    569 uint32_t
    570 RuleBasedCollator::setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &errorCode) {
    571    if(U_FAILURE(errorCode)) { return 0; }
    572    if(varTop == nullptr && len !=0) {
    573        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    574        return 0;
    575    }
    576    if(len < 0) { len = u_strlen(varTop); }
    577    if(len == 0) {
    578        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    579        return 0;
    580    }
    581    UBool numeric = settings->isNumeric();
    582    int64_t ce1, ce2;
    583    if(settings->dontCheckFCD()) {
    584        UTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
    585        ce1 = ci.nextCE(errorCode);
    586        ce2 = ci.nextCE(errorCode);
    587    } else {
    588        FCDUTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
    589        ce1 = ci.nextCE(errorCode);
    590        ce2 = ci.nextCE(errorCode);
    591    }
    592    if(ce1 == Collation::NO_CE || ce2 != Collation::NO_CE) {
    593        errorCode = U_CE_NOT_FOUND_ERROR;
    594        return 0;
    595    }
    596    setVariableTop(static_cast<uint32_t>(ce1 >> 32), errorCode);
    597    return settings->variableTop;
    598 }
    599 
    600 uint32_t
    601 RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &errorCode) {
    602    return setVariableTop(varTop.getBuffer(), varTop.length(), errorCode);
    603 }
    604 
    605 void
    606 RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &errorCode) {
    607    if(U_FAILURE(errorCode)) { return; }
    608    if(varTop != settings->variableTop) {
    609        // Pin the variable top to the end of the reordering group which contains it.
    610        // Only a few special groups are supported.
    611        int32_t group = data->getGroupForPrimary(varTop);
    612        if(group < UCOL_REORDER_CODE_FIRST || UCOL_REORDER_CODE_CURRENCY < group) {
    613            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    614            return;
    615        }
    616        uint32_t v = data->getLastPrimaryForGroup(group);
    617        U_ASSERT(v != 0 && v >= varTop);
    618        varTop = v;
    619        if(varTop != settings->variableTop) {
    620            CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
    621            if(ownedSettings == nullptr) {
    622                errorCode = U_MEMORY_ALLOCATION_ERROR;
    623                return;
    624            }
    625            ownedSettings->setMaxVariable(group - UCOL_REORDER_CODE_FIRST,
    626                                          getDefaultSettings().options, errorCode);
    627            if(U_FAILURE(errorCode)) { return; }
    628            ownedSettings->variableTop = varTop;
    629            setFastLatinOptions(*ownedSettings);
    630        }
    631    }
    632    if(varTop == getDefaultSettings().variableTop) {
    633        setAttributeDefault(ATTR_VARIABLE_TOP);
    634    } else {
    635        setAttributeExplicitly(ATTR_VARIABLE_TOP);
    636    }
    637 }
    638 
    639 int32_t
    640 RuleBasedCollator::getReorderCodes(int32_t *dest, int32_t capacity,
    641                                   UErrorCode &errorCode) const {
    642    if(U_FAILURE(errorCode)) { return 0; }
    643    if(capacity < 0 || (dest == nullptr && capacity > 0)) {
    644        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    645        return 0;
    646    }
    647    int32_t length = settings->reorderCodesLength;
    648    if(length == 0) { return 0; }
    649    if(length > capacity) {
    650        errorCode = U_BUFFER_OVERFLOW_ERROR;
    651        return length;
    652    }
    653    uprv_memcpy(dest, settings->reorderCodes, length * 4);
    654    return length;
    655 }
    656 
    657 void
    658 RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, int32_t length,
    659                                   UErrorCode &errorCode) {
    660    if(U_FAILURE(errorCode)) { return; }
    661    if(length < 0 || (reorderCodes == nullptr && length > 0)) {
    662        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    663        return;
    664    }
    665    if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_NONE) {
    666        length = 0;
    667    }
    668    if(length == settings->reorderCodesLength &&
    669            uprv_memcmp(reorderCodes, settings->reorderCodes, length * 4) == 0) {
    670        return;
    671    }
    672    const CollationSettings &defaultSettings = getDefaultSettings();
    673    if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_DEFAULT) {
    674        if(settings != &defaultSettings) {
    675            CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
    676            if(ownedSettings == nullptr) {
    677                errorCode = U_MEMORY_ALLOCATION_ERROR;
    678                return;
    679            }
    680            ownedSettings->copyReorderingFrom(defaultSettings, errorCode);
    681            setFastLatinOptions(*ownedSettings);
    682        }
    683        return;
    684    }
    685    CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
    686    if(ownedSettings == nullptr) {
    687        errorCode = U_MEMORY_ALLOCATION_ERROR;
    688        return;
    689    }
    690    ownedSettings->setReordering(*data, reorderCodes, length, errorCode);
    691    setFastLatinOptions(*ownedSettings);
    692 }
    693 
    694 void
    695 RuleBasedCollator::setFastLatinOptions(CollationSettings &ownedSettings) const {
    696    ownedSettings.fastLatinOptions = CollationFastLatin::getOptions(
    697            data, ownedSettings,
    698            ownedSettings.fastLatinPrimaries, UPRV_LENGTHOF(ownedSettings.fastLatinPrimaries));
    699 }
    700 
    701 UCollationResult
    702 RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
    703                           UErrorCode &errorCode) const {
    704    if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
    705    return doCompare(left.getBuffer(), left.length(),
    706                     right.getBuffer(), right.length(), errorCode);
    707 }
    708 
    709 UCollationResult
    710 RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
    711                           int32_t length, UErrorCode &errorCode) const {
    712    if(U_FAILURE(errorCode) || length == 0) { return UCOL_EQUAL; }
    713    if(length < 0) {
    714        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    715        return UCOL_EQUAL;
    716    }
    717    int32_t leftLength = left.length();
    718    int32_t rightLength = right.length();
    719    if(leftLength > length) { leftLength = length; }
    720    if(rightLength > length) { rightLength = length; }
    721    return doCompare(left.getBuffer(), leftLength,
    722                     right.getBuffer(), rightLength, errorCode);
    723 }
    724 
    725 UCollationResult
    726 RuleBasedCollator::compare(const char16_t *left, int32_t leftLength,
    727                           const char16_t *right, int32_t rightLength,
    728                           UErrorCode &errorCode) const {
    729    if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
    730    if((left == nullptr && leftLength != 0) || (right == nullptr && rightLength != 0)) {
    731        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    732        return UCOL_EQUAL;
    733    }
    734    // Make sure both or neither strings have a known length.
    735    // We do not optimize for mixed length/termination.
    736    if(leftLength >= 0) {
    737        if(rightLength < 0) { rightLength = u_strlen(right); }
    738    } else {
    739        if(rightLength >= 0) { leftLength = u_strlen(left); }
    740    }
    741    return doCompare(left, leftLength, right, rightLength, errorCode);
    742 }
    743 
    744 UCollationResult
    745 RuleBasedCollator::compareUTF8(const StringPiece &left, const StringPiece &right,
    746                               UErrorCode &errorCode) const {
    747    if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
    748    const uint8_t *leftBytes = reinterpret_cast<const uint8_t *>(left.data());
    749    const uint8_t *rightBytes = reinterpret_cast<const uint8_t *>(right.data());
    750    if((leftBytes == nullptr && !left.empty()) || (rightBytes == nullptr && !right.empty())) {
    751        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    752        return UCOL_EQUAL;
    753    }
    754    return doCompare(leftBytes, left.length(), rightBytes, right.length(), errorCode);
    755 }
    756 
    757 UCollationResult
    758 RuleBasedCollator::internalCompareUTF8(const char *left, int32_t leftLength,
    759                                       const char *right, int32_t rightLength,
    760                                       UErrorCode &errorCode) const {
    761    if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
    762    if((left == nullptr && leftLength != 0) || (right == nullptr && rightLength != 0)) {
    763        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    764        return UCOL_EQUAL;
    765    }
    766    // Make sure both or neither strings have a known length.
    767    // We do not optimize for mixed length/termination.
    768    if(leftLength >= 0) {
    769        if(rightLength < 0) { rightLength = static_cast<int32_t>(uprv_strlen(right)); }
    770    } else {
    771        if(rightLength >= 0) { leftLength = static_cast<int32_t>(uprv_strlen(left)); }
    772    }
    773    return doCompare(reinterpret_cast<const uint8_t *>(left), leftLength,
    774                     reinterpret_cast<const uint8_t *>(right), rightLength, errorCode);
    775 }
    776 
    777 namespace {
    778 
    779 /**
    780 * Abstract iterator for identical-level string comparisons.
    781 * Returns FCD code points and handles temporary switching to NFD.
    782 */
    783 class NFDIterator : public UObject {
    784 public:
    785    NFDIterator() : index(-1), length(0) {}
    786    virtual ~NFDIterator() {}
    787    /**
    788     * Returns the next code point from the internal normalization buffer,
    789     * or else the next text code point.
    790     * Returns -1 at the end of the text.
    791     */
    792    UChar32 nextCodePoint() {
    793        if(index >= 0) {
    794            if(index == length) {
    795                index = -1;
    796            } else {
    797                UChar32 c;
    798                U16_NEXT_UNSAFE(decomp, index, c);
    799                return c;
    800            }
    801        }
    802        return nextRawCodePoint();
    803    }
    804    /**
    805     * @param nfcImpl
    806     * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint()
    807     * @return the first code point in c's decomposition,
    808     *         or c itself if it was decomposed already or if it does not decompose
    809     */
    810    UChar32 nextDecomposedCodePoint(const Normalizer2Impl &nfcImpl, UChar32 c) {
    811        if(index >= 0) { return c; }
    812        decomp = nfcImpl.getDecomposition(c, buffer, length);
    813        if(decomp == nullptr) { return c; }
    814        index = 0;
    815        U16_NEXT_UNSAFE(decomp, index, c);
    816        return c;
    817    }
    818 protected:
    819    /**
    820     * Returns the next text code point in FCD order.
    821     * Returns -1 at the end of the text.
    822     */
    823    virtual UChar32 nextRawCodePoint() = 0;
    824 private:
    825    const char16_t *decomp;
    826    char16_t buffer[4];
    827    int32_t index;
    828    int32_t length;
    829 };
    830 
    831 class UTF16NFDIterator : public NFDIterator {
    832 public:
    833    UTF16NFDIterator(const char16_t *text, const char16_t *textLimit) : s(text), limit(textLimit) {}
    834 protected:
    835    virtual UChar32 nextRawCodePoint() override {
    836        if(s == limit) { return U_SENTINEL; }
    837        UChar32 c = *s++;
    838        if(limit == nullptr && c == 0) {
    839            s = nullptr;
    840            return U_SENTINEL;
    841        }
    842        char16_t trail;
    843        if(U16_IS_LEAD(c) && s != limit && U16_IS_TRAIL(trail = *s)) {
    844            ++s;
    845            c = U16_GET_SUPPLEMENTARY(c, trail);
    846        }
    847        return c;
    848    }
    849 
    850    const char16_t *s;
    851    const char16_t *limit;
    852 };
    853 
    854 class FCDUTF16NFDIterator : public UTF16NFDIterator {
    855 public:
    856    FCDUTF16NFDIterator(const Normalizer2Impl &nfcImpl, const char16_t *text, const char16_t *textLimit)
    857            : UTF16NFDIterator(nullptr, nullptr) {
    858        UErrorCode errorCode = U_ZERO_ERROR;
    859        const char16_t *spanLimit = nfcImpl.makeFCD(text, textLimit, nullptr, errorCode);
    860        if(U_FAILURE(errorCode)) { return; }
    861        if(spanLimit == textLimit || (textLimit == nullptr && *spanLimit == 0)) {
    862            s = text;
    863            limit = spanLimit;
    864        } else {
    865            str.setTo(text, static_cast<int32_t>(spanLimit - text));
    866            {
    867                ReorderingBuffer r_buffer(nfcImpl, str);
    868                if(r_buffer.init(str.length(), errorCode)) {
    869                    nfcImpl.makeFCD(spanLimit, textLimit, &r_buffer, errorCode);
    870                }
    871            }
    872            if(U_SUCCESS(errorCode)) {
    873                s = str.getBuffer();
    874                limit = s + str.length();
    875            }
    876        }
    877    }
    878 private:
    879    UnicodeString str;
    880 };
    881 
    882 class UTF8NFDIterator : public NFDIterator {
    883 public:
    884    UTF8NFDIterator(const uint8_t *text, int32_t textLength)
    885        : s(text), pos(0), length(textLength) {}
    886 protected:
    887    virtual UChar32 nextRawCodePoint() override {
    888        if(pos == length || (s[pos] == 0 && length < 0)) { return U_SENTINEL; }
    889        UChar32 c;
    890        U8_NEXT_OR_FFFD(s, pos, length, c);
    891        return c;
    892    }
    893 
    894    const uint8_t *s;
    895    int32_t pos;
    896    int32_t length;
    897 };
    898 
    899 class FCDUTF8NFDIterator : public NFDIterator {
    900 public:
    901    FCDUTF8NFDIterator(const CollationData *data, const uint8_t *text, int32_t textLength)
    902            : u8ci(data, false, text, 0, textLength) {}
    903 protected:
    904    virtual UChar32 nextRawCodePoint() override {
    905        UErrorCode errorCode = U_ZERO_ERROR;
    906        return u8ci.nextCodePoint(errorCode);
    907    }
    908 private:
    909    FCDUTF8CollationIterator u8ci;
    910 };
    911 
    912 class UIterNFDIterator : public NFDIterator {
    913 public:
    914    UIterNFDIterator(UCharIterator &it) : iter(it) {}
    915 protected:
    916    virtual UChar32 nextRawCodePoint() override {
    917        return uiter_next32(&iter);
    918    }
    919 private:
    920    UCharIterator &iter;
    921 };
    922 
    923 class FCDUIterNFDIterator : public NFDIterator {
    924 public:
    925    FCDUIterNFDIterator(const CollationData *data, UCharIterator &it, int32_t startIndex)
    926            : uici(data, false, it, startIndex) {}
    927 protected:
    928    virtual UChar32 nextRawCodePoint() override {
    929        UErrorCode errorCode = U_ZERO_ERROR;
    930        return uici.nextCodePoint(errorCode);
    931    }
    932 private:
    933    FCDUIterCollationIterator uici;
    934 };
    935 
    936 UCollationResult compareNFDIter(const Normalizer2Impl &nfcImpl,
    937                                NFDIterator &left, NFDIterator &right) {
    938    for(;;) {
    939        // Fetch the next FCD code point from each string.
    940        UChar32 leftCp = left.nextCodePoint();
    941        UChar32 rightCp = right.nextCodePoint();
    942        if(leftCp == rightCp) {
    943            if(leftCp < 0) { break; }
    944            continue;
    945        }
    946        // If they are different, then decompose each and compare again.
    947        if(leftCp < 0) {
    948            leftCp = -2;  // end of string
    949        } else if(leftCp == 0xfffe) {
    950            leftCp = -1;  // U+FFFE: merge separator
    951        } else {
    952            leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp);
    953        }
    954        if(rightCp < 0) {
    955            rightCp = -2;  // end of string
    956        } else if(rightCp == 0xfffe) {
    957            rightCp = -1;  // U+FFFE: merge separator
    958        } else {
    959            rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp);
    960        }
    961        if(leftCp < rightCp) { return UCOL_LESS; }
    962        if(leftCp > rightCp) { return UCOL_GREATER; }
    963    }
    964    return UCOL_EQUAL;
    965 }
    966 
    967 }  // namespace
    968 
    969 UCollationResult
    970 RuleBasedCollator::doCompare(const char16_t *left, int32_t leftLength,
    971                             const char16_t *right, int32_t rightLength,
    972                             UErrorCode &errorCode) const {
    973    // U_FAILURE(errorCode) checked by caller.
    974    if(left == right && leftLength == rightLength) {
    975        return UCOL_EQUAL;
    976    }
    977 
    978    // Identical-prefix test.
    979    const char16_t *leftLimit;
    980    const char16_t *rightLimit;
    981    int32_t equalPrefixLength = 0;
    982    if(leftLength < 0) {
    983        leftLimit = nullptr;
    984        rightLimit = nullptr;
    985        char16_t c;
    986        while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
    987            if(c == 0) { return UCOL_EQUAL; }
    988            ++equalPrefixLength;
    989        }
    990    } else {
    991        leftLimit = left + leftLength;
    992        rightLimit = right + rightLength;
    993        for(;;) {
    994            if(equalPrefixLength == leftLength) {
    995                if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
    996                break;
    997            } else if(equalPrefixLength == rightLength ||
    998                      left[equalPrefixLength] != right[equalPrefixLength]) {
    999                break;
   1000            }
   1001            ++equalPrefixLength;
   1002        }
   1003    }
   1004 
   1005    UBool numeric = settings->isNumeric();
   1006    if(equalPrefixLength > 0) {
   1007        if((equalPrefixLength != leftLength &&
   1008                    data->isUnsafeBackward(left[equalPrefixLength], numeric)) ||
   1009                (equalPrefixLength != rightLength &&
   1010                    data->isUnsafeBackward(right[equalPrefixLength], numeric))) {
   1011            // Identical prefix: Back up to the start of a contraction or reordering sequence.
   1012            while(--equalPrefixLength > 0 &&
   1013                    data->isUnsafeBackward(left[equalPrefixLength], numeric)) {}
   1014        }
   1015        // Notes:
   1016        // - A longer string can compare equal to a prefix of it if only ignorables follow.
   1017        // - With a backward level, a longer string can compare less-than a prefix of it.
   1018 
   1019        // Pass the actual start of each string into the CollationIterators,
   1020        // plus the equalPrefixLength position,
   1021        // so that prefix matches back into the equal prefix work.
   1022    }
   1023 
   1024    int32_t result;
   1025    int32_t fastLatinOptions = settings->fastLatinOptions;
   1026    if(fastLatinOptions >= 0 &&
   1027            (equalPrefixLength == leftLength ||
   1028                left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX) &&
   1029            (equalPrefixLength == rightLength ||
   1030                right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX)) {
   1031        if(leftLength >= 0) {
   1032            result = CollationFastLatin::compareUTF16(data->fastLatinTable,
   1033                                                      settings->fastLatinPrimaries,
   1034                                                      fastLatinOptions,
   1035                                                      left + equalPrefixLength,
   1036                                                      leftLength - equalPrefixLength,
   1037                                                      right + equalPrefixLength,
   1038                                                      rightLength - equalPrefixLength);
   1039        } else {
   1040            result = CollationFastLatin::compareUTF16(data->fastLatinTable,
   1041                                                      settings->fastLatinPrimaries,
   1042                                                      fastLatinOptions,
   1043                                                      left + equalPrefixLength, -1,
   1044                                                      right + equalPrefixLength, -1);
   1045        }
   1046    } else {
   1047        result = CollationFastLatin::BAIL_OUT_RESULT;
   1048    }
   1049 
   1050    if(result == CollationFastLatin::BAIL_OUT_RESULT) {
   1051        if(settings->dontCheckFCD()) {
   1052            UTF16CollationIterator leftIter(data, numeric,
   1053                                            left, left + equalPrefixLength, leftLimit);
   1054            UTF16CollationIterator rightIter(data, numeric,
   1055                                            right, right + equalPrefixLength, rightLimit);
   1056            result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1057        } else {
   1058            FCDUTF16CollationIterator leftIter(data, numeric,
   1059                                              left, left + equalPrefixLength, leftLimit);
   1060            FCDUTF16CollationIterator rightIter(data, numeric,
   1061                                                right, right + equalPrefixLength, rightLimit);
   1062            result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1063        }
   1064    }
   1065    if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
   1066        return static_cast<UCollationResult>(result);
   1067    }
   1068 
   1069    // Note: If NUL-terminated, we could get the actual limits from the iterators now.
   1070    // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
   1071    // and the benefit seems unlikely to be measurable.
   1072 
   1073    // Compare identical level.
   1074    const Normalizer2Impl &nfcImpl = data->nfcImpl;
   1075    left += equalPrefixLength;
   1076    right += equalPrefixLength;
   1077    if(settings->dontCheckFCD()) {
   1078        UTF16NFDIterator leftIter(left, leftLimit);
   1079        UTF16NFDIterator rightIter(right, rightLimit);
   1080        return compareNFDIter(nfcImpl, leftIter, rightIter);
   1081    } else {
   1082        FCDUTF16NFDIterator leftIter(nfcImpl, left, leftLimit);
   1083        FCDUTF16NFDIterator rightIter(nfcImpl, right, rightLimit);
   1084        return compareNFDIter(nfcImpl, leftIter, rightIter);
   1085    }
   1086 }
   1087 
   1088 UCollationResult
   1089 RuleBasedCollator::doCompare(const uint8_t *left, int32_t leftLength,
   1090                             const uint8_t *right, int32_t rightLength,
   1091                             UErrorCode &errorCode) const {
   1092    // U_FAILURE(errorCode) checked by caller.
   1093    if(left == right && leftLength == rightLength) {
   1094        return UCOL_EQUAL;
   1095    }
   1096 
   1097    // Identical-prefix test.
   1098    int32_t equalPrefixLength = 0;
   1099    if(leftLength < 0) {
   1100        uint8_t c;
   1101        while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
   1102            if(c == 0) { return UCOL_EQUAL; }
   1103            ++equalPrefixLength;
   1104        }
   1105    } else {
   1106        for(;;) {
   1107            if(equalPrefixLength == leftLength) {
   1108                if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
   1109                break;
   1110            } else if(equalPrefixLength == rightLength ||
   1111                      left[equalPrefixLength] != right[equalPrefixLength]) {
   1112                break;
   1113            }
   1114            ++equalPrefixLength;
   1115        }
   1116    }
   1117    // Back up to the start of a partially-equal code point.
   1118    if(equalPrefixLength > 0 &&
   1119            ((equalPrefixLength != leftLength && U8_IS_TRAIL(left[equalPrefixLength])) ||
   1120            (equalPrefixLength != rightLength && U8_IS_TRAIL(right[equalPrefixLength])))) {
   1121        while(--equalPrefixLength > 0 && U8_IS_TRAIL(left[equalPrefixLength])) {}
   1122    }
   1123 
   1124    UBool numeric = settings->isNumeric();
   1125    if(equalPrefixLength > 0) {
   1126        UBool unsafe = false;
   1127        if(equalPrefixLength != leftLength) {
   1128            int32_t i = equalPrefixLength;
   1129            UChar32 c;
   1130            U8_NEXT_OR_FFFD(left, i, leftLength, c);
   1131            unsafe = data->isUnsafeBackward(c, numeric);
   1132        }
   1133        if(!unsafe && equalPrefixLength != rightLength) {
   1134            int32_t i = equalPrefixLength;
   1135            UChar32 c;
   1136            U8_NEXT_OR_FFFD(right, i, rightLength, c);
   1137            unsafe = data->isUnsafeBackward(c, numeric);
   1138        }
   1139        if(unsafe) {
   1140            // Identical prefix: Back up to the start of a contraction or reordering sequence.
   1141            UChar32 c;
   1142            do {
   1143                U8_PREV_OR_FFFD(left, 0, equalPrefixLength, c);
   1144            } while(equalPrefixLength > 0 && data->isUnsafeBackward(c, numeric));
   1145        }
   1146        // See the notes in the UTF-16 version.
   1147 
   1148        // Pass the actual start of each string into the CollationIterators,
   1149        // plus the equalPrefixLength position,
   1150        // so that prefix matches back into the equal prefix work.
   1151    }
   1152 
   1153    int32_t result;
   1154    int32_t fastLatinOptions = settings->fastLatinOptions;
   1155    if(fastLatinOptions >= 0 &&
   1156            (equalPrefixLength == leftLength ||
   1157                left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD) &&
   1158            (equalPrefixLength == rightLength ||
   1159                right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD)) {
   1160        if(leftLength >= 0) {
   1161            result = CollationFastLatin::compareUTF8(data->fastLatinTable,
   1162                                                     settings->fastLatinPrimaries,
   1163                                                     fastLatinOptions,
   1164                                                     left + equalPrefixLength,
   1165                                                     leftLength - equalPrefixLength,
   1166                                                     right + equalPrefixLength,
   1167                                                     rightLength - equalPrefixLength);
   1168        } else {
   1169            result = CollationFastLatin::compareUTF8(data->fastLatinTable,
   1170                                                     settings->fastLatinPrimaries,
   1171                                                     fastLatinOptions,
   1172                                                     left + equalPrefixLength, -1,
   1173                                                     right + equalPrefixLength, -1);
   1174        }
   1175    } else {
   1176        result = CollationFastLatin::BAIL_OUT_RESULT;
   1177    }
   1178 
   1179    if(result == CollationFastLatin::BAIL_OUT_RESULT) {
   1180        if(settings->dontCheckFCD()) {
   1181            UTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
   1182            UTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
   1183            result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1184        } else {
   1185            FCDUTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
   1186            FCDUTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
   1187            result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1188        }
   1189    }
   1190    if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
   1191        return static_cast<UCollationResult>(result);
   1192    }
   1193 
   1194    // Note: If NUL-terminated, we could get the actual limits from the iterators now.
   1195    // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
   1196    // and the benefit seems unlikely to be measurable.
   1197 
   1198    // Compare identical level.
   1199    const Normalizer2Impl &nfcImpl = data->nfcImpl;
   1200    left += equalPrefixLength;
   1201    right += equalPrefixLength;
   1202    if(leftLength > 0) {
   1203        leftLength -= equalPrefixLength;
   1204        rightLength -= equalPrefixLength;
   1205    }
   1206    if(settings->dontCheckFCD()) {
   1207        UTF8NFDIterator leftIter(left, leftLength);
   1208        UTF8NFDIterator rightIter(right, rightLength);
   1209        return compareNFDIter(nfcImpl, leftIter, rightIter);
   1210    } else {
   1211        FCDUTF8NFDIterator leftIter(data, left, leftLength);
   1212        FCDUTF8NFDIterator rightIter(data, right, rightLength);
   1213        return compareNFDIter(nfcImpl, leftIter, rightIter);
   1214    }
   1215 }
   1216 
   1217 UCollationResult
   1218 RuleBasedCollator::compare(UCharIterator &left, UCharIterator &right,
   1219                           UErrorCode &errorCode) const {
   1220    if(U_FAILURE(errorCode) || &left == &right) { return UCOL_EQUAL; }
   1221    UBool numeric = settings->isNumeric();
   1222 
   1223    // Identical-prefix test.
   1224    int32_t equalPrefixLength = 0;
   1225    {
   1226        UChar32 leftUnit;
   1227        UChar32 rightUnit;
   1228        while((leftUnit = left.next(&left)) == (rightUnit = right.next(&right))) {
   1229            if(leftUnit < 0) { return UCOL_EQUAL; }
   1230            ++equalPrefixLength;
   1231        }
   1232 
   1233        // Back out the code units that differed, for the real collation comparison.
   1234        if(leftUnit >= 0) { left.previous(&left); }
   1235        if(rightUnit >= 0) { right.previous(&right); }
   1236 
   1237        if(equalPrefixLength > 0) {
   1238            if((leftUnit >= 0 && data->isUnsafeBackward(leftUnit, numeric)) ||
   1239                    (rightUnit >= 0 && data->isUnsafeBackward(rightUnit, numeric))) {
   1240                // Identical prefix: Back up to the start of a contraction or reordering sequence.
   1241                do {
   1242                    --equalPrefixLength;
   1243                    leftUnit = left.previous(&left);
   1244                    right.previous(&right);
   1245                } while(equalPrefixLength > 0 && data->isUnsafeBackward(leftUnit, numeric));
   1246            }
   1247            // See the notes in the UTF-16 version.
   1248        }
   1249    }
   1250 
   1251    UCollationResult result;
   1252    if(settings->dontCheckFCD()) {
   1253        UIterCollationIterator leftIter(data, numeric, left);
   1254        UIterCollationIterator rightIter(data, numeric, right);
   1255        result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1256    } else {
   1257        FCDUIterCollationIterator leftIter(data, numeric, left, equalPrefixLength);
   1258        FCDUIterCollationIterator rightIter(data, numeric, right, equalPrefixLength);
   1259        result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
   1260    }
   1261    if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
   1262        return result;
   1263    }
   1264 
   1265    // Compare identical level.
   1266    left.move(&left, equalPrefixLength, UITER_ZERO);
   1267    right.move(&right, equalPrefixLength, UITER_ZERO);
   1268    const Normalizer2Impl &nfcImpl = data->nfcImpl;
   1269    if(settings->dontCheckFCD()) {
   1270        UIterNFDIterator leftIter(left);
   1271        UIterNFDIterator rightIter(right);
   1272        return compareNFDIter(nfcImpl, leftIter, rightIter);
   1273    } else {
   1274        FCDUIterNFDIterator leftIter(data, left, equalPrefixLength);
   1275        FCDUIterNFDIterator rightIter(data, right, equalPrefixLength);
   1276        return compareNFDIter(nfcImpl, leftIter, rightIter);
   1277    }
   1278 }
   1279 
   1280 CollationKey &
   1281 RuleBasedCollator::getCollationKey(const UnicodeString &s, CollationKey &key,
   1282                                   UErrorCode &errorCode) const {
   1283    return getCollationKey(s.getBuffer(), s.length(), key, errorCode);
   1284 }
   1285 
   1286 CollationKey &
   1287 RuleBasedCollator::getCollationKey(const char16_t *s, int32_t length, CollationKey& key,
   1288                                   UErrorCode &errorCode) const {
   1289    if(U_FAILURE(errorCode)) {
   1290        return key.setToBogus();
   1291    }
   1292    if(s == nullptr && length != 0) {
   1293        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
   1294        return key.setToBogus();
   1295    }
   1296    key.reset();  // resets the "bogus" state
   1297    CollationKeyByteSink sink(key);
   1298    writeSortKey(s, length, sink, errorCode);
   1299    if(U_FAILURE(errorCode)) {
   1300        key.setToBogus();
   1301    } else if(key.isBogus()) {
   1302        errorCode = U_MEMORY_ALLOCATION_ERROR;
   1303    } else {
   1304        key.setLength(sink.NumberOfBytesAppended());
   1305    }
   1306    return key;
   1307 }
   1308 
   1309 int32_t
   1310 RuleBasedCollator::getSortKey(const UnicodeString &s,
   1311                              uint8_t *dest, int32_t capacity) const {
   1312    return getSortKey(s.getBuffer(), s.length(), dest, capacity);
   1313 }
   1314 
   1315 int32_t
   1316 RuleBasedCollator::getSortKey(const char16_t *s, int32_t length,
   1317                              uint8_t *dest, int32_t capacity) const {
   1318    if((s == nullptr && length != 0) || capacity < 0 || (dest == nullptr && capacity > 0)) {
   1319        return 0;
   1320    }
   1321    uint8_t noDest[1] = { 0 };
   1322    if(dest == nullptr) {
   1323        // Distinguish pure preflighting from an allocation error.
   1324        dest = noDest;
   1325        capacity = 0;
   1326    }
   1327    FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), capacity);
   1328    UErrorCode errorCode = U_ZERO_ERROR;
   1329    writeSortKey(s, length, sink, errorCode);
   1330    return U_SUCCESS(errorCode) ? sink.NumberOfBytesAppended() : 0;
   1331 }
   1332 
   1333 void
   1334 RuleBasedCollator::writeSortKey(const char16_t *s, int32_t length,
   1335                                SortKeyByteSink &sink, UErrorCode &errorCode) const {
   1336    if(U_FAILURE(errorCode)) { return; }
   1337    const char16_t *limit = (length >= 0) ? s + length : nullptr;
   1338    UBool numeric = settings->isNumeric();
   1339    CollationKeys::LevelCallback callback;
   1340    if(settings->dontCheckFCD()) {
   1341        UTF16CollationIterator iter(data, numeric, s, s, limit);
   1342        CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
   1343                                                  sink, Collation::PRIMARY_LEVEL,
   1344                                                  callback, true, errorCode);
   1345    } else {
   1346        FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
   1347        CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
   1348                                                  sink, Collation::PRIMARY_LEVEL,
   1349                                                  callback, true, errorCode);
   1350    }
   1351    if(settings->getStrength() == UCOL_IDENTICAL) {
   1352        writeIdenticalLevel(s, limit, sink, errorCode);
   1353    }
   1354    static const char terminator = 0;  // TERMINATOR_BYTE
   1355    sink.Append(&terminator, 1);
   1356 }
   1357 
   1358 void
   1359 RuleBasedCollator::writeIdenticalLevel(const char16_t *s, const char16_t *limit,
   1360                                       SortKeyByteSink &sink, UErrorCode &errorCode) const {
   1361    // NFD quick check
   1362    const char16_t *nfdQCYesLimit = data->nfcImpl.decompose(s, limit, nullptr, errorCode);
   1363    if(U_FAILURE(errorCode)) { return; }
   1364    sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
   1365    UChar32 prev = 0;
   1366    if(nfdQCYesLimit != s) {
   1367        prev = u_writeIdenticalLevelRun(prev, s, static_cast<int32_t>(nfdQCYesLimit - s), sink);
   1368    }
   1369    // Is there non-NFD text?
   1370    int32_t destLengthEstimate;
   1371    if(limit != nullptr) {
   1372        if(nfdQCYesLimit == limit) { return; }
   1373        destLengthEstimate = static_cast<int32_t>(limit - nfdQCYesLimit);
   1374    } else {
   1375        // s is NUL-terminated
   1376        if(*nfdQCYesLimit == 0) { return; }
   1377        destLengthEstimate = -1;
   1378    }
   1379    UnicodeString nfd;
   1380    data->nfcImpl.decompose(nfdQCYesLimit, limit, nfd, destLengthEstimate, errorCode);
   1381    u_writeIdenticalLevelRun(prev, nfd.getBuffer(), nfd.length(), sink);
   1382 }
   1383 
   1384 namespace {
   1385 
   1386 /**
   1387 * internalNextSortKeyPart() calls CollationKeys::writeSortKeyUpToQuaternary()
   1388 * with an instance of this callback class.
   1389 * When another level is about to be written, the callback
   1390 * records the level and the number of bytes that will be written until
   1391 * the sink (which is actually a FixedSortKeyByteSink) fills up.
   1392 *
   1393 * When internalNextSortKeyPart() is called again, it restarts with the last level
   1394 * and ignores as many bytes as were written previously for that level.
   1395 */
   1396 class PartLevelCallback : public CollationKeys::LevelCallback {
   1397 public:
   1398    PartLevelCallback(const SortKeyByteSink &s)
   1399            : sink(s), level(Collation::PRIMARY_LEVEL) {
   1400        levelCapacity = sink.GetRemainingCapacity();
   1401    }
   1402    virtual ~PartLevelCallback() {}
   1403    virtual UBool needToWrite(Collation::Level l) override {
   1404        if(!sink.Overflowed()) {
   1405            // Remember a level that will be at least partially written.
   1406            level = l;
   1407            levelCapacity = sink.GetRemainingCapacity();
   1408            return true;
   1409        } else {
   1410            return false;
   1411        }
   1412    }
   1413    Collation::Level getLevel() const { return level; }
   1414    int32_t getLevelCapacity() const { return levelCapacity; }
   1415 
   1416 private:
   1417    const SortKeyByteSink &sink;
   1418    Collation::Level level;
   1419    int32_t levelCapacity;
   1420 };
   1421 
   1422 }  // namespace
   1423 
   1424 int32_t
   1425 RuleBasedCollator::internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2],
   1426                                           uint8_t *dest, int32_t count, UErrorCode &errorCode) const {
   1427    if(U_FAILURE(errorCode)) { return 0; }
   1428    if(iter == nullptr || state == nullptr || count < 0 || (count > 0 && dest == nullptr)) {
   1429        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
   1430        return 0;
   1431    }
   1432    if(count == 0) { return 0; }
   1433 
   1434    FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), count);
   1435    sink.IgnoreBytes(static_cast<int32_t>(state[1]));
   1436    iter->move(iter, 0, UITER_START);
   1437 
   1438    Collation::Level level = static_cast<Collation::Level>(state[0]);
   1439    if(level <= Collation::QUATERNARY_LEVEL) {
   1440        UBool numeric = settings->isNumeric();
   1441        PartLevelCallback callback(sink);
   1442        if(settings->dontCheckFCD()) {
   1443            UIterCollationIterator ci(data, numeric, *iter);
   1444            CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
   1445                                                      sink, level, callback, false, errorCode);
   1446        } else {
   1447            FCDUIterCollationIterator ci(data, numeric, *iter, 0);
   1448            CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
   1449                                                      sink, level, callback, false, errorCode);
   1450        }
   1451        if(U_FAILURE(errorCode)) { return 0; }
   1452        if(sink.NumberOfBytesAppended() > count) {
   1453            state[0] = static_cast<uint32_t>(callback.getLevel());
   1454            state[1] = static_cast<uint32_t>(callback.getLevelCapacity());
   1455            return count;
   1456        }
   1457        // All of the normal levels are done.
   1458        if(settings->getStrength() == UCOL_IDENTICAL) {
   1459            level = Collation::IDENTICAL_LEVEL;
   1460            iter->move(iter, 0, UITER_START);
   1461        }
   1462        // else fall through to setting ZERO_LEVEL
   1463    }
   1464 
   1465    if(level == Collation::IDENTICAL_LEVEL) {
   1466        int32_t levelCapacity = sink.GetRemainingCapacity();
   1467        UnicodeString s;
   1468        for(;;) {
   1469            UChar32 c = iter->next(iter);
   1470            if(c < 0) { break; }
   1471            s.append(static_cast<char16_t>(c));
   1472        }
   1473        const char16_t *sArray = s.getBuffer();
   1474        writeIdenticalLevel(sArray, sArray + s.length(), sink, errorCode);
   1475        if(U_FAILURE(errorCode)) { return 0; }
   1476        if(sink.NumberOfBytesAppended() > count) {
   1477            state[0] = static_cast<uint32_t>(level);
   1478            state[1] = static_cast<uint32_t>(levelCapacity);
   1479            return count;
   1480        }
   1481    }
   1482 
   1483    // ZERO_LEVEL: Fill the remainder of dest with 00 bytes.
   1484    state[0] = static_cast<uint32_t>(Collation::ZERO_LEVEL);
   1485    state[1] = 0;
   1486    int32_t length = sink.NumberOfBytesAppended();
   1487    int32_t i = length;
   1488    while(i < count) { dest[i++] = 0; }
   1489    return length;
   1490 }
   1491 
   1492 void
   1493 RuleBasedCollator::internalGetCEs(const UnicodeString &str, UVector64 &ces,
   1494                                  UErrorCode &errorCode) const {
   1495    if(U_FAILURE(errorCode)) { return; }
   1496    const char16_t *s = str.getBuffer();
   1497    const char16_t *limit = s + str.length();
   1498    UBool numeric = settings->isNumeric();
   1499    if(settings->dontCheckFCD()) {
   1500        UTF16CollationIterator iter(data, numeric, s, s, limit);
   1501        int64_t ce;
   1502        while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
   1503            ces.addElement(ce, errorCode);
   1504        }
   1505    } else {
   1506        FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
   1507        int64_t ce;
   1508        while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
   1509            ces.addElement(ce, errorCode);
   1510        }
   1511    }
   1512 }
   1513 
   1514 namespace {
   1515 
   1516 void appendSubtag(CharString &s, char letter, const char *subtag, int32_t length,
   1517                  UErrorCode &errorCode) {
   1518    if(U_FAILURE(errorCode) || length == 0) { return; }
   1519    if(!s.isEmpty()) {
   1520        s.append('_', errorCode);
   1521    }
   1522    s.append(letter, errorCode);
   1523    for(int32_t i = 0; i < length; ++i) {
   1524        s.append(uprv_toupper(subtag[i]), errorCode);
   1525    }
   1526 }
   1527 
   1528 void appendAttribute(CharString &s, char letter, UColAttributeValue value,
   1529                     UErrorCode &errorCode) {
   1530    if(U_FAILURE(errorCode)) { return; }
   1531    if(!s.isEmpty()) {
   1532        s.append('_', errorCode);
   1533    }
   1534    static const char *valueChars = "1234...........IXO..SN..LU......";
   1535    s.append(letter, errorCode);
   1536    s.append(valueChars[value], errorCode);
   1537 }
   1538 
   1539 }  // namespace
   1540 
   1541 int32_t
   1542 RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
   1543                                                    char *buffer, int32_t capacity,
   1544                                                    UErrorCode &errorCode) const {
   1545    if(U_FAILURE(errorCode)) { return 0; }
   1546    if(buffer == nullptr ? capacity != 0 : capacity < 0) {
   1547        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
   1548        return 0;
   1549    }
   1550    if(locale == nullptr) {
   1551        locale = internalGetLocaleID(ULOC_VALID_LOCALE, errorCode);
   1552    }
   1553 
   1554    char resultLocale[ULOC_FULLNAME_CAPACITY + 1];
   1555    int32_t length = ucol_getFunctionalEquivalent(resultLocale, ULOC_FULLNAME_CAPACITY,
   1556                                                  "collation", locale,
   1557                                                  nullptr, &errorCode);
   1558    if(U_FAILURE(errorCode)) { return 0; }
   1559    resultLocale[length] = 0;
   1560 
   1561    // Append items in alphabetic order of their short definition letters.
   1562    CharString result;
   1563 
   1564    if(attributeHasBeenSetExplicitly(UCOL_ALTERNATE_HANDLING)) {
   1565        appendAttribute(result, 'A', getAttribute(UCOL_ALTERNATE_HANDLING, errorCode), errorCode);
   1566    }
   1567    // ATTR_VARIABLE_TOP not supported because 'B' was broken.
   1568    // See ICU tickets #10372 and #10386.
   1569    if(attributeHasBeenSetExplicitly(UCOL_CASE_FIRST)) {
   1570        appendAttribute(result, 'C', getAttribute(UCOL_CASE_FIRST, errorCode), errorCode);
   1571    }
   1572    if(attributeHasBeenSetExplicitly(UCOL_NUMERIC_COLLATION)) {
   1573        appendAttribute(result, 'D', getAttribute(UCOL_NUMERIC_COLLATION, errorCode), errorCode);
   1574    }
   1575    if(attributeHasBeenSetExplicitly(UCOL_CASE_LEVEL)) {
   1576        appendAttribute(result, 'E', getAttribute(UCOL_CASE_LEVEL, errorCode), errorCode);
   1577    }
   1578    if(attributeHasBeenSetExplicitly(UCOL_FRENCH_COLLATION)) {
   1579        appendAttribute(result, 'F', getAttribute(UCOL_FRENCH_COLLATION, errorCode), errorCode);
   1580    }
   1581    // Note: UCOL_HIRAGANA_QUATERNARY_MODE is deprecated and never changes away from default.
   1582    CharString collation = ulocimp_getKeywordValue(resultLocale, "collation", errorCode);
   1583    appendSubtag(result, 'K', collation.data(), collation.length(), errorCode);
   1584    CharString language;
   1585    CharString script;
   1586    CharString region;
   1587    CharString variant;
   1588    ulocimp_getSubtags(resultLocale, &language, &script, &region, &variant, nullptr, errorCode);
   1589    if (language.isEmpty()) {
   1590        appendSubtag(result, 'L', "root", 4, errorCode);
   1591    } else {
   1592        appendSubtag(result, 'L', language.data(), language.length(), errorCode);
   1593    }
   1594    if(attributeHasBeenSetExplicitly(UCOL_NORMALIZATION_MODE)) {
   1595        appendAttribute(result, 'N', getAttribute(UCOL_NORMALIZATION_MODE, errorCode), errorCode);
   1596    }
   1597    appendSubtag(result, 'R', region.data(), region.length(), errorCode);
   1598    if(attributeHasBeenSetExplicitly(UCOL_STRENGTH)) {
   1599        appendAttribute(result, 'S', getAttribute(UCOL_STRENGTH, errorCode), errorCode);
   1600    }
   1601    appendSubtag(result, 'V', variant.data(), variant.length(), errorCode);
   1602    appendSubtag(result, 'Z', script.data(), script.length(), errorCode);
   1603 
   1604    if(U_FAILURE(errorCode)) { return 0; }
   1605    return result.extract(buffer, capacity, errorCode);
   1606 }
   1607 
   1608 UBool
   1609 RuleBasedCollator::isUnsafe(UChar32 c) const {
   1610    return data->isUnsafeBackward(c, settings->isNumeric());
   1611 }
   1612 
   1613 void U_CALLCONV
   1614 RuleBasedCollator::computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode) {
   1615    t->maxExpansions = CollationElementIterator::computeMaxExpansions(t->data, errorCode);
   1616 }
   1617 
   1618 UBool
   1619 RuleBasedCollator::initMaxExpansions(UErrorCode &errorCode) const {
   1620    umtx_initOnce(tailoring->maxExpansionsInitOnce, computeMaxExpansions, tailoring, errorCode);
   1621    return U_SUCCESS(errorCode);
   1622 }
   1623 
   1624 CollationElementIterator *
   1625 RuleBasedCollator::createCollationElementIterator(const UnicodeString& source) const {
   1626    UErrorCode errorCode = U_ZERO_ERROR;
   1627    if(!initMaxExpansions(errorCode)) { return nullptr; }
   1628    CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
   1629    if(U_FAILURE(errorCode)) {
   1630        delete cei;
   1631        return nullptr;
   1632    }
   1633    return cei;
   1634 }
   1635 
   1636 CollationElementIterator *
   1637 RuleBasedCollator::createCollationElementIterator(const CharacterIterator& source) const {
   1638    UErrorCode errorCode = U_ZERO_ERROR;
   1639    if(!initMaxExpansions(errorCode)) { return nullptr; }
   1640    CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
   1641    if(U_FAILURE(errorCode)) {
   1642        delete cei;
   1643        return nullptr;
   1644    }
   1645    return cei;
   1646 }
   1647 
   1648 int32_t
   1649 RuleBasedCollator::getMaxExpansion(int32_t order) const {
   1650    UErrorCode errorCode = U_ZERO_ERROR;
   1651    (void)initMaxExpansions(errorCode);
   1652    return CollationElementIterator::getMaxExpansion(tailoring->maxExpansions, order);
   1653 }
   1654 
   1655 U_NAMESPACE_END
   1656 
   1657 #endif  // !UCONFIG_NO_COLLATION