tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

collationsets.cpp (21825B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 2013-2014, International Business Machines
      6 * Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 * collationsets.cpp
      9 *
     10 * created on: 2013feb09
     11 * created by: Markus W. Scherer
     12 */
     13 
     14 #include "unicode/utypes.h"
     15 
     16 #if !UCONFIG_NO_COLLATION
     17 
     18 #include "unicode/ucharstrie.h"
     19 #include "unicode/uniset.h"
     20 #include "unicode/unistr.h"
     21 #include "unicode/ustringtrie.h"
     22 #include "collation.h"
     23 #include "collationdata.h"
     24 #include "collationsets.h"
     25 #include "normalizer2impl.h"
     26 #include "uassert.h"
     27 #include "utf16collationiterator.h"
     28 #include "utrie2.h"
     29 
     30 U_NAMESPACE_BEGIN
     31 
     32 U_CDECL_BEGIN
     33 
     34 static UBool U_CALLCONV
     35 enumTailoredRange(const void *context, UChar32 start, UChar32 end, uint32_t ce32) {
     36    if(ce32 == Collation::FALLBACK_CE32) {
     37        return true;  // fallback to base, not tailored
     38    }
     39    TailoredSet *ts = (TailoredSet *)context;
     40    return ts->handleCE32(start, end, ce32);
     41 }
     42 
     43 U_CDECL_END
     44 
     45 void
     46 TailoredSet::forData(const CollationData *d, UErrorCode &ec) {
     47    if(U_FAILURE(ec)) { return; }
     48    errorCode = ec;  // Preserve info & warning codes.
     49    data = d;
     50    baseData = d->base;
     51    U_ASSERT(baseData != nullptr);
     52    utrie2_enum(data->trie, nullptr, enumTailoredRange, this);
     53    ec = errorCode;
     54 }
     55 
     56 UBool
     57 TailoredSet::handleCE32(UChar32 start, UChar32 end, uint32_t ce32) {
     58    U_ASSERT(ce32 != Collation::FALLBACK_CE32);
     59    if(Collation::isSpecialCE32(ce32)) {
     60        ce32 = data->getIndirectCE32(ce32);
     61        if(ce32 == Collation::FALLBACK_CE32) {
     62            return U_SUCCESS(errorCode);
     63        }
     64    }
     65    do {
     66        uint32_t baseCE32 = baseData->getFinalCE32(baseData->getCE32(start));
     67        // Do not just continue if ce32 == baseCE32 because
     68        // contractions and expansions in different data objects
     69        // normally differ even if they have the same data offsets.
     70        if(Collation::isSelfContainedCE32(ce32) && Collation::isSelfContainedCE32(baseCE32)) {
     71            // fastpath
     72            if(ce32 != baseCE32) {
     73                tailored->add(start);
     74            }
     75        } else {
     76            compare(start, ce32, baseCE32);
     77        }
     78    } while(++start <= end);
     79    return U_SUCCESS(errorCode);
     80 }
     81 
     82 void
     83 TailoredSet::compare(UChar32 c, uint32_t ce32, uint32_t baseCE32) {
     84    if(Collation::isPrefixCE32(ce32)) {
     85        const char16_t *p = data->contexts + Collation::indexFromCE32(ce32);
     86        ce32 = data->getFinalCE32(CollationData::readCE32(p));
     87        if(Collation::isPrefixCE32(baseCE32)) {
     88            const char16_t *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
     89            baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
     90            comparePrefixes(c, p + 2, q + 2);
     91        } else {
     92            addPrefixes(data, c, p + 2);
     93        }
     94    } else if(Collation::isPrefixCE32(baseCE32)) {
     95        const char16_t *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
     96        baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
     97        addPrefixes(baseData, c, q + 2);
     98    }
     99 
    100    if(Collation::isContractionCE32(ce32)) {
    101        const char16_t *p = data->contexts + Collation::indexFromCE32(ce32);
    102        if((ce32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
    103            ce32 = Collation::NO_CE32;
    104        } else {
    105            ce32 = data->getFinalCE32(CollationData::readCE32(p));
    106        }
    107        if(Collation::isContractionCE32(baseCE32)) {
    108            const char16_t *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
    109            if((baseCE32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
    110                baseCE32 = Collation::NO_CE32;
    111            } else {
    112                baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
    113            }
    114            compareContractions(c, p + 2, q + 2);
    115        } else {
    116            addContractions(c, p + 2);
    117        }
    118    } else if(Collation::isContractionCE32(baseCE32)) {
    119        const char16_t *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
    120        baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
    121        addContractions(c, q + 2);
    122    }
    123 
    124    int32_t tag;
    125    if(Collation::isSpecialCE32(ce32)) {
    126        tag = Collation::tagFromCE32(ce32);
    127        U_ASSERT(tag != Collation::PREFIX_TAG);
    128        U_ASSERT(tag != Collation::CONTRACTION_TAG);
    129        // Currently, the tailoring data builder does not write offset tags.
    130        // They might be useful for saving space,
    131        // but they would complicate the builder,
    132        // and in tailorings we assume that performance of tailored characters is more important.
    133        U_ASSERT(tag != Collation::OFFSET_TAG);
    134    } else {
    135        tag = -1;
    136    }
    137    int32_t baseTag;
    138    if(Collation::isSpecialCE32(baseCE32)) {
    139        baseTag = Collation::tagFromCE32(baseCE32);
    140        U_ASSERT(baseTag != Collation::PREFIX_TAG);
    141        U_ASSERT(baseTag != Collation::CONTRACTION_TAG);
    142    } else {
    143        baseTag = -1;
    144    }
    145 
    146    // Non-contextual mappings, expansions, etc.
    147    if(baseTag == Collation::OFFSET_TAG) {
    148        // We might be comparing a tailoring CE which is a copy of
    149        // a base offset-tag CE, via the [optimize [set]] syntax
    150        // or when a single-character mapping was copied for tailored contractions.
    151        // Offset tags always result in long-primary CEs,
    152        // with common secondary/tertiary weights.
    153        if(!Collation::isLongPrimaryCE32(ce32)) {
    154            add(c);
    155            return;
    156        }
    157        int64_t dataCE = baseData->ces[Collation::indexFromCE32(baseCE32)];
    158        uint32_t p = Collation::getThreeBytePrimaryForOffsetData(c, dataCE);
    159        if(Collation::primaryFromLongPrimaryCE32(ce32) != p) {
    160            add(c);
    161            return;
    162        }
    163    }
    164 
    165    if(tag != baseTag) {
    166        add(c);
    167        return;
    168    }
    169 
    170    if(tag == Collation::EXPANSION32_TAG) {
    171        const uint32_t *ce32s = data->ce32s + Collation::indexFromCE32(ce32);
    172        int32_t length = Collation::lengthFromCE32(ce32);
    173 
    174        const uint32_t *baseCE32s = baseData->ce32s + Collation::indexFromCE32(baseCE32);
    175        int32_t baseLength = Collation::lengthFromCE32(baseCE32);
    176 
    177        if(length != baseLength) {
    178            add(c);
    179            return;
    180        }
    181        for(int32_t i = 0; i < length; ++i) {
    182            if(ce32s[i] != baseCE32s[i]) {
    183                add(c);
    184                break;
    185            }
    186        }
    187    } else if(tag == Collation::EXPANSION_TAG) {
    188        const int64_t *ces = data->ces + Collation::indexFromCE32(ce32);
    189        int32_t length = Collation::lengthFromCE32(ce32);
    190 
    191        const int64_t *baseCEs = baseData->ces + Collation::indexFromCE32(baseCE32);
    192        int32_t baseLength = Collation::lengthFromCE32(baseCE32);
    193 
    194        if(length != baseLength) {
    195            add(c);
    196            return;
    197        }
    198        for(int32_t i = 0; i < length; ++i) {
    199            if(ces[i] != baseCEs[i]) {
    200                add(c);
    201                break;
    202            }
    203        }
    204    } else if(tag == Collation::HANGUL_TAG) {
    205        char16_t jamos[3];
    206        int32_t length = Hangul::decompose(c, jamos);
    207        if(tailored->contains(jamos[0]) || tailored->contains(jamos[1]) ||
    208                (length == 3 && tailored->contains(jamos[2]))) {
    209            add(c);
    210        }
    211    } else if(ce32 != baseCE32) {
    212        add(c);
    213    }
    214 }
    215 
    216 void
    217 TailoredSet::comparePrefixes(UChar32 c, const char16_t *p, const char16_t *q) {
    218    // Parallel iteration over prefixes of both tables.
    219    UCharsTrie::Iterator prefixes(p, 0, errorCode);
    220    UCharsTrie::Iterator basePrefixes(q, 0, errorCode);
    221    const UnicodeString *tp = nullptr;  // Tailoring prefix.
    222    const UnicodeString *bp = nullptr;  // Base prefix.
    223    // Use a string with a U+FFFF as the limit sentinel.
    224    // U+FFFF is untailorable and will not occur in prefixes.
    225    UnicodeString none(static_cast<char16_t>(0xffff));
    226    for(;;) {
    227        if(tp == nullptr) {
    228            if(prefixes.next(errorCode)) {
    229                tp = &prefixes.getString();
    230            } else {
    231                tp = &none;
    232            }
    233        }
    234        if(bp == nullptr) {
    235            if(basePrefixes.next(errorCode)) {
    236                bp = &basePrefixes.getString();
    237            } else {
    238                bp = &none;
    239            }
    240        }
    241        if(tp == &none && bp == &none) { break; }
    242        int32_t cmp = tp->compare(*bp);
    243        if(cmp < 0) {
    244            // tp occurs in the tailoring but not in the base.
    245            addPrefix(data, *tp, c, static_cast<uint32_t>(prefixes.getValue()));
    246            tp = nullptr;
    247        } else if(cmp > 0) {
    248            // bp occurs in the base but not in the tailoring.
    249            addPrefix(baseData, *bp, c, static_cast<uint32_t>(basePrefixes.getValue()));
    250            bp = nullptr;
    251        } else {
    252            setPrefix(*tp);
    253            compare(c, static_cast<uint32_t>(prefixes.getValue()), static_cast<uint32_t>(basePrefixes.getValue()));
    254            resetPrefix();
    255            tp = nullptr;
    256            bp = nullptr;
    257        }
    258    }
    259 }
    260 
    261 void
    262 TailoredSet::compareContractions(UChar32 c, const char16_t *p, const char16_t *q) {
    263    // Parallel iteration over suffixes of both tables.
    264    UCharsTrie::Iterator suffixes(p, 0, errorCode);
    265    UCharsTrie::Iterator baseSuffixes(q, 0, errorCode);
    266    const UnicodeString *ts = nullptr;  // Tailoring suffix.
    267    const UnicodeString *bs = nullptr;  // Base suffix.
    268    // Use a string with two U+FFFF as the limit sentinel.
    269    // U+FFFF is untailorable and will not occur in contractions except maybe
    270    // as a single suffix character for a root-collator boundary contraction.
    271    UnicodeString none(static_cast<char16_t>(0xffff));
    272    none.append(static_cast<char16_t>(0xffff));
    273    for(;;) {
    274        if(ts == nullptr) {
    275            if(suffixes.next(errorCode)) {
    276                ts = &suffixes.getString();
    277            } else {
    278                ts = &none;
    279            }
    280        }
    281        if(bs == nullptr) {
    282            if(baseSuffixes.next(errorCode)) {
    283                bs = &baseSuffixes.getString();
    284            } else {
    285                bs = &none;
    286            }
    287        }
    288        if(ts == &none && bs == &none) { break; }
    289        int32_t cmp = ts->compare(*bs);
    290        if(cmp < 0) {
    291            // ts occurs in the tailoring but not in the base.
    292            addSuffix(c, *ts);
    293            ts = nullptr;
    294        } else if(cmp > 0) {
    295            // bs occurs in the base but not in the tailoring.
    296            addSuffix(c, *bs);
    297            bs = nullptr;
    298        } else {
    299            suffix = ts;
    300            compare(c, static_cast<uint32_t>(suffixes.getValue()), static_cast<uint32_t>(baseSuffixes.getValue()));
    301            suffix = nullptr;
    302            ts = nullptr;
    303            bs = nullptr;
    304        }
    305    }
    306 }
    307 
    308 void
    309 TailoredSet::addPrefixes(const CollationData *d, UChar32 c, const char16_t *p) {
    310    UCharsTrie::Iterator prefixes(p, 0, errorCode);
    311    while(prefixes.next(errorCode)) {
    312        addPrefix(d, prefixes.getString(), c, static_cast<uint32_t>(prefixes.getValue()));
    313    }
    314 }
    315 
    316 void
    317 TailoredSet::addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32) {
    318    setPrefix(pfx);
    319    ce32 = d->getFinalCE32(ce32);
    320    if(Collation::isContractionCE32(ce32)) {
    321        const char16_t *p = d->contexts + Collation::indexFromCE32(ce32);
    322        addContractions(c, p + 2);
    323    }
    324    tailored->add(UnicodeString(unreversedPrefix).append(c));
    325    resetPrefix();
    326 }
    327 
    328 void
    329 TailoredSet::addContractions(UChar32 c, const char16_t *p) {
    330    UCharsTrie::Iterator suffixes(p, 0, errorCode);
    331    while(suffixes.next(errorCode)) {
    332        addSuffix(c, suffixes.getString());
    333    }
    334 }
    335 
    336 void
    337 TailoredSet::addSuffix(UChar32 c, const UnicodeString &sfx) {
    338    tailored->add(UnicodeString(unreversedPrefix).append(c).append(sfx));
    339 }
    340 
    341 void
    342 TailoredSet::add(UChar32 c) {
    343    if(unreversedPrefix.isEmpty() && suffix == nullptr) {
    344        tailored->add(c);
    345    } else {
    346        UnicodeString s(unreversedPrefix);
    347        s.append(c);
    348        if(suffix != nullptr) {
    349            s.append(*suffix);
    350        }
    351        tailored->add(s);
    352    }
    353 }
    354 
    355 ContractionsAndExpansions::CESink::~CESink() {}
    356 
    357 U_CDECL_BEGIN
    358 
    359 static UBool U_CALLCONV
    360 enumCnERange(const void *context, UChar32 start, UChar32 end, uint32_t ce32) {
    361    ContractionsAndExpansions *cne = (ContractionsAndExpansions *)context;
    362    if(cne->checkTailored == 0) {
    363        // There is no tailoring.
    364        // No need to collect nor check the tailored set.
    365    } else if(cne->checkTailored < 0) {
    366        // Collect the set of code points with mappings in the tailoring data.
    367        if(ce32 == Collation::FALLBACK_CE32) {
    368            return true;  // fallback to base, not tailored
    369        } else {
    370            cne->tailored.add(start, end);
    371        }
    372        // checkTailored > 0: Exclude tailored ranges from the base data enumeration.
    373    } else if(start == end) {
    374        if(cne->tailored.contains(start)) {
    375            return true;
    376        }
    377    } else if(cne->tailored.containsSome(start, end)) {
    378        cne->ranges.set(start, end).removeAll(cne->tailored);
    379        int32_t count = cne->ranges.getRangeCount();
    380        for(int32_t i = 0; i < count; ++i) {
    381            cne->handleCE32(cne->ranges.getRangeStart(i), cne->ranges.getRangeEnd(i), ce32);
    382        }
    383        return U_SUCCESS(cne->errorCode);
    384    }
    385    cne->handleCE32(start, end, ce32);
    386    return U_SUCCESS(cne->errorCode);
    387 }
    388 
    389 U_CDECL_END
    390 
    391 void
    392 ContractionsAndExpansions::forData(const CollationData *d, UErrorCode &ec) {
    393    if(U_FAILURE(ec)) { return; }
    394    errorCode = ec;  // Preserve info & warning codes.
    395    // Add all from the data, can be tailoring or base.
    396    if(d->base != nullptr) {
    397        checkTailored = -1;
    398    }
    399    data = d;
    400    utrie2_enum(data->trie, nullptr, enumCnERange, this);
    401    if(d->base == nullptr || U_FAILURE(errorCode)) {
    402        ec = errorCode;
    403        return;
    404    }
    405    // Add all from the base data but only for un-tailored code points.
    406    tailored.freeze();
    407    checkTailored = 1;
    408    data = d->base;
    409    utrie2_enum(data->trie, nullptr, enumCnERange, this);
    410    ec = errorCode;
    411 }
    412 
    413 void
    414 ContractionsAndExpansions::forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec) {
    415    if(U_FAILURE(ec)) { return; }
    416    errorCode = ec;  // Preserve info & warning codes.
    417    uint32_t ce32 = d->getCE32(c);
    418    if(ce32 == Collation::FALLBACK_CE32) {
    419        d = d->base;
    420        ce32 = d->getCE32(c);
    421    }
    422    data = d;
    423    handleCE32(c, c, ce32);
    424    ec = errorCode;
    425 }
    426 
    427 void
    428 ContractionsAndExpansions::handleCE32(UChar32 start, UChar32 end, uint32_t ce32) {
    429    for(;;) {
    430        if((ce32 & 0xff) < Collation::SPECIAL_CE32_LOW_BYTE) {
    431            // !isSpecialCE32()
    432            if(sink != nullptr) {
    433                sink->handleCE(Collation::ceFromSimpleCE32(ce32));
    434            }
    435            return;
    436        }
    437        switch(Collation::tagFromCE32(ce32)) {
    438        case Collation::FALLBACK_TAG:
    439            return;
    440        case Collation::RESERVED_TAG_3:
    441        case Collation::BUILDER_DATA_TAG:
    442        case Collation::LEAD_SURROGATE_TAG:
    443            if(U_SUCCESS(errorCode)) { errorCode = U_INTERNAL_PROGRAM_ERROR; }
    444            return;
    445        case Collation::LONG_PRIMARY_TAG:
    446            if(sink != nullptr) {
    447                sink->handleCE(Collation::ceFromLongPrimaryCE32(ce32));
    448            }
    449            return;
    450        case Collation::LONG_SECONDARY_TAG:
    451            if(sink != nullptr) {
    452                sink->handleCE(Collation::ceFromLongSecondaryCE32(ce32));
    453            }
    454            return;
    455        case Collation::LATIN_EXPANSION_TAG:
    456            if(sink != nullptr) {
    457                ces[0] = Collation::latinCE0FromCE32(ce32);
    458                ces[1] = Collation::latinCE1FromCE32(ce32);
    459                sink->handleExpansion(ces, 2);
    460            }
    461            // Optimization: If we have a prefix,
    462            // then the relevant strings have been added already.
    463            if(unreversedPrefix.isEmpty()) {
    464                addExpansions(start, end);
    465            }
    466            return;
    467        case Collation::EXPANSION32_TAG:
    468            if(sink != nullptr) {
    469                const uint32_t *ce32s = data->ce32s + Collation::indexFromCE32(ce32);
    470                int32_t length = Collation::lengthFromCE32(ce32);
    471                for(int32_t i = 0; i < length; ++i) {
    472                    ces[i] = Collation::ceFromCE32(*ce32s++);
    473                }
    474                sink->handleExpansion(ces, length);
    475            }
    476            // Optimization: If we have a prefix,
    477            // then the relevant strings have been added already.
    478            if(unreversedPrefix.isEmpty()) {
    479                addExpansions(start, end);
    480            }
    481            return;
    482        case Collation::EXPANSION_TAG:
    483            if(sink != nullptr) {
    484                int32_t length = Collation::lengthFromCE32(ce32);
    485                sink->handleExpansion(data->ces + Collation::indexFromCE32(ce32), length);
    486            }
    487            // Optimization: If we have a prefix,
    488            // then the relevant strings have been added already.
    489            if(unreversedPrefix.isEmpty()) {
    490                addExpansions(start, end);
    491            }
    492            return;
    493        case Collation::PREFIX_TAG:
    494            handlePrefixes(start, end, ce32);
    495            return;
    496        case Collation::CONTRACTION_TAG:
    497            handleContractions(start, end, ce32);
    498            return;
    499        case Collation::DIGIT_TAG:
    500            // Fetch the non-numeric-collation CE32 and continue.
    501            ce32 = data->ce32s[Collation::indexFromCE32(ce32)];
    502            break;
    503        case Collation::U0000_TAG:
    504            U_ASSERT(start == 0 && end == 0);
    505            // Fetch the normal ce32 for U+0000 and continue.
    506            ce32 = data->ce32s[0];
    507            break;
    508        case Collation::HANGUL_TAG:
    509            if(sink != nullptr) {
    510                // TODO: This should be optimized,
    511                // especially if [start..end] is the complete Hangul range. (assert that)
    512                UTF16CollationIterator iter(data, false, nullptr, nullptr, nullptr);
    513                char16_t hangul[1] = { 0 };
    514                for(UChar32 c = start; c <= end; ++c) {
    515                    hangul[0] = static_cast<char16_t>(c);
    516                    iter.setText(hangul, hangul + 1);
    517                    int32_t length = iter.fetchCEs(errorCode);
    518                    if(U_FAILURE(errorCode)) { return; }
    519                    // Ignore the terminating non-CE.
    520                    U_ASSERT(length >= 2 && iter.getCE(length - 1) == Collation::NO_CE);
    521                    sink->handleExpansion(iter.getCEs(), length - 1);
    522                }
    523            }
    524            // Optimization: If we have a prefix,
    525            // then the relevant strings have been added already.
    526            if(unreversedPrefix.isEmpty()) {
    527                addExpansions(start, end);
    528            }
    529            return;
    530        case Collation::OFFSET_TAG:
    531            // Currently no need to send offset CEs to the sink.
    532            return;
    533        case Collation::IMPLICIT_TAG:
    534            // Currently no need to send implicit CEs to the sink.
    535            return;
    536        }
    537    }
    538 }
    539 
    540 void
    541 ContractionsAndExpansions::handlePrefixes(
    542        UChar32 start, UChar32 end, uint32_t ce32) {
    543    const char16_t *p = data->contexts + Collation::indexFromCE32(ce32);
    544    ce32 = CollationData::readCE32(p);  // Default if no prefix match.
    545    handleCE32(start, end, ce32);
    546    if(!addPrefixes) { return; }
    547    UCharsTrie::Iterator prefixes(p + 2, 0, errorCode);
    548    while(prefixes.next(errorCode)) {
    549        setPrefix(prefixes.getString());
    550        // Prefix/pre-context mappings are special kinds of contractions
    551        // that always yield expansions.
    552        addStrings(start, end, contractions);
    553        addStrings(start, end, expansions);
    554        handleCE32(start, end, static_cast<uint32_t>(prefixes.getValue()));
    555    }
    556    resetPrefix();
    557 }
    558 
    559 void
    560 ContractionsAndExpansions::handleContractions(
    561        UChar32 start, UChar32 end, uint32_t ce32) {
    562    const char16_t *p = data->contexts + Collation::indexFromCE32(ce32);
    563    if((ce32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
    564        // No match on the single code point.
    565        // We are underneath a prefix, and the default mapping is just
    566        // a fallback to the mappings for a shorter prefix.
    567        U_ASSERT(!unreversedPrefix.isEmpty());
    568    } else {
    569        ce32 = CollationData::readCE32(p);  // Default if no suffix match.
    570        U_ASSERT(!Collation::isContractionCE32(ce32));
    571        handleCE32(start, end, ce32);
    572    }
    573    UCharsTrie::Iterator suffixes(p + 2, 0, errorCode);
    574    while(suffixes.next(errorCode)) {
    575        suffix = &suffixes.getString();
    576        addStrings(start, end, contractions);
    577        if(!unreversedPrefix.isEmpty()) {
    578            addStrings(start, end, expansions);
    579        }
    580        handleCE32(start, end, static_cast<uint32_t>(suffixes.getValue()));
    581    }
    582    suffix = nullptr;
    583 }
    584 
    585 void
    586 ContractionsAndExpansions::addExpansions(UChar32 start, UChar32 end) {
    587    if(unreversedPrefix.isEmpty() && suffix == nullptr) {
    588        if(expansions != nullptr) {
    589            expansions->add(start, end);
    590        }
    591    } else {
    592        addStrings(start, end, expansions);
    593    }
    594 }
    595 
    596 void
    597 ContractionsAndExpansions::addStrings(UChar32 start, UChar32 end, UnicodeSet *set) {
    598    if(set == nullptr) { return; }
    599    UnicodeString s(unreversedPrefix);
    600    do {
    601        s.append(start);
    602        if(suffix != nullptr) {
    603            s.append(*suffix);
    604        }
    605        set->add(s);
    606        s.truncate(unreversedPrefix.length());
    607    } while(++start <= end);
    608 }
    609 
    610 U_NAMESPACE_END
    611 
    612 #endif  // !UCONFIG_NO_COLLATION