tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

collationsettings.cpp (13073B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 2013-2015, International Business Machines
      6 * Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 * collationsettings.cpp
      9 *
     10 * created on: 2013feb07
     11 * created by: Markus W. Scherer
     12 */
     13 
     14 #include "unicode/utypes.h"
     15 
     16 #if !UCONFIG_NO_COLLATION
     17 
     18 #include "unicode/ucol.h"
     19 #include "cmemory.h"
     20 #include "collation.h"
     21 #include "collationdata.h"
     22 #include "collationsettings.h"
     23 #include "sharedobject.h"
     24 #include "uassert.h"
     25 #include "umutex.h"
     26 #include "uvectr32.h"
     27 
     28 U_NAMESPACE_BEGIN
     29 
     30 CollationSettings::CollationSettings(const CollationSettings &other)
     31        : SharedObject(other),
     32          options(other.options), variableTop(other.variableTop),
     33          reorderTable(nullptr),
     34          minHighNoReorder(other.minHighNoReorder),
     35          reorderRanges(nullptr), reorderRangesLength(0),
     36          reorderCodes(nullptr), reorderCodesLength(0), reorderCodesCapacity(0),
     37          fastLatinOptions(other.fastLatinOptions) {
     38    UErrorCode errorCode = U_ZERO_ERROR;
     39    copyReorderingFrom(other, errorCode);
     40    if(fastLatinOptions >= 0) {
     41        uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries));
     42    }
     43 }
     44 
     45 CollationSettings::~CollationSettings() {
     46    if(reorderCodesCapacity != 0) {
     47        uprv_free(const_cast<int32_t *>(reorderCodes));
     48    }
     49 }
     50 
     51 bool
     52 CollationSettings::operator==(const CollationSettings &other) const {
     53    if(options != other.options) { return false; }
     54    if((options & ALTERNATE_MASK) != 0 && variableTop != other.variableTop) { return false; }
     55    if(reorderCodesLength != other.reorderCodesLength) { return false; }
     56    for(int32_t i = 0; i < reorderCodesLength; ++i) {
     57        if(reorderCodes[i] != other.reorderCodes[i]) { return false; }
     58    }
     59    return true;
     60 }
     61 
     62 int32_t
     63 CollationSettings::hashCode() const {
     64    int32_t h = options << 8;
     65    if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; }
     66    h ^= reorderCodesLength;
     67    for(int32_t i = 0; i < reorderCodesLength; ++i) {
     68        h ^= (reorderCodes[i] << i);
     69    }
     70    return h;
     71 }
     72 
     73 void
     74 CollationSettings::resetReordering() {
     75    // When we turn off reordering, we want to set a nullptr permutation
     76    // rather than a no-op permutation.
     77    // Keep the memory via reorderCodes and its capacity.
     78    reorderTable = nullptr;
     79    minHighNoReorder = 0;
     80    reorderRangesLength = 0;
     81    reorderCodesLength = 0;
     82 }
     83 
     84 void
     85 CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length,
     86                                   const uint32_t *ranges, int32_t rangesLength,
     87                                   const uint8_t *table, UErrorCode &errorCode) {
     88    if(U_FAILURE(errorCode)) { return; }
     89    if(table != nullptr &&
     90            (rangesLength == 0 ?
     91                    !reorderTableHasSplitBytes(table) :
     92                    rangesLength >= 2 &&
     93                    // The first offset must be 0. The last offset must not be 0.
     94                    (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) {
     95        // We need to release the memory before setting the alias pointer.
     96        if(reorderCodesCapacity != 0) {
     97            uprv_free(const_cast<int32_t *>(reorderCodes));
     98            reorderCodesCapacity = 0;
     99        }
    100        reorderTable = table;
    101        reorderCodes = codes;
    102        reorderCodesLength = length;
    103        // Drop ranges before the first split byte. They are reordered by the table.
    104        // This then speeds up reordering of the remaining ranges.
    105        int32_t firstSplitByteRangeIndex = 0;
    106        while(firstSplitByteRangeIndex < rangesLength &&
    107                (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) {
    108            // The second byte of the primary limit is 0.
    109            ++firstSplitByteRangeIndex;
    110        }
    111        if(firstSplitByteRangeIndex == rangesLength) {
    112            U_ASSERT(!reorderTableHasSplitBytes(table));
    113            minHighNoReorder = 0;
    114            reorderRanges = nullptr;
    115            reorderRangesLength = 0;
    116        } else {
    117            U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0);
    118            minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
    119            reorderRanges = ranges + firstSplitByteRangeIndex;
    120            reorderRangesLength = rangesLength - firstSplitByteRangeIndex;
    121        }
    122        return;
    123    }
    124    // Regenerate missing data.
    125    setReordering(data, codes, length, errorCode);
    126 }
    127 
    128 void
    129 CollationSettings::setReordering(const CollationData &data,
    130                                 const int32_t *codes, int32_t codesLength,
    131                                 UErrorCode &errorCode) {
    132    if(U_FAILURE(errorCode)) { return; }
    133    if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) {
    134        resetReordering();
    135        return;
    136    }
    137    UVector32 rangesList(errorCode);
    138    data.makeReorderRanges(codes, codesLength, rangesList, errorCode);
    139    if(U_FAILURE(errorCode)) { return; }
    140    int32_t rangesLength = rangesList.size();
    141    if(rangesLength == 0) {
    142        resetReordering();
    143        return;
    144    }
    145    const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer());
    146    // ranges[] contains at least two (limit, offset) pairs.
    147    // The first offset must be 0. The last offset must not be 0.
    148    // Separators (at the low end) and trailing weights (at the high end)
    149    // are never reordered.
    150    U_ASSERT(rangesLength >= 2);
    151    U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0);
    152    minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
    153 
    154    // Write the lead byte permutation table.
    155    // Set a 0 for each lead byte that has a range boundary in the middle.
    156    uint8_t table[256];
    157    int32_t b = 0;
    158    int32_t firstSplitByteRangeIndex = -1;
    159    for(int32_t i = 0; i < rangesLength; ++i) {
    160        uint32_t pair = ranges[i];
    161        int32_t limit1 = static_cast<int32_t>(pair >> 24);
    162        while(b < limit1) {
    163            table[b] = static_cast<uint8_t>(b + pair);
    164            ++b;
    165        }
    166        // Check the second byte of the limit.
    167        if((pair & 0xff0000) != 0) {
    168            table[limit1] = 0;
    169            b = limit1 + 1;
    170            if(firstSplitByteRangeIndex < 0) {
    171                firstSplitByteRangeIndex = i;
    172            }
    173        }
    174    }
    175    while(b <= 0xff) {
    176        table[b] = static_cast<uint8_t>(b);
    177        ++b;
    178    }
    179    if(firstSplitByteRangeIndex < 0) {
    180        // The lead byte permutation table alone suffices for reordering.
    181        rangesLength = 0;
    182    } else {
    183        // Remove the ranges below the first split byte.
    184        ranges += firstSplitByteRangeIndex;
    185        rangesLength -= firstSplitByteRangeIndex;
    186    }
    187    setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode);
    188 }
    189 
    190 void
    191 CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength,
    192                                    const uint32_t *ranges, int32_t rangesLength,
    193                                    const uint8_t *table, UErrorCode &errorCode) {
    194    if(U_FAILURE(errorCode)) { return; }
    195    int32_t *ownedCodes;
    196    int32_t totalLength = codesLength + rangesLength;
    197    U_ASSERT(totalLength > 0);
    198    if(totalLength <= reorderCodesCapacity) {
    199        ownedCodes = const_cast<int32_t *>(reorderCodes);
    200    } else {
    201        // Allocate one memory block for the codes, the ranges, and the 16-aligned table.
    202        int32_t capacity = (totalLength + 3) & ~3;  // round up to a multiple of 4 ints
    203        ownedCodes = static_cast<int32_t*>(uprv_malloc(capacity * 4 + 256));
    204        if(ownedCodes == nullptr) {
    205            resetReordering();
    206            errorCode = U_MEMORY_ALLOCATION_ERROR;
    207            return;
    208        }
    209        if(reorderCodesCapacity != 0) {
    210            uprv_free(const_cast<int32_t *>(reorderCodes));
    211        }
    212        reorderCodes = ownedCodes;
    213        reorderCodesCapacity = capacity;
    214    }
    215    uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256);
    216    uprv_memcpy(ownedCodes, codes, codesLength * 4);
    217    uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4);
    218    reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodesCapacity);
    219    reorderCodesLength = codesLength;
    220    reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength;
    221    reorderRangesLength = rangesLength;
    222 }
    223 
    224 void
    225 CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) {
    226    if(U_FAILURE(errorCode)) { return; }
    227    if(!other.hasReordering()) {
    228        resetReordering();
    229        return;
    230    }
    231    minHighNoReorder = other.minHighNoReorder;
    232    if(other.reorderCodesCapacity == 0) {
    233        // The reorder arrays are aliased to memory-mapped data.
    234        reorderTable = other.reorderTable;
    235        reorderRanges = other.reorderRanges;
    236        reorderRangesLength = other.reorderRangesLength;
    237        reorderCodes = other.reorderCodes;
    238        reorderCodesLength = other.reorderCodesLength;
    239    } else {
    240        setReorderArrays(other.reorderCodes, other.reorderCodesLength,
    241                         other.reorderRanges, other.reorderRangesLength,
    242                         other.reorderTable, errorCode);
    243    }
    244 }
    245 
    246 UBool
    247 CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) {
    248    U_ASSERT(table[0] == 0);
    249    for(int32_t i = 1; i < 256; ++i) {
    250        if(table[i] == 0) {
    251            return true;
    252        }
    253    }
    254    return false;
    255 }
    256 
    257 uint32_t
    258 CollationSettings::reorderEx(uint32_t p) const {
    259    if(p >= minHighNoReorder) { return p; }
    260    // Round up p so that its lower 16 bits are >= any offset bits.
    261    // Then compare q directly with (limit, offset) pairs.
    262    uint32_t q = p | 0xffff;
    263    uint32_t r;
    264    const uint32_t *ranges = reorderRanges;
    265    while(q >= (r = *ranges)) { ++ranges; }
    266    return p + (r << 24);
    267 }
    268 
    269 void
    270 CollationSettings::setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
    271    if(U_FAILURE(errorCode)) { return; }
    272    int32_t noStrength = options & ~STRENGTH_MASK;
    273    switch(value) {
    274    case UCOL_PRIMARY:
    275    case UCOL_SECONDARY:
    276    case UCOL_TERTIARY:
    277    case UCOL_QUATERNARY:
    278    case UCOL_IDENTICAL:
    279        options = noStrength | (value << STRENGTH_SHIFT);
    280        break;
    281    case UCOL_DEFAULT:
    282        options = noStrength | (defaultOptions & STRENGTH_MASK);
    283        break;
    284    default:
    285        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    286        break;
    287    }
    288 }
    289 
    290 void
    291 CollationSettings::setFlag(int32_t bit, UColAttributeValue value,
    292                           int32_t defaultOptions, UErrorCode &errorCode) {
    293    if(U_FAILURE(errorCode)) { return; }
    294    switch(value) {
    295    case UCOL_ON:
    296        options |= bit;
    297        break;
    298    case UCOL_OFF:
    299        options &= ~bit;
    300        break;
    301    case UCOL_DEFAULT:
    302        options = (options & ~bit) | (defaultOptions & bit);
    303        break;
    304    default:
    305        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    306        break;
    307    }
    308 }
    309 
    310 void
    311 CollationSettings::setCaseFirst(UColAttributeValue value,
    312                                int32_t defaultOptions, UErrorCode &errorCode) {
    313    if(U_FAILURE(errorCode)) { return; }
    314    int32_t noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK;
    315    switch(value) {
    316    case UCOL_OFF:
    317        options = noCaseFirst;
    318        break;
    319    case UCOL_LOWER_FIRST:
    320        options = noCaseFirst | CASE_FIRST;
    321        break;
    322    case UCOL_UPPER_FIRST:
    323        options = noCaseFirst | CASE_FIRST_AND_UPPER_MASK;
    324        break;
    325    case UCOL_DEFAULT:
    326        options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK);
    327        break;
    328    default:
    329        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    330        break;
    331    }
    332 }
    333 
    334 void
    335 CollationSettings::setAlternateHandling(UColAttributeValue value,
    336                                        int32_t defaultOptions, UErrorCode &errorCode) {
    337    if(U_FAILURE(errorCode)) { return; }
    338    int32_t noAlternate = options & ~ALTERNATE_MASK;
    339    switch(value) {
    340    case UCOL_NON_IGNORABLE:
    341        options = noAlternate;
    342        break;
    343    case UCOL_SHIFTED:
    344        options = noAlternate | SHIFTED;
    345        break;
    346    case UCOL_DEFAULT:
    347        options = noAlternate | (defaultOptions & ALTERNATE_MASK);
    348        break;
    349    default:
    350        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    351        break;
    352    }
    353 }
    354 
    355 void
    356 CollationSettings::setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
    357    if(U_FAILURE(errorCode)) { return; }
    358    int32_t noMax = options & ~MAX_VARIABLE_MASK;
    359    switch(value) {
    360    case MAX_VAR_SPACE:
    361    case MAX_VAR_PUNCT:
    362    case MAX_VAR_SYMBOL:
    363    case MAX_VAR_CURRENCY:
    364        options = noMax | (value << MAX_VARIABLE_SHIFT);
    365        break;
    366    case UCOL_DEFAULT:
    367        options = noMax | (defaultOptions & MAX_VARIABLE_MASK);
    368        break;
    369    default:
    370        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
    371        break;
    372    }
    373 }
    374 
    375 U_NAMESPACE_END
    376 
    377 #endif  // !UCONFIG_NO_COLLATION