tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

ucoleitr.cpp (13032B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ******************************************************************************
      5 *   Copyright (C) 2001-2016, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 ******************************************************************************
      8 *
      9 * File ucoleitr.cpp
     10 *
     11 * Modification History:
     12 *
     13 * Date        Name        Description
     14 * 02/15/2001  synwee      Modified all methods to process its own function 
     15 *                         instead of calling the equivalent c++ api (coleitr.h)
     16 * 2012-2014   markus      Rewritten in C++ again.
     17 ******************************************************************************/
     18 
     19 #include "unicode/utypes.h"
     20 
     21 #if !UCONFIG_NO_COLLATION
     22 
     23 #include "unicode/coleitr.h"
     24 #include "unicode/tblcoll.h"
     25 #include "unicode/ucoleitr.h"
     26 #include "unicode/ustring.h"
     27 #include "unicode/sortkey.h"
     28 #include "unicode/uobject.h"
     29 #include "cmemory.h"
     30 #include "usrchimp.h"
     31 
     32 U_NAMESPACE_USE
     33 
     34 #define BUFFER_LENGTH             100
     35 
     36 #define DEFAULT_BUFFER_SIZE 16
     37 #define BUFFER_GROW 8
     38 
     39 #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (size_t)(count) * sizeof (src)[0])
     40 
     41 #define NEW_ARRAY(type, count) (type *) uprv_malloc((size_t)(count) * sizeof(type))
     42 
     43 #define DELETE_ARRAY(array) uprv_free((void *) (array))
     44 
     45 struct RCEI
     46 {
     47    uint32_t ce;
     48    int32_t  low;
     49    int32_t  high;
     50 };
     51 
     52 U_NAMESPACE_BEGIN
     53 
     54 struct RCEBuffer
     55 {
     56    RCEI    defaultBuffer[DEFAULT_BUFFER_SIZE];
     57    RCEI   *buffer;
     58    int32_t bufferIndex;
     59    int32_t bufferSize;
     60 
     61    RCEBuffer();
     62    ~RCEBuffer();
     63 
     64    UBool isEmpty() const;
     65    void  put(uint32_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode);
     66    const RCEI *get();
     67 };
     68 
     69 RCEBuffer::RCEBuffer()
     70 {
     71    buffer = defaultBuffer;
     72    bufferIndex = 0;
     73    bufferSize = UPRV_LENGTHOF(defaultBuffer);
     74 }
     75 
     76 RCEBuffer::~RCEBuffer()
     77 {
     78    if (buffer != defaultBuffer) {
     79        DELETE_ARRAY(buffer);
     80    }
     81 }
     82 
     83 UBool RCEBuffer::isEmpty() const
     84 {
     85    return bufferIndex <= 0;
     86 }
     87 
     88 void RCEBuffer::put(uint32_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode)
     89 {
     90    if (U_FAILURE(errorCode)) {
     91        return;
     92    }
     93    if (bufferIndex >= bufferSize) {
     94        RCEI *newBuffer = NEW_ARRAY(RCEI, bufferSize + BUFFER_GROW);
     95        if (newBuffer == nullptr) {
     96            errorCode = U_MEMORY_ALLOCATION_ERROR;
     97            return;
     98        }
     99 
    100        ARRAY_COPY(newBuffer, buffer, bufferSize);
    101 
    102        if (buffer != defaultBuffer) {
    103            DELETE_ARRAY(buffer);
    104        }
    105 
    106        buffer = newBuffer;
    107        bufferSize += BUFFER_GROW;
    108    }
    109 
    110    buffer[bufferIndex].ce   = ce;
    111    buffer[bufferIndex].low  = ixLow;
    112    buffer[bufferIndex].high = ixHigh;
    113 
    114    bufferIndex += 1;
    115 }
    116 
    117 const RCEI *RCEBuffer::get()
    118 {
    119    if (bufferIndex > 0) {
    120     return &buffer[--bufferIndex];
    121    }
    122 
    123    return nullptr;
    124 }
    125 
    126 PCEBuffer::PCEBuffer()
    127 {
    128    buffer = defaultBuffer;
    129    bufferIndex = 0;
    130    bufferSize = UPRV_LENGTHOF(defaultBuffer);
    131 }
    132 
    133 PCEBuffer::~PCEBuffer()
    134 {
    135    if (buffer != defaultBuffer) {
    136        DELETE_ARRAY(buffer);
    137    }
    138 }
    139 
    140 void PCEBuffer::reset()
    141 {
    142    bufferIndex = 0;
    143 }
    144 
    145 UBool PCEBuffer::isEmpty() const
    146 {
    147    return bufferIndex <= 0;
    148 }
    149 
    150 void PCEBuffer::put(uint64_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode)
    151 {
    152    if (U_FAILURE(errorCode)) {
    153        return;
    154    }
    155    if (bufferIndex >= bufferSize) {
    156        PCEI *newBuffer = NEW_ARRAY(PCEI, bufferSize + BUFFER_GROW);
    157        if (newBuffer == nullptr) {
    158            errorCode = U_MEMORY_ALLOCATION_ERROR;
    159            return;
    160        }
    161 
    162        ARRAY_COPY(newBuffer, buffer, bufferSize);
    163 
    164        if (buffer != defaultBuffer) {
    165            DELETE_ARRAY(buffer);
    166        }
    167 
    168        buffer = newBuffer;
    169        bufferSize += BUFFER_GROW;
    170    }
    171 
    172    buffer[bufferIndex].ce   = ce;
    173    buffer[bufferIndex].low  = ixLow;
    174    buffer[bufferIndex].high = ixHigh;
    175 
    176    bufferIndex += 1;
    177 }
    178 
    179 const PCEI *PCEBuffer::get()
    180 {
    181    if (bufferIndex > 0) {
    182     return &buffer[--bufferIndex];
    183    }
    184 
    185    return nullptr;
    186 }
    187 
    188 UCollationPCE::UCollationPCE(UCollationElements *elems) { init(elems); }
    189 
    190 UCollationPCE::UCollationPCE(CollationElementIterator *iter) { init(iter); }
    191 
    192 void UCollationPCE::init(UCollationElements *elems) {
    193    init(CollationElementIterator::fromUCollationElements(elems));
    194 }
    195 
    196 void UCollationPCE::init(CollationElementIterator *iter)
    197 {
    198    cei = iter;
    199    init(*iter->rbc_);
    200 }
    201 
    202 void UCollationPCE::init(const Collator &coll)
    203 {
    204    UErrorCode status = U_ZERO_ERROR;
    205 
    206    strength    = coll.getAttribute(UCOL_STRENGTH, status);
    207    toShift     = coll.getAttribute(UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED;
    208    isShifted   = false;
    209    variableTop = coll.getVariableTop(status);
    210 }
    211 
    212 UCollationPCE::~UCollationPCE()
    213 {
    214    // nothing to do
    215 }
    216 
    217 uint64_t UCollationPCE::processCE(uint32_t ce)
    218 {
    219    uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
    220 
    221    // This is clean, but somewhat slow...
    222    // We could apply the mask to ce and then
    223    // just get all three orders...
    224    switch(strength) {
    225    default:
    226        tertiary = ucol_tertiaryOrder(ce);
    227        U_FALLTHROUGH;
    228 
    229    case UCOL_SECONDARY:
    230        secondary = ucol_secondaryOrder(ce);
    231        U_FALLTHROUGH;
    232 
    233    case UCOL_PRIMARY:
    234        primary = ucol_primaryOrder(ce);
    235    }
    236 
    237    // **** This should probably handle continuations too.  ****
    238    // **** That means that we need 24 bits for the primary ****
    239    // **** instead of the 16 that we're currently using.   ****
    240    // **** So we can lay out the 64 bits as: 24.12.12.16.  ****
    241    // **** Another complication with continuations is that ****
    242    // **** the *second* CE is marked as a continuation, so ****
    243    // **** we always have to peek ahead to know how long   ****
    244    // **** the primary is...                               ****
    245    if ((toShift && variableTop > ce && primary != 0)
    246                || (isShifted && primary == 0)) {
    247 
    248        if (primary == 0) {
    249            return UCOL_IGNORABLE;
    250        }
    251 
    252        if (strength >= UCOL_QUATERNARY) {
    253            quaternary = primary;
    254        }
    255 
    256        primary = secondary = tertiary = 0;
    257        isShifted = true;
    258    } else {
    259        if (strength >= UCOL_QUATERNARY) {
    260            quaternary = 0xFFFF;
    261        }
    262 
    263        isShifted = false;
    264    }
    265 
    266    return primary << 48 | secondary << 32 | tertiary << 16 | quaternary;
    267 }
    268 
    269 U_NAMESPACE_END
    270 
    271 /* public methods ---------------------------------------------------- */
    272 
    273 U_CAPI UCollationElements* U_EXPORT2
    274 ucol_openElements(const UCollator  *coll,
    275                  const char16_t   *text,
    276                        int32_t    textLength,
    277                        UErrorCode *status)
    278 {
    279    if (U_FAILURE(*status)) {
    280        return nullptr;
    281    }
    282    if (coll == nullptr || (text == nullptr && textLength != 0)) {
    283        *status = U_ILLEGAL_ARGUMENT_ERROR;
    284        return nullptr;
    285    }
    286    const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
    287    if (rbc == nullptr) {
    288        *status = U_UNSUPPORTED_ERROR;  // coll is a Collator but not a RuleBasedCollator
    289        return nullptr;
    290    }
    291 
    292    UnicodeString s(textLength < 0, text, textLength);
    293    CollationElementIterator *cei = rbc->createCollationElementIterator(s);
    294    if (cei == nullptr) {
    295        *status = U_MEMORY_ALLOCATION_ERROR;
    296        return nullptr;
    297    }
    298 
    299    return cei->toUCollationElements();
    300 }
    301 
    302 
    303 U_CAPI void U_EXPORT2
    304 ucol_closeElements(UCollationElements *elems)
    305 {
    306    delete CollationElementIterator::fromUCollationElements(elems);
    307 }
    308 
    309 U_CAPI void U_EXPORT2
    310 ucol_reset(UCollationElements *elems)
    311 {
    312    CollationElementIterator::fromUCollationElements(elems)->reset();
    313 }
    314 
    315 U_CAPI int32_t U_EXPORT2
    316 ucol_next(UCollationElements *elems, 
    317          UErrorCode         *status)
    318 {
    319    if (U_FAILURE(*status)) {
    320        return UCOL_NULLORDER;
    321    }
    322 
    323    return CollationElementIterator::fromUCollationElements(elems)->next(*status);
    324 }
    325 
    326 U_NAMESPACE_BEGIN
    327 
    328 int64_t
    329 UCollationPCE::nextProcessed(
    330                   int32_t            *ixLow,
    331                   int32_t            *ixHigh,
    332                   UErrorCode         *status)
    333 {
    334    int64_t result = UCOL_IGNORABLE;
    335    uint32_t low = 0, high = 0;
    336 
    337    if (U_FAILURE(*status)) {
    338        return UCOL_PROCESSED_NULLORDER;
    339    }
    340 
    341    pceBuffer.reset();
    342 
    343    do {
    344        low = cei->getOffset();
    345        int32_t ce = cei->next(*status);
    346        high = cei->getOffset();
    347 
    348        if (ce == UCOL_NULLORDER) {
    349             result = UCOL_PROCESSED_NULLORDER;
    350             break;
    351        }
    352 
    353        result = processCE(static_cast<uint32_t>(ce));
    354    } while (result == UCOL_IGNORABLE);
    355 
    356    if (ixLow != nullptr) {
    357        *ixLow = low;
    358    }
    359 
    360    if (ixHigh != nullptr) {
    361        *ixHigh = high;
    362    }
    363 
    364    return result;
    365 }
    366 
    367 U_NAMESPACE_END
    368 
    369 U_CAPI int32_t U_EXPORT2
    370 ucol_previous(UCollationElements *elems,
    371              UErrorCode         *status)
    372 {
    373    if(U_FAILURE(*status)) {
    374        return UCOL_NULLORDER;
    375    }
    376    return CollationElementIterator::fromUCollationElements(elems)->previous(*status);
    377 }
    378 
    379 U_NAMESPACE_BEGIN
    380 
    381 int64_t
    382 UCollationPCE::previousProcessed(
    383                   int32_t            *ixLow,
    384                   int32_t            *ixHigh,
    385                   UErrorCode         *status)
    386 {
    387    int64_t result = UCOL_IGNORABLE;
    388    int32_t  low = 0, high = 0;
    389 
    390    if (U_FAILURE(*status)) {
    391        return UCOL_PROCESSED_NULLORDER;
    392    }
    393 
    394    // pceBuffer.reset();
    395 
    396    while (pceBuffer.isEmpty()) {
    397        // buffer raw CEs up to non-ignorable primary
    398        RCEBuffer rceb;
    399        int32_t ce;
    400        
    401        // **** do we need to reset rceb, or will it always be empty at this point ****
    402        do {
    403            high = cei->getOffset();
    404            ce   = cei->previous(*status);
    405            low  = cei->getOffset();
    406 
    407            if (ce == UCOL_NULLORDER) {
    408                if (!rceb.isEmpty()) {
    409                    break;
    410                }
    411 
    412                goto finish;
    413            }
    414 
    415            rceb.put(static_cast<uint32_t>(ce), low, high, *status);
    416        } while (U_SUCCESS(*status) && ((ce & UCOL_PRIMARYORDERMASK) == 0 || isContinuation(ce)));
    417 
    418        // process the raw CEs
    419        while (U_SUCCESS(*status) && !rceb.isEmpty()) {
    420            const RCEI *rcei = rceb.get();
    421 
    422            result = processCE(rcei->ce);
    423 
    424            if (result != UCOL_IGNORABLE) {
    425                pceBuffer.put(result, rcei->low, rcei->high, *status);
    426            }
    427        }
    428        if (U_FAILURE(*status)) {
    429            return UCOL_PROCESSED_NULLORDER;
    430        }
    431    }
    432 
    433 finish:
    434    if (pceBuffer.isEmpty()) {
    435        // **** Is -1 the right value for ixLow, ixHigh? ****
    436    	if (ixLow != nullptr) {
    437    		*ixLow = -1;
    438    	}
    439    	
    440    	if (ixHigh != nullptr) {
    441    		*ixHigh = -1
    442    		;
    443    	}
    444        return UCOL_PROCESSED_NULLORDER;
    445    }
    446 
    447    const PCEI *pcei = pceBuffer.get();
    448 
    449    if (ixLow != nullptr) {
    450        *ixLow = pcei->low;
    451    }
    452 
    453    if (ixHigh != nullptr) {
    454        *ixHigh = pcei->high;
    455    }
    456 
    457    return pcei->ce;
    458 }
    459 
    460 U_NAMESPACE_END
    461 
    462 U_CAPI int32_t U_EXPORT2
    463 ucol_getMaxExpansion(const UCollationElements *elems,
    464                           int32_t            order)
    465 {
    466    return CollationElementIterator::fromUCollationElements(elems)->getMaxExpansion(order);
    467 
    468    // TODO: The old code masked the order according to strength and then did a binary search.
    469    // However this was probably at least partially broken because of the following comment.
    470    // Still, it might have found a match when this version may not.
    471 
    472    // FIXME: with a masked search, there might be more than one hit,
    473    // so we need to look forward and backward from the match to find all
    474    // of the hits...
    475 }
    476 
    477 U_CAPI void U_EXPORT2
    478 ucol_setText(      UCollationElements *elems,
    479             const char16_t           *text,
    480                   int32_t            textLength,
    481                   UErrorCode         *status)
    482 {
    483    if (U_FAILURE(*status)) {
    484        return;
    485    }
    486 
    487    if ((text == nullptr && textLength != 0)) {
    488        *status = U_ILLEGAL_ARGUMENT_ERROR;
    489        return;
    490    }
    491    UnicodeString s(textLength < 0, text, textLength);
    492    return CollationElementIterator::fromUCollationElements(elems)->setText(s, *status);
    493 }
    494 
    495 U_CAPI int32_t U_EXPORT2
    496 ucol_getOffset(const UCollationElements *elems)
    497 {
    498    return CollationElementIterator::fromUCollationElements(elems)->getOffset();
    499 }
    500 
    501 U_CAPI void U_EXPORT2
    502 ucol_setOffset(UCollationElements    *elems,
    503               int32_t           offset,
    504               UErrorCode            *status)
    505 {
    506    if (U_FAILURE(*status)) {
    507        return;
    508    }
    509 
    510    CollationElementIterator::fromUCollationElements(elems)->setOffset(offset, *status);
    511 }
    512 
    513 U_CAPI int32_t U_EXPORT2
    514 ucol_primaryOrder (int32_t order) 
    515 {
    516    return (order >> 16) & 0xffff;
    517 }
    518 
    519 U_CAPI int32_t U_EXPORT2
    520 ucol_secondaryOrder (int32_t order) 
    521 {
    522    return (order >> 8) & 0xff;
    523 }
    524 
    525 U_CAPI int32_t U_EXPORT2
    526 ucol_tertiaryOrder (int32_t order) 
    527 {
    528    return order & 0xff;
    529 }
    530 
    531 #endif /* #if !UCONFIG_NO_COLLATION */