tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

collationrootelements.cpp (11817B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 2013-2014, International Business Machines
      6 * Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 * collationrootelements.cpp
      9 *
     10 * created on: 2013mar05
     11 * created by: Markus W. Scherer
     12 */
     13 
     14 #include "unicode/utypes.h"
     15 
     16 #if !UCONFIG_NO_COLLATION
     17 
     18 #include "collation.h"
     19 #include "collationrootelements.h"
     20 #include "uassert.h"
     21 
     22 U_NAMESPACE_BEGIN
     23 
     24 int64_t
     25 CollationRootElements::lastCEWithPrimaryBefore(uint32_t p) const {
     26    if(p == 0) { return 0; }
     27    U_ASSERT(p > elements[elements[IX_FIRST_PRIMARY_INDEX]]);
     28    int32_t index = findP(p);
     29    uint32_t q = elements[index];
     30    uint32_t secTer;
     31    if(p == (q & 0xffffff00)) {
     32        // p == elements[index] is a root primary. Find the CE before it.
     33        // We must not be in a primary range.
     34        U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
     35        secTer = elements[index - 1];
     36        if((secTer & SEC_TER_DELTA_FLAG) == 0) {
     37            // Primary CE just before p.
     38            p = secTer & 0xffffff00;
     39            secTer = Collation::COMMON_SEC_AND_TER_CE;
     40        } else {
     41            // secTer = last secondary & tertiary for the previous primary
     42            index -= 2;
     43            for(;;) {
     44                p = elements[index];
     45                if((p & SEC_TER_DELTA_FLAG) == 0) {
     46                    p &= 0xffffff00;
     47                    break;
     48                }
     49                --index;
     50            }
     51        }
     52    } else {
     53        // p > elements[index] which is the previous primary.
     54        // Find the last secondary & tertiary weights for it.
     55        p = q & 0xffffff00;
     56        secTer = Collation::COMMON_SEC_AND_TER_CE;
     57        for(;;) {
     58            q = elements[++index];
     59            if((q & SEC_TER_DELTA_FLAG) == 0) {
     60                // We must not be in a primary range.
     61                U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
     62                break;
     63            }
     64            secTer = q;
     65        }
     66    }
     67    return (static_cast<int64_t>(p) << 32) | (secTer & ~SEC_TER_DELTA_FLAG);
     68 }
     69 
     70 int64_t
     71 CollationRootElements::firstCEWithPrimaryAtLeast(uint32_t p) const {
     72    if(p == 0) { return 0; }
     73    int32_t index = findP(p);
     74    if(p != (elements[index] & 0xffffff00)) {
     75        for(;;) {
     76            p = elements[++index];
     77            if((p & SEC_TER_DELTA_FLAG) == 0) {
     78                // First primary after p. We must not be in a primary range.
     79                U_ASSERT((p & PRIMARY_STEP_MASK) == 0);
     80                break;
     81            }
     82        }
     83    }
     84    // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0.
     85    return (static_cast<int64_t>(p) << 32) | Collation::COMMON_SEC_AND_TER_CE;
     86 }
     87 
     88 uint32_t
     89 CollationRootElements::getPrimaryBefore(uint32_t p, UBool isCompressible) const {
     90    int32_t index = findPrimary(p);
     91    int32_t step;
     92    uint32_t q = elements[index];
     93    if(p == (q & 0xffffff00)) {
     94        // Found p itself. Return the previous primary.
     95        // See if p is at the end of a previous range.
     96        step = static_cast<int32_t>(q) & PRIMARY_STEP_MASK;
     97        if(step == 0) {
     98            // p is not at the end of a range. Look for the previous primary.
     99            do {
    100                p = elements[--index];
    101            } while((p & SEC_TER_DELTA_FLAG) != 0);
    102            return p & 0xffffff00;
    103        }
    104    } else {
    105        // p is in a range, and not at the start.
    106        uint32_t nextElement = elements[index + 1];
    107        U_ASSERT(isEndOfPrimaryRange(nextElement));
    108        step = static_cast<int32_t>(nextElement) & PRIMARY_STEP_MASK;
    109    }
    110    // Return the previous range primary.
    111    if((p & 0xffff) == 0) {
    112        return Collation::decTwoBytePrimaryByOneStep(p, isCompressible, step);
    113    } else {
    114        return Collation::decThreeBytePrimaryByOneStep(p, isCompressible, step);
    115    }
    116 }
    117 
    118 uint32_t
    119 CollationRootElements::getSecondaryBefore(uint32_t p, uint32_t s) const {
    120    int32_t index;
    121    uint32_t previousSec, sec;
    122    if(p == 0) {
    123        index = static_cast<int32_t>(elements[IX_FIRST_SECONDARY_INDEX]);
    124        // Gap at the beginning of the secondary CE range.
    125        previousSec = 0;
    126        sec = elements[index] >> 16;
    127    } else {
    128        index = findPrimary(p) + 1;
    129        previousSec = Collation::BEFORE_WEIGHT16;
    130        sec = getFirstSecTerForPrimary(index) >> 16;
    131    }
    132    U_ASSERT(s >= sec);
    133    while(s > sec) {
    134        previousSec = sec;
    135        U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0);
    136        sec = elements[index++] >> 16;
    137    }
    138    U_ASSERT(sec == s);
    139    return previousSec;
    140 }
    141 
    142 uint32_t
    143 CollationRootElements::getTertiaryBefore(uint32_t p, uint32_t s, uint32_t t) const {
    144    U_ASSERT((t & ~Collation::ONLY_TERTIARY_MASK) == 0);
    145    int32_t index;
    146    uint32_t previousTer, secTer;
    147    if(p == 0) {
    148        if(s == 0) {
    149            index = static_cast<int32_t>(elements[IX_FIRST_TERTIARY_INDEX]);
    150            // Gap at the beginning of the tertiary CE range.
    151            previousTer = 0;
    152        } else {
    153            index = static_cast<int32_t>(elements[IX_FIRST_SECONDARY_INDEX]);
    154            previousTer = Collation::BEFORE_WEIGHT16;
    155        }
    156        secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
    157    } else {
    158        index = findPrimary(p) + 1;
    159        previousTer = Collation::BEFORE_WEIGHT16;
    160        secTer = getFirstSecTerForPrimary(index);
    161    }
    162    uint32_t st = (s << 16) | t;
    163    while(st > secTer) {
    164        if((secTer >> 16) == s) { previousTer = secTer; }
    165        U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0);
    166        secTer = elements[index++] & ~SEC_TER_DELTA_FLAG;
    167    }
    168    U_ASSERT(secTer == st);
    169    return previousTer & 0xffff;
    170 }
    171 
    172 uint32_t
    173 CollationRootElements::getPrimaryAfter(uint32_t p, int32_t index, UBool isCompressible) const {
    174    U_ASSERT(p == (elements[index] & 0xffffff00) || isEndOfPrimaryRange(elements[index + 1]));
    175    uint32_t q = elements[++index];
    176    int32_t step;
    177    if ((q & SEC_TER_DELTA_FLAG) == 0 && (step = static_cast<int32_t>(q) & PRIMARY_STEP_MASK) != 0) {
    178        // Return the next primary in this range.
    179        if((p & 0xffff) == 0) {
    180            return Collation::incTwoBytePrimaryByOffset(p, isCompressible, step);
    181        } else {
    182            return Collation::incThreeBytePrimaryByOffset(p, isCompressible, step);
    183        }
    184    } else {
    185        // Return the next primary in the list.
    186        while((q & SEC_TER_DELTA_FLAG) != 0) {
    187            q = elements[++index];
    188        }
    189        U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
    190        return q;
    191    }
    192 }
    193 
    194 uint32_t
    195 CollationRootElements::getSecondaryAfter(int32_t index, uint32_t s) const {
    196    uint32_t secTer;
    197    uint32_t secLimit;
    198    if(index == 0) {
    199        // primary = 0
    200        U_ASSERT(s != 0);
    201        index = static_cast<int32_t>(elements[IX_FIRST_SECONDARY_INDEX]);
    202        secTer = elements[index];
    203        // Gap at the end of the secondary CE range.
    204        secLimit = 0x10000;
    205    } else {
    206        U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]);
    207        secTer = getFirstSecTerForPrimary(index + 1);
    208        // If this is an explicit sec/ter unit, then it will be read once more.
    209        // Gap for secondaries of primary CEs.
    210        secLimit = getSecondaryBoundary();
    211    }
    212    for(;;) {
    213        uint32_t sec = secTer >> 16;
    214        if(sec > s) { return sec; }
    215        secTer = elements[++index];
    216        if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; }
    217    }
    218 }
    219 
    220 uint32_t
    221 CollationRootElements::getTertiaryAfter(int32_t index, uint32_t s, uint32_t t) const {
    222    uint32_t secTer;
    223    uint32_t terLimit;
    224    if(index == 0) {
    225        // primary = 0
    226        if(s == 0) {
    227            U_ASSERT(t != 0);
    228            index = static_cast<int32_t>(elements[IX_FIRST_TERTIARY_INDEX]);
    229            // Gap at the end of the tertiary CE range.
    230            terLimit = 0x4000;
    231        } else {
    232            index = static_cast<int32_t>(elements[IX_FIRST_SECONDARY_INDEX]);
    233            // Gap for tertiaries of primary/secondary CEs.
    234            terLimit = getTertiaryBoundary();
    235        }
    236        secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
    237    } else {
    238        U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]);
    239        secTer = getFirstSecTerForPrimary(index + 1);
    240        // If this is an explicit sec/ter unit, then it will be read once more.
    241        terLimit = getTertiaryBoundary();
    242    }
    243    uint32_t st = (s << 16) | t;
    244    for(;;) {
    245        if(secTer > st) {
    246            U_ASSERT((secTer >> 16) == s);
    247            return secTer & 0xffff;
    248        }
    249        secTer = elements[++index];
    250        // No tertiary greater than t for this primary+secondary.
    251        if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; }
    252        secTer &= ~SEC_TER_DELTA_FLAG;
    253    }
    254 }
    255 
    256 uint32_t
    257 CollationRootElements::getFirstSecTerForPrimary(int32_t index) const {
    258    uint32_t secTer = elements[index];
    259    if((secTer & SEC_TER_DELTA_FLAG) == 0) {
    260        // No sec/ter delta.
    261        return Collation::COMMON_SEC_AND_TER_CE;
    262    }
    263    secTer &= ~SEC_TER_DELTA_FLAG;
    264    if(secTer > Collation::COMMON_SEC_AND_TER_CE) {
    265        // Implied sec/ter.
    266        return Collation::COMMON_SEC_AND_TER_CE;
    267    }
    268    // Explicit sec/ter below common/common.
    269    return secTer;
    270 }
    271 
    272 int32_t
    273 CollationRootElements::findPrimary(uint32_t p) const {
    274    // Requirement: p must occur as a root primary.
    275    U_ASSERT((p & 0xff) == 0);  // at most a 3-byte primary
    276    int32_t index = findP(p);
    277    // If p is in a range, then we just assume that p is an actual primary in this range.
    278    // (Too cumbersome/expensive to check.)
    279    // Otherwise, it must be an exact match.
    280    U_ASSERT(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00));
    281    return index;
    282 }
    283 
    284 int32_t
    285 CollationRootElements::findP(uint32_t p) const {
    286    // p need not occur as a root primary.
    287    // For example, it might be a reordering group boundary.
    288    U_ASSERT((p >> 24) != Collation::UNASSIGNED_IMPLICIT_BYTE);
    289    // modified binary search
    290    int32_t start = static_cast<int32_t>(elements[IX_FIRST_PRIMARY_INDEX]);
    291    U_ASSERT(p >= elements[start]);
    292    int32_t limit = length - 1;
    293    U_ASSERT(elements[limit] >= PRIMARY_SENTINEL);
    294    U_ASSERT(p < elements[limit]);
    295    while((start + 1) < limit) {
    296        // Invariant: elements[start] and elements[limit] are primaries,
    297        // and elements[start]<=p<=elements[limit].
    298        int32_t i = (start + limit) / 2;
    299        uint32_t q = elements[i];
    300        if((q & SEC_TER_DELTA_FLAG) != 0) {
    301            // Find the next primary.
    302            int32_t j = i + 1;
    303            for(;;) {
    304                if(j == limit) { break; }
    305                q = elements[j];
    306                if((q & SEC_TER_DELTA_FLAG) == 0) {
    307                    i = j;
    308                    break;
    309                }
    310                ++j;
    311            }
    312            if((q & SEC_TER_DELTA_FLAG) != 0) {
    313                // Find the preceding primary.
    314                j = i - 1;
    315                for(;;) {
    316                    if(j == start) { break; }
    317                    q = elements[j];
    318                    if((q & SEC_TER_DELTA_FLAG) == 0) {
    319                        i = j;
    320                        break;
    321                    }
    322                    --j;
    323                }
    324                if((q & SEC_TER_DELTA_FLAG) != 0) {
    325                    // No primary between start and limit.
    326                    break;
    327                }
    328            }
    329        }
    330        if(p < (q & 0xffffff00)) {  // Reset the "step" bits of a range end primary.
    331            limit = i;
    332        } else {
    333            start = i;
    334        }
    335    }
    336    return start;
    337 }
    338 
    339 U_NAMESPACE_END
    340 
    341 #endif  // !UCONFIG_NO_COLLATION