tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

collation.cpp (5524B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 2010-2014, International Business Machines
      6 * Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 * collation.cpp
      9 *
     10 * created on: 2010oct27
     11 * created by: Markus W. Scherer
     12 */
     13 
     14 #include "unicode/utypes.h"
     15 
     16 #if !UCONFIG_NO_COLLATION
     17 
     18 #include "collation.h"
     19 #include "uassert.h"
     20 
     21 U_NAMESPACE_BEGIN
     22 
     23 uint32_t
     24 Collation::incTwoBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) {
     25    // Extract the second byte, minus the minimum byte value,
     26    // plus the offset, modulo the number of usable byte values, plus the minimum.
     27    // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
     28    uint32_t primary;
     29    if(isCompressible) {
     30        offset += (static_cast<int32_t>(basePrimary >> 16) & 0xff) - 4;
     31        primary = static_cast<uint32_t>((offset % 251) + 4) << 16;
     32        offset /= 251;
     33    } else {
     34        offset += (static_cast<int32_t>(basePrimary >> 16) & 0xff) - 2;
     35        primary = static_cast<uint32_t>((offset % 254) + 2) << 16;
     36        offset /= 254;
     37    }
     38    // First byte, assume no further overflow.
     39    return primary | ((basePrimary & 0xff000000) + static_cast<uint32_t>(offset << 24));
     40 }
     41 
     42 uint32_t
     43 Collation::incThreeBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) {
     44    // Extract the third byte, minus the minimum byte value,
     45    // plus the offset, modulo the number of usable byte values, plus the minimum.
     46    offset += (static_cast<int32_t>(basePrimary >> 8) & 0xff) - 2;
     47    uint32_t primary = static_cast<uint32_t>((offset % 254) + 2) << 8;
     48    offset /= 254;
     49    // Same with the second byte,
     50    // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
     51    if(isCompressible) {
     52        offset += (static_cast<int32_t>(basePrimary >> 16) & 0xff) - 4;
     53        primary |= static_cast<uint32_t>((offset % 251) + 4) << 16;
     54        offset /= 251;
     55    } else {
     56        offset += (static_cast<int32_t>(basePrimary >> 16) & 0xff) - 2;
     57        primary |= static_cast<uint32_t>((offset % 254) + 2) << 16;
     58        offset /= 254;
     59    }
     60    // First byte, assume no further overflow.
     61    return primary | ((basePrimary & 0xff000000) + static_cast<uint32_t>(offset << 24));
     62 }
     63 
     64 uint32_t
     65 Collation::decTwoBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) {
     66    // Extract the second byte, minus the minimum byte value,
     67    // minus the step, modulo the number of usable byte values, plus the minimum.
     68    // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
     69    // Assume no further underflow for the first byte.
     70    U_ASSERT(0 < step && step <= 0x7f);
     71    int32_t byte2 = (static_cast<int32_t>(basePrimary >> 16) & 0xff) - step;
     72    if(isCompressible) {
     73        if(byte2 < 4) {
     74            byte2 += 251;
     75            basePrimary -= 0x1000000;
     76        }
     77    } else {
     78        if(byte2 < 2) {
     79            byte2 += 254;
     80            basePrimary -= 0x1000000;
     81        }
     82    }
     83    return (basePrimary & 0xff000000) | (static_cast<uint32_t>(byte2) << 16);
     84 }
     85 
     86 uint32_t
     87 Collation::decThreeBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) {
     88    // Extract the third byte, minus the minimum byte value,
     89    // minus the step, modulo the number of usable byte values, plus the minimum.
     90    U_ASSERT(0 < step && step <= 0x7f);
     91    int32_t byte3 = (static_cast<int32_t>(basePrimary >> 8) & 0xff) - step;
     92    if(byte3 >= 2) {
     93        return (basePrimary & 0xffff0000) | (static_cast<uint32_t>(byte3) << 8);
     94    }
     95    byte3 += 254;
     96    // Same with the second byte,
     97    // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
     98    int32_t byte2 = (static_cast<int32_t>(basePrimary >> 16) & 0xff) - 1;
     99    if(isCompressible) {
    100        if(byte2 < 4) {
    101            byte2 = 0xfe;
    102            basePrimary -= 0x1000000;
    103        }
    104    } else {
    105        if(byte2 < 2) {
    106            byte2 = 0xff;
    107            basePrimary -= 0x1000000;
    108        }
    109    }
    110    // First byte, assume no further underflow.
    111    return (basePrimary & 0xff000000) | (static_cast<uint32_t>(byte2) << 16) | (static_cast<uint32_t>(byte3) << 8);
    112 }
    113 
    114 uint32_t
    115 Collation::getThreeBytePrimaryForOffsetData(UChar32 c, int64_t dataCE) {
    116    uint32_t p = static_cast<uint32_t>(dataCE >> 32); // three-byte primary pppppp00
    117    int32_t lower32 = static_cast<int32_t>(dataCE); // base code point b & step s: bbbbbbss (bit 7: isCompressible)
    118    int32_t offset = (c - (lower32 >> 8)) * (lower32 & 0x7f);  // delta * increment
    119    UBool isCompressible = (lower32 & 0x80) != 0;
    120    return Collation::incThreeBytePrimaryByOffset(p, isCompressible, offset);
    121 }
    122 
    123 uint32_t
    124 Collation::unassignedPrimaryFromCodePoint(UChar32 c) {
    125    // Create a gap before U+0000. Use c=-1 for [first unassigned].
    126    ++c;
    127    // Fourth byte: 18 values, every 14th byte value (gap of 13).
    128    uint32_t primary = 2 + (c % 18) * 14;
    129    c /= 18;
    130    // Third byte: 254 values.
    131    primary |= (2 + (c % 254)) << 8;
    132    c /= 254;
    133    // Second byte: 251 values 04..FE excluding the primary compression bytes.
    134    primary |= (4 + (c % 251)) << 16;
    135    // One lead byte covers all code points (c < 0x1182B4 = 1*251*254*18).
    136    return primary | (UNASSIGNED_IMPLICIT_BYTE << 24);
    137 }
    138 
    139 U_NAMESPACE_END
    140 
    141 #endif  // !UCONFIG_NO_COLLATION