tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

sortkey.cpp (7589B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 1996-2012, International Business Machines Corporation and
      6 * others. All Rights Reserved.
      7 *******************************************************************************
      8 */
      9 //===============================================================================
     10 //
     11 // File sortkey.cpp
     12 //
     13 //
     14 //
     15 // Created by: Helena Shih
     16 //
     17 // Modification History:
     18 //
     19 //  Date         Name          Description
     20 //
     21 //  6/20/97      helena        Java class name change.
     22 //  6/23/97      helena        Added comments to make code more readable.
     23 //  6/26/98      erm           Changed to use byte arrays instead of UnicodeString
     24 //  7/31/98      erm           hashCode: minimum inc should be 2 not 1,
     25 //                             Cleaned up operator=
     26 // 07/12/99      helena        HPUX 11 CC port.
     27 // 03/06/01      synwee        Modified compareTo, to handle the result of
     28 //                             2 string similar in contents, but one is longer
     29 //                             than the other
     30 //===============================================================================
     31 
     32 #include "unicode/utypes.h"
     33 
     34 #if !UCONFIG_NO_COLLATION
     35 
     36 #include "unicode/sortkey.h"
     37 #include "cmemory.h"
     38 #include "uelement.h"
     39 #include "ustr_imp.h"
     40 
     41 U_NAMESPACE_BEGIN
     42 
     43 // A hash code of kInvalidHashCode indicates that the hash code needs
     44 // to be computed. A hash code of kEmptyHashCode is used for empty keys
     45 // and for any key whose computed hash code is kInvalidHashCode.
     46 static const int32_t kInvalidHashCode = 0;
     47 static const int32_t kEmptyHashCode = 1;
     48 // The "bogus hash code" replaces a separate fBogus flag.
     49 static const int32_t kBogusHashCode = 2;
     50 
     51 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
     52 
     53 CollationKey::CollationKey()
     54    : UObject(), fFlagAndLength(0),
     55      fHashCode(kEmptyHashCode)
     56 {
     57 }
     58 
     59 // Create a collation key from a bit array.
     60 CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
     61    : UObject(), fFlagAndLength(count),
     62      fHashCode(kInvalidHashCode)
     63 {
     64    if (count < 0 || (newValues == nullptr && count != 0) ||
     65            (count > getCapacity() && reallocate(count, 0) == nullptr)) {
     66        setToBogus();
     67        return;
     68    }
     69 
     70    if (count > 0) {
     71        uprv_memcpy(getBytes(), newValues, count);
     72    }
     73 }
     74 
     75 CollationKey::CollationKey(const CollationKey& other)
     76    : UObject(other), fFlagAndLength(other.getLength()),
     77      fHashCode(other.fHashCode)
     78 {
     79    if (other.isBogus())
     80    {
     81        setToBogus();
     82        return;
     83    }
     84 
     85    int32_t length = fFlagAndLength;
     86    if (length > getCapacity() && reallocate(length, 0) == nullptr) {
     87        setToBogus();
     88        return;
     89    }
     90 
     91    if (length > 0) {
     92        uprv_memcpy(getBytes(), other.getBytes(), length);
     93    }
     94 }
     95 
     96 CollationKey::~CollationKey()
     97 {
     98    if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
     99 }
    100 
    101 uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) {
    102    uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity));
    103    if(newBytes == nullptr) { return nullptr; }
    104    if(length > 0) {
    105        uprv_memcpy(newBytes, getBytes(), length);
    106    }
    107    if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
    108    fUnion.fFields.fBytes = newBytes;
    109    fUnion.fFields.fCapacity = newCapacity;
    110    fFlagAndLength |= 0x80000000;
    111    return newBytes;
    112 }
    113 
    114 void CollationKey::setLength(int32_t newLength) {
    115    // U_ASSERT(newLength >= 0 && newLength <= getCapacity());
    116    fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength;
    117    fHashCode = kInvalidHashCode;
    118 }
    119 
    120 // set the key to an empty state
    121 CollationKey&
    122 CollationKey::reset()
    123 {
    124    fFlagAndLength &= 0x80000000;
    125    fHashCode = kEmptyHashCode;
    126 
    127    return *this;
    128 }
    129 
    130 // set the key to a "bogus" or invalid state
    131 CollationKey&
    132 CollationKey::setToBogus()
    133 {
    134    fFlagAndLength &= 0x80000000;
    135    fHashCode = kBogusHashCode;
    136 
    137    return *this;
    138 }
    139 
    140 bool
    141 CollationKey::operator==(const CollationKey& source) const
    142 {
    143    return getLength() == source.getLength() &&
    144            (this == &source ||
    145             uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0);
    146 }
    147 
    148 const CollationKey&
    149 CollationKey::operator=(const CollationKey& other)
    150 {
    151    if (this != &other)
    152    {
    153        if (other.isBogus())
    154        {
    155            return setToBogus();
    156        }
    157 
    158        int32_t length = other.getLength();
    159        if (length > getCapacity() && reallocate(length, 0) == nullptr) {
    160            return setToBogus();
    161        }
    162        if (length > 0) {
    163            uprv_memcpy(getBytes(), other.getBytes(), length);
    164        }
    165        fFlagAndLength = (fFlagAndLength & 0x80000000) | length;
    166        fHashCode = other.fHashCode;
    167    }
    168 
    169    return *this;
    170 }
    171 
    172 // Bitwise comparison for the collation keys.
    173 Collator::EComparisonResult
    174 CollationKey::compareTo(const CollationKey& target) const
    175 {
    176    UErrorCode errorCode = U_ZERO_ERROR;
    177    return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode));
    178 }
    179 
    180 // Bitwise comparison for the collation keys.
    181 UCollationResult
    182 CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
    183 {
    184  if(U_SUCCESS(status)) {
    185    const uint8_t *src = getBytes();
    186    const uint8_t *tgt = target.getBytes();
    187 
    188    // are we comparing the same string
    189    if (src == tgt)
    190        return  UCOL_EQUAL;
    191 
    192    UCollationResult result;
    193 
    194    // are we comparing different lengths?
    195    int32_t minLength = getLength();
    196    int32_t targetLength = target.getLength();
    197    if (minLength < targetLength) {
    198        result = UCOL_LESS;
    199    } else if (minLength == targetLength) {
    200        result = UCOL_EQUAL;
    201    } else {
    202        minLength = targetLength;
    203        result = UCOL_GREATER;
    204    }
    205 
    206    if (minLength > 0) {
    207        int diff = uprv_memcmp(src, tgt, minLength);
    208        if (diff > 0) {
    209            return UCOL_GREATER;
    210        }
    211        else
    212            if (diff < 0) {
    213                return UCOL_LESS;
    214            }
    215    }
    216 
    217    return result;
    218  } else {
    219    return UCOL_EQUAL;
    220  }
    221 }
    222 
    223 #ifdef U_USE_COLLATION_KEY_DEPRECATES
    224 // Create a copy of the byte array.
    225 uint8_t*
    226 CollationKey::toByteArray(int32_t& count) const
    227 {
    228    uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
    229 
    230    if (result == nullptr)
    231    {
    232        count = 0;
    233    }
    234    else
    235    {
    236        count = fCount;
    237        if (count > 0) {
    238            uprv_memcpy(result, fBytes, fCount);
    239        }
    240    }
    241 
    242    return result;
    243 }
    244 #endif
    245 
    246 static int32_t
    247 computeHashCode(const uint8_t *key, int32_t  length) {
    248    const char *s = reinterpret_cast<const char *>(key);
    249    int32_t hash;
    250    if (s == nullptr || length == 0) {
    251        hash = kEmptyHashCode;
    252    } else {
    253        hash = ustr_hashCharsN(s, length);
    254        if (hash == kInvalidHashCode || hash == kBogusHashCode) {
    255            hash = kEmptyHashCode;
    256        }
    257    }
    258    return hash;
    259 }
    260 
    261 int32_t
    262 CollationKey::hashCode() const
    263 {
    264    // (Cribbed from UnicodeString)
    265    // We cache the hashCode; when it becomes invalid, due to any change to the
    266    // string, we note this by setting it to kInvalidHashCode. [LIU]
    267 
    268    // Note: This method is semantically const, but physically non-const.
    269 
    270    if (fHashCode == kInvalidHashCode)
    271    {
    272        fHashCode = computeHashCode(getBytes(), getLength());
    273    }
    274 
    275    return fHashCode;
    276 }
    277 
    278 U_NAMESPACE_END
    279 
    280 U_CAPI int32_t U_EXPORT2
    281 ucol_keyHashCode(const uint8_t *key, 
    282                       int32_t  length)
    283 {
    284    return icu::computeHashCode(key, length);
    285 }
    286 
    287 #endif /* #if !UCONFIG_NO_COLLATION */