tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

collationkeys.h (6864B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 2012-2014, International Business Machines
      6 * Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 * collationkeys.h
      9 *
     10 * created on: 2012sep02
     11 * created by: Markus W. Scherer
     12 */
     13 
     14 #ifndef __COLLATIONKEYS_H__
     15 #define __COLLATIONKEYS_H__
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_COLLATION
     20 
     21 #include "unicode/bytestream.h"
     22 #include "unicode/ucol.h"
     23 #include "charstr.h"
     24 #include "collation.h"
     25 
     26 U_NAMESPACE_BEGIN
     27 
     28 class CollationIterator;
     29 struct CollationDataReader;
     30 struct CollationSettings;
     31 
     32 class SortKeyByteSink : public ByteSink {
     33 public:
     34    SortKeyByteSink(char *dest, int32_t destCapacity)
     35            : buffer_(dest), capacity_(destCapacity),
     36              appended_(0), ignore_(0) {}
     37    virtual ~SortKeyByteSink();
     38 
     39    void IgnoreBytes(int32_t numIgnore) { ignore_ = numIgnore; }
     40 
     41    virtual void Append(const char *bytes, int32_t n) override;
     42    void Append(uint32_t b) {
     43        if (ignore_ > 0) {
     44            --ignore_;
     45        } else {
     46            if (appended_ < capacity_ || Resize(1, appended_)) {
     47                buffer_[appended_] = static_cast<char>(b);
     48            }
     49            ++appended_;
     50        }
     51    }
     52    virtual char *GetAppendBuffer(int32_t min_capacity,
     53                                  int32_t desired_capacity_hint,
     54                                  char *scratch, int32_t scratch_capacity,
     55                                  int32_t *result_capacity) override;
     56    int32_t NumberOfBytesAppended() const { return appended_; }
     57 
     58    /**
     59     * @return how many bytes can be appended (including ignored ones)
     60     *         without reallocation
     61     */
     62    int32_t GetRemainingCapacity() const {
     63        // Either ignore_ or appended_ should be 0.
     64        return ignore_ + capacity_ - appended_;
     65    }
     66 
     67    UBool Overflowed() const { return appended_ > capacity_; }
     68    /** @return false if memory allocation failed */
     69    UBool IsOk() const { return buffer_ != nullptr; }
     70 
     71 protected:
     72    virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) = 0;
     73    virtual UBool Resize(int32_t appendCapacity, int32_t length) = 0;
     74 
     75    void SetNotOk() {
     76        buffer_ = nullptr;
     77        capacity_ = 0;
     78    }
     79 
     80    char *buffer_;
     81    int32_t capacity_;
     82    int32_t appended_;
     83    int32_t ignore_;
     84 
     85 private:
     86    SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented
     87    SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented
     88 };
     89 
     90 class U_I18N_API CollationKeys /* not : public UObject because all methods are static */ {
     91 public:
     92    class LevelCallback : public UMemory {
     93    public:
     94        virtual ~LevelCallback();
     95        /**
     96         * @param level The next level about to be written to the ByteSink.
     97         * @return true if the level is to be written
     98         *         (the base class implementation always returns true)
     99         */
    100        virtual UBool needToWrite(Collation::Level level);
    101    };
    102 
    103    /**
    104     * Writes the sort key bytes for minLevel up to the iterator data's strength.
    105     * Optionally writes the case level.
    106     * Stops writing levels when callback.needToWrite(level) returns false.
    107     * Separates levels with the LEVEL_SEPARATOR_BYTE
    108     * but does not write a TERMINATOR_BYTE.
    109     */
    110    static void writeSortKeyUpToQuaternary(CollationIterator &iter,
    111                                           const UBool *compressibleBytes,
    112                                           const CollationSettings &settings,
    113                                           SortKeyByteSink &sink,
    114                                           Collation::Level minLevel, LevelCallback &callback,
    115                                           UBool preflight, UErrorCode &errorCode);
    116 private:
    117    friend struct CollationDataReader;
    118 
    119    CollationKeys() = delete;  // no instantiation
    120 
    121    // Secondary level: Compress up to 33 common weights as 05..25 or 25..45.
    122    static const uint32_t SEC_COMMON_LOW = Collation::COMMON_BYTE;
    123    static const uint32_t SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20;
    124    static const uint32_t SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40;
    125    static const int32_t SEC_COMMON_MAX_COUNT = 0x21;
    126 
    127    // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13.
    128    static const uint32_t CASE_LOWER_FIRST_COMMON_LOW = 1;
    129    static const uint32_t CASE_LOWER_FIRST_COMMON_MIDDLE = 7;
    130    static const uint32_t CASE_LOWER_FIRST_COMMON_HIGH = 13;
    131    static const int32_t CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7;
    132 
    133    // Case level, upperFirst: Compress up to 13 common weights as 3..15.
    134    static const uint32_t CASE_UPPER_FIRST_COMMON_LOW = 3;
    135    static const uint32_t CASE_UPPER_FIRST_COMMON_HIGH = 15;
    136    static const int32_t CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13;
    137 
    138    // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5.
    139    static const uint32_t TER_ONLY_COMMON_LOW = Collation::COMMON_BYTE;
    140    static const uint32_t TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60;
    141    static const uint32_t TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0;
    142    static const int32_t TER_ONLY_COMMON_MAX_COUNT = 0x61;
    143 
    144    // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45.
    145    static const uint32_t TER_LOWER_FIRST_COMMON_LOW = Collation::COMMON_BYTE;
    146    static const uint32_t TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20;
    147    static const uint32_t TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40;
    148    static const int32_t TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21;
    149 
    150    // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5.
    151    static const uint32_t TER_UPPER_FIRST_COMMON_LOW = Collation::COMMON_BYTE + 0x80;
    152    static const uint32_t TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20;
    153    static const uint32_t TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40;
    154    static const int32_t TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21;
    155 
    156    // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC.
    157    static const uint32_t QUAT_COMMON_LOW = 0x1c;
    158    static const uint32_t QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70;
    159    static const uint32_t QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0;
    160    static const int32_t QUAT_COMMON_MAX_COUNT = 0x71;
    161    // Primary weights shifted to quaternary level must be encoded with
    162    // a lead byte below the common-weight compression range.
    163    static const uint32_t QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1;  // 0x1b
    164 };
    165 
    166 U_NAMESPACE_END
    167 
    168 #endif  // !UCONFIG_NO_COLLATION
    169 #endif  // __COLLATIONKEYS_H__