tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

uniquecharstr.h (4003B)


      1 // © 2020 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 // uniquecharstr.h
      5 // created: 2020sep01 Frank Yung-Fong Tang
      6 
      7 #ifndef __UNIQUECHARSTR_H__
      8 #define __UNIQUECHARSTR_H__
      9 
     10 #include "charstr.h"
     11 #include "uassert.h"
     12 #include "uhash.h"
     13 #include "cmemory.h"
     14 
     15 U_NAMESPACE_BEGIN
     16 
     17 /**
     18 * Stores NUL-terminated strings with duplicate elimination.
     19 * Checks for unique UTF-16 string pointers and converts to invariant characters.
     20 *
     21 * Intended to be stack-allocated. Add strings, get a unique number for each,
     22 * freeze the object, get a char * pointer for each string,
     23 * call orphanCharStrings() to capture the string storage, and let this object go out of scope.
     24 */
     25 class UniqueCharStrings {
     26 public:
     27    UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) {
     28        // Note: We hash on string contents but store stable char16_t * pointers.
     29        // If the strings are stored in resource bundles which should be built with
     30        // duplicate elimination, then we should be able to hash on just the pointer values.
     31        uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode);
     32        if (U_FAILURE(errorCode)) { return; }
     33        strings = new CharString();
     34        if (strings == nullptr) {
     35            errorCode = U_MEMORY_ALLOCATION_ERROR;
     36        }
     37    }
     38    ~UniqueCharStrings() {
     39        uhash_close(&map);
     40        delete strings;
     41    }
     42 
     43    /** Returns/orphans the CharString that contains all strings. */
     44    CharString *orphanCharStrings() {
     45        CharString *result = strings;
     46        strings = nullptr;
     47        return result;
     48    }
     49 
     50    /**
     51     * Adds a NUL-terminated string and returns a unique number for it.
     52     * The string must not change, nor move around in memory,
     53     * while this UniqueCharStrings is in use.
     54     *
     55     * Best used with string data in a stable storage, such as strings returned
     56     * by resource bundle functions.
     57     */
     58    int32_t add(const char16_t*p, UErrorCode &errorCode) {
     59        if (U_FAILURE(errorCode)) { return -1; }
     60        if (isFrozen) {
     61            errorCode = U_NO_WRITE_PERMISSION;
     62            return -1;
     63        }
     64        // The string points into the resource bundle.
     65        int32_t oldIndex = uhash_geti(&map, p);
     66        if (oldIndex != 0) {  // found duplicate
     67            return oldIndex;
     68        }
     69        // Explicit NUL terminator for the previous string.
     70        // The strings object is also terminated with one implicit NUL.
     71        strings->append(0, errorCode);
     72        int32_t newIndex = strings->length();
     73        strings->appendInvariantChars(p, u_strlen(p), errorCode);
     74        uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode);
     75        return newIndex;
     76    }
     77 
     78    /**
     79     * Adds a unicode string by value and returns a unique number for it.
     80     */
     81    int32_t addByValue(UnicodeString s, UErrorCode &errorCode) {
     82        if (U_FAILURE(errorCode)) { return -1; }
     83        if (isFrozen) {
     84            errorCode = U_NO_WRITE_PERMISSION;
     85            return -1;
     86        }
     87        int32_t oldIndex = uhash_geti(&map, s.getTerminatedBuffer());
     88        if (oldIndex != 0) {  // found duplicate
     89            return oldIndex;
     90        }
     91        // We need to store the string content of the UnicodeString.
     92        UnicodeString *key = keyStore.create(s);
     93        if (key == nullptr) {
     94            errorCode = U_MEMORY_ALLOCATION_ERROR;
     95            return -1;
     96        }
     97        return add(key->getTerminatedBuffer(), errorCode);
     98    }
     99 
    100    void freeze() { isFrozen = true; }
    101 
    102    /**
    103     * Returns a string pointer for its unique number, if this object is frozen.
    104     * Otherwise nullptr.
    105     */
    106    const char *get(int32_t i) const {
    107        U_ASSERT(isFrozen);
    108        return isFrozen && i > 0 ? strings->data() + i : nullptr;
    109    }
    110 
    111 private:
    112    UHashtable map;
    113    CharString *strings;
    114    MemoryPool<UnicodeString> keyStore;
    115    bool isFrozen = false;
    116 };
    117 
    118 U_NAMESPACE_END
    119 
    120 #endif  // __UNIQUECHARSTR_H__