tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

scriptset.cpp (8672B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 2014, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 *
      9 * scriptset.cpp
     10 *
     11 * created on: 2013 Jan 7
     12 * created by: Andy Heninger
     13 */
     14 
     15 #include "unicode/utypes.h"
     16 
     17 #include "unicode/uchar.h"
     18 #include "unicode/unistr.h"
     19 
     20 #include "scriptset.h"
     21 #include "uassert.h"
     22 #include "cmemory.h"
     23 
     24 U_NAMESPACE_BEGIN
     25 
     26 //----------------------------------------------------------------------------
     27 //
     28 //  ScriptSet implementation
     29 //
     30 //----------------------------------------------------------------------------
     31 ScriptSet::ScriptSet() {
     32    uprv_memset(bits, 0, sizeof(bits));
     33 }
     34 
     35 ScriptSet::~ScriptSet() {
     36 }
     37 
     38 ScriptSet::ScriptSet(const ScriptSet &other) {
     39    *this = other;
     40 }
     41 
     42 ScriptSet & ScriptSet::operator =(const ScriptSet &other) {
     43    if (this != &other) {
     44        uprv_memcpy(bits, other.bits, sizeof(bits));
     45    }
     46    return *this;
     47 }
     48 
     49 bool ScriptSet::operator == (const ScriptSet &other) const {
     50    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
     51        if (bits[i] != other.bits[i]) {
     52            return false;
     53        }
     54    }
     55    return true;
     56 }
     57 
     58 UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const {
     59    if (U_FAILURE(status)) {
     60        return false;
     61    }
     62    if (script < 0 || static_cast<int32_t>(script) >= SCRIPT_LIMIT) {
     63        status = U_ILLEGAL_ARGUMENT_ERROR;
     64        return false;
     65    }
     66    uint32_t index = script / 32;
     67    uint32_t bit   = 1 << (script & 31);
     68    return ((bits[index] & bit) != 0);
     69 }
     70 
     71 
     72 ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) {
     73    if (U_FAILURE(status)) {
     74        return *this;
     75    }
     76    if (script < 0 || static_cast<int32_t>(script) >= SCRIPT_LIMIT) {
     77        status = U_ILLEGAL_ARGUMENT_ERROR;
     78        return *this;
     79    }
     80    uint32_t index = script / 32;
     81    uint32_t bit   = 1 << (script & 31);
     82    bits[index] |= bit;
     83    return *this;
     84 }
     85 
     86 ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) {
     87    if (U_FAILURE(status)) {
     88        return *this;
     89    }
     90    if (script < 0 || static_cast<int32_t>(script) >= SCRIPT_LIMIT) {
     91        status = U_ILLEGAL_ARGUMENT_ERROR;
     92        return *this;
     93    }
     94    uint32_t index = script / 32;
     95    uint32_t bit   = 1 << (script & 31);
     96    bits[index] &= ~bit;
     97    return *this;
     98 }
     99 
    100 
    101 
    102 ScriptSet &ScriptSet::Union(const ScriptSet &other) {
    103    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
    104        bits[i] |= other.bits[i];
    105    }
    106    return *this;
    107 }
    108 
    109 ScriptSet &ScriptSet::intersect(const ScriptSet &other) {
    110    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
    111        bits[i] &= other.bits[i];
    112    }
    113    return *this;
    114 }
    115 
    116 ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) {
    117    ScriptSet t;
    118    t.set(script, status);
    119    if (U_SUCCESS(status)) {
    120        this->intersect(t);
    121    }
    122    return *this;
    123 }
    124 
    125 UBool ScriptSet::intersects(const ScriptSet &other) const {
    126    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
    127        if ((bits[i] & other.bits[i]) != 0) {
    128            return true;
    129        }
    130    }
    131    return false;
    132 }
    133 
    134 UBool ScriptSet::contains(const ScriptSet &other) const {
    135    ScriptSet t(*this);
    136    t.intersect(other);
    137    return (t == other);
    138 }
    139 
    140 
    141 ScriptSet &ScriptSet::setAll() {
    142    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
    143        bits[i] = 0xffffffffu;
    144    }
    145    return *this;
    146 }
    147 
    148 
    149 ScriptSet &ScriptSet::resetAll() {
    150    uprv_memset(bits, 0, sizeof(bits));
    151    return *this;
    152 }
    153 
    154 int32_t ScriptSet::countMembers() const {
    155    // This bit counter is good for sparse numbers of '1's, which is
    156    //  very much the case that we will usually have.
    157    int32_t count = 0;
    158    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
    159        uint32_t x = bits[i];
    160        while (x > 0) {
    161            count++;
    162            x &= (x - 1);    // and off the least significant one bit.
    163        }
    164    }
    165    return count;
    166 }
    167 
    168 int32_t ScriptSet::hashCode() const {
    169    int32_t hash = 0;
    170    for (int32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
    171        hash ^= bits[i];
    172    }
    173    return hash;
    174 }
    175 
    176 int32_t ScriptSet::nextSetBit(int32_t fromIndex) const {
    177    // TODO: Wants a better implementation.
    178    if (fromIndex < 0) {
    179        return -1;
    180    }
    181    UErrorCode status = U_ZERO_ERROR;
    182    for (int32_t scriptIndex = fromIndex; scriptIndex < SCRIPT_LIMIT; scriptIndex++) {
    183        if (test(static_cast<UScriptCode>(scriptIndex), status)) {
    184            return scriptIndex;
    185        }
    186    }
    187    return -1;
    188 }
    189 
    190 UBool ScriptSet::isEmpty() const {
    191    for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) {
    192        if (bits[i] != 0) {
    193            return false;
    194        }
    195    }
    196    return true;
    197 }
    198 
    199 UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const {
    200    UBool firstTime = true;
    201    for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
    202        if (!firstTime) {
    203            dest.append(static_cast<char16_t>(0x20));
    204        }
    205        firstTime = false;
    206        const char* scriptName = uscript_getShortName(static_cast<UScriptCode>(i));
    207        dest.append(UnicodeString(scriptName, -1, US_INV));
    208    }
    209    return dest;
    210 }
    211 
    212 ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {
    213    resetAll();
    214    if (U_FAILURE(status)) {
    215        return *this;
    216    }
    217    UnicodeString oneScriptName;
    218    for (int32_t i=0; i<scriptString.length();) {
    219        UChar32 c = scriptString.char32At(i);
    220        i = scriptString.moveIndex32(i, 1);
    221        if (!u_isUWhiteSpace(c)) {
    222            oneScriptName.append(c);
    223            if (i < scriptString.length()) {
    224                continue;
    225            }
    226        }
    227        if (oneScriptName.length() > 0) {
    228            char buf[40];
    229            oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);
    230            buf[sizeof(buf)-1] = 0;
    231            int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);
    232            if (sc == UCHAR_INVALID_CODE) {
    233                status = U_ILLEGAL_ARGUMENT_ERROR;
    234            } else {
    235                this->set(static_cast<UScriptCode>(sc), status);
    236            }
    237            if (U_FAILURE(status)) {
    238                return *this;
    239            }
    240            oneScriptName.remove();
    241        }
    242    }
    243    return *this;
    244 }
    245 
    246 void ScriptSet::setScriptExtensions(UChar32 codePoint, UErrorCode& status) {
    247    if (U_FAILURE(status)) { return; }
    248    static const int32_t FIRST_GUESS_SCRIPT_CAPACITY = 20;
    249    MaybeStackArray<UScriptCode,FIRST_GUESS_SCRIPT_CAPACITY> scripts;
    250    UErrorCode internalStatus = U_ZERO_ERROR;
    251    int32_t script_count = -1;
    252 
    253    while (true) {
    254        script_count = uscript_getScriptExtensions(
    255            codePoint, scripts.getAlias(), scripts.getCapacity(), &internalStatus);
    256        if (internalStatus == U_BUFFER_OVERFLOW_ERROR) {
    257            // Need to allocate more space
    258            if (scripts.resize(script_count) == nullptr) {
    259                status = U_MEMORY_ALLOCATION_ERROR;
    260                return;
    261            }
    262            internalStatus = U_ZERO_ERROR;
    263        } else {
    264            break;
    265        }
    266    }
    267 
    268    // Check if we failed for some reason other than buffer overflow
    269    if (U_FAILURE(internalStatus)) {
    270        status = internalStatus;
    271        return;
    272    }
    273 
    274    // Load the scripts into the ScriptSet and return
    275    for (int32_t i = 0; i < script_count; i++) {
    276        this->set(scripts[i], status);
    277        if (U_FAILURE(status)) { return; }
    278    }
    279 }
    280 
    281 U_NAMESPACE_END
    282 
    283 U_CAPI UBool U_EXPORT2
    284 uhash_equalsScriptSet(const UElement key1, const UElement key2) {
    285    icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
    286    icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer);
    287    return (*s1 == *s2);
    288 }
    289 
    290 U_CAPI int32_t U_EXPORT2
    291 uhash_compareScriptSet(UElement key0, UElement key1) {
    292    icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);
    293    icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
    294    int32_t diff = s0->countMembers() - s1->countMembers();
    295    if (diff != 0) return diff;
    296    int32_t i0 = s0->nextSetBit(0);
    297    int32_t i1 = s1->nextSetBit(0);
    298    while ((diff = i0-i1) == 0 && i0 > 0) {
    299        i0 = s0->nextSetBit(i0+1);
    300        i1 = s1->nextSetBit(i1+1);
    301    }
    302    return diff;
    303 }
    304 
    305 U_CAPI int32_t U_EXPORT2
    306 uhash_hashScriptSet(const UElement key) {
    307    icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer);
    308    return s->hashCode();
    309 }
    310 
    311 U_CAPI void U_EXPORT2
    312 uhash_deleteScriptSet(void *obj) {
    313    icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj);
    314    delete s;
    315 }