tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

loadednormalizer2impl.cpp (14836B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 2014, International Business Machines
      6 * Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 * loadednormalizer2impl.cpp
      9 *
     10 * created on: 2014sep03
     11 * created by: Markus W. Scherer
     12 */
     13 
     14 #include "unicode/utypes.h"
     15 
     16 #if !UCONFIG_NO_NORMALIZATION
     17 
     18 #include "unicode/udata.h"
     19 #include "unicode/localpointer.h"
     20 #include "unicode/normalizer2.h"
     21 #include "unicode/ucptrie.h"
     22 #include "unicode/unistr.h"
     23 #include "unicode/unorm.h"
     24 #include "cstring.h"
     25 #include "mutex.h"
     26 #include "norm2allmodes.h"
     27 #include "normalizer2impl.h"
     28 #include "uassert.h"
     29 #include "ucln_cmn.h"
     30 #include "uhash.h"
     31 
     32 U_NAMESPACE_BEGIN
     33 
     34 class LoadedNormalizer2Impl : public Normalizer2Impl {
     35 public:
     36    LoadedNormalizer2Impl() : memory(nullptr), ownedTrie(nullptr) {}
     37    virtual ~LoadedNormalizer2Impl();
     38 
     39    void load(const char *packageName, const char *name, UErrorCode &errorCode);
     40 
     41 private:
     42    static UBool U_CALLCONV
     43    isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
     44 
     45    UDataMemory *memory;
     46    UCPTrie *ownedTrie;
     47 };
     48 
     49 LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
     50    udata_close(memory);
     51    ucptrie_close(ownedTrie);
     52 }
     53 
     54 UBool U_CALLCONV
     55 LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
     56                                    const char * /* type */, const char * /*name*/,
     57                                    const UDataInfo *pInfo) {
     58    if(
     59        pInfo->size>=20 &&
     60        pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
     61        pInfo->charsetFamily==U_CHARSET_FAMILY &&
     62        pInfo->dataFormat[0]==0x4e &&    /* dataFormat="Nrm2" */
     63        pInfo->dataFormat[1]==0x72 &&
     64        pInfo->dataFormat[2]==0x6d &&
     65        pInfo->dataFormat[3]==0x32 &&
     66        pInfo->formatVersion[0]==5
     67    ) {
     68        // Normalizer2Impl *me=(Normalizer2Impl *)context;
     69        // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
     70        return true;
     71    } else {
     72        return false;
     73    }
     74 }
     75 
     76 void
     77 LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
     78    if(U_FAILURE(errorCode)) {
     79        return;
     80    }
     81    memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
     82    if(U_FAILURE(errorCode)) {
     83        return;
     84    }
     85    const uint8_t* inBytes = static_cast<const uint8_t*>(udata_getMemory(memory));
     86    const int32_t* inIndexes = reinterpret_cast<const int32_t*>(inBytes);
     87    int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
     88    if(indexesLength<=IX_MIN_LCCC_CP) {
     89        errorCode=U_INVALID_FORMAT_ERROR;  // Not enough indexes.
     90        return;
     91    }
     92 
     93    int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
     94    int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
     95    ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16,
     96                                     inBytes+offset, nextOffset-offset, nullptr,
     97                                     &errorCode);
     98    if(U_FAILURE(errorCode)) {
     99        return;
    100    }
    101 
    102    offset=nextOffset;
    103    nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
    104    const uint16_t* inExtraData = reinterpret_cast<const uint16_t*>(inBytes + offset);
    105 
    106    // smallFCD: new in formatVersion 2
    107    offset=nextOffset;
    108    const uint8_t *inSmallFCD=inBytes+offset;
    109 
    110    init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
    111 }
    112 
    113 // instance cache ---------------------------------------------------------- ***
    114 
    115 Norm2AllModes *
    116 Norm2AllModes::createInstance(const char *packageName,
    117                              const char *name,
    118                              UErrorCode &errorCode) {
    119    if(U_FAILURE(errorCode)) {
    120        return nullptr;
    121    }
    122    LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
    123    if(impl==nullptr) {
    124        errorCode=U_MEMORY_ALLOCATION_ERROR;
    125        return nullptr;
    126    }
    127    impl->load(packageName, name, errorCode);
    128    return createInstance(impl, errorCode);
    129 }
    130 
    131 U_CDECL_BEGIN
    132 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
    133 U_CDECL_END
    134 
    135 #if !NORM2_HARDCODE_NFC_DATA
    136 static Norm2AllModes *nfcSingleton;
    137 static icu::UInitOnce nfcInitOnce {};
    138 #endif
    139 
    140 static Norm2AllModes *nfkcSingleton;
    141 static icu::UInitOnce nfkcInitOnce {};
    142 
    143 static Norm2AllModes *nfkc_cfSingleton;
    144 static icu::UInitOnce nfkc_cfInitOnce {};
    145 
    146 static Norm2AllModes *nfkc_scfSingleton;
    147 static icu::UInitOnce nfkc_scfInitOnce {};
    148 
    149 static UHashtable    *cache=nullptr;
    150 
    151 // UInitOnce singleton initialization function
    152 static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
    153 #if !NORM2_HARDCODE_NFC_DATA
    154    if (uprv_strcmp(what, "nfc") == 0) {
    155        nfcSingleton    = Norm2AllModes::createInstance(nullptr, "nfc", errorCode);
    156    } else
    157 #endif
    158    if (uprv_strcmp(what, "nfkc") == 0) {
    159        nfkcSingleton    = Norm2AllModes::createInstance(nullptr, "nfkc", errorCode);
    160    } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
    161        nfkc_cfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_cf", errorCode);
    162    } else if (uprv_strcmp(what, "nfkc_scf") == 0) {
    163        nfkc_scfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_scf", errorCode);
    164    } else {
    165        UPRV_UNREACHABLE_EXIT;   // Unknown singleton
    166    }
    167    ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
    168 }
    169 
    170 U_CDECL_BEGIN
    171 
    172 static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
    173    delete (Norm2AllModes *)allModes;
    174 }
    175 
    176 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
    177 #if !NORM2_HARDCODE_NFC_DATA
    178    delete nfcSingleton;
    179    nfcSingleton = nullptr;
    180    nfcInitOnce.reset();
    181 #endif
    182 
    183    delete nfkcSingleton;
    184    nfkcSingleton = nullptr;
    185    nfkcInitOnce.reset();
    186 
    187    delete nfkc_cfSingleton;
    188    nfkc_cfSingleton = nullptr;
    189    nfkc_cfInitOnce.reset();
    190 
    191    delete nfkc_scfSingleton;
    192    nfkc_scfSingleton = nullptr;
    193    nfkc_scfInitOnce.reset();
    194 
    195    uhash_close(cache);
    196    cache=nullptr;
    197    return true;
    198 }
    199 
    200 U_CDECL_END
    201 
    202 #if !NORM2_HARDCODE_NFC_DATA
    203 const Norm2AllModes *
    204 Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
    205    if(U_FAILURE(errorCode)) { return nullptr; }
    206    umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
    207    return nfcSingleton;
    208 }
    209 #endif
    210 
    211 const Norm2AllModes *
    212 Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
    213    if(U_FAILURE(errorCode)) { return nullptr; }
    214    umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
    215    return nfkcSingleton;
    216 }
    217 
    218 const Norm2AllModes *
    219 Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
    220    if(U_FAILURE(errorCode)) { return nullptr; }
    221    umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
    222    return nfkc_cfSingleton;
    223 }
    224 
    225 const Norm2AllModes *
    226 Norm2AllModes::getNFKC_SCFInstance(UErrorCode &errorCode) {
    227    if(U_FAILURE(errorCode)) { return nullptr; }
    228    umtx_initOnce(nfkc_scfInitOnce, &initSingletons, "nfkc_scf", errorCode);
    229    return nfkc_scfSingleton;
    230 }
    231 
    232 #if !NORM2_HARDCODE_NFC_DATA
    233 const Normalizer2 *
    234 Normalizer2::getNFCInstance(UErrorCode &errorCode) {
    235    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
    236    return allModes!=nullptr ? &allModes->comp : nullptr;
    237 }
    238 
    239 const Normalizer2 *
    240 Normalizer2::getNFDInstance(UErrorCode &errorCode) {
    241    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
    242    return allModes!=nullptr ? &allModes->decomp : nullptr;
    243 }
    244 
    245 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
    246    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
    247    return allModes!=nullptr ? &allModes->fcd : nullptr;
    248 }
    249 
    250 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
    251    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
    252    return allModes!=nullptr ? &allModes->fcc : nullptr;
    253 }
    254 
    255 const Normalizer2Impl *
    256 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
    257    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
    258    return allModes!=nullptr ? allModes->impl : nullptr;
    259 }
    260 #endif
    261 
    262 const Normalizer2 *
    263 Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
    264    const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
    265    return allModes!=nullptr ? &allModes->comp : nullptr;
    266 }
    267 
    268 const Normalizer2 *
    269 Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
    270    const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
    271    return allModes!=nullptr ? &allModes->decomp : nullptr;
    272 }
    273 
    274 const Normalizer2 *
    275 Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
    276    const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
    277    return allModes!=nullptr ? &allModes->comp : nullptr;
    278 }
    279 
    280 const Normalizer2 *
    281 Normalizer2::getNFKCSimpleCasefoldInstance(UErrorCode &errorCode) {
    282    const Norm2AllModes *allModes=Norm2AllModes::getNFKC_SCFInstance(errorCode);
    283    return allModes!=nullptr ? &allModes->comp : nullptr;
    284 }
    285 
    286 const Normalizer2 *
    287 Normalizer2::getInstance(const char *packageName,
    288                         const char *name,
    289                         UNormalization2Mode mode,
    290                         UErrorCode &errorCode) {
    291    if(U_FAILURE(errorCode)) {
    292        return nullptr;
    293    }
    294    if(name==nullptr || *name==0) {
    295        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    296        return nullptr;
    297    }
    298    const Norm2AllModes *allModes=nullptr;
    299    if(packageName==nullptr) {
    300        if(0==uprv_strcmp(name, "nfc")) {
    301            allModes=Norm2AllModes::getNFCInstance(errorCode);
    302        } else if(0==uprv_strcmp(name, "nfkc")) {
    303            allModes=Norm2AllModes::getNFKCInstance(errorCode);
    304        } else if(0==uprv_strcmp(name, "nfkc_cf")) {
    305            allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
    306        } else if(0==uprv_strcmp(name, "nfkc_scf")) {
    307            allModes=Norm2AllModes::getNFKC_SCFInstance(errorCode);
    308        }
    309    }
    310    if(allModes==nullptr && U_SUCCESS(errorCode)) {
    311        {
    312            Mutex lock;
    313            if(cache!=nullptr) {
    314                allModes = static_cast<Norm2AllModes*>(uhash_get(cache, name));
    315            }
    316        }
    317        if(allModes==nullptr) {
    318            ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
    319            LocalPointer<Norm2AllModes> localAllModes(
    320                Norm2AllModes::createInstance(packageName, name, errorCode));
    321            if(U_SUCCESS(errorCode)) {
    322                Mutex lock;
    323                if(cache==nullptr) {
    324                    cache=uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &errorCode);
    325                    if(U_FAILURE(errorCode)) {
    326                        return nullptr;
    327                    }
    328                    uhash_setKeyDeleter(cache, uprv_free);
    329                    uhash_setValueDeleter(cache, deleteNorm2AllModes);
    330                }
    331                void *temp=uhash_get(cache, name);
    332                if(temp==nullptr) {
    333                    int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1);
    334                    char* nameCopy = static_cast<char*>(uprv_malloc(keyLength));
    335                    if(nameCopy==nullptr) {
    336                        errorCode=U_MEMORY_ALLOCATION_ERROR;
    337                        return nullptr;
    338                    }
    339                    uprv_memcpy(nameCopy, name, keyLength);
    340                    allModes=localAllModes.getAlias();
    341                    uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
    342                } else {
    343                    // race condition
    344                    allModes = static_cast<Norm2AllModes*>(temp);
    345                }
    346            }
    347        }
    348    }
    349    if(allModes!=nullptr && U_SUCCESS(errorCode)) {
    350        switch(mode) {
    351        case UNORM2_COMPOSE:
    352            return &allModes->comp;
    353        case UNORM2_DECOMPOSE:
    354            return &allModes->decomp;
    355        case UNORM2_FCD:
    356            return &allModes->fcd;
    357        case UNORM2_COMPOSE_CONTIGUOUS:
    358            return &allModes->fcc;
    359        default:
    360            break;  // do nothing
    361        }
    362    }
    363    return nullptr;
    364 }
    365 
    366 const Normalizer2 *
    367 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
    368    if(U_FAILURE(errorCode)) {
    369        return nullptr;
    370    }
    371    switch(mode) {
    372    case UNORM_NFD:
    373        return Normalizer2::getNFDInstance(errorCode);
    374    case UNORM_NFKD:
    375        return Normalizer2::getNFKDInstance(errorCode);
    376    case UNORM_NFC:
    377        return Normalizer2::getNFCInstance(errorCode);
    378    case UNORM_NFKC:
    379        return Normalizer2::getNFKCInstance(errorCode);
    380    case UNORM_FCD:
    381        return getFCDInstance(errorCode);
    382    default:  // UNORM_NONE
    383        return getNoopInstance(errorCode);
    384    }
    385 }
    386 
    387 const Normalizer2Impl *
    388 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
    389    const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
    390    return allModes!=nullptr ? allModes->impl : nullptr;
    391 }
    392 
    393 const Normalizer2Impl *
    394 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
    395    const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
    396    return allModes!=nullptr ? allModes->impl : nullptr;
    397 }
    398 
    399 U_NAMESPACE_END
    400 
    401 // C API ------------------------------------------------------------------- ***
    402 
    403 U_NAMESPACE_USE
    404 
    405 U_CAPI const UNormalizer2 * U_EXPORT2
    406 unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
    407    return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
    408 }
    409 
    410 U_CAPI const UNormalizer2 * U_EXPORT2
    411 unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
    412    return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
    413 }
    414 
    415 U_CAPI const UNormalizer2 * U_EXPORT2
    416 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
    417    return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
    418 }
    419 
    420 U_CAPI const UNormalizer2 * U_EXPORT2
    421 unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode) {
    422    return (const UNormalizer2 *)Normalizer2::getNFKCSimpleCasefoldInstance(*pErrorCode);
    423 }
    424 
    425 U_CAPI const UNormalizer2 * U_EXPORT2
    426 unorm2_getInstance(const char *packageName,
    427                   const char *name,
    428                   UNormalization2Mode mode,
    429                   UErrorCode *pErrorCode) {
    430    return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
    431 }
    432 
    433 U_CFUNC UNormalizationCheckResult
    434 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
    435    if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
    436        return UNORM_YES;
    437    }
    438    UErrorCode errorCode=U_ZERO_ERROR;
    439    const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
    440    if(U_SUCCESS(errorCode)) {
    441        return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
    442    } else {
    443        return UNORM_MAYBE;
    444    }
    445 }
    446 
    447 #endif  // !UCONFIG_NO_NORMALIZATION