loadednormalizer2impl.cpp (14836B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * loadednormalizer2impl.cpp 9 * 10 * created on: 2014sep03 11 * created by: Markus W. Scherer 12 */ 13 14 #include "unicode/utypes.h" 15 16 #if !UCONFIG_NO_NORMALIZATION 17 18 #include "unicode/udata.h" 19 #include "unicode/localpointer.h" 20 #include "unicode/normalizer2.h" 21 #include "unicode/ucptrie.h" 22 #include "unicode/unistr.h" 23 #include "unicode/unorm.h" 24 #include "cstring.h" 25 #include "mutex.h" 26 #include "norm2allmodes.h" 27 #include "normalizer2impl.h" 28 #include "uassert.h" 29 #include "ucln_cmn.h" 30 #include "uhash.h" 31 32 U_NAMESPACE_BEGIN 33 34 class LoadedNormalizer2Impl : public Normalizer2Impl { 35 public: 36 LoadedNormalizer2Impl() : memory(nullptr), ownedTrie(nullptr) {} 37 virtual ~LoadedNormalizer2Impl(); 38 39 void load(const char *packageName, const char *name, UErrorCode &errorCode); 40 41 private: 42 static UBool U_CALLCONV 43 isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo); 44 45 UDataMemory *memory; 46 UCPTrie *ownedTrie; 47 }; 48 49 LoadedNormalizer2Impl::~LoadedNormalizer2Impl() { 50 udata_close(memory); 51 ucptrie_close(ownedTrie); 52 } 53 54 UBool U_CALLCONV 55 LoadedNormalizer2Impl::isAcceptable(void * /*context*/, 56 const char * /* type */, const char * /*name*/, 57 const UDataInfo *pInfo) { 58 if( 59 pInfo->size>=20 && 60 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 61 pInfo->charsetFamily==U_CHARSET_FAMILY && 62 pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */ 63 pInfo->dataFormat[1]==0x72 && 64 pInfo->dataFormat[2]==0x6d && 65 pInfo->dataFormat[3]==0x32 && 66 pInfo->formatVersion[0]==5 67 ) { 68 // Normalizer2Impl *me=(Normalizer2Impl *)context; 69 // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4); 70 return true; 71 } else { 72 return false; 73 } 74 } 75 76 void 77 LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) { 78 if(U_FAILURE(errorCode)) { 79 return; 80 } 81 memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode); 82 if(U_FAILURE(errorCode)) { 83 return; 84 } 85 const uint8_t* inBytes = static_cast<const uint8_t*>(udata_getMemory(memory)); 86 const int32_t* inIndexes = reinterpret_cast<const int32_t*>(inBytes); 87 int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4; 88 if(indexesLength<=IX_MIN_LCCC_CP) { 89 errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes. 90 return; 91 } 92 93 int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET]; 94 int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET]; 95 ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16, 96 inBytes+offset, nextOffset-offset, nullptr, 97 &errorCode); 98 if(U_FAILURE(errorCode)) { 99 return; 100 } 101 102 offset=nextOffset; 103 nextOffset=inIndexes[IX_SMALL_FCD_OFFSET]; 104 const uint16_t* inExtraData = reinterpret_cast<const uint16_t*>(inBytes + offset); 105 106 // smallFCD: new in formatVersion 2 107 offset=nextOffset; 108 const uint8_t *inSmallFCD=inBytes+offset; 109 110 init(inIndexes, ownedTrie, inExtraData, inSmallFCD); 111 } 112 113 // instance cache ---------------------------------------------------------- *** 114 115 Norm2AllModes * 116 Norm2AllModes::createInstance(const char *packageName, 117 const char *name, 118 UErrorCode &errorCode) { 119 if(U_FAILURE(errorCode)) { 120 return nullptr; 121 } 122 LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl; 123 if(impl==nullptr) { 124 errorCode=U_MEMORY_ALLOCATION_ERROR; 125 return nullptr; 126 } 127 impl->load(packageName, name, errorCode); 128 return createInstance(impl, errorCode); 129 } 130 131 U_CDECL_BEGIN 132 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup(); 133 U_CDECL_END 134 135 #if !NORM2_HARDCODE_NFC_DATA 136 static Norm2AllModes *nfcSingleton; 137 static icu::UInitOnce nfcInitOnce {}; 138 #endif 139 140 static Norm2AllModes *nfkcSingleton; 141 static icu::UInitOnce nfkcInitOnce {}; 142 143 static Norm2AllModes *nfkc_cfSingleton; 144 static icu::UInitOnce nfkc_cfInitOnce {}; 145 146 static Norm2AllModes *nfkc_scfSingleton; 147 static icu::UInitOnce nfkc_scfInitOnce {}; 148 149 static UHashtable *cache=nullptr; 150 151 // UInitOnce singleton initialization function 152 static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) { 153 #if !NORM2_HARDCODE_NFC_DATA 154 if (uprv_strcmp(what, "nfc") == 0) { 155 nfcSingleton = Norm2AllModes::createInstance(nullptr, "nfc", errorCode); 156 } else 157 #endif 158 if (uprv_strcmp(what, "nfkc") == 0) { 159 nfkcSingleton = Norm2AllModes::createInstance(nullptr, "nfkc", errorCode); 160 } else if (uprv_strcmp(what, "nfkc_cf") == 0) { 161 nfkc_cfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_cf", errorCode); 162 } else if (uprv_strcmp(what, "nfkc_scf") == 0) { 163 nfkc_scfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_scf", errorCode); 164 } else { 165 UPRV_UNREACHABLE_EXIT; // Unknown singleton 166 } 167 ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup); 168 } 169 170 U_CDECL_BEGIN 171 172 static void U_CALLCONV deleteNorm2AllModes(void *allModes) { 173 delete (Norm2AllModes *)allModes; 174 } 175 176 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() { 177 #if !NORM2_HARDCODE_NFC_DATA 178 delete nfcSingleton; 179 nfcSingleton = nullptr; 180 nfcInitOnce.reset(); 181 #endif 182 183 delete nfkcSingleton; 184 nfkcSingleton = nullptr; 185 nfkcInitOnce.reset(); 186 187 delete nfkc_cfSingleton; 188 nfkc_cfSingleton = nullptr; 189 nfkc_cfInitOnce.reset(); 190 191 delete nfkc_scfSingleton; 192 nfkc_scfSingleton = nullptr; 193 nfkc_scfInitOnce.reset(); 194 195 uhash_close(cache); 196 cache=nullptr; 197 return true; 198 } 199 200 U_CDECL_END 201 202 #if !NORM2_HARDCODE_NFC_DATA 203 const Norm2AllModes * 204 Norm2AllModes::getNFCInstance(UErrorCode &errorCode) { 205 if(U_FAILURE(errorCode)) { return nullptr; } 206 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); 207 return nfcSingleton; 208 } 209 #endif 210 211 const Norm2AllModes * 212 Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) { 213 if(U_FAILURE(errorCode)) { return nullptr; } 214 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); 215 return nfkcSingleton; 216 } 217 218 const Norm2AllModes * 219 Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) { 220 if(U_FAILURE(errorCode)) { return nullptr; } 221 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode); 222 return nfkc_cfSingleton; 223 } 224 225 const Norm2AllModes * 226 Norm2AllModes::getNFKC_SCFInstance(UErrorCode &errorCode) { 227 if(U_FAILURE(errorCode)) { return nullptr; } 228 umtx_initOnce(nfkc_scfInitOnce, &initSingletons, "nfkc_scf", errorCode); 229 return nfkc_scfSingleton; 230 } 231 232 #if !NORM2_HARDCODE_NFC_DATA 233 const Normalizer2 * 234 Normalizer2::getNFCInstance(UErrorCode &errorCode) { 235 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); 236 return allModes!=nullptr ? &allModes->comp : nullptr; 237 } 238 239 const Normalizer2 * 240 Normalizer2::getNFDInstance(UErrorCode &errorCode) { 241 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); 242 return allModes!=nullptr ? &allModes->decomp : nullptr; 243 } 244 245 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { 246 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); 247 return allModes!=nullptr ? &allModes->fcd : nullptr; 248 } 249 250 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { 251 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); 252 return allModes!=nullptr ? &allModes->fcc : nullptr; 253 } 254 255 const Normalizer2Impl * 256 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { 257 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); 258 return allModes!=nullptr ? allModes->impl : nullptr; 259 } 260 #endif 261 262 const Normalizer2 * 263 Normalizer2::getNFKCInstance(UErrorCode &errorCode) { 264 const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode); 265 return allModes!=nullptr ? &allModes->comp : nullptr; 266 } 267 268 const Normalizer2 * 269 Normalizer2::getNFKDInstance(UErrorCode &errorCode) { 270 const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode); 271 return allModes!=nullptr ? &allModes->decomp : nullptr; 272 } 273 274 const Normalizer2 * 275 Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) { 276 const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); 277 return allModes!=nullptr ? &allModes->comp : nullptr; 278 } 279 280 const Normalizer2 * 281 Normalizer2::getNFKCSimpleCasefoldInstance(UErrorCode &errorCode) { 282 const Norm2AllModes *allModes=Norm2AllModes::getNFKC_SCFInstance(errorCode); 283 return allModes!=nullptr ? &allModes->comp : nullptr; 284 } 285 286 const Normalizer2 * 287 Normalizer2::getInstance(const char *packageName, 288 const char *name, 289 UNormalization2Mode mode, 290 UErrorCode &errorCode) { 291 if(U_FAILURE(errorCode)) { 292 return nullptr; 293 } 294 if(name==nullptr || *name==0) { 295 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 296 return nullptr; 297 } 298 const Norm2AllModes *allModes=nullptr; 299 if(packageName==nullptr) { 300 if(0==uprv_strcmp(name, "nfc")) { 301 allModes=Norm2AllModes::getNFCInstance(errorCode); 302 } else if(0==uprv_strcmp(name, "nfkc")) { 303 allModes=Norm2AllModes::getNFKCInstance(errorCode); 304 } else if(0==uprv_strcmp(name, "nfkc_cf")) { 305 allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); 306 } else if(0==uprv_strcmp(name, "nfkc_scf")) { 307 allModes=Norm2AllModes::getNFKC_SCFInstance(errorCode); 308 } 309 } 310 if(allModes==nullptr && U_SUCCESS(errorCode)) { 311 { 312 Mutex lock; 313 if(cache!=nullptr) { 314 allModes = static_cast<Norm2AllModes*>(uhash_get(cache, name)); 315 } 316 } 317 if(allModes==nullptr) { 318 ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup); 319 LocalPointer<Norm2AllModes> localAllModes( 320 Norm2AllModes::createInstance(packageName, name, errorCode)); 321 if(U_SUCCESS(errorCode)) { 322 Mutex lock; 323 if(cache==nullptr) { 324 cache=uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &errorCode); 325 if(U_FAILURE(errorCode)) { 326 return nullptr; 327 } 328 uhash_setKeyDeleter(cache, uprv_free); 329 uhash_setValueDeleter(cache, deleteNorm2AllModes); 330 } 331 void *temp=uhash_get(cache, name); 332 if(temp==nullptr) { 333 int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1); 334 char* nameCopy = static_cast<char*>(uprv_malloc(keyLength)); 335 if(nameCopy==nullptr) { 336 errorCode=U_MEMORY_ALLOCATION_ERROR; 337 return nullptr; 338 } 339 uprv_memcpy(nameCopy, name, keyLength); 340 allModes=localAllModes.getAlias(); 341 uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode); 342 } else { 343 // race condition 344 allModes = static_cast<Norm2AllModes*>(temp); 345 } 346 } 347 } 348 } 349 if(allModes!=nullptr && U_SUCCESS(errorCode)) { 350 switch(mode) { 351 case UNORM2_COMPOSE: 352 return &allModes->comp; 353 case UNORM2_DECOMPOSE: 354 return &allModes->decomp; 355 case UNORM2_FCD: 356 return &allModes->fcd; 357 case UNORM2_COMPOSE_CONTIGUOUS: 358 return &allModes->fcc; 359 default: 360 break; // do nothing 361 } 362 } 363 return nullptr; 364 } 365 366 const Normalizer2 * 367 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) { 368 if(U_FAILURE(errorCode)) { 369 return nullptr; 370 } 371 switch(mode) { 372 case UNORM_NFD: 373 return Normalizer2::getNFDInstance(errorCode); 374 case UNORM_NFKD: 375 return Normalizer2::getNFKDInstance(errorCode); 376 case UNORM_NFC: 377 return Normalizer2::getNFCInstance(errorCode); 378 case UNORM_NFKC: 379 return Normalizer2::getNFKCInstance(errorCode); 380 case UNORM_FCD: 381 return getFCDInstance(errorCode); 382 default: // UNORM_NONE 383 return getNoopInstance(errorCode); 384 } 385 } 386 387 const Normalizer2Impl * 388 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) { 389 const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode); 390 return allModes!=nullptr ? allModes->impl : nullptr; 391 } 392 393 const Normalizer2Impl * 394 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) { 395 const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); 396 return allModes!=nullptr ? allModes->impl : nullptr; 397 } 398 399 U_NAMESPACE_END 400 401 // C API ------------------------------------------------------------------- *** 402 403 U_NAMESPACE_USE 404 405 U_CAPI const UNormalizer2 * U_EXPORT2 406 unorm2_getNFKCInstance(UErrorCode *pErrorCode) { 407 return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode); 408 } 409 410 U_CAPI const UNormalizer2 * U_EXPORT2 411 unorm2_getNFKDInstance(UErrorCode *pErrorCode) { 412 return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode); 413 } 414 415 U_CAPI const UNormalizer2 * U_EXPORT2 416 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) { 417 return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode); 418 } 419 420 U_CAPI const UNormalizer2 * U_EXPORT2 421 unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode) { 422 return (const UNormalizer2 *)Normalizer2::getNFKCSimpleCasefoldInstance(*pErrorCode); 423 } 424 425 U_CAPI const UNormalizer2 * U_EXPORT2 426 unorm2_getInstance(const char *packageName, 427 const char *name, 428 UNormalization2Mode mode, 429 UErrorCode *pErrorCode) { 430 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode); 431 } 432 433 U_CFUNC UNormalizationCheckResult 434 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { 435 if(mode<=UNORM_NONE || UNORM_FCD<=mode) { 436 return UNORM_YES; 437 } 438 UErrorCode errorCode=U_ZERO_ERROR; 439 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode); 440 if(U_SUCCESS(errorCode)) { 441 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c); 442 } else { 443 return UNORM_MAYBE; 444 } 445 } 446 447 #endif // !UCONFIG_NO_NORMALIZATION