usprep.cpp (28361B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2003-2016, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: usprep.cpp 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2003jul2 16 * created by: Ram Viswanadha 17 */ 18 19 #include "unicode/utypes.h" 20 21 #if !UCONFIG_NO_IDNA 22 23 #include "unicode/usprep.h" 24 25 #include "unicode/normalizer2.h" 26 #include "unicode/ustring.h" 27 #include "unicode/uchar.h" 28 #include "unicode/uversion.h" 29 #include "umutex.h" 30 #include "cmemory.h" 31 #include "sprpimpl.h" 32 #include "ustr_imp.h" 33 #include "uhash.h" 34 #include "cstring.h" 35 #include "udataswp.h" 36 #include "ucln_cmn.h" 37 #include "ubidi_props.h" 38 #include "uprops.h" 39 40 U_NAMESPACE_USE 41 42 U_CDECL_BEGIN 43 44 /* 45 Static cache for already opened StringPrep profiles 46 */ 47 static UHashtable *SHARED_DATA_HASHTABLE = nullptr; 48 static icu::UInitOnce gSharedDataInitOnce {}; 49 50 static UMutex usprepMutex; 51 /* format version of spp file */ 52 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 }; 53 54 /* the Unicode version of the sprep data */ 55 static UVersionInfo dataVersion={ 0, 0, 0, 0 }; 56 57 /* Profile names must be aligned to UStringPrepProfileType */ 58 static const char * const PROFILE_NAMES[] = { 59 "rfc3491", /* USPREP_RFC3491_NAMEPREP */ 60 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */ 61 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */ 62 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */ 63 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */ 64 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */ 65 "rfc3722", /* USPREP_RFC3722_ISCSI */ 66 "rfc3920node", /* USPREP_RFC3920_NODEPREP */ 67 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */ 68 "rfc4011", /* USPREP_RFC4011_MIB */ 69 "rfc4013", /* USPREP_RFC4013_SASLPREP */ 70 "rfc4505", /* USPREP_RFC4505_TRACE */ 71 "rfc4518", /* USPREP_RFC4518_LDAP */ 72 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */ 73 }; 74 75 static UBool U_CALLCONV 76 isSPrepAcceptable(void * /* context */, 77 const char * /* type */, 78 const char * /* name */, 79 const UDataInfo *pInfo) { 80 if( 81 pInfo->size>=20 && 82 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 83 pInfo->charsetFamily==U_CHARSET_FAMILY && 84 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */ 85 pInfo->dataFormat[1]==0x50 && 86 pInfo->dataFormat[2]==0x52 && 87 pInfo->dataFormat[3]==0x50 && 88 pInfo->formatVersion[0]==3 && 89 pInfo->formatVersion[2]==UTRIE_SHIFT && 90 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT 91 ) { 92 //uprv_memcpy(formatVersion, pInfo->formatVersion, 4); 93 uprv_memcpy(dataVersion, pInfo->dataVersion, 4); 94 return true; 95 } else { 96 return false; 97 } 98 } 99 100 static int32_t U_CALLCONV 101 getSPrepFoldingOffset(uint32_t data) { 102 103 return (int32_t)data; 104 105 } 106 107 /* hashes an entry */ 108 static int32_t U_CALLCONV 109 hashEntry(const UHashTok parm) { 110 UStringPrepKey *b = (UStringPrepKey *)parm.pointer; 111 UHashTok namekey, pathkey; 112 namekey.pointer = b->name; 113 pathkey.pointer = b->path; 114 uint32_t unsignedHash = static_cast<uint32_t>(uhash_hashChars(namekey)) + 115 37u * static_cast<uint32_t>(uhash_hashChars(pathkey)); 116 return static_cast<int32_t>(unsignedHash); 117 } 118 119 /* compares two entries */ 120 static UBool U_CALLCONV 121 compareEntries(const UHashTok p1, const UHashTok p2) { 122 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer; 123 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer; 124 UHashTok name1, name2, path1, path2; 125 name1.pointer = b1->name; 126 name2.pointer = b2->name; 127 path1.pointer = b1->path; 128 path2.pointer = b2->path; 129 return uhash_compareChars(name1, name2) && uhash_compareChars(path1, path2); 130 } 131 132 static void 133 usprep_unload(UStringPrepProfile* data){ 134 udata_close(data->sprepData); 135 } 136 137 static int32_t 138 usprep_internal_flushCache(UBool noRefCount){ 139 UStringPrepProfile *profile = nullptr; 140 UStringPrepKey *key = nullptr; 141 int32_t pos = UHASH_FIRST; 142 int32_t deletedNum = 0; 143 const UHashElement *e; 144 145 /* 146 * if shared data hasn't even been lazy evaluated yet 147 * return 0 148 */ 149 umtx_lock(&usprepMutex); 150 if (SHARED_DATA_HASHTABLE == nullptr) { 151 umtx_unlock(&usprepMutex); 152 return 0; 153 } 154 155 /*creates an enumeration to iterate through every element in the table */ 156 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != nullptr) 157 { 158 profile = (UStringPrepProfile *) e->value.pointer; 159 key = (UStringPrepKey *) e->key.pointer; 160 161 if ((noRefCount== false && profile->refCount == 0) || 162 noRefCount) { 163 deletedNum++; 164 uhash_removeElement(SHARED_DATA_HASHTABLE, e); 165 166 /* unload the data */ 167 usprep_unload(profile); 168 169 if(key->name != nullptr) { 170 uprv_free(key->name); 171 key->name=nullptr; 172 } 173 if(key->path != nullptr) { 174 uprv_free(key->path); 175 key->path=nullptr; 176 } 177 uprv_free(profile); 178 uprv_free(key); 179 } 180 181 } 182 umtx_unlock(&usprepMutex); 183 184 return deletedNum; 185 } 186 187 /* Works just like ucnv_flushCache() 188 static int32_t 189 usprep_flushCache(){ 190 return usprep_internal_flushCache(false); 191 } 192 */ 193 194 static UBool U_CALLCONV usprep_cleanup(){ 195 if (SHARED_DATA_HASHTABLE != nullptr) { 196 usprep_internal_flushCache(true); 197 if (SHARED_DATA_HASHTABLE != nullptr && uhash_count(SHARED_DATA_HASHTABLE) == 0) { 198 uhash_close(SHARED_DATA_HASHTABLE); 199 SHARED_DATA_HASHTABLE = nullptr; 200 } 201 } 202 gSharedDataInitOnce.reset(); 203 return (SHARED_DATA_HASHTABLE == nullptr); 204 } 205 U_CDECL_END 206 207 208 /** Initializes the cache for resources */ 209 static void U_CALLCONV 210 createCache(UErrorCode &status) { 211 SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, nullptr, &status); 212 if (U_FAILURE(status)) { 213 SHARED_DATA_HASHTABLE = nullptr; 214 } 215 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup); 216 } 217 218 static void 219 initCache(UErrorCode *status) { 220 umtx_initOnce(gSharedDataInitOnce, &createCache, *status); 221 } 222 223 static UBool U_CALLCONV 224 loadData(UStringPrepProfile* profile, 225 const char* path, 226 const char* name, 227 const char* type, 228 UErrorCode* errorCode) { 229 /* load Unicode SPREP data from file */ 230 UTrie _sprepTrie = {nullptr, nullptr, nullptr, 0, 0, 0, 0}; 231 UDataMemory *dataMemory; 232 const int32_t *p=nullptr; 233 const uint8_t *pb; 234 UVersionInfo normUnicodeVersion; 235 int32_t normUniVer, sprepUniVer, normCorrVer; 236 237 if(errorCode==nullptr || U_FAILURE(*errorCode)) { 238 return 0; 239 } 240 241 /* open the data outside the mutex block */ 242 //TODO: change the path 243 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, nullptr, errorCode); 244 if(U_FAILURE(*errorCode)) { 245 return false; 246 } 247 248 p = static_cast<const int32_t*>(udata_getMemory(dataMemory)); 249 pb = reinterpret_cast<const uint8_t*>(p + _SPREP_INDEX_TOP); 250 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode); 251 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset; 252 253 254 if(U_FAILURE(*errorCode)) { 255 udata_close(dataMemory); 256 return false; 257 } 258 259 /* in the mutex block, set the data for this process */ 260 umtx_lock(&usprepMutex); 261 if(profile->sprepData==nullptr) { 262 profile->sprepData=dataMemory; 263 dataMemory=nullptr; 264 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes)); 265 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie)); 266 } else { 267 p = static_cast<const int32_t*>(udata_getMemory(profile->sprepData)); 268 } 269 umtx_unlock(&usprepMutex); 270 /* initialize some variables */ 271 profile->mappingData = reinterpret_cast<const uint16_t*>(reinterpret_cast<const uint8_t*>(p + _SPREP_INDEX_TOP) + profile->indexes[_SPREP_INDEX_TRIE_SIZE]); 272 273 u_getUnicodeVersion(normUnicodeVersion); 274 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) + 275 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]); 276 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) + 277 (dataVersion[2] << 8 ) + (dataVersion[3]); 278 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION]; 279 280 if(U_FAILURE(*errorCode)){ 281 udata_close(dataMemory); 282 return false; 283 } 284 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Version of the normalization data */ 285 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Version of the normalization data */ 286 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/ 287 ){ 288 *errorCode = U_INVALID_FORMAT_ERROR; 289 udata_close(dataMemory); 290 return false; 291 } 292 profile->isDataLoaded = true; 293 294 /* if a different thread set it first, then close the extra data */ 295 if(dataMemory!=nullptr) { 296 udata_close(dataMemory); /* nullptr if it was set correctly */ 297 } 298 299 300 return profile->isDataLoaded; 301 } 302 303 static UStringPrepProfile* 304 usprep_getProfile(const char* path, 305 const char* name, 306 UErrorCode *status){ 307 308 UStringPrepProfile* profile = nullptr; 309 310 initCache(status); 311 312 if(U_FAILURE(*status)){ 313 return nullptr; 314 } 315 316 UStringPrepKey stackKey; 317 /* 318 * const is cast way to save malloc, strcpy and free calls 319 * we use the passed in pointers for fetching the data from the 320 * hash table which is safe 321 */ 322 stackKey.name = const_cast<char*>(name); 323 stackKey.path = const_cast<char*>(path); 324 325 /* fetch the data from the cache */ 326 umtx_lock(&usprepMutex); 327 profile = static_cast<UStringPrepProfile*>(uhash_get(SHARED_DATA_HASHTABLE, &stackKey)); 328 if(profile != nullptr) { 329 profile->refCount++; 330 } 331 umtx_unlock(&usprepMutex); 332 333 if(profile == nullptr) { 334 /* else load the data and put the data in the cache */ 335 LocalMemory<UStringPrepProfile> newProfile; 336 if(newProfile.allocateInsteadAndReset() == nullptr) { 337 *status = U_MEMORY_ALLOCATION_ERROR; 338 return nullptr; 339 } 340 341 /* load the data */ 342 if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){ 343 return nullptr; 344 } 345 346 /* get the options */ 347 newProfile->doNFKC = static_cast<UBool>((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0); 348 newProfile->checkBiDi = static_cast<UBool>((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0); 349 350 LocalMemory<UStringPrepKey> key; 351 LocalMemory<char> keyName; 352 LocalMemory<char> keyPath; 353 if( key.allocateInsteadAndReset() == nullptr || 354 keyName.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(name)+1)) == nullptr || 355 (path != nullptr && 356 keyPath.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(path)+1)) == nullptr) 357 ) { 358 *status = U_MEMORY_ALLOCATION_ERROR; 359 usprep_unload(newProfile.getAlias()); 360 return nullptr; 361 } 362 363 umtx_lock(&usprepMutex); 364 // If another thread already inserted the same key/value, refcount and cleanup our thread data 365 profile = static_cast<UStringPrepProfile*>(uhash_get(SHARED_DATA_HASHTABLE, &stackKey)); 366 if(profile != nullptr) { 367 profile->refCount++; 368 usprep_unload(newProfile.getAlias()); 369 } 370 else { 371 /* initialize the key members */ 372 key->name = keyName.orphan(); 373 uprv_strcpy(key->name, name); 374 if(path != nullptr){ 375 key->path = keyPath.orphan(); 376 uprv_strcpy(key->path, path); 377 } 378 profile = newProfile.orphan(); 379 380 /* add the data object to the cache */ 381 profile->refCount = 1; 382 uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status); 383 } 384 umtx_unlock(&usprepMutex); 385 } 386 387 return profile; 388 } 389 390 U_CAPI UStringPrepProfile* U_EXPORT2 391 usprep_open(const char* path, 392 const char* name, 393 UErrorCode* status){ 394 395 if(status == nullptr || U_FAILURE(*status)){ 396 return nullptr; 397 } 398 399 /* initialize the profile struct members */ 400 return usprep_getProfile(path,name,status); 401 } 402 403 U_CAPI UStringPrepProfile* U_EXPORT2 404 usprep_openByType(UStringPrepProfileType type, 405 UErrorCode* status) { 406 if(status == nullptr || U_FAILURE(*status)){ 407 return nullptr; 408 } 409 int32_t index = (int32_t)type; 410 if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) { 411 *status = U_ILLEGAL_ARGUMENT_ERROR; 412 return nullptr; 413 } 414 return usprep_open(nullptr, PROFILE_NAMES[index], status); 415 } 416 417 U_CAPI void U_EXPORT2 418 usprep_close(UStringPrepProfile* profile){ 419 if(profile==nullptr){ 420 return; 421 } 422 423 umtx_lock(&usprepMutex); 424 /* decrement the ref count*/ 425 if(profile->refCount > 0){ 426 profile->refCount--; 427 } 428 umtx_unlock(&usprepMutex); 429 430 } 431 432 U_CFUNC void 433 uprv_syntaxError(const char16_t* rules, 434 int32_t pos, 435 int32_t rulesLen, 436 UParseError* parseError){ 437 if(parseError == nullptr){ 438 return; 439 } 440 parseError->offset = pos; 441 parseError->line = 0 ; // we are not using line numbers 442 443 // for pre-context 444 int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1)); 445 int32_t limit = pos; 446 447 u_memcpy(parseError->preContext,rules+start,limit-start); 448 //null terminate the buffer 449 parseError->preContext[limit-start] = 0; 450 451 // for post-context; include error rules[pos] 452 start = pos; 453 limit = start + (U_PARSE_CONTEXT_LEN-1); 454 if (limit > rulesLen) { 455 limit = rulesLen; 456 } 457 if (start < rulesLen) { 458 u_memcpy(parseError->postContext,rules+start,limit-start); 459 } 460 //null terminate the buffer 461 parseError->postContext[limit-start]= 0; 462 } 463 464 465 static inline UStringPrepType 466 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){ 467 468 UStringPrepType type; 469 if(trieWord == 0){ 470 /* 471 * Initial value stored in the mapping table 472 * just return USPREP_TYPE_LIMIT .. so that 473 * the source codepoint is copied to the destination 474 */ 475 type = USPREP_TYPE_LIMIT; 476 isIndex =false; 477 value = 0; 478 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){ 479 type = static_cast<UStringPrepType>(trieWord - _SPREP_TYPE_THRESHOLD); 480 isIndex =false; 481 value = 0; 482 }else{ 483 /* get the type */ 484 type = USPREP_MAP; 485 /* ascertain if the value is index or delta */ 486 if(trieWord & 0x02){ 487 isIndex = true; 488 value = trieWord >> 2; //mask off the lower 2 bits and shift 489 }else{ 490 isIndex = false; 491 value = static_cast<int16_t>(trieWord); 492 value = (value >> 2); 493 } 494 495 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){ 496 type = USPREP_DELETE; 497 isIndex =false; 498 value = 0; 499 } 500 } 501 return type; 502 } 503 504 // TODO: change to writing to UnicodeString not char16_t * 505 static int32_t 506 usprep_map( const UStringPrepProfile* profile, 507 const char16_t* src, int32_t srcLength, 508 char16_t* dest, int32_t destCapacity, 509 int32_t options, 510 UParseError* parseError, 511 UErrorCode* status ){ 512 513 uint16_t result; 514 int32_t destIndex=0; 515 int32_t srcIndex; 516 UBool allowUnassigned = static_cast<UBool>((options & USPREP_ALLOW_UNASSIGNED) > 0); 517 UStringPrepType type; 518 int16_t value; 519 UBool isIndex; 520 const int32_t* indexes = profile->indexes; 521 522 // no error checking the caller check for error and arguments 523 // no string length check the caller finds out the string length 524 525 for(srcIndex=0;srcIndex<srcLength;){ 526 UChar32 ch; 527 528 U16_NEXT(src,srcIndex,srcLength,ch); 529 530 result=0; 531 532 UTRIE_GET16(&profile->sprepTrie,ch,result); 533 534 type = getValues(result, value, isIndex); 535 536 // check if the source codepoint is unassigned 537 if(type == USPREP_UNASSIGNED && allowUnassigned == false){ 538 539 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError); 540 *status = U_STRINGPREP_UNASSIGNED_ERROR; 541 return 0; 542 543 }else if(type == USPREP_MAP){ 544 545 int32_t index, length; 546 547 if(isIndex){ 548 index = value; 549 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] && 550 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){ 551 length = 1; 552 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] && 553 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){ 554 length = 2; 555 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] && 556 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){ 557 length = 3; 558 }else{ 559 length = profile->mappingData[index++]; 560 561 } 562 563 /* copy mapping to destination */ 564 for(int32_t i=0; i< length; i++){ 565 if(destIndex < destCapacity ){ 566 dest[destIndex] = profile->mappingData[index+i]; 567 } 568 destIndex++; /* for pre-flighting */ 569 } 570 continue; 571 }else{ 572 // subtract the delta to arrive at the code point 573 ch -= value; 574 } 575 576 }else if(type==USPREP_DELETE){ 577 // just consume the codepoint and continue 578 continue; 579 } 580 //copy the code point into destination 581 if(ch <= 0xFFFF){ 582 if(destIndex < destCapacity ){ 583 dest[destIndex] = static_cast<char16_t>(ch); 584 } 585 destIndex++; 586 }else{ 587 if(destIndex+1 < destCapacity ){ 588 dest[destIndex] = U16_LEAD(ch); 589 dest[destIndex+1] = U16_TRAIL(ch); 590 } 591 destIndex +=2; 592 } 593 594 } 595 596 return u_terminateUChars(dest, destCapacity, destIndex, status); 597 } 598 599 /* 600 1) Map -- For each character in the input, check if it has a mapping 601 and, if so, replace it with its mapping. 602 603 2) Normalize -- Possibly normalize the result of step 1 using Unicode 604 normalization. 605 606 3) Prohibit -- Check for any characters that are not allowed in the 607 output. If any are found, return an error. 608 609 4) Check bidi -- Possibly check for right-to-left characters, and if 610 any are found, make sure that the whole string satisfies the 611 requirements for bidirectional strings. If the string does not 612 satisfy the requirements for bidirectional strings, return an 613 error. 614 [Unicode3.2] defines several bidirectional categories; each character 615 has one bidirectional category assigned to it. For the purposes of 616 the requirements below, an "RandALCat character" is a character that 617 has Unicode bidirectional categories "R" or "AL"; an "LCat character" 618 is a character that has Unicode bidirectional category "L". Note 619 620 621 that there are many characters which fall in neither of the above 622 definitions; Latin digits (<U+0030> through <U+0039>) are examples of 623 this because they have bidirectional category "EN". 624 625 In any profile that specifies bidirectional character handling, all 626 three of the following requirements MUST be met: 627 628 1) The characters in section 5.8 MUST be prohibited. 629 630 2) If a string contains any RandALCat character, the string MUST NOT 631 contain any LCat character. 632 633 3) If a string contains any RandALCat character, a RandALCat 634 character MUST be the first character of the string, and a 635 RandALCat character MUST be the last character of the string. 636 */ 637 U_CAPI int32_t U_EXPORT2 638 usprep_prepare( const UStringPrepProfile* profile, 639 const char16_t* src, int32_t srcLength, 640 char16_t* dest, int32_t destCapacity, 641 int32_t options, 642 UParseError* parseError, 643 UErrorCode* status ){ 644 645 // check error status 646 if(U_FAILURE(*status)){ 647 return 0; 648 } 649 650 //check arguments 651 if(profile==nullptr || 652 (src==nullptr ? srcLength!=0 : srcLength<-1) || 653 (dest==nullptr ? destCapacity!=0 : destCapacity<0)) { 654 *status=U_ILLEGAL_ARGUMENT_ERROR; 655 return 0; 656 } 657 658 //get the string length 659 if(srcLength < 0){ 660 srcLength = u_strlen(src); 661 } 662 // map 663 UnicodeString s1; 664 char16_t *b1 = s1.getBuffer(srcLength); 665 if(b1==nullptr){ 666 *status = U_MEMORY_ALLOCATION_ERROR; 667 return 0; 668 } 669 UErrorCode bufferStatus = U_ZERO_ERROR; 670 int32_t b1Len = usprep_map(profile, src, srcLength, 671 b1, s1.getCapacity(), options, parseError, &bufferStatus); 672 s1.releaseBuffer(U_SUCCESS(bufferStatus) ? b1Len : 0); 673 674 if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){ 675 // redo processing of string 676 /* we do not have enough room so grow the buffer*/ 677 b1 = s1.getBuffer(b1Len); 678 if(b1==nullptr){ 679 *status = U_MEMORY_ALLOCATION_ERROR; 680 return 0; 681 } 682 683 bufferStatus = U_ZERO_ERROR; // reset error 684 b1Len = usprep_map(profile, src, srcLength, 685 b1, s1.getCapacity(), options, parseError, &bufferStatus); 686 s1.releaseBuffer(U_SUCCESS(bufferStatus) ? b1Len : 0); 687 } 688 if(U_FAILURE(bufferStatus)){ 689 *status = bufferStatus; 690 return 0; 691 } 692 693 // normalize 694 UnicodeString s2; 695 if(profile->doNFKC){ 696 const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status); 697 FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status)); 698 if(U_FAILURE(*status)){ 699 return 0; 700 } 701 fn2.normalize(s1, s2, *status); 702 }else{ 703 s2.fastCopyFrom(s1); 704 } 705 if(U_FAILURE(*status)){ 706 return 0; 707 } 708 709 // Prohibit and checkBiDi in one pass 710 const char16_t *b2 = s2.getBuffer(); 711 int32_t b2Len = s2.length(); 712 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT; 713 UBool leftToRight=false, rightToLeft=false; 714 int32_t rtlPos =-1, ltrPos =-1; 715 716 for(int32_t b2Index=0; b2Index<b2Len;){ 717 UChar32 ch = 0; 718 U16_NEXT(b2, b2Index, b2Len, ch); 719 720 uint16_t result; 721 UTRIE_GET16(&profile->sprepTrie,ch,result); 722 723 int16_t value; 724 UBool isIndex; 725 UStringPrepType type = getValues(result, value, isIndex); 726 727 if( type == USPREP_PROHIBITED || 728 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/) 729 ){ 730 *status = U_STRINGPREP_PROHIBITED_ERROR; 731 uprv_syntaxError(b2, b2Index-U16_LENGTH(ch), b2Len, parseError); 732 return 0; 733 } 734 735 if(profile->checkBiDi) { 736 direction = ubidi_getClass(ch); 737 if(firstCharDir == U_CHAR_DIRECTION_COUNT){ 738 firstCharDir = direction; 739 } 740 if(direction == U_LEFT_TO_RIGHT){ 741 leftToRight = true; 742 ltrPos = b2Index-1; 743 } 744 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){ 745 rightToLeft = true; 746 rtlPos = b2Index-1; 747 } 748 } 749 } 750 if(profile->checkBiDi){ 751 // satisfy 2 752 if( leftToRight && rightToLeft){ 753 *status = U_STRINGPREP_CHECK_BIDI_ERROR; 754 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError); 755 return 0; 756 } 757 758 //satisfy 3 759 if( rightToLeft && 760 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) && 761 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC)) 762 ){ 763 *status = U_STRINGPREP_CHECK_BIDI_ERROR; 764 uprv_syntaxError(b2, rtlPos, b2Len, parseError); 765 return false; 766 } 767 } 768 return s2.extract(dest, destCapacity, *status); 769 } 770 771 772 /* data swapping ------------------------------------------------------------ */ 773 774 U_CAPI int32_t U_EXPORT2 775 usprep_swap(const UDataSwapper *ds, 776 const void *inData, int32_t length, void *outData, 777 UErrorCode *pErrorCode) { 778 const UDataInfo *pInfo; 779 int32_t headerSize; 780 781 const uint8_t *inBytes; 782 uint8_t *outBytes; 783 784 const int32_t *inIndexes; 785 int32_t indexes[16]; 786 787 int32_t i, offset, count, size; 788 789 /* udata_swapDataHeader checks the arguments */ 790 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 791 if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) { 792 return 0; 793 } 794 795 /* check data format and format version */ 796 pInfo=(const UDataInfo *)((const char *)inData+4); 797 if(!( 798 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */ 799 pInfo->dataFormat[1]==0x50 && 800 pInfo->dataFormat[2]==0x52 && 801 pInfo->dataFormat[3]==0x50 && 802 pInfo->formatVersion[0]==3 803 )) { 804 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n", 805 pInfo->dataFormat[0], pInfo->dataFormat[1], 806 pInfo->dataFormat[2], pInfo->dataFormat[3], 807 pInfo->formatVersion[0]); 808 *pErrorCode=U_UNSUPPORTED_ERROR; 809 return 0; 810 } 811 812 inBytes=(const uint8_t *)inData+headerSize; 813 outBytes= (outData == nullptr ) ? nullptr : (uint8_t *)outData+headerSize; 814 815 inIndexes=(const int32_t *)inBytes; 816 817 if(length>=0) { 818 length-=headerSize; 819 if(length<16*4) { 820 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n", 821 length); 822 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 823 return 0; 824 } 825 } 826 827 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */ 828 for(i=0; i<16; ++i) { 829 indexes[i]=udata_readInt32(ds, inIndexes[i]); 830 } 831 832 /* calculate the total length of the data */ 833 size= 834 16*4+ /* size of indexes[] */ 835 indexes[_SPREP_INDEX_TRIE_SIZE]+ 836 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; 837 838 if(length>=0) { 839 if(length<size) { 840 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n", 841 length); 842 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 843 return 0; 844 } 845 846 /* copy the data for inaccessible bytes */ 847 if(inBytes!=outBytes) { 848 uprv_memcpy(outBytes, inBytes, size); 849 } 850 851 offset=0; 852 853 /* swap the int32_t indexes[] */ 854 count=16*4; 855 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); 856 offset+=count; 857 858 /* swap the UTrie */ 859 count=indexes[_SPREP_INDEX_TRIE_SIZE]; 860 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 861 offset+=count; 862 863 /* swap the uint16_t mappingTable[] */ 864 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; 865 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); 866 //offset+=count; 867 } 868 869 return headerSize+size; 870 } 871 872 #endif /* #if !UCONFIG_NO_IDNA */