ucol.cpp (20051B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 1996-2015, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * file name: ucol.cpp 9 * encoding: UTF-8 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * Modification history 14 * Date Name Comments 15 * 1996-1999 various members of ICU team maintained C API for collation framework 16 * 02/16/2001 synwee Added internal method getPrevSpecialCE 17 * 03/01/2001 synwee Added maxexpansion functionality. 18 * 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant 19 * 2012-2014 markus Rewritten in C++ again. 20 */ 21 22 #include "unicode/utypes.h" 23 24 #if !UCONFIG_NO_COLLATION 25 26 #include "unicode/coll.h" 27 #include "unicode/tblcoll.h" 28 #include "unicode/bytestream.h" 29 #include "unicode/coleitr.h" 30 #include "unicode/ucoleitr.h" 31 #include "unicode/ustring.h" 32 #include "cmemory.h" 33 #include "collation.h" 34 #include "cstring.h" 35 #include "putilimp.h" 36 #include "uassert.h" 37 #include "utracimp.h" 38 39 U_NAMESPACE_USE 40 41 U_CAPI UCollator* U_EXPORT2 42 ucol_openBinary(const uint8_t *bin, int32_t length, 43 const UCollator *base, 44 UErrorCode *status) 45 { 46 if(U_FAILURE(*status)) { return nullptr; } 47 RuleBasedCollator *coll = new RuleBasedCollator( 48 bin, length, 49 RuleBasedCollator::rbcFromUCollator(base), 50 *status); 51 if(coll == nullptr) { 52 *status = U_MEMORY_ALLOCATION_ERROR; 53 return nullptr; 54 } 55 if(U_FAILURE(*status)) { 56 delete coll; 57 return nullptr; 58 } 59 return coll->toUCollator(); 60 } 61 62 U_CAPI int32_t U_EXPORT2 63 ucol_cloneBinary(const UCollator *coll, 64 uint8_t *buffer, int32_t capacity, 65 UErrorCode *status) 66 { 67 if(U_FAILURE(*status)) { 68 return 0; 69 } 70 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); 71 if(rbc == nullptr && coll != nullptr) { 72 *status = U_UNSUPPORTED_ERROR; 73 return 0; 74 } 75 return rbc->cloneBinary(buffer, capacity, *status); 76 } 77 78 U_CAPI UCollator* U_EXPORT2 79 ucol_safeClone(const UCollator *coll, void * /*stackBuffer*/, int32_t * pBufferSize, UErrorCode *status) 80 { 81 if (status == nullptr || U_FAILURE(*status)){ 82 return nullptr; 83 } 84 if (coll == nullptr) { 85 *status = U_ILLEGAL_ARGUMENT_ERROR; 86 return nullptr; 87 } 88 if (pBufferSize != nullptr) { 89 int32_t inputSize = *pBufferSize; 90 *pBufferSize = 1; 91 if (inputSize == 0) { 92 return nullptr; // preflighting for deprecated functionality 93 } 94 } 95 Collator *newColl = Collator::fromUCollator(coll)->clone(); 96 if (newColl == nullptr) { 97 *status = U_MEMORY_ALLOCATION_ERROR; 98 return nullptr; 99 } else if (pBufferSize != nullptr) { 100 *status = U_SAFECLONE_ALLOCATED_WARNING; 101 } 102 return newColl->toUCollator(); 103 } 104 105 U_CAPI UCollator* U_EXPORT2 106 ucol_clone(const UCollator *coll, UErrorCode *status) 107 { 108 return ucol_safeClone(coll, nullptr, nullptr, status); 109 } 110 111 U_CAPI void U_EXPORT2 112 ucol_close(UCollator *coll) 113 { 114 UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE); 115 UTRACE_DATA1(UTRACE_INFO, "coll = %p", coll); 116 if(coll != nullptr) { 117 delete Collator::fromUCollator(coll); 118 } 119 UTRACE_EXIT(); 120 } 121 122 U_CAPI int32_t U_EXPORT2 123 ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, 124 const uint8_t *src2, int32_t src2Length, 125 uint8_t *dest, int32_t destCapacity) { 126 /* check arguments */ 127 if( src1==nullptr || src1Length<-1 || src1Length==0 || (src1Length>0 && src1[src1Length-1]!=0) || 128 src2==nullptr || src2Length<-1 || src2Length==0 || (src2Length>0 && src2[src2Length-1]!=0) || 129 destCapacity<0 || (destCapacity>0 && dest==nullptr) 130 ) { 131 /* error, attempt to write a zero byte and return 0 */ 132 if(dest!=nullptr && destCapacity>0) { 133 *dest=0; 134 } 135 return 0; 136 } 137 138 /* check lengths and capacity */ 139 if(src1Length<0) { 140 src1Length=(int32_t)uprv_strlen((const char *)src1)+1; 141 } 142 if(src2Length<0) { 143 src2Length=(int32_t)uprv_strlen((const char *)src2)+1; 144 } 145 146 int32_t destLength=src1Length+src2Length; 147 if(destLength>destCapacity) { 148 /* the merged sort key does not fit into the destination */ 149 return destLength; 150 } 151 152 /* merge the sort keys with the same number of levels */ 153 uint8_t *p=dest; 154 for(;;) { 155 /* copy level from src1 not including 00 or 01 */ 156 uint8_t b; 157 while((b=*src1)>=2) { 158 ++src1; 159 *p++=b; 160 } 161 162 /* add a 02 merge separator */ 163 *p++=2; 164 165 /* copy level from src2 not including 00 or 01 */ 166 while((b=*src2)>=2) { 167 ++src2; 168 *p++=b; 169 } 170 171 /* if both sort keys have another level, then add a 01 level separator and continue */ 172 if(*src1==1 && *src2==1) { 173 ++src1; 174 ++src2; 175 *p++=1; 176 } else { 177 break; 178 } 179 } 180 181 /* 182 * here, at least one sort key is finished now, but the other one 183 * might have some contents left from containing more levels; 184 * that contents is just appended to the result 185 */ 186 if(*src1!=0) { 187 /* src1 is not finished, therefore *src2==0, and src1 is appended */ 188 src2=src1; 189 } 190 /* append src2, "the other, unfinished sort key" */ 191 while((*p++=*src2++)!=0) {} 192 193 /* the actual length might be less than destLength if either sort key contained illegally embedded zero bytes */ 194 return (int32_t)(p-dest); 195 } 196 197 U_CAPI int32_t U_EXPORT2 198 ucol_getSortKey(const UCollator *coll, 199 const char16_t *source, 200 int32_t sourceLength, 201 uint8_t *result, 202 int32_t resultLength) 203 { 204 UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY); 205 if (UTRACE_LEVEL(UTRACE_VERBOSE)) { 206 UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source string = %vh ", coll, source, 207 ((sourceLength==-1 && source!=nullptr) ? u_strlen(source) : sourceLength)); 208 } 209 210 int32_t keySize = Collator::fromUCollator(coll)-> 211 getSortKey(source, sourceLength, result, resultLength); 212 213 UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize); 214 UTRACE_EXIT_VALUE(keySize); 215 return keySize; 216 } 217 218 U_CAPI int32_t U_EXPORT2 219 ucol_nextSortKeyPart(const UCollator *coll, 220 UCharIterator *iter, 221 uint32_t state[2], 222 uint8_t *dest, int32_t count, 223 UErrorCode *status) 224 { 225 /* error checking */ 226 if(status==nullptr || U_FAILURE(*status)) { 227 return 0; 228 } 229 UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART); 230 UTRACE_DATA6(UTRACE_VERBOSE, "coll=%p, iter=%p, state=%d %d, dest=%p, count=%d", 231 coll, iter, state[0], state[1], dest, count); 232 233 int32_t i = Collator::fromUCollator(coll)-> 234 internalNextSortKeyPart(iter, state, dest, count, *status); 235 236 // Return number of meaningful sortkey bytes. 237 UTRACE_DATA4(UTRACE_VERBOSE, "dest = %vb, state=%d %d", 238 dest,i, state[0], state[1]); 239 UTRACE_EXIT_VALUE_STATUS(i, *status); 240 return i; 241 } 242 243 /** 244 * Produce a bound for a given sortkey and a number of levels. 245 */ 246 U_CAPI int32_t U_EXPORT2 247 ucol_getBound(const uint8_t *source, 248 int32_t sourceLength, 249 UColBoundMode boundType, 250 uint32_t noOfLevels, 251 uint8_t *result, 252 int32_t resultLength, 253 UErrorCode *status) 254 { 255 // consistency checks 256 if(status == nullptr || U_FAILURE(*status)) { 257 return 0; 258 } 259 if(source == nullptr) { 260 *status = U_ILLEGAL_ARGUMENT_ERROR; 261 return 0; 262 } 263 264 int32_t sourceIndex = 0; 265 // Scan the string until we skip enough of the key OR reach the end of the key 266 do { 267 sourceIndex++; 268 if(source[sourceIndex] == Collation::LEVEL_SEPARATOR_BYTE) { 269 noOfLevels--; 270 } 271 } while (noOfLevels > 0 272 && (source[sourceIndex] != 0 || sourceIndex < sourceLength)); 273 274 if((source[sourceIndex] == 0 || sourceIndex == sourceLength) 275 && noOfLevels > 0) { 276 *status = U_SORT_KEY_TOO_SHORT_WARNING; 277 } 278 279 280 // READ ME: this code assumes that the values for boundType 281 // enum will not changes. They are set so that the enum value 282 // corresponds to the number of extra bytes each bound type 283 // needs. 284 if(result != nullptr && resultLength >= sourceIndex+boundType) { 285 uprv_memcpy(result, source, sourceIndex); 286 switch(boundType) { 287 // Lower bound just gets terminated. No extra bytes 288 case UCOL_BOUND_LOWER: // = 0 289 break; 290 // Upper bound needs one extra byte 291 case UCOL_BOUND_UPPER: // = 1 292 result[sourceIndex++] = 2; 293 break; 294 // Upper long bound needs two extra bytes 295 case UCOL_BOUND_UPPER_LONG: // = 2 296 result[sourceIndex++] = 0xFF; 297 result[sourceIndex++] = 0xFF; 298 break; 299 default: 300 *status = U_ILLEGAL_ARGUMENT_ERROR; 301 return 0; 302 } 303 result[sourceIndex++] = 0; 304 305 return sourceIndex; 306 } else { 307 return sourceIndex+boundType+1; 308 } 309 } 310 311 U_CAPI void U_EXPORT2 312 ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode) { 313 if(U_FAILURE(*pErrorCode)) { return; } 314 Collator::fromUCollator(coll)->setMaxVariable(group, *pErrorCode); 315 } 316 317 U_CAPI UColReorderCode U_EXPORT2 318 ucol_getMaxVariable(const UCollator *coll) { 319 return Collator::fromUCollator(coll)->getMaxVariable(); 320 } 321 322 U_CAPI uint32_t U_EXPORT2 323 ucol_setVariableTop(UCollator *coll, const char16_t *varTop, int32_t len, UErrorCode *status) { 324 if(U_FAILURE(*status) || coll == nullptr) { 325 return 0; 326 } 327 return Collator::fromUCollator(coll)->setVariableTop(varTop, len, *status); 328 } 329 330 U_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status) { 331 if(U_FAILURE(*status) || coll == nullptr) { 332 return 0; 333 } 334 return Collator::fromUCollator(coll)->getVariableTop(*status); 335 } 336 337 U_CAPI void U_EXPORT2 338 ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status) { 339 if(U_FAILURE(*status) || coll == nullptr) { 340 return; 341 } 342 Collator::fromUCollator(coll)->setVariableTop(varTop, *status); 343 } 344 345 U_CAPI void U_EXPORT2 346 ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status) { 347 if(U_FAILURE(*status) || coll == nullptr) { 348 return; 349 } 350 351 Collator::fromUCollator(coll)->setAttribute(attr, value, *status); 352 } 353 354 U_CAPI UColAttributeValue U_EXPORT2 355 ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status) { 356 if(U_FAILURE(*status) || coll == nullptr) { 357 return UCOL_DEFAULT; 358 } 359 360 return Collator::fromUCollator(coll)->getAttribute(attr, *status); 361 } 362 363 U_CAPI void U_EXPORT2 364 ucol_setStrength( UCollator *coll, 365 UCollationStrength strength) 366 { 367 UErrorCode status = U_ZERO_ERROR; 368 ucol_setAttribute(coll, UCOL_STRENGTH, strength, &status); 369 } 370 371 U_CAPI UCollationStrength U_EXPORT2 372 ucol_getStrength(const UCollator *coll) 373 { 374 UErrorCode status = U_ZERO_ERROR; 375 return ucol_getAttribute(coll, UCOL_STRENGTH, &status); 376 } 377 378 U_CAPI int32_t U_EXPORT2 379 ucol_getReorderCodes(const UCollator *coll, 380 int32_t *dest, 381 int32_t destCapacity, 382 UErrorCode *status) { 383 if (U_FAILURE(*status)) { 384 return 0; 385 } 386 387 return Collator::fromUCollator(coll)->getReorderCodes(dest, destCapacity, *status); 388 } 389 390 U_CAPI void U_EXPORT2 391 ucol_setReorderCodes(UCollator* coll, 392 const int32_t* reorderCodes, 393 int32_t reorderCodesLength, 394 UErrorCode *status) { 395 if (U_FAILURE(*status)) { 396 return; 397 } 398 399 Collator::fromUCollator(coll)->setReorderCodes(reorderCodes, reorderCodesLength, *status); 400 } 401 402 U_CAPI int32_t U_EXPORT2 403 ucol_getEquivalentReorderCodes(int32_t reorderCode, 404 int32_t* dest, 405 int32_t destCapacity, 406 UErrorCode *pErrorCode) { 407 return Collator::getEquivalentReorderCodes(reorderCode, dest, destCapacity, *pErrorCode); 408 } 409 410 U_CAPI void U_EXPORT2 411 ucol_getVersion(const UCollator* coll, 412 UVersionInfo versionInfo) 413 { 414 Collator::fromUCollator(coll)->getVersion(versionInfo); 415 } 416 417 U_CAPI UCollationResult U_EXPORT2 418 ucol_strcollIter( const UCollator *coll, 419 UCharIterator *sIter, 420 UCharIterator *tIter, 421 UErrorCode *status) 422 { 423 if(!status || U_FAILURE(*status)) { 424 return UCOL_EQUAL; 425 } 426 427 UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER); 428 UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, sIter=%p, tIter=%p", coll, sIter, tIter); 429 430 if(sIter == nullptr || tIter == nullptr || coll == nullptr) { 431 *status = U_ILLEGAL_ARGUMENT_ERROR; 432 UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status); 433 return UCOL_EQUAL; 434 } 435 436 UCollationResult result = Collator::fromUCollator(coll)->compare(*sIter, *tIter, *status); 437 438 UTRACE_EXIT_VALUE_STATUS(result, *status); 439 return result; 440 } 441 442 443 /* */ 444 /* ucol_strcoll Main public API string comparison function */ 445 /* */ 446 U_CAPI UCollationResult U_EXPORT2 447 ucol_strcoll( const UCollator *coll, 448 const char16_t *source, 449 int32_t sourceLength, 450 const char16_t *target, 451 int32_t targetLength) 452 { 453 UTRACE_ENTRY(UTRACE_UCOL_STRCOLL); 454 if (UTRACE_LEVEL(UTRACE_VERBOSE)) { 455 UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target); 456 UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vh ", source, sourceLength); 457 UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vh ", target, targetLength); 458 } 459 460 UErrorCode status = U_ZERO_ERROR; 461 UCollationResult returnVal = Collator::fromUCollator(coll)-> 462 compare(source, sourceLength, target, targetLength, status); 463 UTRACE_EXIT_VALUE_STATUS(returnVal, status); 464 return returnVal; 465 } 466 467 U_CAPI UCollationResult U_EXPORT2 468 ucol_strcollUTF8( 469 const UCollator *coll, 470 const char *source, 471 int32_t sourceLength, 472 const char *target, 473 int32_t targetLength, 474 UErrorCode *status) 475 { 476 UTRACE_ENTRY(UTRACE_UCOL_STRCOLLUTF8); 477 if (UTRACE_LEVEL(UTRACE_VERBOSE)) { 478 UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target); 479 UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vb ", source, sourceLength); 480 UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vb ", target, targetLength); 481 } 482 483 if (U_FAILURE(*status)) { 484 /* do nothing */ 485 UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status); 486 return UCOL_EQUAL; 487 } 488 489 UCollationResult returnVal = Collator::fromUCollator(coll)->internalCompareUTF8( 490 source, sourceLength, target, targetLength, *status); 491 UTRACE_EXIT_VALUE_STATUS(returnVal, *status); 492 return returnVal; 493 } 494 495 496 /* convenience function for comparing strings */ 497 U_CAPI UBool U_EXPORT2 498 ucol_greater( const UCollator *coll, 499 const char16_t *source, 500 int32_t sourceLength, 501 const char16_t *target, 502 int32_t targetLength) 503 { 504 return (ucol_strcoll(coll, source, sourceLength, target, targetLength) 505 == UCOL_GREATER); 506 } 507 508 /* convenience function for comparing strings */ 509 U_CAPI UBool U_EXPORT2 510 ucol_greaterOrEqual( const UCollator *coll, 511 const char16_t *source, 512 int32_t sourceLength, 513 const char16_t *target, 514 int32_t targetLength) 515 { 516 return (ucol_strcoll(coll, source, sourceLength, target, targetLength) 517 != UCOL_LESS); 518 } 519 520 /* convenience function for comparing strings */ 521 U_CAPI UBool U_EXPORT2 522 ucol_equal( const UCollator *coll, 523 const char16_t *source, 524 int32_t sourceLength, 525 const char16_t *target, 526 int32_t targetLength) 527 { 528 return (ucol_strcoll(coll, source, sourceLength, target, targetLength) 529 == UCOL_EQUAL); 530 } 531 532 U_CAPI void U_EXPORT2 533 ucol_getUCAVersion(const UCollator* coll, UVersionInfo info) { 534 const Collator *c = Collator::fromUCollator(coll); 535 if(c != nullptr) { 536 UVersionInfo v; 537 c->getVersion(v); 538 // Note: This is tied to how the current implementation encodes the UCA version 539 // in the overall getVersion(). 540 // Alternatively, we could load the root collator and get at lower-level data from there. 541 // Either way, it will reflect the input collator's UCA version only 542 // if it is a known implementation. 543 // It would be cleaner to make this a virtual Collator method. 544 info[0] = v[1] >> 3; 545 info[1] = v[1] & 7; 546 info[2] = v[2] >> 6; 547 info[3] = 0; 548 } 549 } 550 551 U_CAPI const char16_t * U_EXPORT2 552 ucol_getRules(const UCollator *coll, int32_t *length) { 553 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); 554 // OK to crash if coll==nullptr: We do not want to check "this" pointers. 555 if(rbc != nullptr || coll == nullptr) { 556 const UnicodeString &rules = rbc->getRules(); 557 U_ASSERT(rules.getBuffer()[rules.length()] == 0); 558 *length = rules.length(); 559 return rules.getBuffer(); 560 } 561 static const char16_t _NUL = 0; 562 *length = 0; 563 return &_NUL; 564 } 565 566 U_CAPI int32_t U_EXPORT2 567 ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, char16_t *buffer, int32_t bufferLen) { 568 UnicodeString rules; 569 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); 570 if(rbc != nullptr || coll == nullptr) { 571 rbc->getRules(delta, rules); 572 } 573 if(buffer != nullptr && bufferLen > 0) { 574 UErrorCode errorCode = U_ZERO_ERROR; 575 return rules.extract(buffer, bufferLen, errorCode); 576 } else { 577 return rules.length(); 578 } 579 } 580 581 U_CAPI const char * U_EXPORT2 582 ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) { 583 return ucol_getLocaleByType(coll, type, status); 584 } 585 586 U_CAPI const char * U_EXPORT2 587 ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) { 588 if(U_FAILURE(*status)) { 589 return nullptr; 590 } 591 UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE); 592 UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll); 593 594 const char *result; 595 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); 596 if(rbc == nullptr && coll != nullptr) { 597 *status = U_UNSUPPORTED_ERROR; 598 result = nullptr; 599 } else { 600 result = rbc->internalGetLocaleID(type, *status); 601 } 602 603 UTRACE_DATA1(UTRACE_INFO, "result = %s", result); 604 UTRACE_EXIT_STATUS(*status); 605 return result; 606 } 607 608 U_CAPI USet * U_EXPORT2 609 ucol_getTailoredSet(const UCollator *coll, UErrorCode *status) { 610 if(U_FAILURE(*status)) { 611 return nullptr; 612 } 613 UnicodeSet *set = Collator::fromUCollator(coll)->getTailoredSet(*status); 614 if(U_FAILURE(*status)) { 615 delete set; 616 return nullptr; 617 } 618 return set->toUSet(); 619 } 620 621 U_CAPI UBool U_EXPORT2 622 ucol_equals(const UCollator *source, const UCollator *target) { 623 return source == target || 624 (*Collator::fromUCollator(source)) == (*Collator::fromUCollator(target)); 625 } 626 627 #endif /* #if !UCONFIG_NO_COLLATION */