ustring.cpp (45213B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ****************************************************************************** 5 * 6 * Copyright (C) 1998-2016, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ****************************************************************************** 10 * 11 * File ustring.cpp 12 * 13 * Modification History: 14 * 15 * Date Name Description 16 * 12/07/98 bertrand Creation. 17 ****************************************************************************** 18 */ 19 20 #include "unicode/utypes.h" 21 #include "unicode/putil.h" 22 #include "unicode/uchar.h" 23 #include "unicode/ustring.h" 24 #include "unicode/utf16.h" 25 #include "cstring.h" 26 #include "cwchar.h" 27 #include "cmemory.h" 28 #include "ustr_imp.h" 29 30 /* ANSI string.h - style functions ------------------------------------------ */ 31 32 /* U+ffff is the highest BMP code point, the highest one that fits into a 16-bit char16_t */ 33 #define U_BMP_MAX 0xffff 34 35 /* Forward binary string search functions ----------------------------------- */ 36 37 /* 38 * Test if a substring match inside a string is at code point boundaries. 39 * All pointers refer to the same buffer. 40 * The limit pointer may be nullptr, all others must be real pointers. 41 */ 42 static inline UBool 43 isMatchAtCPBoundary(const char16_t *start, const char16_t *match, const char16_t *matchLimit, const char16_t *limit) { 44 if(U16_IS_TRAIL(*match) && start!=match && U16_IS_LEAD(*(match-1))) { 45 /* the leading edge of the match is in the middle of a surrogate pair */ 46 return false; 47 } 48 if(U16_IS_LEAD(*(matchLimit-1)) && matchLimit!=limit && U16_IS_TRAIL(*matchLimit)) { 49 /* the trailing edge of the match is in the middle of a surrogate pair */ 50 return false; 51 } 52 return true; 53 } 54 55 U_CAPI char16_t * U_EXPORT2 56 u_strFindFirst(const char16_t *s, int32_t length, 57 const char16_t *sub, int32_t subLength) { 58 const char16_t *start, *p, *q, *subLimit; 59 char16_t c, cs, cq; 60 61 if(sub==nullptr || subLength<-1) { 62 return (char16_t *)s; 63 } 64 if(s==nullptr || length<-1) { 65 return nullptr; 66 } 67 68 start=s; 69 70 if(length<0 && subLength<0) { 71 /* both strings are NUL-terminated */ 72 if((cs=*sub++)==0) { 73 return (char16_t *)s; 74 } 75 if(*sub==0 && !U16_IS_SURROGATE(cs)) { 76 /* the substring consists of a single, non-surrogate BMP code point */ 77 return u_strchr(s, cs); 78 } 79 80 while((c=*s++)!=0) { 81 if(c==cs) { 82 /* found first substring char16_t, compare rest */ 83 p=s; 84 q=sub; 85 for(;;) { 86 if((cq=*q)==0) { 87 if(isMatchAtCPBoundary(start, s-1, p, nullptr)) { 88 return (char16_t *)(s-1); /* well-formed match */ 89 } else { 90 break; /* no match because surrogate pair is split */ 91 } 92 } 93 if((c=*p)==0) { 94 return nullptr; /* no match, and none possible after s */ 95 } 96 if(c!=cq) { 97 break; /* no match */ 98 } 99 ++p; 100 ++q; 101 } 102 } 103 } 104 105 /* not found */ 106 return nullptr; 107 } 108 109 if(subLength<0) { 110 subLength=u_strlen(sub); 111 } 112 if(subLength==0) { 113 return (char16_t *)s; 114 } 115 116 /* get sub[0] to search for it fast */ 117 cs=*sub++; 118 --subLength; 119 subLimit=sub+subLength; 120 121 if(subLength==0 && !U16_IS_SURROGATE(cs)) { 122 /* the substring consists of a single, non-surrogate BMP code point */ 123 return length<0 ? u_strchr(s, cs) : u_memchr(s, cs, length); 124 } 125 126 if(length<0) { 127 /* s is NUL-terminated */ 128 while((c=*s++)!=0) { 129 if(c==cs) { 130 /* found first substring char16_t, compare rest */ 131 p=s; 132 q=sub; 133 for(;;) { 134 if(q==subLimit) { 135 if(isMatchAtCPBoundary(start, s-1, p, nullptr)) { 136 return (char16_t *)(s-1); /* well-formed match */ 137 } else { 138 break; /* no match because surrogate pair is split */ 139 } 140 } 141 if((c=*p)==0) { 142 return nullptr; /* no match, and none possible after s */ 143 } 144 if(c!=*q) { 145 break; /* no match */ 146 } 147 ++p; 148 ++q; 149 } 150 } 151 } 152 } else { 153 const char16_t *limit, *preLimit; 154 155 /* subLength was decremented above */ 156 if(length<=subLength) { 157 return nullptr; /* s is shorter than sub */ 158 } 159 160 limit=s+length; 161 162 /* the substring must start before preLimit */ 163 preLimit=limit-subLength; 164 165 while(s!=preLimit) { 166 c=*s++; 167 if(c==cs) { 168 /* found first substring char16_t, compare rest */ 169 p=s; 170 q=sub; 171 for(;;) { 172 if(q==subLimit) { 173 if(isMatchAtCPBoundary(start, s-1, p, limit)) { 174 return (char16_t *)(s-1); /* well-formed match */ 175 } else { 176 break; /* no match because surrogate pair is split */ 177 } 178 } 179 if(*p!=*q) { 180 break; /* no match */ 181 } 182 ++p; 183 ++q; 184 } 185 } 186 } 187 } 188 189 /* not found */ 190 return nullptr; 191 } 192 193 U_CAPI char16_t * U_EXPORT2 194 u_strstr(const char16_t *s, const char16_t *substring) { 195 return u_strFindFirst(s, -1, substring, -1); 196 } 197 198 U_CAPI char16_t * U_EXPORT2 199 u_strchr(const char16_t *s, char16_t c) { 200 if(U16_IS_SURROGATE(c)) { 201 /* make sure to not find half of a surrogate pair */ 202 return u_strFindFirst(s, -1, &c, 1); 203 } else { 204 char16_t cs; 205 206 /* trivial search for a BMP code point */ 207 for(;;) { 208 if((cs=*s)==c) { 209 return (char16_t *)s; 210 } 211 if(cs==0) { 212 return nullptr; 213 } 214 ++s; 215 } 216 } 217 } 218 219 U_CAPI char16_t * U_EXPORT2 220 u_strchr32(const char16_t *s, UChar32 c) { 221 if((uint32_t)c<=U_BMP_MAX) { 222 /* find BMP code point */ 223 return u_strchr(s, (char16_t)c); 224 } else if((uint32_t)c<=UCHAR_MAX_VALUE) { 225 /* find supplementary code point as surrogate pair */ 226 char16_t cs, lead=U16_LEAD(c), trail=U16_TRAIL(c); 227 228 while((cs=*s++)!=0) { 229 if(cs==lead && *s==trail) { 230 return (char16_t *)(s-1); 231 } 232 } 233 return nullptr; 234 } else { 235 /* not a Unicode code point, not findable */ 236 return nullptr; 237 } 238 } 239 240 U_CAPI char16_t * U_EXPORT2 241 u_memchr(const char16_t *s, char16_t c, int32_t count) { 242 if(count<=0) { 243 return nullptr; /* no string */ 244 } else if(U16_IS_SURROGATE(c)) { 245 /* make sure to not find half of a surrogate pair */ 246 return u_strFindFirst(s, count, &c, 1); 247 } else { 248 /* trivial search for a BMP code point */ 249 const char16_t *limit=s+count; 250 do { 251 if(*s==c) { 252 return (char16_t *)s; 253 } 254 } while(++s!=limit); 255 return nullptr; 256 } 257 } 258 259 U_CAPI char16_t * U_EXPORT2 260 u_memchr32(const char16_t *s, UChar32 c, int32_t count) { 261 if((uint32_t)c<=U_BMP_MAX) { 262 /* find BMP code point */ 263 return u_memchr(s, (char16_t)c, count); 264 } else if(count<2) { 265 /* too short for a surrogate pair */ 266 return nullptr; 267 } else if((uint32_t)c<=UCHAR_MAX_VALUE) { 268 /* find supplementary code point as surrogate pair */ 269 const char16_t *limit=s+count-1; /* -1 so that we do not need a separate check for the trail unit */ 270 char16_t lead=U16_LEAD(c), trail=U16_TRAIL(c); 271 272 do { 273 if(*s==lead && *(s+1)==trail) { 274 return (char16_t *)s; 275 } 276 } while(++s!=limit); 277 return nullptr; 278 } else { 279 /* not a Unicode code point, not findable */ 280 return nullptr; 281 } 282 } 283 284 /* Backward binary string search functions ---------------------------------- */ 285 286 U_CAPI char16_t * U_EXPORT2 287 u_strFindLast(const char16_t *s, int32_t length, 288 const char16_t *sub, int32_t subLength) { 289 const char16_t *start, *limit, *p, *q, *subLimit; 290 char16_t c, cs; 291 292 if(sub==nullptr || subLength<-1) { 293 return (char16_t *)s; 294 } 295 if(s==nullptr || length<-1) { 296 return nullptr; 297 } 298 299 /* 300 * This implementation is more lazy than the one for u_strFindFirst(): 301 * There is no special search code for NUL-terminated strings. 302 * It does not seem to be worth it for searching substrings to 303 * search forward and find all matches like in u_strrchr() and similar. 304 * Therefore, we simply get both string lengths and search backward. 305 * 306 * markus 2002oct23 307 */ 308 309 if(subLength<0) { 310 subLength=u_strlen(sub); 311 } 312 if(subLength==0) { 313 return (char16_t *)s; 314 } 315 316 /* get sub[subLength-1] to search for it fast */ 317 subLimit=sub+subLength; 318 cs=*(--subLimit); 319 --subLength; 320 321 if(subLength==0 && !U16_IS_SURROGATE(cs)) { 322 /* the substring consists of a single, non-surrogate BMP code point */ 323 return length<0 ? u_strrchr(s, cs) : u_memrchr(s, cs, length); 324 } 325 326 if(length<0) { 327 length=u_strlen(s); 328 } 329 330 /* subLength was decremented above */ 331 if(length<=subLength) { 332 return nullptr; /* s is shorter than sub */ 333 } 334 335 start=s; 336 limit=s+length; 337 338 /* the substring must start no later than s+subLength */ 339 s+=subLength; 340 341 while(s!=limit) { 342 c=*(--limit); 343 if(c==cs) { 344 /* found last substring char16_t, compare rest */ 345 p=limit; 346 q=subLimit; 347 for(;;) { 348 if(q==sub) { 349 if(isMatchAtCPBoundary(start, p, limit+1, start+length)) { 350 return (char16_t *)p; /* well-formed match */ 351 } else { 352 break; /* no match because surrogate pair is split */ 353 } 354 } 355 if(*(--p)!=*(--q)) { 356 break; /* no match */ 357 } 358 } 359 } 360 } 361 362 /* not found */ 363 return nullptr; 364 } 365 366 U_CAPI char16_t * U_EXPORT2 367 u_strrstr(const char16_t *s, const char16_t *substring) { 368 return u_strFindLast(s, -1, substring, -1); 369 } 370 371 U_CAPI char16_t * U_EXPORT2 372 u_strrchr(const char16_t *s, char16_t c) { 373 if(U16_IS_SURROGATE(c)) { 374 /* make sure to not find half of a surrogate pair */ 375 return u_strFindLast(s, -1, &c, 1); 376 } else { 377 const char16_t *result=nullptr; 378 char16_t cs; 379 380 /* trivial search for a BMP code point */ 381 for(;;) { 382 if((cs=*s)==c) { 383 result=s; 384 } 385 if(cs==0) { 386 return (char16_t *)result; 387 } 388 ++s; 389 } 390 } 391 } 392 393 U_CAPI char16_t * U_EXPORT2 394 u_strrchr32(const char16_t *s, UChar32 c) { 395 if((uint32_t)c<=U_BMP_MAX) { 396 /* find BMP code point */ 397 return u_strrchr(s, (char16_t)c); 398 } else if((uint32_t)c<=UCHAR_MAX_VALUE) { 399 /* find supplementary code point as surrogate pair */ 400 const char16_t *result=nullptr; 401 char16_t cs, lead=U16_LEAD(c), trail=U16_TRAIL(c); 402 403 while((cs=*s++)!=0) { 404 if(cs==lead && *s==trail) { 405 result=s-1; 406 } 407 } 408 return (char16_t *)result; 409 } else { 410 /* not a Unicode code point, not findable */ 411 return nullptr; 412 } 413 } 414 415 U_CAPI char16_t * U_EXPORT2 416 u_memrchr(const char16_t *s, char16_t c, int32_t count) { 417 if(count<=0) { 418 return nullptr; /* no string */ 419 } else if(U16_IS_SURROGATE(c)) { 420 /* make sure to not find half of a surrogate pair */ 421 return u_strFindLast(s, count, &c, 1); 422 } else { 423 /* trivial search for a BMP code point */ 424 const char16_t *limit=s+count; 425 do { 426 if(*(--limit)==c) { 427 return (char16_t *)limit; 428 } 429 } while(s!=limit); 430 return nullptr; 431 } 432 } 433 434 U_CAPI char16_t * U_EXPORT2 435 u_memrchr32(const char16_t *s, UChar32 c, int32_t count) { 436 if((uint32_t)c<=U_BMP_MAX) { 437 /* find BMP code point */ 438 return u_memrchr(s, (char16_t)c, count); 439 } else if(count<2) { 440 /* too short for a surrogate pair */ 441 return nullptr; 442 } else if((uint32_t)c<=UCHAR_MAX_VALUE) { 443 /* find supplementary code point as surrogate pair */ 444 const char16_t *limit=s+count-1; 445 char16_t lead=U16_LEAD(c), trail=U16_TRAIL(c); 446 447 do { 448 if(*limit==trail && *(limit-1)==lead) { 449 return (char16_t *)(limit-1); 450 } 451 } while(s!=--limit); 452 return nullptr; 453 } else { 454 /* not a Unicode code point, not findable */ 455 return nullptr; 456 } 457 } 458 459 /* Tokenization functions --------------------------------------------------- */ 460 461 /* 462 * Match each code point in a string against each code point in the matchSet. 463 * Return the index of the first string code point that 464 * is (polarity==true) or is not (false) contained in the matchSet. 465 * Return -(string length)-1 if there is no such code point. 466 */ 467 static int32_t 468 _matchFromSet(const char16_t *string, const char16_t *matchSet, UBool polarity) { 469 int32_t matchLen, matchBMPLen, strItr, matchItr; 470 UChar32 stringCh, matchCh; 471 char16_t c, c2; 472 473 /* first part of matchSet contains only BMP code points */ 474 matchBMPLen = 0; 475 while((c = matchSet[matchBMPLen]) != 0 && U16_IS_SINGLE(c)) { 476 ++matchBMPLen; 477 } 478 479 /* second part of matchSet contains BMP and supplementary code points */ 480 matchLen = matchBMPLen; 481 while(matchSet[matchLen] != 0) { 482 ++matchLen; 483 } 484 485 for(strItr = 0; (c = string[strItr]) != 0;) { 486 ++strItr; 487 if(U16_IS_SINGLE(c)) { 488 if(polarity) { 489 for(matchItr = 0; matchItr < matchLen; ++matchItr) { 490 if(c == matchSet[matchItr]) { 491 return strItr - 1; /* one matches */ 492 } 493 } 494 } else { 495 for(matchItr = 0; matchItr < matchLen; ++matchItr) { 496 if(c == matchSet[matchItr]) { 497 goto endloop; 498 } 499 } 500 return strItr - 1; /* none matches */ 501 } 502 } else { 503 /* 504 * No need to check for string length before U16_IS_TRAIL 505 * because c2 could at worst be the terminating NUL. 506 */ 507 if(U16_IS_SURROGATE_LEAD(c) && U16_IS_TRAIL(c2 = string[strItr])) { 508 ++strItr; 509 stringCh = U16_GET_SUPPLEMENTARY(c, c2); 510 } else { 511 stringCh = c; /* unpaired trail surrogate */ 512 } 513 514 if(polarity) { 515 for(matchItr = matchBMPLen; matchItr < matchLen;) { 516 U16_NEXT(matchSet, matchItr, matchLen, matchCh); 517 if(stringCh == matchCh) { 518 return strItr - U16_LENGTH(stringCh); /* one matches */ 519 } 520 } 521 } else { 522 for(matchItr = matchBMPLen; matchItr < matchLen;) { 523 U16_NEXT(matchSet, matchItr, matchLen, matchCh); 524 if(stringCh == matchCh) { 525 goto endloop; 526 } 527 } 528 return strItr - U16_LENGTH(stringCh); /* none matches */ 529 } 530 } 531 endloop: 532 /* wish C had continue with labels like Java... */; 533 } 534 535 /* Didn't find it. */ 536 return -strItr-1; 537 } 538 539 /* Search for a codepoint in a string that matches one of the matchSet codepoints. */ 540 U_CAPI char16_t * U_EXPORT2 541 u_strpbrk(const char16_t *string, const char16_t *matchSet) 542 { 543 int32_t idx = _matchFromSet(string, matchSet, true); 544 if(idx >= 0) { 545 return (char16_t *)string + idx; 546 } else { 547 return nullptr; 548 } 549 } 550 551 /* Search for a codepoint in a string that matches one of the matchSet codepoints. */ 552 U_CAPI int32_t U_EXPORT2 553 u_strcspn(const char16_t *string, const char16_t *matchSet) 554 { 555 int32_t idx = _matchFromSet(string, matchSet, true); 556 if(idx >= 0) { 557 return idx; 558 } else { 559 return -idx - 1; /* == u_strlen(string) */ 560 } 561 } 562 563 /* Search for a codepoint in a string that does not match one of the matchSet codepoints. */ 564 U_CAPI int32_t U_EXPORT2 565 u_strspn(const char16_t *string, const char16_t *matchSet) 566 { 567 int32_t idx = _matchFromSet(string, matchSet, false); 568 if(idx >= 0) { 569 return idx; 570 } else { 571 return -idx - 1; /* == u_strlen(string) */ 572 } 573 } 574 575 /* ----- Text manipulation functions --- */ 576 577 U_CAPI char16_t* U_EXPORT2 578 u_strtok_r(char16_t *src, 579 const char16_t *delim, 580 char16_t **saveState) 581 { 582 char16_t *tokSource; 583 char16_t *nextToken; 584 uint32_t nonDelimIdx; 585 586 /* If saveState is nullptr, the user messed up. */ 587 if (src != nullptr) { 588 tokSource = src; 589 *saveState = src; /* Set to "src" in case there are no delimiters */ 590 } 591 else if (*saveState) { 592 tokSource = *saveState; 593 } 594 else { 595 /* src == nullptr && *saveState == nullptr */ 596 /* This shouldn't happen. We already finished tokenizing. */ 597 return nullptr; 598 } 599 600 /* Skip initial delimiters */ 601 nonDelimIdx = u_strspn(tokSource, delim); 602 tokSource = &tokSource[nonDelimIdx]; 603 604 if (*tokSource) { 605 nextToken = u_strpbrk(tokSource, delim); 606 if (nextToken != nullptr) { 607 /* Create a token */ 608 *(nextToken++) = 0; 609 *saveState = nextToken; 610 return tokSource; 611 } 612 else if (*saveState) { 613 /* Return the last token */ 614 *saveState = nullptr; 615 return tokSource; 616 } 617 } 618 else { 619 /* No tokens were found. Only delimiters were left. */ 620 *saveState = nullptr; 621 } 622 return nullptr; 623 } 624 625 /* Miscellaneous functions -------------------------------------------------- */ 626 627 U_CAPI char16_t* U_EXPORT2 628 u_strcat(char16_t *dst, 629 const char16_t *src) 630 { 631 char16_t *anchor = dst; /* save a pointer to start of dst */ 632 633 while(*dst != 0) { /* To end of first string */ 634 ++dst; 635 } 636 while((*(dst++) = *(src++)) != 0) { /* copy string 2 over */ 637 } 638 639 return anchor; 640 } 641 642 U_CAPI char16_t* U_EXPORT2 643 u_strncat(char16_t *dst, 644 const char16_t *src, 645 int32_t n ) 646 { 647 if(n > 0) { 648 char16_t *anchor = dst; /* save a pointer to start of dst */ 649 650 while(*dst != 0) { /* To end of first string */ 651 ++dst; 652 } 653 while((*dst = *src) != 0) { /* copy string 2 over */ 654 ++dst; 655 if(--n == 0) { 656 *dst = 0; 657 break; 658 } 659 ++src; 660 } 661 662 return anchor; 663 } else { 664 return dst; 665 } 666 } 667 668 /* ----- Text property functions --- */ 669 670 U_CAPI int32_t U_EXPORT2 671 u_strcmp(const char16_t *s1, 672 const char16_t *s2) 673 { 674 char16_t c1, c2; 675 676 for(;;) { 677 c1=*s1++; 678 c2=*s2++; 679 if (c1 != c2 || c1 == 0) { 680 break; 681 } 682 } 683 return (int32_t)c1 - (int32_t)c2; 684 } 685 686 U_CFUNC int32_t U_EXPORT2 687 uprv_strCompare(const char16_t *s1, int32_t length1, 688 const char16_t *s2, int32_t length2, 689 UBool strncmpStyle, UBool codePointOrder) { 690 const char16_t *start1, *start2, *limit1, *limit2; 691 char16_t c1, c2; 692 693 /* setup for fix-up */ 694 start1=s1; 695 start2=s2; 696 697 /* compare identical prefixes - they do not need to be fixed up */ 698 if(length1<0 && length2<0) { 699 /* strcmp style, both NUL-terminated */ 700 if(s1==s2) { 701 return 0; 702 } 703 704 for(;;) { 705 c1=*s1; 706 c2=*s2; 707 if(c1!=c2) { 708 break; 709 } 710 if(c1==0) { 711 return 0; 712 } 713 ++s1; 714 ++s2; 715 } 716 717 /* setup for fix-up */ 718 limit1=limit2=nullptr; 719 } else if(strncmpStyle) { 720 /* special handling for strncmp, assume length1==length2>=0 but also check for NUL */ 721 if(s1==s2) { 722 return 0; 723 } 724 725 limit1=start1+length1; 726 727 for(;;) { 728 /* both lengths are same, check only one limit */ 729 if(s1==limit1) { 730 return 0; 731 } 732 733 c1=*s1; 734 c2=*s2; 735 if(c1!=c2) { 736 break; 737 } 738 if(c1==0) { 739 return 0; 740 } 741 ++s1; 742 ++s2; 743 } 744 745 /* setup for fix-up */ 746 limit2=start2+length1; /* use length1 here, too, to enforce assumption */ 747 } else { 748 /* memcmp/UnicodeString style, both length-specified */ 749 int32_t lengthResult; 750 751 if(length1<0) { 752 length1=u_strlen(s1); 753 } 754 if(length2<0) { 755 length2=u_strlen(s2); 756 } 757 758 /* limit1=start1+min(length1, length2) */ 759 if(length1<length2) { 760 lengthResult=-1; 761 limit1=start1+length1; 762 } else if(length1==length2) { 763 lengthResult=0; 764 limit1=start1+length1; 765 } else /* length1>length2 */ { 766 lengthResult=1; 767 limit1=start1+length2; 768 } 769 770 if(s1==s2) { 771 return lengthResult; 772 } 773 774 for(;;) { 775 /* check pseudo-limit */ 776 if(s1==limit1) { 777 return lengthResult; 778 } 779 780 c1=*s1; 781 c2=*s2; 782 if(c1!=c2) { 783 break; 784 } 785 ++s1; 786 ++s2; 787 } 788 789 /* setup for fix-up */ 790 limit1=start1+length1; 791 limit2=start2+length2; 792 } 793 794 /* if both values are in or above the surrogate range, fix them up */ 795 if(c1>=0xd800 && c2>=0xd800 && codePointOrder) { 796 /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ 797 if( 798 (c1<=0xdbff && (s1+1)!=limit1 && U16_IS_TRAIL(*(s1+1))) || 799 (U16_IS_TRAIL(c1) && start1!=s1 && U16_IS_LEAD(*(s1-1))) 800 ) { 801 /* part of a surrogate pair, leave >=d800 */ 802 } else { 803 /* BMP code point - may be surrogate code point - make <d800 */ 804 c1-=0x2800; 805 } 806 807 if( 808 (c2<=0xdbff && (s2+1)!=limit2 && U16_IS_TRAIL(*(s2+1))) || 809 (U16_IS_TRAIL(c2) && start2!=s2 && U16_IS_LEAD(*(s2-1))) 810 ) { 811 /* part of a surrogate pair, leave >=d800 */ 812 } else { 813 /* BMP code point - may be surrogate code point - make <d800 */ 814 c2-=0x2800; 815 } 816 } 817 818 /* now c1 and c2 are in the requested (code unit or code point) order */ 819 return (int32_t)c1-(int32_t)c2; 820 } 821 822 /* 823 * Compare two strings as presented by UCharIterators. 824 * Use code unit or code point order. 825 * When the function returns, it is undefined where the iterators 826 * have stopped. 827 */ 828 U_CAPI int32_t U_EXPORT2 829 u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) { 830 UChar32 c1, c2; 831 832 /* argument checking */ 833 if(iter1==nullptr || iter2==nullptr) { 834 return 0; /* bad arguments */ 835 } 836 if(iter1==iter2) { 837 return 0; /* identical iterators */ 838 } 839 840 /* reset iterators to start? */ 841 iter1->move(iter1, 0, UITER_START); 842 iter2->move(iter2, 0, UITER_START); 843 844 /* compare identical prefixes - they do not need to be fixed up */ 845 for(;;) { 846 c1=iter1->next(iter1); 847 c2=iter2->next(iter2); 848 if(c1!=c2) { 849 break; 850 } 851 if(c1==-1) { 852 return 0; 853 } 854 } 855 856 /* if both values are in or above the surrogate range, fix them up */ 857 if(c1>=0xd800 && c2>=0xd800 && codePointOrder) { 858 /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ 859 if( 860 (c1<=0xdbff && U16_IS_TRAIL(iter1->current(iter1))) || 861 (U16_IS_TRAIL(c1) && (iter1->previous(iter1), U16_IS_LEAD(iter1->previous(iter1)))) 862 ) { 863 /* part of a surrogate pair, leave >=d800 */ 864 } else { 865 /* BMP code point - may be surrogate code point - make <d800 */ 866 c1-=0x2800; 867 } 868 869 if( 870 (c2<=0xdbff && U16_IS_TRAIL(iter2->current(iter2))) || 871 (U16_IS_TRAIL(c2) && (iter2->previous(iter2), U16_IS_LEAD(iter2->previous(iter2)))) 872 ) { 873 /* part of a surrogate pair, leave >=d800 */ 874 } else { 875 /* BMP code point - may be surrogate code point - make <d800 */ 876 c2-=0x2800; 877 } 878 } 879 880 /* now c1 and c2 are in the requested (code unit or code point) order */ 881 return (int32_t)c1-(int32_t)c2; 882 } 883 884 #if 0 885 /* 886 * u_strCompareIter() does not leave the iterators _on_ the different units. 887 * This is possible but would cost a few extra indirect function calls to back 888 * up if the last unit (c1 or c2 respectively) was >=0. 889 * 890 * Consistently leaving them _behind_ the different units is not an option 891 * because the current "unit" is the end of the string if that is reached, 892 * and in such a case the iterator does not move. 893 * For example, when comparing "ab" with "abc", both iterators rest _on_ the end 894 * of their strings. Calling previous() on each does not move them to where 895 * the comparison fails. 896 * 897 * So the simplest semantics is to not define where the iterators end up. 898 * 899 * The following fragment is part of what would need to be done for backing up. 900 */ 901 void fragment { 902 /* iff a surrogate is part of a surrogate pair, leave >=d800 */ 903 if(c1<=0xdbff) { 904 if(!U16_IS_TRAIL(iter1->current(iter1))) { 905 /* lead surrogate code point - make <d800 */ 906 c1-=0x2800; 907 } 908 } else if(c1<=0xdfff) { 909 int32_t idx=iter1->getIndex(iter1, UITER_CURRENT); 910 iter1->previous(iter1); /* ==c1 */ 911 if(!U16_IS_LEAD(iter1->previous(iter1))) { 912 /* trail surrogate code point - make <d800 */ 913 c1-=0x2800; 914 } 915 /* go back to behind where the difference is */ 916 iter1->move(iter1, idx, UITER_ZERO); 917 } else /* 0xe000<=c1<=0xffff */ { 918 /* BMP code point - make <d800 */ 919 c1-=0x2800; 920 } 921 } 922 #endif 923 924 U_CAPI int32_t U_EXPORT2 925 u_strCompare(const char16_t *s1, int32_t length1, 926 const char16_t *s2, int32_t length2, 927 UBool codePointOrder) { 928 /* argument checking */ 929 if(s1==nullptr || length1<-1 || s2==nullptr || length2<-1) { 930 return 0; 931 } 932 return uprv_strCompare(s1, length1, s2, length2, false, codePointOrder); 933 } 934 935 /* String compare in code point order - u_strcmp() compares in code unit order. */ 936 U_CAPI int32_t U_EXPORT2 937 u_strcmpCodePointOrder(const char16_t *s1, const char16_t *s2) { 938 return uprv_strCompare(s1, -1, s2, -1, false, true); 939 } 940 941 U_CAPI int32_t U_EXPORT2 942 u_strncmp(const char16_t *s1, 943 const char16_t *s2, 944 int32_t n) 945 { 946 if(n > 0) { 947 int32_t rc; 948 for(;;) { 949 rc = (int32_t)*s1 - (int32_t)*s2; 950 if(rc != 0 || *s1 == 0 || --n == 0) { 951 return rc; 952 } 953 ++s1; 954 ++s2; 955 } 956 } else { 957 return 0; 958 } 959 } 960 961 U_CAPI int32_t U_EXPORT2 962 u_strncmpCodePointOrder(const char16_t *s1, const char16_t *s2, int32_t n) { 963 return uprv_strCompare(s1, n, s2, n, true, true); 964 } 965 966 U_CAPI char16_t* U_EXPORT2 967 u_strcpy(char16_t *dst, 968 const char16_t *src) 969 { 970 char16_t *anchor = dst; /* save a pointer to start of dst */ 971 972 while((*(dst++) = *(src++)) != 0) { /* copy string 2 over */ 973 } 974 975 return anchor; 976 } 977 978 U_CAPI char16_t* U_EXPORT2 979 u_strncpy(char16_t *dst, 980 const char16_t *src, 981 int32_t n) 982 { 983 char16_t *anchor = dst; /* save a pointer to start of dst */ 984 985 /* copy string 2 over */ 986 while(n > 0 && (*(dst++) = *(src++)) != 0) { 987 --n; 988 } 989 990 return anchor; 991 } 992 993 U_CAPI int32_t U_EXPORT2 994 u_strlen(const char16_t *s) 995 { 996 #if U_SIZEOF_WCHAR_T == U_SIZEOF_UCHAR 997 return (int32_t)uprv_wcslen((const wchar_t *)s); 998 #else 999 const char16_t *t = s; 1000 while(*t != 0) { 1001 ++t; 1002 } 1003 return t - s; 1004 #endif 1005 } 1006 1007 U_CAPI int32_t U_EXPORT2 1008 u_countChar32(const char16_t *s, int32_t length) { 1009 int32_t count; 1010 1011 if(s==nullptr || length<-1) { 1012 return 0; 1013 } 1014 1015 count=0; 1016 if(length>=0) { 1017 while(length>0) { 1018 ++count; 1019 if(U16_IS_LEAD(*s) && length>=2 && U16_IS_TRAIL(*(s+1))) { 1020 s+=2; 1021 length-=2; 1022 } else { 1023 ++s; 1024 --length; 1025 } 1026 } 1027 } else /* length==-1 */ { 1028 char16_t c; 1029 1030 for(;;) { 1031 if((c=*s++)==0) { 1032 break; 1033 } 1034 ++count; 1035 1036 /* 1037 * sufficient to look ahead one because of UTF-16; 1038 * safe to look ahead one because at worst that would be the terminating NUL 1039 */ 1040 if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) { 1041 ++s; 1042 } 1043 } 1044 } 1045 return count; 1046 } 1047 1048 U_CAPI UBool U_EXPORT2 1049 u_strHasMoreChar32Than(const char16_t *s, int32_t length, int32_t number) { 1050 1051 if(number<0) { 1052 return true; 1053 } 1054 if(s==nullptr || length<-1) { 1055 return false; 1056 } 1057 1058 if(length==-1) { 1059 /* s is NUL-terminated */ 1060 char16_t c; 1061 1062 /* count code points until they exceed */ 1063 for(;;) { 1064 if((c=*s++)==0) { 1065 return false; 1066 } 1067 if(number==0) { 1068 return true; 1069 } 1070 if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) { 1071 ++s; 1072 } 1073 --number; 1074 } 1075 } else { 1076 /* length>=0 known */ 1077 const char16_t *limit; 1078 int32_t maxSupplementary; 1079 1080 /* s contains at least (length+1)/2 code points: <=2 UChars per cp */ 1081 if(((length+1)/2)>number) { 1082 return true; 1083 } 1084 1085 /* check if s does not even contain enough UChars */ 1086 maxSupplementary=length-number; 1087 if(maxSupplementary<=0) { 1088 return false; 1089 } 1090 /* there are maxSupplementary=length-number more UChars than asked-for code points */ 1091 1092 /* 1093 * count code points until they exceed and also check that there are 1094 * no more than maxSupplementary supplementary code points (char16_t pairs) 1095 */ 1096 limit=s+length; 1097 for(;;) { 1098 if(s==limit) { 1099 return false; 1100 } 1101 if(number==0) { 1102 return true; 1103 } 1104 if(U16_IS_LEAD(*s++) && s!=limit && U16_IS_TRAIL(*s)) { 1105 ++s; 1106 if(--maxSupplementary<=0) { 1107 /* too many pairs - too few code points */ 1108 return false; 1109 } 1110 } 1111 --number; 1112 } 1113 } 1114 } 1115 1116 U_CAPI char16_t * U_EXPORT2 1117 u_memcpy(char16_t *dest, const char16_t *src, int32_t count) { 1118 if(count > 0) { 1119 uprv_memcpy(dest, src, (size_t)count*U_SIZEOF_UCHAR); 1120 } 1121 return dest; 1122 } 1123 1124 U_CAPI char16_t * U_EXPORT2 1125 u_memmove(char16_t *dest, const char16_t *src, int32_t count) { 1126 if(count > 0) { 1127 uprv_memmove(dest, src, (size_t)count*U_SIZEOF_UCHAR); 1128 } 1129 return dest; 1130 } 1131 1132 U_CAPI char16_t * U_EXPORT2 1133 u_memset(char16_t *dest, char16_t c, int32_t count) { 1134 if(count > 0) { 1135 char16_t *ptr = dest; 1136 char16_t *limit = dest + count; 1137 1138 while (ptr < limit) { 1139 *(ptr++) = c; 1140 } 1141 } 1142 return dest; 1143 } 1144 1145 U_CAPI int32_t U_EXPORT2 1146 u_memcmp(const char16_t *buf1, const char16_t *buf2, int32_t count) { 1147 if(count > 0) { 1148 const char16_t *limit = buf1 + count; 1149 int32_t result; 1150 1151 while (buf1 < limit) { 1152 result = (int32_t)(uint16_t)*buf1 - (int32_t)(uint16_t)*buf2; 1153 if (result != 0) { 1154 return result; 1155 } 1156 buf1++; 1157 buf2++; 1158 } 1159 } 1160 return 0; 1161 } 1162 1163 U_CAPI int32_t U_EXPORT2 1164 u_memcmpCodePointOrder(const char16_t *s1, const char16_t *s2, int32_t count) { 1165 return uprv_strCompare(s1, count, s2, count, false, true); 1166 } 1167 1168 /* u_unescape & support fns ------------------------------------------------- */ 1169 1170 /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */ 1171 static const char16_t UNESCAPE_MAP[] = { 1172 /*" 0x22, 0x22 */ 1173 /*' 0x27, 0x27 */ 1174 /*? 0x3F, 0x3F */ 1175 /*\ 0x5C, 0x5C */ 1176 /*a*/ 0x61, 0x07, 1177 /*b*/ 0x62, 0x08, 1178 /*e*/ 0x65, 0x1b, 1179 /*f*/ 0x66, 0x0c, 1180 /*n*/ 0x6E, 0x0a, 1181 /*r*/ 0x72, 0x0d, 1182 /*t*/ 0x74, 0x09, 1183 /*v*/ 0x76, 0x0b 1184 }; 1185 enum { UNESCAPE_MAP_LENGTH = UPRV_LENGTHOF(UNESCAPE_MAP) }; 1186 1187 /* Convert one octal digit to a numeric value 0..7, or -1 on failure */ 1188 static int32_t _digit8(char16_t c) { 1189 if (c >= u'0' && c <= u'7') { 1190 return c - u'0'; 1191 } 1192 return -1; 1193 } 1194 1195 /* Convert one hex digit to a numeric value 0..F, or -1 on failure */ 1196 static int32_t _digit16(char16_t c) { 1197 if (c >= u'0' && c <= u'9') { 1198 return c - u'0'; 1199 } 1200 if (c >= u'A' && c <= u'F') { 1201 return c - (u'A' - 10); 1202 } 1203 if (c >= u'a' && c <= u'f') { 1204 return c - (u'a' - 10); 1205 } 1206 return -1; 1207 } 1208 1209 /* Parse a single escape sequence. Although this method deals in 1210 * UChars, it does not use C++ or UnicodeString. This allows it to 1211 * be used from C contexts. */ 1212 U_CAPI UChar32 U_EXPORT2 1213 u_unescapeAt(UNESCAPE_CHAR_AT charAt, 1214 int32_t *offset, 1215 int32_t length, 1216 void *context) { 1217 1218 int32_t start = *offset; 1219 UChar32 c; 1220 UChar32 result = 0; 1221 int8_t n = 0; 1222 int8_t minDig = 0; 1223 int8_t maxDig = 0; 1224 int8_t bitsPerDigit = 4; 1225 int32_t dig; 1226 UBool braces = false; 1227 1228 /* Check that offset is in range */ 1229 if (*offset < 0 || *offset >= length) { 1230 goto err; 1231 } 1232 1233 /* Fetch first char16_t after '\\' */ 1234 c = charAt((*offset)++, context); 1235 1236 /* Convert hexadecimal and octal escapes */ 1237 switch (c) { 1238 case u'u': 1239 minDig = maxDig = 4; 1240 break; 1241 case u'U': 1242 minDig = maxDig = 8; 1243 break; 1244 case u'x': 1245 minDig = 1; 1246 if (*offset < length && charAt(*offset, context) == u'{') { 1247 ++(*offset); 1248 braces = true; 1249 maxDig = 8; 1250 } else { 1251 maxDig = 2; 1252 } 1253 break; 1254 default: 1255 dig = _digit8(c); 1256 if (dig >= 0) { 1257 minDig = 1; 1258 maxDig = 3; 1259 n = 1; /* Already have first octal digit */ 1260 bitsPerDigit = 3; 1261 result = dig; 1262 } 1263 break; 1264 } 1265 if (minDig != 0) { 1266 while (*offset < length && n < maxDig) { 1267 c = charAt(*offset, context); 1268 dig = (bitsPerDigit == 3) ? _digit8(c) : _digit16(c); 1269 if (dig < 0) { 1270 break; 1271 } 1272 result = (result << bitsPerDigit) | dig; 1273 ++(*offset); 1274 ++n; 1275 } 1276 if (n < minDig) { 1277 goto err; 1278 } 1279 if (braces) { 1280 if (c != u'}') { 1281 goto err; 1282 } 1283 ++(*offset); 1284 } 1285 if (result < 0 || result >= 0x110000) { 1286 goto err; 1287 } 1288 /* If an escape sequence specifies a lead surrogate, see if 1289 * there is a trail surrogate after it, either as an escape or 1290 * as a literal. If so, join them up into a supplementary. 1291 */ 1292 if (*offset < length && U16_IS_LEAD(result)) { 1293 int32_t ahead = *offset + 1; 1294 c = charAt(*offset, context); 1295 if (c == u'\\' && ahead < length) { 1296 // Calling ourselves recursively may cause a stack overflow if 1297 // we have repeated escaped lead surrogates. 1298 // Limit the length to 11 ("x{0000DFFF}") after ahead. 1299 int32_t tailLimit = ahead + 11; 1300 if (tailLimit > length) { 1301 tailLimit = length; 1302 } 1303 c = u_unescapeAt(charAt, &ahead, tailLimit, context); 1304 } 1305 if (U16_IS_TRAIL(c)) { 1306 *offset = ahead; 1307 result = U16_GET_SUPPLEMENTARY(result, c); 1308 } 1309 } 1310 return result; 1311 } 1312 1313 /* Convert C-style escapes in table */ 1314 for (int32_t i=0; i<UNESCAPE_MAP_LENGTH; i+=2) { 1315 if (c == UNESCAPE_MAP[i]) { 1316 return UNESCAPE_MAP[i+1]; 1317 } else if (c < UNESCAPE_MAP[i]) { 1318 break; 1319 } 1320 } 1321 1322 /* Map \cX to control-X: X & 0x1F */ 1323 if (c == u'c' && *offset < length) { 1324 c = charAt((*offset)++, context); 1325 if (U16_IS_LEAD(c) && *offset < length) { 1326 char16_t c2 = charAt(*offset, context); 1327 if (U16_IS_TRAIL(c2)) { 1328 ++(*offset); 1329 c = U16_GET_SUPPLEMENTARY(c, c2); 1330 } 1331 } 1332 return 0x1F & c; 1333 } 1334 1335 /* If no special forms are recognized, then consider 1336 * the backslash to generically escape the next character. 1337 * Deal with surrogate pairs. */ 1338 if (U16_IS_LEAD(c) && *offset < length) { 1339 char16_t c2 = charAt(*offset, context); 1340 if (U16_IS_TRAIL(c2)) { 1341 ++(*offset); 1342 return U16_GET_SUPPLEMENTARY(c, c2); 1343 } 1344 } 1345 return c; 1346 1347 err: 1348 /* Invalid escape sequence */ 1349 *offset = start; /* Reset to initial value */ 1350 return (UChar32)0xFFFFFFFF; 1351 } 1352 1353 /* u_unescapeAt() callback to return a char16_t from a char* */ 1354 static char16_t U_CALLCONV 1355 _charPtr_charAt(int32_t offset, void *context) { 1356 char16_t c16; 1357 /* It would be more efficient to access the invariant tables 1358 * directly but there is no API for that. */ 1359 u_charsToUChars(static_cast<char*>(context) + offset, &c16, 1); 1360 return c16; 1361 } 1362 1363 /* Append an escape-free segment of the text; used by u_unescape() */ 1364 static void _appendUChars(char16_t *dest, int32_t destCapacity, 1365 const char *src, int32_t srcLen) { 1366 if (destCapacity < 0) { 1367 destCapacity = 0; 1368 } 1369 if (srcLen > destCapacity) { 1370 srcLen = destCapacity; 1371 } 1372 u_charsToUChars(src, dest, srcLen); 1373 } 1374 1375 /* Do an invariant conversion of char* -> char16_t*, with escape parsing */ 1376 U_CAPI int32_t U_EXPORT2 1377 u_unescape(const char *src, char16_t *dest, int32_t destCapacity) { 1378 const char *segment = src; 1379 int32_t i = 0; 1380 char c; 1381 1382 while ((c=*src) != 0) { 1383 /* '\\' intentionally written as compiler-specific 1384 * character constant to correspond to compiler-specific 1385 * char* constants. */ 1386 if (c == '\\') { 1387 int32_t lenParsed = 0; 1388 UChar32 c32; 1389 if (src != segment) { 1390 if (dest != nullptr) { 1391 _appendUChars(dest + i, destCapacity - i, 1392 segment, (int32_t)(src - segment)); 1393 } 1394 i += (int32_t)(src - segment); 1395 } 1396 ++src; /* advance past '\\' */ 1397 c32 = u_unescapeAt(_charPtr_charAt, &lenParsed, (int32_t)uprv_strlen(src), const_cast<char*>(src)); 1398 if (lenParsed == 0) { 1399 goto err; 1400 } 1401 src += lenParsed; /* advance past escape seq. */ 1402 if (dest != nullptr && U16_LENGTH(c32) <= (destCapacity - i)) { 1403 U16_APPEND_UNSAFE(dest, i, c32); 1404 } else { 1405 i += U16_LENGTH(c32); 1406 } 1407 segment = src; 1408 } else { 1409 ++src; 1410 } 1411 } 1412 if (src != segment) { 1413 if (dest != nullptr) { 1414 _appendUChars(dest + i, destCapacity - i, 1415 segment, (int32_t)(src - segment)); 1416 } 1417 i += (int32_t)(src - segment); 1418 } 1419 if (dest != nullptr && i < destCapacity) { 1420 dest[i] = 0; 1421 } 1422 return i; 1423 1424 err: 1425 if (dest != nullptr && destCapacity > 0) { 1426 *dest = 0; 1427 } 1428 return 0; 1429 } 1430 1431 /* NUL-termination of strings ----------------------------------------------- */ 1432 1433 /** 1434 * NUL-terminate a string no matter what its type. 1435 * Set warning and error codes accordingly. 1436 */ 1437 #define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode) UPRV_BLOCK_MACRO_BEGIN { \ 1438 if(pErrorCode!=nullptr && U_SUCCESS(*pErrorCode)) { \ 1439 /* not a public function, so no complete argument checking */ \ 1440 \ 1441 if(length<0) { \ 1442 /* assume that the caller handles this */ \ 1443 } else if(length<destCapacity) { \ 1444 /* NUL-terminate the string, the NUL fits */ \ 1445 dest[length]=0; \ 1446 /* unset the not-terminated warning but leave all others */ \ 1447 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { \ 1448 *pErrorCode=U_ZERO_ERROR; \ 1449 } \ 1450 } else if(length==destCapacity) { \ 1451 /* unable to NUL-terminate, but the string itself fit - set a warning code */ \ 1452 *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; \ 1453 } else /* length>destCapacity */ { \ 1454 /* even the string itself did not fit - set an error code */ \ 1455 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; \ 1456 } \ 1457 } \ 1458 } UPRV_BLOCK_MACRO_END 1459 1460 U_CAPI char16_t U_EXPORT2 1461 u_asciiToUpper(char16_t c) { 1462 if (u'a' <= c && c <= u'z') { 1463 c = c + u'A' - u'a'; 1464 } 1465 return c; 1466 } 1467 1468 U_CAPI int32_t U_EXPORT2 1469 u_terminateUChars(char16_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) { 1470 __TERMINATE_STRING(dest, destCapacity, length, pErrorCode); 1471 return length; 1472 } 1473 1474 U_CAPI int32_t U_EXPORT2 1475 u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) { 1476 __TERMINATE_STRING(dest, destCapacity, length, pErrorCode); 1477 return length; 1478 } 1479 1480 U_CAPI int32_t U_EXPORT2 1481 u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) { 1482 __TERMINATE_STRING(dest, destCapacity, length, pErrorCode); 1483 return length; 1484 } 1485 1486 U_CAPI int32_t U_EXPORT2 1487 u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) { 1488 __TERMINATE_STRING(dest, destCapacity, length, pErrorCode); 1489 return length; 1490 } 1491 1492 // Compute the hash code for a string -------------------------------------- *** 1493 1494 // Moved here from uhash.c so that UnicodeString::hashCode() does not depend 1495 // on UHashtable code. 1496 1497 /* 1498 Compute the hash by iterating sparsely over about 32 (up to 63) 1499 characters spaced evenly through the string. For each character, 1500 multiply the previous hash value by a prime number and add the new 1501 character in, like a linear congruential random number generator, 1502 producing a pseudorandom deterministic value well distributed over 1503 the output range. [LIU] 1504 */ 1505 1506 #define STRING_HASH(TYPE, STR, STRLEN, DEREF) UPRV_BLOCK_MACRO_BEGIN { \ 1507 uint32_t hash = 0; \ 1508 const TYPE *p = (const TYPE*) STR; \ 1509 if (p != nullptr) { \ 1510 int32_t len = (int32_t)(STRLEN); \ 1511 int32_t inc = ((len - 32) / 32) + 1; \ 1512 const TYPE *limit = p + len; \ 1513 while (p<limit) { \ 1514 hash = (hash * 37) + DEREF; \ 1515 p += inc; \ 1516 } \ 1517 } \ 1518 return static_cast<int32_t>(hash); \ 1519 } UPRV_BLOCK_MACRO_END 1520 1521 /* Used by UnicodeString to compute its hashcode - Not public API. */ 1522 U_CAPI int32_t U_EXPORT2 1523 ustr_hashUCharsN(const char16_t *str, int32_t length) { 1524 STRING_HASH(char16_t, str, length, *p); 1525 } 1526 1527 U_CAPI int32_t U_EXPORT2 1528 ustr_hashCharsN(const char *str, int32_t length) { 1529 STRING_HASH(uint8_t, str, length, *p); 1530 } 1531 1532 U_CAPI int32_t U_EXPORT2 1533 ustr_hashICharsN(const char *str, int32_t length) { 1534 STRING_HASH(char, str, length, (uint8_t)uprv_tolower(*p)); 1535 }