uspoof.cpp (30771B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 *************************************************************************** 5 * Copyright (C) 2008-2015, International Business Machines Corporation 6 * and others. All Rights Reserved. 7 *************************************************************************** 8 * file name: uspoof.cpp 9 * encoding: UTF-8 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2008Feb13 14 * created by: Andy Heninger 15 * 16 * Unicode Spoof Detection 17 */ 18 #include "unicode/ubidi.h" 19 #include "unicode/utypes.h" 20 #include "unicode/normalizer2.h" 21 #include "unicode/uspoof.h" 22 #include "unicode/ustring.h" 23 #include "unicode/utf16.h" 24 #include "cmemory.h" 25 #include "cstring.h" 26 #include "mutex.h" 27 #include "scriptset.h" 28 #include "uassert.h" 29 #include "ucln_in.h" 30 #include "uspoof_impl.h" 31 #include "umutex.h" 32 33 34 #if !UCONFIG_NO_NORMALIZATION 35 36 U_NAMESPACE_USE 37 38 39 // 40 // Static Objects used by the spoof impl, their thread safe initialization and their cleanup. 41 // 42 static UnicodeSet *gInclusionSet = nullptr; 43 static UnicodeSet *gRecommendedSet = nullptr; 44 static const Normalizer2 *gNfdNormalizer = nullptr; 45 static UInitOnce gSpoofInitStaticsOnce {}; 46 47 namespace { 48 49 UBool U_CALLCONV 50 uspoof_cleanup() { 51 delete gInclusionSet; 52 gInclusionSet = nullptr; 53 delete gRecommendedSet; 54 gRecommendedSet = nullptr; 55 gNfdNormalizer = nullptr; 56 gSpoofInitStaticsOnce.reset(); 57 return true; 58 } 59 60 void U_CALLCONV initializeStatics(UErrorCode &status) { 61 gInclusionSet = new UnicodeSet(); 62 gRecommendedSet = new UnicodeSet(); 63 if (gInclusionSet == nullptr || gRecommendedSet == nullptr) { 64 status = U_MEMORY_ALLOCATION_ERROR; 65 delete gInclusionSet; 66 gInclusionSet = nullptr; 67 delete gRecommendedSet; 68 gRecommendedSet = nullptr; 69 return; 70 } 71 gInclusionSet->applyIntPropertyValue(UCHAR_IDENTIFIER_TYPE, U_ID_TYPE_INCLUSION, status); 72 gRecommendedSet->applyIntPropertyValue(UCHAR_IDENTIFIER_TYPE, U_ID_TYPE_RECOMMENDED, status); 73 if (U_FAILURE(status)) { 74 delete gInclusionSet; 75 gInclusionSet = nullptr; 76 delete gRecommendedSet; 77 gRecommendedSet = nullptr; 78 return; 79 } 80 gInclusionSet->freeze(); 81 gRecommendedSet->freeze(); 82 gNfdNormalizer = Normalizer2::getNFDInstance(status); 83 ucln_i18n_registerCleanup(UCLN_I18N_SPOOF, uspoof_cleanup); 84 } 85 86 } // namespace 87 88 U_CFUNC void uspoof_internalInitStatics(UErrorCode *status) { 89 umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status); 90 } 91 92 U_CAPI USpoofChecker * U_EXPORT2 93 uspoof_open(UErrorCode *status) { 94 umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status); 95 if (U_FAILURE(*status)) { 96 return nullptr; 97 } 98 SpoofImpl *si = new SpoofImpl(*status); 99 if (si == nullptr) { 100 *status = U_MEMORY_ALLOCATION_ERROR; 101 return nullptr; 102 } 103 if (U_FAILURE(*status)) { 104 delete si; 105 return nullptr; 106 } 107 return si->asUSpoofChecker(); 108 } 109 110 111 U_CAPI USpoofChecker * U_EXPORT2 112 uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength, 113 UErrorCode *status) { 114 if (U_FAILURE(*status)) { 115 return nullptr; 116 } 117 118 if (data == nullptr) { 119 *status = U_ILLEGAL_ARGUMENT_ERROR; 120 return nullptr; 121 } 122 123 umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status); 124 if (U_FAILURE(*status)) 125 { 126 return nullptr; 127 } 128 129 SpoofData *sd = new SpoofData(data, length, *status); 130 if (sd == nullptr) { 131 *status = U_MEMORY_ALLOCATION_ERROR; 132 return nullptr; 133 } 134 135 if (U_FAILURE(*status)) { 136 delete sd; 137 return nullptr; 138 } 139 140 SpoofImpl *si = new SpoofImpl(sd, *status); 141 if (si == nullptr) { 142 *status = U_MEMORY_ALLOCATION_ERROR; 143 delete sd; // explicit delete as the destructor for si won't be called. 144 return nullptr; 145 } 146 147 if (U_FAILURE(*status)) { 148 delete si; // no delete for sd, as the si destructor will delete it. 149 return nullptr; 150 } 151 152 if (pActualLength != nullptr) { 153 *pActualLength = sd->size(); 154 } 155 return si->asUSpoofChecker(); 156 } 157 158 159 U_CAPI USpoofChecker * U_EXPORT2 160 uspoof_clone(const USpoofChecker *sc, UErrorCode *status) { 161 const SpoofImpl *src = SpoofImpl::validateThis(sc, *status); 162 if (src == nullptr) { 163 return nullptr; 164 } 165 SpoofImpl *result = new SpoofImpl(*src, *status); // copy constructor 166 if (result == nullptr) { 167 *status = U_MEMORY_ALLOCATION_ERROR; 168 return nullptr; 169 } 170 if (U_FAILURE(*status)) { 171 delete result; 172 result = nullptr; 173 } 174 return result->asUSpoofChecker(); 175 } 176 177 178 U_CAPI void U_EXPORT2 179 uspoof_close(USpoofChecker *sc) { 180 UErrorCode status = U_ZERO_ERROR; 181 SpoofImpl *This = SpoofImpl::validateThis(sc, status); 182 delete This; 183 } 184 185 186 U_CAPI void U_EXPORT2 187 uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status) { 188 SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 189 if (This == nullptr) { 190 return; 191 } 192 193 // Verify that the requested checks are all ones (bits) that 194 // are acceptable, known values. 195 if (checks & ~(USPOOF_ALL_CHECKS | USPOOF_AUX_INFO)) { 196 *status = U_ILLEGAL_ARGUMENT_ERROR; 197 return; 198 } 199 200 This->fChecks = checks; 201 } 202 203 204 U_CAPI int32_t U_EXPORT2 205 uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status) { 206 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 207 if (This == nullptr) { 208 return 0; 209 } 210 return This->fChecks; 211 } 212 213 U_CAPI void U_EXPORT2 214 uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel) { 215 UErrorCode status = U_ZERO_ERROR; 216 SpoofImpl *This = SpoofImpl::validateThis(sc, status); 217 if (This != nullptr) { 218 This->fRestrictionLevel = restrictionLevel; 219 This->fChecks |= USPOOF_RESTRICTION_LEVEL; 220 } 221 } 222 223 U_CAPI URestrictionLevel U_EXPORT2 224 uspoof_getRestrictionLevel(const USpoofChecker *sc) { 225 UErrorCode status = U_ZERO_ERROR; 226 const SpoofImpl *This = SpoofImpl::validateThis(sc, status); 227 if (This == nullptr) { 228 return USPOOF_UNRESTRICTIVE; 229 } 230 return This->fRestrictionLevel; 231 } 232 233 U_CAPI void U_EXPORT2 234 uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status) { 235 SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 236 if (This == nullptr) { 237 return; 238 } 239 This->setAllowedLocales(localesList, *status); 240 } 241 242 U_CAPI const char * U_EXPORT2 243 uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status) { 244 SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 245 if (This == nullptr) { 246 return nullptr; 247 } 248 return This->getAllowedLocales(*status); 249 } 250 251 252 U_CAPI const USet * U_EXPORT2 253 uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status) { 254 const UnicodeSet *result = uspoof_getAllowedUnicodeSet(sc, status); 255 return result->toUSet(); 256 } 257 258 U_CAPI const UnicodeSet * U_EXPORT2 259 uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status) { 260 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 261 if (This == nullptr) { 262 return nullptr; 263 } 264 return This->fAllowedCharsSet; 265 } 266 267 268 U_CAPI void U_EXPORT2 269 uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status) { 270 const UnicodeSet *set = UnicodeSet::fromUSet(chars); 271 uspoof_setAllowedUnicodeSet(sc, set, status); 272 } 273 274 275 U_CAPI void U_EXPORT2 276 uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const UnicodeSet *chars, UErrorCode *status) { 277 SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 278 if (This == nullptr) { 279 return; 280 } 281 if (chars->isBogus()) { 282 *status = U_ILLEGAL_ARGUMENT_ERROR; 283 return; 284 } 285 UnicodeSet *clonedSet = chars->clone(); 286 if (clonedSet == nullptr || clonedSet->isBogus()) { 287 *status = U_MEMORY_ALLOCATION_ERROR; 288 return; 289 } 290 clonedSet->freeze(); 291 delete This->fAllowedCharsSet; 292 This->fAllowedCharsSet = clonedSet; 293 This->fChecks |= USPOOF_CHAR_LIMIT; 294 } 295 296 297 U_CAPI int32_t U_EXPORT2 298 uspoof_check(const USpoofChecker *sc, 299 const char16_t *id, int32_t length, 300 int32_t *position, 301 UErrorCode *status) { 302 303 // Backwards compatibility: 304 if (position != nullptr) { 305 *position = 0; 306 } 307 308 // Delegate to uspoof_check2 309 return uspoof_check2(sc, id, length, nullptr, status); 310 } 311 312 313 U_CAPI int32_t U_EXPORT2 314 uspoof_check2(const USpoofChecker *sc, 315 const char16_t* id, int32_t length, 316 USpoofCheckResult* checkResult, 317 UErrorCode *status) { 318 319 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 320 if (This == nullptr) { 321 return 0; 322 } 323 if (length < -1) { 324 *status = U_ILLEGAL_ARGUMENT_ERROR; 325 return 0; 326 } 327 UnicodeString idStr((length == -1), id, length); // Aliasing constructor. 328 int32_t result = uspoof_check2UnicodeString(sc, idStr, checkResult, status); 329 return result; 330 } 331 332 333 U_CAPI int32_t U_EXPORT2 334 uspoof_checkUTF8(const USpoofChecker *sc, 335 const char *id, int32_t length, 336 int32_t *position, 337 UErrorCode *status) { 338 339 // Backwards compatibility: 340 if (position != nullptr) { 341 *position = 0; 342 } 343 344 // Delegate to uspoof_check2 345 return uspoof_check2UTF8(sc, id, length, nullptr, status); 346 } 347 348 349 U_CAPI int32_t U_EXPORT2 350 uspoof_check2UTF8(const USpoofChecker *sc, 351 const char *id, int32_t length, 352 USpoofCheckResult* checkResult, 353 UErrorCode *status) { 354 355 if (U_FAILURE(*status)) { 356 return 0; 357 } 358 UnicodeString idStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : static_cast<int32_t>(uprv_strlen(id)))); 359 int32_t result = uspoof_check2UnicodeString(sc, idStr, checkResult, status); 360 return result; 361 } 362 363 364 U_CAPI int32_t U_EXPORT2 365 uspoof_areConfusable(const USpoofChecker *sc, 366 const char16_t *id1, int32_t length1, 367 const char16_t *id2, int32_t length2, 368 UErrorCode *status) { 369 SpoofImpl::validateThis(sc, *status); 370 if (U_FAILURE(*status)) { 371 return 0; 372 } 373 if (length1 < -1 || length2 < -1) { 374 *status = U_ILLEGAL_ARGUMENT_ERROR; 375 return 0; 376 } 377 378 UnicodeString id1Str((length1==-1), id1, length1); // Aliasing constructor 379 UnicodeString id2Str((length2==-1), id2, length2); // Aliasing constructor 380 return uspoof_areConfusableUnicodeString(sc, id1Str, id2Str, status); 381 } 382 383 384 U_CAPI int32_t U_EXPORT2 385 uspoof_areConfusableUTF8(const USpoofChecker *sc, 386 const char *id1, int32_t length1, 387 const char *id2, int32_t length2, 388 UErrorCode *status) { 389 SpoofImpl::validateThis(sc, *status); 390 if (U_FAILURE(*status)) { 391 return 0; 392 } 393 if (length1 < -1 || length2 < -1) { 394 *status = U_ILLEGAL_ARGUMENT_ERROR; 395 return 0; 396 } 397 UnicodeString id1Str = UnicodeString::fromUTF8(StringPiece(id1, length1>=0? length1 : static_cast<int32_t>(uprv_strlen(id1)))); 398 UnicodeString id2Str = UnicodeString::fromUTF8(StringPiece(id2, length2>=0? length2 : static_cast<int32_t>(uprv_strlen(id2)))); 399 int32_t results = uspoof_areConfusableUnicodeString(sc, id1Str, id2Str, status); 400 return results; 401 } 402 403 404 U_CAPI int32_t U_EXPORT2 405 uspoof_areConfusableUnicodeString(const USpoofChecker *sc, 406 const icu::UnicodeString &id1, 407 const icu::UnicodeString &id2, 408 UErrorCode *status) { 409 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 410 if (U_FAILURE(*status)) { 411 return 0; 412 } 413 // 414 // See section 4 of UAX 39 for the algorithm for checking whether two strings are confusable, 415 // and for definitions of the types (single, whole, mixed-script) of confusables. 416 417 // We only care about a few of the check flags. Ignore the others. 418 // If no tests relevant to this function have been specified, return an error. 419 // TODO: is this really the right thing to do? It's probably an error on the caller's part, 420 // but logically we would just return 0 (no error). 421 if ((This->fChecks & USPOOF_CONFUSABLE) == 0) { 422 *status = U_INVALID_STATE_ERROR; 423 return 0; 424 } 425 426 // Compute the skeletons and check for confusability. 427 UnicodeString id1Skeleton; 428 uspoof_getSkeletonUnicodeString(sc, 0 /* deprecated */, id1, id1Skeleton, status); 429 UnicodeString id2Skeleton; 430 uspoof_getSkeletonUnicodeString(sc, 0 /* deprecated */, id2, id2Skeleton, status); 431 if (U_FAILURE(*status)) { return 0; } 432 if (id1Skeleton != id2Skeleton) { 433 return 0; 434 } 435 436 // If we get here, the strings are confusable. Now we just need to set the flags for the appropriate classes 437 // of confusables according to UTS 39 section 4. 438 // Start by computing the resolved script sets of id1 and id2. 439 ScriptSet id1RSS; 440 This->getResolvedScriptSet(id1, id1RSS, *status); 441 ScriptSet id2RSS; 442 This->getResolvedScriptSet(id2, id2RSS, *status); 443 444 // Turn on all applicable flags 445 int32_t result = 0; 446 if (id1RSS.intersects(id2RSS)) { 447 result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE; 448 } else { 449 result |= USPOOF_MIXED_SCRIPT_CONFUSABLE; 450 if (!id1RSS.isEmpty() && !id2RSS.isEmpty()) { 451 result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE; 452 } 453 } 454 455 // Turn off flags that the user doesn't want 456 if ((This->fChecks & USPOOF_SINGLE_SCRIPT_CONFUSABLE) == 0) { 457 result &= ~USPOOF_SINGLE_SCRIPT_CONFUSABLE; 458 } 459 if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) == 0) { 460 result &= ~USPOOF_MIXED_SCRIPT_CONFUSABLE; 461 } 462 if ((This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE) == 0) { 463 result &= ~USPOOF_WHOLE_SCRIPT_CONFUSABLE; 464 } 465 466 return result; 467 } 468 469 U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusable(const USpoofChecker *sc, UBiDiDirection direction, 470 const char16_t *id1, int32_t length1, 471 const char16_t *id2, int32_t length2, 472 UErrorCode *status) { 473 UnicodeString id1Str((length1 == -1), id1, length1); // Aliasing constructor 474 UnicodeString id2Str((length2 == -1), id2, length2); // Aliasing constructor 475 if (id1Str.isBogus() || id2Str.isBogus()) { 476 *status = U_ILLEGAL_ARGUMENT_ERROR; 477 return 0; 478 } 479 return uspoof_areBidiConfusableUnicodeString(sc, direction, id1Str, id2Str, status); 480 } 481 482 U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUTF8(const USpoofChecker *sc, UBiDiDirection direction, 483 const char *id1, int32_t length1, const char *id2, 484 int32_t length2, UErrorCode *status) { 485 if (length1 < -1 || length2 < -1) { 486 *status = U_ILLEGAL_ARGUMENT_ERROR; 487 return 0; 488 } 489 UnicodeString id1Str = UnicodeString::fromUTF8( 490 StringPiece(id1, length1 >= 0 ? length1 : static_cast<int32_t>(uprv_strlen(id1)))); 491 UnicodeString id2Str = UnicodeString::fromUTF8( 492 StringPiece(id2, length2 >= 0 ? length2 : static_cast<int32_t>(uprv_strlen(id2)))); 493 return uspoof_areBidiConfusableUnicodeString(sc, direction, id1Str, id2Str, status); 494 } 495 496 U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUnicodeString(const USpoofChecker *sc, 497 UBiDiDirection direction, 498 const icu::UnicodeString &id1, 499 const icu::UnicodeString &id2, 500 UErrorCode *status) { 501 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 502 if (U_FAILURE(*status)) { 503 return 0; 504 } 505 // 506 // See section 4 of UTS 39 for the algorithm for checking whether two strings are confusable, 507 // and for definitions of the types (single, whole, mixed-script) of confusables. 508 509 // We only care about a few of the check flags. Ignore the others. 510 // If no tests relevant to this function have been specified, return an error. 511 // TODO: is this really the right thing to do? It's probably an error on the caller's part, 512 // but logically we would just return 0 (no error). 513 if ((This->fChecks & USPOOF_CONFUSABLE) == 0) { 514 *status = U_INVALID_STATE_ERROR; 515 return 0; 516 } 517 518 // Compute the skeletons and check for confusability. 519 UnicodeString id1Skeleton; 520 uspoof_getBidiSkeletonUnicodeString(sc, direction, id1, id1Skeleton, status); 521 UnicodeString id2Skeleton; 522 uspoof_getBidiSkeletonUnicodeString(sc, direction, id2, id2Skeleton, status); 523 if (U_FAILURE(*status)) { 524 return 0; 525 } 526 if (id1Skeleton != id2Skeleton) { 527 return 0; 528 } 529 530 // If we get here, the strings are confusable. Now we just need to set the flags for the appropriate 531 // classes of confusables according to UTS 39 section 4. Start by computing the resolved script sets 532 // of id1 and id2. 533 ScriptSet id1RSS; 534 This->getResolvedScriptSet(id1, id1RSS, *status); 535 ScriptSet id2RSS; 536 This->getResolvedScriptSet(id2, id2RSS, *status); 537 538 // Turn on all applicable flags 539 uint32_t result = 0; 540 if (id1RSS.intersects(id2RSS)) { 541 result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE; 542 } else { 543 result |= USPOOF_MIXED_SCRIPT_CONFUSABLE; 544 if (!id1RSS.isEmpty() && !id2RSS.isEmpty()) { 545 result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE; 546 } 547 } 548 549 // Turn off flags that the user doesn't want 550 return result & This->fChecks; 551 } 552 553 554 U_CAPI int32_t U_EXPORT2 555 uspoof_checkUnicodeString(const USpoofChecker *sc, 556 const icu::UnicodeString &id, 557 int32_t *position, 558 UErrorCode *status) { 559 560 // Backwards compatibility: 561 if (position != nullptr) { 562 *position = 0; 563 } 564 565 // Delegate to uspoof_check2 566 return uspoof_check2UnicodeString(sc, id, nullptr, status); 567 } 568 569 namespace { 570 571 int32_t checkImpl(const SpoofImpl* This, const UnicodeString& id, CheckResult* checkResult, UErrorCode* status) { 572 U_ASSERT(This != nullptr); 573 U_ASSERT(checkResult != nullptr); 574 checkResult->clear(); 575 int32_t result = 0; 576 577 if (0 != (This->fChecks & USPOOF_RESTRICTION_LEVEL)) { 578 URestrictionLevel idRestrictionLevel = This->getRestrictionLevel(id, *status); 579 if (idRestrictionLevel > This->fRestrictionLevel) { 580 result |= USPOOF_RESTRICTION_LEVEL; 581 } 582 checkResult->fRestrictionLevel = idRestrictionLevel; 583 } 584 585 if (0 != (This->fChecks & USPOOF_MIXED_NUMBERS)) { 586 UnicodeSet numerics; 587 This->getNumerics(id, numerics, *status); 588 if (numerics.size() > 1) { 589 result |= USPOOF_MIXED_NUMBERS; 590 } 591 checkResult->fNumerics = numerics; // UnicodeSet::operator= 592 } 593 594 if (0 != (This->fChecks & USPOOF_HIDDEN_OVERLAY)) { 595 int32_t index = This->findHiddenOverlay(id, *status); 596 if (index != -1) { 597 result |= USPOOF_HIDDEN_OVERLAY; 598 } 599 } 600 601 602 if (0 != (This->fChecks & USPOOF_CHAR_LIMIT)) { 603 int32_t i; 604 UChar32 c; 605 int32_t length = id.length(); 606 for (i=0; i<length ;) { 607 c = id.char32At(i); 608 i += U16_LENGTH(c); 609 if (!This->fAllowedCharsSet->contains(c)) { 610 result |= USPOOF_CHAR_LIMIT; 611 break; 612 } 613 } 614 } 615 616 if (0 != (This->fChecks & USPOOF_INVISIBLE)) { 617 // This check needs to be done on NFD input 618 UnicodeString nfdText; 619 gNfdNormalizer->normalize(id, nfdText, *status); 620 int32_t nfdLength = nfdText.length(); 621 622 // scan for more than one occurrence of the same non-spacing mark 623 // in a sequence of non-spacing marks. 624 int32_t i; 625 UChar32 c; 626 UChar32 firstNonspacingMark = 0; 627 UBool haveMultipleMarks = false; 628 UnicodeSet marksSeenSoFar; // Set of combining marks in a single combining sequence. 629 630 for (i=0; i<nfdLength ;) { 631 c = nfdText.char32At(i); 632 i += U16_LENGTH(c); 633 if (u_charType(c) != U_NON_SPACING_MARK) { 634 firstNonspacingMark = 0; 635 if (haveMultipleMarks) { 636 marksSeenSoFar.clear(); 637 haveMultipleMarks = false; 638 } 639 continue; 640 } 641 if (firstNonspacingMark == 0) { 642 firstNonspacingMark = c; 643 continue; 644 } 645 if (!haveMultipleMarks) { 646 marksSeenSoFar.add(firstNonspacingMark); 647 haveMultipleMarks = true; 648 } 649 if (marksSeenSoFar.contains(c)) { 650 // report the error, and stop scanning. 651 // No need to find more than the first failure. 652 result |= USPOOF_INVISIBLE; 653 break; 654 } 655 marksSeenSoFar.add(c); 656 } 657 } 658 659 checkResult->fChecks = result; 660 return checkResult->toCombinedBitmask(This->fChecks); 661 } 662 663 } // namespace 664 665 U_CAPI int32_t U_EXPORT2 666 uspoof_check2UnicodeString(const USpoofChecker *sc, 667 const icu::UnicodeString &id, 668 USpoofCheckResult* checkResult, 669 UErrorCode *status) { 670 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 671 if (This == nullptr) { 672 return false; 673 } 674 675 if (checkResult != nullptr) { 676 CheckResult* ThisCheckResult = CheckResult::validateThis(checkResult, *status); 677 if (ThisCheckResult == nullptr) { 678 return false; 679 } 680 return checkImpl(This, id, ThisCheckResult, status); 681 } else { 682 // Stack-allocate the checkResult since this method doesn't return it 683 CheckResult stackCheckResult; 684 return checkImpl(This, id, &stackCheckResult, status); 685 } 686 } 687 688 689 U_CAPI int32_t U_EXPORT2 690 uspoof_getSkeleton(const USpoofChecker *sc, 691 uint32_t type, 692 const char16_t *id, int32_t length, 693 char16_t *dest, int32_t destCapacity, 694 UErrorCode *status) { 695 696 SpoofImpl::validateThis(sc, *status); 697 if (U_FAILURE(*status)) { 698 return 0; 699 } 700 if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=nullptr)) { 701 *status = U_ILLEGAL_ARGUMENT_ERROR; 702 return 0; 703 } 704 705 UnicodeString idStr((length==-1), id, length); // Aliasing constructor 706 UnicodeString destStr; 707 uspoof_getSkeletonUnicodeString(sc, type, idStr, destStr, status); 708 destStr.extract(dest, destCapacity, *status); 709 return destStr.length(); 710 } 711 712 U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeleton(const USpoofChecker *sc, UBiDiDirection direction, 713 const UChar *id, int32_t length, UChar *dest, 714 int32_t destCapacity, UErrorCode *status) { 715 UnicodeString idStr((length == -1), id, length); // Aliasing constructor 716 if (idStr.isBogus()) { 717 *status = U_ILLEGAL_ARGUMENT_ERROR; 718 return 0; 719 } 720 UnicodeString destStr; 721 uspoof_getBidiSkeletonUnicodeString(sc, direction, idStr, destStr, status); 722 return destStr.extract(dest, destCapacity, *status); 723 } 724 725 726 727 U_I18N_API UnicodeString &U_EXPORT2 uspoof_getBidiSkeletonUnicodeString(const USpoofChecker *sc, 728 UBiDiDirection direction, 729 const UnicodeString &id, 730 UnicodeString &dest, 731 UErrorCode *status) { 732 dest.remove(); 733 if (direction != UBIDI_LTR && direction != UBIDI_RTL) { 734 *status = U_ILLEGAL_ARGUMENT_ERROR; 735 return dest; 736 } 737 UBiDi *bidi = ubidi_open(); 738 ubidi_setPara(bidi, id.getBuffer(), id.length(), direction, 739 /*embeddingLevels*/ nullptr, status); 740 if (U_FAILURE(*status)) { 741 ubidi_close(bidi); 742 return dest; 743 } 744 UnicodeString reordered; 745 int32_t const size = ubidi_getProcessedLength(bidi); 746 UChar* const reorderedBuffer = reordered.getBuffer(size); 747 if (reorderedBuffer == nullptr) { 748 *status = U_MEMORY_ALLOCATION_ERROR; 749 ubidi_close(bidi); 750 return dest; 751 } 752 ubidi_writeReordered(bidi, reorderedBuffer, size, 753 UBIDI_KEEP_BASE_COMBINING | UBIDI_DO_MIRRORING, status); 754 reordered.releaseBuffer(size); 755 ubidi_close(bidi); 756 757 if (U_FAILURE(*status)) { 758 return dest; 759 } 760 761 // The type parameter is deprecated since ICU 58; any number may be passed. 762 constexpr uint32_t deprecatedType = 58; 763 return uspoof_getSkeletonUnicodeString(sc, deprecatedType, reordered, dest, status); 764 } 765 766 767 768 U_I18N_API UnicodeString & U_EXPORT2 769 uspoof_getSkeletonUnicodeString(const USpoofChecker *sc, 770 uint32_t /*type*/, 771 const UnicodeString &id, 772 UnicodeString &dest, 773 UErrorCode *status) { 774 const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 775 if (U_FAILURE(*status)) { 776 return dest; 777 } 778 779 UnicodeString nfdId; 780 gNfdNormalizer->normalize(id, nfdId, *status); 781 782 // Apply the skeleton mapping to the NFD normalized input string 783 // Accumulate the skeleton, possibly unnormalized, in a UnicodeString. 784 int32_t inputIndex = 0; 785 UnicodeString skelStr; 786 int32_t normalizedLen = nfdId.length(); 787 for (inputIndex=0; inputIndex < normalizedLen; ) { 788 UChar32 c = nfdId.char32At(inputIndex); 789 inputIndex += U16_LENGTH(c); 790 if (!u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) { 791 This->fSpoofData->confusableLookup(c, skelStr); 792 } 793 } 794 795 gNfdNormalizer->normalize(skelStr, dest, *status); 796 return dest; 797 } 798 799 U_CAPI int32_t U_EXPORT2 uspoof_getSkeletonUTF8(const USpoofChecker *sc, uint32_t type, const char *id, 800 int32_t length, char *dest, int32_t destCapacity, 801 UErrorCode *status) { 802 SpoofImpl::validateThis(sc, *status); 803 if (U_FAILURE(*status)) { 804 return 0; 805 } 806 if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=nullptr)) { 807 *status = U_ILLEGAL_ARGUMENT_ERROR; 808 return 0; 809 } 810 811 UnicodeString srcStr = UnicodeString::fromUTF8( 812 StringPiece(id, length >= 0 ? length : static_cast<int32_t>(uprv_strlen(id)))); 813 UnicodeString destStr; 814 uspoof_getSkeletonUnicodeString(sc, type, srcStr, destStr, status); 815 if (U_FAILURE(*status)) { 816 return 0; 817 } 818 819 int32_t lengthInUTF8 = 0; 820 u_strToUTF8(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status); 821 return lengthInUTF8; 822 } 823 824 U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeletonUTF8(const USpoofChecker *sc, UBiDiDirection direction, 825 const char *id, int32_t length, char *dest, 826 int32_t destCapacity, UErrorCode *status) { 827 if (length < -1) { 828 *status = U_ILLEGAL_ARGUMENT_ERROR; 829 return 0; 830 } 831 832 UnicodeString srcStr = UnicodeString::fromUTF8( 833 StringPiece(id, length >= 0 ? length : static_cast<int32_t>(uprv_strlen(id)))); 834 UnicodeString destStr; 835 uspoof_getBidiSkeletonUnicodeString(sc, direction, srcStr, destStr, status); 836 if (U_FAILURE(*status)) { 837 return 0; 838 } 839 840 int32_t lengthInUTF8 = 0; 841 u_strToUTF8(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status); 842 return lengthInUTF8; 843 } 844 845 846 U_CAPI int32_t U_EXPORT2 847 uspoof_serialize(USpoofChecker *sc,void *buf, int32_t capacity, UErrorCode *status) { 848 SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 849 if (This == nullptr) { 850 U_ASSERT(U_FAILURE(*status)); 851 return 0; 852 } 853 854 return This->fSpoofData->serialize(buf, capacity, *status); 855 } 856 857 U_CAPI const USet * U_EXPORT2 858 uspoof_getInclusionSet(UErrorCode *status) { 859 umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status); 860 return gInclusionSet->toUSet(); 861 } 862 863 U_CAPI const USet * U_EXPORT2 864 uspoof_getRecommendedSet(UErrorCode *status) { 865 umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status); 866 return gRecommendedSet->toUSet(); 867 } 868 869 U_I18N_API const UnicodeSet * U_EXPORT2 870 uspoof_getInclusionUnicodeSet(UErrorCode *status) { 871 umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status); 872 return gInclusionSet; 873 } 874 875 U_I18N_API const UnicodeSet * U_EXPORT2 876 uspoof_getRecommendedUnicodeSet(UErrorCode *status) { 877 umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status); 878 return gRecommendedSet; 879 } 880 881 //------------------ 882 // CheckResult APIs 883 //------------------ 884 885 U_CAPI USpoofCheckResult* U_EXPORT2 886 uspoof_openCheckResult(UErrorCode *status) { 887 CheckResult* checkResult = new CheckResult(); 888 if (checkResult == nullptr) { 889 *status = U_MEMORY_ALLOCATION_ERROR; 890 return nullptr; 891 } 892 return checkResult->asUSpoofCheckResult(); 893 } 894 895 U_CAPI void U_EXPORT2 896 uspoof_closeCheckResult(USpoofCheckResult* checkResult) { 897 UErrorCode status = U_ZERO_ERROR; 898 CheckResult* This = CheckResult::validateThis(checkResult, status); 899 delete This; 900 } 901 902 U_CAPI int32_t U_EXPORT2 903 uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status) { 904 const CheckResult* This = CheckResult::validateThis(checkResult, *status); 905 if (U_FAILURE(*status)) { return 0; } 906 return This->fChecks; 907 } 908 909 U_CAPI URestrictionLevel U_EXPORT2 910 uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult *checkResult, UErrorCode *status) { 911 const CheckResult* This = CheckResult::validateThis(checkResult, *status); 912 if (U_FAILURE(*status)) { return USPOOF_UNRESTRICTIVE; } 913 return This->fRestrictionLevel; 914 } 915 916 U_CAPI const USet* U_EXPORT2 917 uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status) { 918 const CheckResult* This = CheckResult::validateThis(checkResult, *status); 919 if (U_FAILURE(*status)) { return nullptr; } 920 return This->fNumerics.toUSet(); 921 } 922 923 924 925 #endif // !UCONFIG_NO_NORMALIZATION