tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

uspoof.cpp (30771B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ***************************************************************************
      5 * Copyright (C) 2008-2015, International Business Machines Corporation
      6 * and others. All Rights Reserved.
      7 ***************************************************************************
      8 *   file name:  uspoof.cpp
      9 *   encoding:   UTF-8
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2008Feb13
     14 *   created by: Andy Heninger
     15 *
     16 *   Unicode Spoof Detection
     17 */
     18 #include "unicode/ubidi.h"
     19 #include "unicode/utypes.h"
     20 #include "unicode/normalizer2.h"
     21 #include "unicode/uspoof.h"
     22 #include "unicode/ustring.h"
     23 #include "unicode/utf16.h"
     24 #include "cmemory.h"
     25 #include "cstring.h"
     26 #include "mutex.h"
     27 #include "scriptset.h"
     28 #include "uassert.h"
     29 #include "ucln_in.h"
     30 #include "uspoof_impl.h"
     31 #include "umutex.h"
     32 
     33 
     34 #if !UCONFIG_NO_NORMALIZATION
     35 
     36 U_NAMESPACE_USE
     37 
     38 
     39 //
     40 // Static Objects used by the spoof impl, their thread safe initialization and their cleanup.
     41 //
     42 static UnicodeSet *gInclusionSet = nullptr;
     43 static UnicodeSet *gRecommendedSet = nullptr;
     44 static const Normalizer2 *gNfdNormalizer = nullptr;
     45 static UInitOnce gSpoofInitStaticsOnce {};
     46 
     47 namespace {
     48 
     49 UBool U_CALLCONV
     50 uspoof_cleanup() {
     51    delete gInclusionSet;
     52    gInclusionSet = nullptr;
     53    delete gRecommendedSet;
     54    gRecommendedSet = nullptr;
     55    gNfdNormalizer = nullptr;
     56    gSpoofInitStaticsOnce.reset();
     57    return true;
     58 }
     59 
     60 void U_CALLCONV initializeStatics(UErrorCode &status) {
     61    gInclusionSet = new UnicodeSet();
     62    gRecommendedSet = new UnicodeSet();
     63    if (gInclusionSet == nullptr || gRecommendedSet == nullptr) {
     64        status = U_MEMORY_ALLOCATION_ERROR;
     65        delete gInclusionSet;
     66        gInclusionSet = nullptr;
     67        delete gRecommendedSet;
     68        gRecommendedSet = nullptr;
     69        return;
     70    }
     71    gInclusionSet->applyIntPropertyValue(UCHAR_IDENTIFIER_TYPE, U_ID_TYPE_INCLUSION, status);
     72    gRecommendedSet->applyIntPropertyValue(UCHAR_IDENTIFIER_TYPE, U_ID_TYPE_RECOMMENDED, status);
     73    if (U_FAILURE(status)) {
     74        delete gInclusionSet;
     75        gInclusionSet = nullptr;
     76        delete gRecommendedSet;
     77        gRecommendedSet = nullptr;
     78        return;
     79    }
     80    gInclusionSet->freeze();
     81    gRecommendedSet->freeze();
     82    gNfdNormalizer = Normalizer2::getNFDInstance(status);
     83    ucln_i18n_registerCleanup(UCLN_I18N_SPOOF, uspoof_cleanup);
     84 }
     85 
     86 }  // namespace
     87 
     88 U_CFUNC void uspoof_internalInitStatics(UErrorCode *status) {
     89    umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
     90 }
     91 
     92 U_CAPI USpoofChecker * U_EXPORT2
     93 uspoof_open(UErrorCode *status) {
     94    umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
     95    if (U_FAILURE(*status)) {
     96        return nullptr;
     97    }
     98    SpoofImpl *si = new SpoofImpl(*status);
     99    if (si == nullptr) {
    100        *status = U_MEMORY_ALLOCATION_ERROR;
    101        return nullptr;
    102    }
    103    if (U_FAILURE(*status)) {
    104        delete si;
    105        return nullptr;
    106    }
    107    return si->asUSpoofChecker();
    108 }
    109 
    110 
    111 U_CAPI USpoofChecker * U_EXPORT2
    112 uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength,
    113                          UErrorCode *status) {
    114    if (U_FAILURE(*status)) {
    115        return nullptr;
    116    }
    117 
    118    if (data == nullptr) {
    119        *status = U_ILLEGAL_ARGUMENT_ERROR;
    120        return nullptr;
    121    }
    122 
    123    umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
    124    if (U_FAILURE(*status))
    125    {
    126        return nullptr;
    127    }
    128 
    129    SpoofData *sd = new SpoofData(data, length, *status);
    130    if (sd == nullptr) {
    131        *status = U_MEMORY_ALLOCATION_ERROR;
    132        return nullptr;
    133    }
    134 
    135    if (U_FAILURE(*status)) {
    136        delete sd;
    137        return nullptr;
    138    }
    139 
    140    SpoofImpl *si = new SpoofImpl(sd, *status);
    141    if (si == nullptr) {
    142        *status = U_MEMORY_ALLOCATION_ERROR;
    143        delete sd; // explicit delete as the destructor for si won't be called.
    144        return nullptr;
    145    }
    146 
    147    if (U_FAILURE(*status)) {
    148        delete si; // no delete for sd, as the si destructor will delete it.
    149        return nullptr;
    150    }
    151 
    152    if (pActualLength != nullptr) {
    153        *pActualLength = sd->size();
    154    }
    155    return si->asUSpoofChecker();
    156 }
    157 
    158 
    159 U_CAPI USpoofChecker * U_EXPORT2
    160 uspoof_clone(const USpoofChecker *sc, UErrorCode *status) {
    161    const SpoofImpl *src = SpoofImpl::validateThis(sc, *status);
    162    if (src == nullptr) {
    163        return nullptr;
    164    }
    165    SpoofImpl *result = new SpoofImpl(*src, *status);   // copy constructor
    166    if (result == nullptr) {
    167        *status = U_MEMORY_ALLOCATION_ERROR;
    168        return nullptr;
    169    }
    170    if (U_FAILURE(*status)) {
    171        delete result;
    172        result = nullptr;
    173    }
    174    return result->asUSpoofChecker();
    175 }
    176 
    177 
    178 U_CAPI void U_EXPORT2
    179 uspoof_close(USpoofChecker *sc) {
    180    UErrorCode status = U_ZERO_ERROR;
    181    SpoofImpl *This = SpoofImpl::validateThis(sc, status);
    182    delete This;
    183 }
    184 
    185 
    186 U_CAPI void U_EXPORT2
    187 uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status) {
    188    SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    189    if (This == nullptr) {
    190        return;
    191    }
    192 
    193    // Verify that the requested checks are all ones (bits) that 
    194    //   are acceptable, known values.
    195    if (checks & ~(USPOOF_ALL_CHECKS | USPOOF_AUX_INFO)) {
    196        *status = U_ILLEGAL_ARGUMENT_ERROR; 
    197        return;
    198    }
    199 
    200    This->fChecks = checks;
    201 }
    202 
    203 
    204 U_CAPI int32_t U_EXPORT2
    205 uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status) {
    206    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    207    if (This == nullptr) {
    208        return 0;
    209    }
    210    return This->fChecks;
    211 }
    212 
    213 U_CAPI void U_EXPORT2
    214 uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel) {
    215    UErrorCode status = U_ZERO_ERROR;
    216    SpoofImpl *This = SpoofImpl::validateThis(sc, status);
    217    if (This != nullptr) {
    218        This->fRestrictionLevel = restrictionLevel;
    219        This->fChecks |= USPOOF_RESTRICTION_LEVEL;
    220    }
    221 }
    222 
    223 U_CAPI URestrictionLevel U_EXPORT2
    224 uspoof_getRestrictionLevel(const USpoofChecker *sc) {
    225    UErrorCode status = U_ZERO_ERROR;
    226    const SpoofImpl *This = SpoofImpl::validateThis(sc, status);
    227    if (This == nullptr) {
    228        return USPOOF_UNRESTRICTIVE;
    229    }
    230    return This->fRestrictionLevel;
    231 }
    232 
    233 U_CAPI void U_EXPORT2
    234 uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status) {
    235    SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    236    if (This == nullptr) {
    237        return;
    238    }
    239    This->setAllowedLocales(localesList, *status);
    240 }
    241 
    242 U_CAPI const char * U_EXPORT2
    243 uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status) {
    244    SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    245    if (This == nullptr) {
    246        return nullptr;
    247    }
    248    return This->getAllowedLocales(*status);
    249 }
    250 
    251 
    252 U_CAPI const USet * U_EXPORT2
    253 uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status) {
    254    const UnicodeSet *result = uspoof_getAllowedUnicodeSet(sc, status);
    255    return result->toUSet();
    256 }
    257 
    258 U_CAPI const UnicodeSet * U_EXPORT2
    259 uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status) {
    260    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    261    if (This == nullptr) {
    262        return nullptr;
    263    }
    264    return This->fAllowedCharsSet;
    265 }
    266 
    267 
    268 U_CAPI void U_EXPORT2
    269 uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status) {
    270    const UnicodeSet *set = UnicodeSet::fromUSet(chars);
    271    uspoof_setAllowedUnicodeSet(sc, set, status);
    272 }
    273 
    274 
    275 U_CAPI void U_EXPORT2
    276 uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const UnicodeSet *chars, UErrorCode *status) {
    277    SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    278    if (This == nullptr) {
    279        return;
    280    }
    281    if (chars->isBogus()) {
    282        *status = U_ILLEGAL_ARGUMENT_ERROR;
    283        return;
    284    }
    285    UnicodeSet *clonedSet = chars->clone();
    286    if (clonedSet == nullptr || clonedSet->isBogus()) {
    287        *status = U_MEMORY_ALLOCATION_ERROR;
    288        return;
    289    }
    290    clonedSet->freeze();
    291    delete This->fAllowedCharsSet;
    292    This->fAllowedCharsSet = clonedSet;
    293    This->fChecks |= USPOOF_CHAR_LIMIT;
    294 }
    295 
    296 
    297 U_CAPI int32_t U_EXPORT2
    298 uspoof_check(const USpoofChecker *sc,
    299             const char16_t *id, int32_t length,
    300             int32_t *position,
    301             UErrorCode *status) {
    302 
    303    // Backwards compatibility:
    304    if (position != nullptr) {
    305        *position = 0;
    306    }
    307 
    308    // Delegate to uspoof_check2
    309    return uspoof_check2(sc, id, length, nullptr, status);
    310 }
    311 
    312 
    313 U_CAPI int32_t U_EXPORT2
    314 uspoof_check2(const USpoofChecker *sc,
    315    const char16_t* id, int32_t length,
    316    USpoofCheckResult* checkResult,
    317    UErrorCode *status) {
    318 
    319    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    320    if (This == nullptr) {
    321        return 0;
    322    }
    323    if (length < -1) {
    324        *status = U_ILLEGAL_ARGUMENT_ERROR;
    325        return 0;
    326    }
    327    UnicodeString idStr((length == -1), id, length);  // Aliasing constructor.
    328    int32_t result = uspoof_check2UnicodeString(sc, idStr, checkResult, status);
    329    return result;
    330 }
    331 
    332 
    333 U_CAPI int32_t U_EXPORT2
    334 uspoof_checkUTF8(const USpoofChecker *sc,
    335                 const char *id, int32_t length,
    336                 int32_t *position,
    337                 UErrorCode *status) {
    338 
    339    // Backwards compatibility:
    340    if (position != nullptr) {
    341        *position = 0;
    342    }
    343 
    344    // Delegate to uspoof_check2
    345    return uspoof_check2UTF8(sc, id, length, nullptr, status);
    346 }
    347 
    348 
    349 U_CAPI int32_t U_EXPORT2
    350 uspoof_check2UTF8(const USpoofChecker *sc,
    351    const char *id, int32_t length,
    352    USpoofCheckResult* checkResult,
    353    UErrorCode *status) {
    354 
    355    if (U_FAILURE(*status)) {
    356        return 0;
    357    }
    358    UnicodeString idStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : static_cast<int32_t>(uprv_strlen(id))));
    359    int32_t result = uspoof_check2UnicodeString(sc, idStr, checkResult, status);
    360    return result;
    361 }
    362 
    363 
    364 U_CAPI int32_t U_EXPORT2
    365 uspoof_areConfusable(const USpoofChecker *sc,
    366                     const char16_t *id1, int32_t length1,
    367                     const char16_t *id2, int32_t length2,
    368                     UErrorCode *status) {
    369    SpoofImpl::validateThis(sc, *status);
    370    if (U_FAILURE(*status)) {
    371        return 0;
    372    }
    373    if (length1 < -1 || length2 < -1) {
    374        *status = U_ILLEGAL_ARGUMENT_ERROR;
    375        return 0;
    376    }
    377        
    378    UnicodeString id1Str((length1==-1), id1, length1);  // Aliasing constructor
    379    UnicodeString id2Str((length2==-1), id2, length2);  // Aliasing constructor
    380    return uspoof_areConfusableUnicodeString(sc, id1Str, id2Str, status);
    381 }
    382 
    383 
    384 U_CAPI int32_t U_EXPORT2
    385 uspoof_areConfusableUTF8(const USpoofChecker *sc,
    386                         const char *id1, int32_t length1,
    387                         const char *id2, int32_t length2,
    388                         UErrorCode *status) {
    389    SpoofImpl::validateThis(sc, *status);
    390    if (U_FAILURE(*status)) {
    391        return 0;
    392    }
    393    if (length1 < -1 || length2 < -1) {
    394        *status = U_ILLEGAL_ARGUMENT_ERROR;
    395        return 0;
    396    }
    397    UnicodeString id1Str = UnicodeString::fromUTF8(StringPiece(id1, length1>=0? length1 : static_cast<int32_t>(uprv_strlen(id1))));
    398    UnicodeString id2Str = UnicodeString::fromUTF8(StringPiece(id2, length2>=0? length2 : static_cast<int32_t>(uprv_strlen(id2))));
    399    int32_t results = uspoof_areConfusableUnicodeString(sc, id1Str, id2Str, status);
    400    return results;
    401 }
    402 
    403 
    404 U_CAPI int32_t U_EXPORT2
    405 uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
    406                                  const icu::UnicodeString &id1,
    407                                  const icu::UnicodeString &id2,
    408                                  UErrorCode *status) {
    409    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    410    if (U_FAILURE(*status)) {
    411        return 0;
    412    }
    413    //
    414    // See section 4 of UAX 39 for the algorithm for checking whether two strings are confusable,
    415    //   and for definitions of the types (single, whole, mixed-script) of confusables.
    416    
    417    // We only care about a few of the check flags.  Ignore the others.
    418    // If no tests relevant to this function have been specified, return an error.
    419    // TODO:  is this really the right thing to do?  It's probably an error on the caller's part,
    420    //        but logically we would just return 0 (no error).
    421    if ((This->fChecks & USPOOF_CONFUSABLE) == 0) {
    422        *status = U_INVALID_STATE_ERROR;
    423        return 0;
    424    }
    425 
    426    // Compute the skeletons and check for confusability.
    427    UnicodeString id1Skeleton;
    428    uspoof_getSkeletonUnicodeString(sc, 0 /* deprecated */, id1, id1Skeleton, status);
    429    UnicodeString id2Skeleton;
    430    uspoof_getSkeletonUnicodeString(sc, 0 /* deprecated */, id2, id2Skeleton, status);
    431    if (U_FAILURE(*status)) { return 0; }
    432    if (id1Skeleton != id2Skeleton) {
    433        return 0;
    434    }
    435 
    436    // If we get here, the strings are confusable.  Now we just need to set the flags for the appropriate classes
    437    // of confusables according to UTS 39 section 4.
    438    // Start by computing the resolved script sets of id1 and id2.
    439    ScriptSet id1RSS;
    440    This->getResolvedScriptSet(id1, id1RSS, *status);
    441    ScriptSet id2RSS;
    442    This->getResolvedScriptSet(id2, id2RSS, *status);
    443 
    444    // Turn on all applicable flags
    445    int32_t result = 0;
    446    if (id1RSS.intersects(id2RSS)) {
    447        result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE;
    448    } else {
    449        result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
    450        if (!id1RSS.isEmpty() && !id2RSS.isEmpty()) {
    451            result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
    452        }
    453    }
    454 
    455    // Turn off flags that the user doesn't want
    456    if ((This->fChecks & USPOOF_SINGLE_SCRIPT_CONFUSABLE) == 0) {
    457        result &= ~USPOOF_SINGLE_SCRIPT_CONFUSABLE;
    458    }
    459    if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) == 0) {
    460        result &= ~USPOOF_MIXED_SCRIPT_CONFUSABLE;
    461    }
    462    if ((This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE) == 0) {
    463        result &= ~USPOOF_WHOLE_SCRIPT_CONFUSABLE;
    464    }
    465 
    466    return result;
    467 }
    468 
    469 U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusable(const USpoofChecker *sc, UBiDiDirection direction,
    470                                                  const char16_t *id1, int32_t length1,
    471                                                  const char16_t *id2, int32_t length2,
    472                                                   UErrorCode *status) {
    473    UnicodeString id1Str((length1 == -1), id1, length1); // Aliasing constructor
    474    UnicodeString id2Str((length2 == -1), id2, length2); // Aliasing constructor
    475    if (id1Str.isBogus() || id2Str.isBogus()) {
    476        *status = U_ILLEGAL_ARGUMENT_ERROR;
    477        return 0;
    478    }
    479    return uspoof_areBidiConfusableUnicodeString(sc, direction, id1Str, id2Str, status);
    480 }
    481 
    482 U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUTF8(const USpoofChecker *sc, UBiDiDirection direction,
    483                                                      const char *id1, int32_t length1, const char *id2,
    484                                                      int32_t length2, UErrorCode *status) {
    485    if (length1 < -1 || length2 < -1) {
    486        *status = U_ILLEGAL_ARGUMENT_ERROR;
    487        return 0;
    488    }
    489    UnicodeString id1Str = UnicodeString::fromUTF8(
    490        StringPiece(id1, length1 >= 0 ? length1 : static_cast<int32_t>(uprv_strlen(id1))));
    491    UnicodeString id2Str = UnicodeString::fromUTF8(
    492        StringPiece(id2, length2 >= 0 ? length2 : static_cast<int32_t>(uprv_strlen(id2))));
    493    return uspoof_areBidiConfusableUnicodeString(sc, direction, id1Str, id2Str, status);
    494 }
    495 
    496 U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUnicodeString(const USpoofChecker *sc,
    497                                                               UBiDiDirection direction,
    498                                                               const icu::UnicodeString &id1,
    499                                                               const icu::UnicodeString &id2,
    500                                                               UErrorCode *status) {
    501    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    502    if (U_FAILURE(*status)) {
    503        return 0;
    504    }
    505    //
    506    // See section 4 of UTS 39 for the algorithm for checking whether two strings are confusable,
    507    //   and for definitions of the types (single, whole, mixed-script) of confusables.
    508 
    509    // We only care about a few of the check flags.  Ignore the others.
    510    // If no tests relevant to this function have been specified, return an error.
    511    // TODO:  is this really the right thing to do?  It's probably an error on the caller's part,
    512    //        but logically we would just return 0 (no error).
    513    if ((This->fChecks & USPOOF_CONFUSABLE) == 0) {
    514        *status = U_INVALID_STATE_ERROR;
    515        return 0;
    516    }
    517 
    518    // Compute the skeletons and check for confusability.
    519    UnicodeString id1Skeleton;
    520    uspoof_getBidiSkeletonUnicodeString(sc, direction, id1, id1Skeleton, status);
    521    UnicodeString id2Skeleton;
    522    uspoof_getBidiSkeletonUnicodeString(sc, direction, id2, id2Skeleton, status);
    523    if (U_FAILURE(*status)) {
    524        return 0;
    525    }
    526    if (id1Skeleton != id2Skeleton) {
    527        return 0;
    528    }
    529 
    530    // If we get here, the strings are confusable.  Now we just need to set the flags for the appropriate
    531    // classes of confusables according to UTS 39 section 4. Start by computing the resolved script sets
    532    // of id1 and id2.
    533    ScriptSet id1RSS;
    534    This->getResolvedScriptSet(id1, id1RSS, *status);
    535    ScriptSet id2RSS;
    536    This->getResolvedScriptSet(id2, id2RSS, *status);
    537 
    538    // Turn on all applicable flags
    539    uint32_t result = 0;
    540    if (id1RSS.intersects(id2RSS)) {
    541        result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE;
    542    } else {
    543        result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
    544        if (!id1RSS.isEmpty() && !id2RSS.isEmpty()) {
    545            result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
    546        }
    547    }
    548 
    549    // Turn off flags that the user doesn't want
    550    return result & This->fChecks;
    551 }
    552 
    553 
    554 U_CAPI int32_t U_EXPORT2
    555 uspoof_checkUnicodeString(const USpoofChecker *sc,
    556                          const icu::UnicodeString &id,
    557                          int32_t *position,
    558                          UErrorCode *status) {
    559 
    560    // Backwards compatibility:
    561    if (position != nullptr) {
    562        *position = 0;
    563    }
    564 
    565    // Delegate to uspoof_check2
    566    return uspoof_check2UnicodeString(sc, id, nullptr, status);
    567 }
    568 
    569 namespace {
    570 
    571 int32_t checkImpl(const SpoofImpl* This, const UnicodeString& id, CheckResult* checkResult, UErrorCode* status) {
    572    U_ASSERT(This != nullptr);
    573    U_ASSERT(checkResult != nullptr);
    574    checkResult->clear();
    575    int32_t result = 0;
    576 
    577    if (0 != (This->fChecks & USPOOF_RESTRICTION_LEVEL)) {
    578        URestrictionLevel idRestrictionLevel = This->getRestrictionLevel(id, *status);
    579        if (idRestrictionLevel > This->fRestrictionLevel) {
    580            result |= USPOOF_RESTRICTION_LEVEL;
    581        }
    582        checkResult->fRestrictionLevel = idRestrictionLevel;
    583    }
    584 
    585    if (0 != (This->fChecks & USPOOF_MIXED_NUMBERS)) {
    586        UnicodeSet numerics;
    587        This->getNumerics(id, numerics, *status);
    588        if (numerics.size() > 1) {
    589            result |= USPOOF_MIXED_NUMBERS;
    590        }
    591        checkResult->fNumerics = numerics;  // UnicodeSet::operator=
    592    }
    593 
    594    if (0 != (This->fChecks & USPOOF_HIDDEN_OVERLAY)) {
    595        int32_t index = This->findHiddenOverlay(id, *status);
    596        if (index != -1) {
    597            result |= USPOOF_HIDDEN_OVERLAY;
    598        }
    599    }
    600 
    601 
    602    if (0 != (This->fChecks & USPOOF_CHAR_LIMIT)) {
    603        int32_t i;
    604        UChar32 c;
    605        int32_t length = id.length();
    606        for (i=0; i<length ;) {
    607            c = id.char32At(i);
    608            i += U16_LENGTH(c);
    609            if (!This->fAllowedCharsSet->contains(c)) {
    610                result |= USPOOF_CHAR_LIMIT;
    611                break;
    612            }
    613        }
    614    }
    615 
    616    if (0 != (This->fChecks & USPOOF_INVISIBLE)) {
    617        // This check needs to be done on NFD input
    618        UnicodeString nfdText;
    619        gNfdNormalizer->normalize(id, nfdText, *status);
    620        int32_t nfdLength = nfdText.length();
    621 
    622        // scan for more than one occurrence of the same non-spacing mark
    623        // in a sequence of non-spacing marks.
    624        int32_t     i;
    625        UChar32     c;
    626        UChar32     firstNonspacingMark = 0;
    627        UBool       haveMultipleMarks = false;  
    628        UnicodeSet  marksSeenSoFar;   // Set of combining marks in a single combining sequence.
    629        
    630        for (i=0; i<nfdLength ;) {
    631            c = nfdText.char32At(i);
    632            i += U16_LENGTH(c);
    633            if (u_charType(c) != U_NON_SPACING_MARK) {
    634                firstNonspacingMark = 0;
    635                if (haveMultipleMarks) {
    636                    marksSeenSoFar.clear();
    637                    haveMultipleMarks = false;
    638                }
    639                continue;
    640            }
    641            if (firstNonspacingMark == 0) {
    642                firstNonspacingMark = c;
    643                continue;
    644            }
    645            if (!haveMultipleMarks) {
    646                marksSeenSoFar.add(firstNonspacingMark);
    647                haveMultipleMarks = true;
    648            }
    649            if (marksSeenSoFar.contains(c)) {
    650                // report the error, and stop scanning.
    651                // No need to find more than the first failure.
    652                result |= USPOOF_INVISIBLE;
    653                break;
    654            }
    655            marksSeenSoFar.add(c);
    656        }
    657    }
    658 
    659    checkResult->fChecks = result;
    660    return checkResult->toCombinedBitmask(This->fChecks);
    661 }
    662 
    663 }  // namespace
    664 
    665 U_CAPI int32_t U_EXPORT2
    666 uspoof_check2UnicodeString(const USpoofChecker *sc,
    667                          const icu::UnicodeString &id,
    668                          USpoofCheckResult* checkResult,
    669                          UErrorCode *status) {
    670    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    671    if (This == nullptr) {
    672        return false;
    673    }
    674 
    675    if (checkResult != nullptr) {
    676        CheckResult* ThisCheckResult = CheckResult::validateThis(checkResult, *status);
    677        if (ThisCheckResult == nullptr) {
    678            return false;
    679        }
    680        return checkImpl(This, id, ThisCheckResult, status);
    681    } else {
    682        // Stack-allocate the checkResult since this method doesn't return it
    683        CheckResult stackCheckResult;
    684        return checkImpl(This, id, &stackCheckResult, status);
    685    }
    686 }
    687 
    688 
    689 U_CAPI int32_t U_EXPORT2
    690 uspoof_getSkeleton(const USpoofChecker *sc,
    691                   uint32_t type,
    692                   const char16_t *id,  int32_t length,
    693                   char16_t *dest, int32_t destCapacity,
    694                   UErrorCode *status) {
    695 
    696    SpoofImpl::validateThis(sc, *status);
    697    if (U_FAILURE(*status)) {
    698        return 0;
    699    }
    700    if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=nullptr)) {
    701        *status = U_ILLEGAL_ARGUMENT_ERROR;
    702        return 0;
    703    }
    704 
    705    UnicodeString idStr((length==-1), id, length);  // Aliasing constructor
    706    UnicodeString destStr;
    707    uspoof_getSkeletonUnicodeString(sc, type, idStr, destStr, status);
    708    destStr.extract(dest, destCapacity, *status);
    709    return destStr.length();
    710 }
    711 
    712 U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeleton(const USpoofChecker *sc, UBiDiDirection direction,
    713                                                const UChar *id, int32_t length, UChar *dest,
    714                                                int32_t destCapacity, UErrorCode *status) {
    715    UnicodeString idStr((length == -1), id, length); // Aliasing constructor
    716    if (idStr.isBogus()) {
    717        *status = U_ILLEGAL_ARGUMENT_ERROR;
    718        return 0;
    719    }
    720    UnicodeString destStr;
    721    uspoof_getBidiSkeletonUnicodeString(sc, direction, idStr, destStr, status);
    722    return destStr.extract(dest, destCapacity, *status);
    723 }
    724 
    725 
    726 
    727 U_I18N_API UnicodeString &U_EXPORT2 uspoof_getBidiSkeletonUnicodeString(const USpoofChecker *sc,
    728                                                                        UBiDiDirection direction,
    729                                                                        const UnicodeString &id,
    730                                                                        UnicodeString &dest,
    731                                                                        UErrorCode *status) {
    732    dest.remove();
    733    if (direction != UBIDI_LTR && direction != UBIDI_RTL) {
    734      *status = U_ILLEGAL_ARGUMENT_ERROR;
    735      return dest;
    736    }
    737    UBiDi *bidi = ubidi_open();
    738    ubidi_setPara(bidi, id.getBuffer(), id.length(), direction,
    739                  /*embeddingLevels*/ nullptr, status);
    740    if (U_FAILURE(*status)) {
    741        ubidi_close(bidi);
    742        return dest;
    743    }
    744    UnicodeString reordered;
    745    int32_t const size = ubidi_getProcessedLength(bidi);
    746    UChar* const reorderedBuffer = reordered.getBuffer(size);
    747    if (reorderedBuffer == nullptr) {
    748        *status = U_MEMORY_ALLOCATION_ERROR;
    749        ubidi_close(bidi);
    750        return dest;
    751    }
    752    ubidi_writeReordered(bidi, reorderedBuffer, size,
    753                         UBIDI_KEEP_BASE_COMBINING | UBIDI_DO_MIRRORING, status);
    754    reordered.releaseBuffer(size);
    755    ubidi_close(bidi);
    756 
    757    if (U_FAILURE(*status)) {
    758        return dest;
    759    }
    760 
    761    // The type parameter is deprecated since ICU 58; any number may be passed.
    762    constexpr uint32_t deprecatedType = 58;
    763    return uspoof_getSkeletonUnicodeString(sc, deprecatedType, reordered, dest, status);
    764 }
    765 
    766 
    767 
    768 U_I18N_API UnicodeString &  U_EXPORT2
    769 uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
    770                                uint32_t /*type*/,
    771                                const UnicodeString &id,
    772                                UnicodeString &dest,
    773                                UErrorCode *status) {
    774    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    775    if (U_FAILURE(*status)) {
    776        return dest;
    777    }
    778 
    779    UnicodeString nfdId;
    780    gNfdNormalizer->normalize(id, nfdId, *status);
    781 
    782    // Apply the skeleton mapping to the NFD normalized input string
    783    // Accumulate the skeleton, possibly unnormalized, in a UnicodeString.
    784    int32_t inputIndex = 0;
    785    UnicodeString skelStr;
    786    int32_t normalizedLen = nfdId.length();
    787    for (inputIndex=0; inputIndex < normalizedLen; ) {
    788        UChar32 c = nfdId.char32At(inputIndex);
    789        inputIndex += U16_LENGTH(c);
    790        if (!u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
    791            This->fSpoofData->confusableLookup(c, skelStr);
    792        }
    793    }
    794 
    795    gNfdNormalizer->normalize(skelStr, dest, *status);
    796    return dest;
    797 }
    798 
    799 U_CAPI int32_t U_EXPORT2 uspoof_getSkeletonUTF8(const USpoofChecker *sc, uint32_t type, const char *id,
    800                                                int32_t length, char *dest, int32_t destCapacity,
    801                       UErrorCode *status) {
    802    SpoofImpl::validateThis(sc, *status);
    803    if (U_FAILURE(*status)) {
    804        return 0;
    805    }
    806    if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=nullptr)) {
    807        *status = U_ILLEGAL_ARGUMENT_ERROR;
    808        return 0;
    809    }
    810 
    811    UnicodeString srcStr = UnicodeString::fromUTF8(
    812        StringPiece(id, length >= 0 ? length : static_cast<int32_t>(uprv_strlen(id))));
    813    UnicodeString destStr;
    814    uspoof_getSkeletonUnicodeString(sc, type, srcStr, destStr, status);
    815    if (U_FAILURE(*status)) {
    816        return 0;
    817    }
    818 
    819    int32_t lengthInUTF8 = 0;
    820    u_strToUTF8(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status);
    821    return lengthInUTF8;
    822 }
    823 
    824 U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeletonUTF8(const USpoofChecker *sc, UBiDiDirection direction,
    825                                                    const char *id, int32_t length, char *dest,
    826                                                    int32_t destCapacity, UErrorCode *status) {
    827    if (length < -1) {
    828        *status = U_ILLEGAL_ARGUMENT_ERROR;
    829        return 0;
    830    }
    831 
    832    UnicodeString srcStr = UnicodeString::fromUTF8(
    833        StringPiece(id, length >= 0 ? length : static_cast<int32_t>(uprv_strlen(id))));
    834    UnicodeString destStr;
    835    uspoof_getBidiSkeletonUnicodeString(sc, direction, srcStr, destStr, status);
    836    if (U_FAILURE(*status)) {
    837        return 0;
    838    }
    839 
    840    int32_t lengthInUTF8 = 0;
    841    u_strToUTF8(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status);
    842    return lengthInUTF8;
    843 }
    844 
    845 
    846 U_CAPI int32_t U_EXPORT2
    847 uspoof_serialize(USpoofChecker *sc,void *buf, int32_t capacity, UErrorCode *status) {
    848    SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    849    if (This == nullptr) {
    850        U_ASSERT(U_FAILURE(*status));
    851        return 0;
    852    }
    853 
    854    return This->fSpoofData->serialize(buf, capacity, *status);
    855 }
    856 
    857 U_CAPI const USet * U_EXPORT2
    858 uspoof_getInclusionSet(UErrorCode *status) {
    859    umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
    860    return gInclusionSet->toUSet();
    861 }
    862 
    863 U_CAPI const USet * U_EXPORT2
    864 uspoof_getRecommendedSet(UErrorCode *status) {
    865    umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
    866    return gRecommendedSet->toUSet();
    867 }
    868 
    869 U_I18N_API const UnicodeSet * U_EXPORT2
    870 uspoof_getInclusionUnicodeSet(UErrorCode *status) {
    871    umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
    872    return gInclusionSet;
    873 }
    874 
    875 U_I18N_API const UnicodeSet * U_EXPORT2
    876 uspoof_getRecommendedUnicodeSet(UErrorCode *status) {
    877    umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status);
    878    return gRecommendedSet;
    879 }
    880 
    881 //------------------
    882 // CheckResult APIs
    883 //------------------
    884 
    885 U_CAPI USpoofCheckResult* U_EXPORT2
    886 uspoof_openCheckResult(UErrorCode *status) {
    887    CheckResult* checkResult = new CheckResult();
    888    if (checkResult == nullptr) {
    889        *status = U_MEMORY_ALLOCATION_ERROR;
    890        return nullptr;
    891    }
    892    return checkResult->asUSpoofCheckResult();
    893 }
    894 
    895 U_CAPI void U_EXPORT2
    896 uspoof_closeCheckResult(USpoofCheckResult* checkResult) {
    897    UErrorCode status = U_ZERO_ERROR;
    898    CheckResult* This = CheckResult::validateThis(checkResult, status);
    899    delete This;
    900 }
    901 
    902 U_CAPI int32_t U_EXPORT2
    903 uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status) {
    904    const CheckResult* This = CheckResult::validateThis(checkResult, *status);
    905    if (U_FAILURE(*status)) { return 0; }
    906    return This->fChecks;
    907 }
    908 
    909 U_CAPI URestrictionLevel U_EXPORT2
    910 uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult *checkResult, UErrorCode *status) {
    911    const CheckResult* This = CheckResult::validateThis(checkResult, *status);
    912    if (U_FAILURE(*status)) { return USPOOF_UNRESTRICTIVE; }
    913    return This->fRestrictionLevel;
    914 }
    915 
    916 U_CAPI const USet* U_EXPORT2
    917 uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status) {
    918    const CheckResult* This = CheckResult::validateThis(checkResult, *status);
    919    if (U_FAILURE(*status)) { return nullptr; }
    920    return This->fNumerics.toUSet();
    921 }
    922 
    923 
    924 
    925 #endif // !UCONFIG_NO_NORMALIZATION