tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

uprops.cpp (40492B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 2002-2016, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  uprops.cpp
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 2002feb24
     16 *   created by: Markus W. Scherer
     17 *
     18 *   Implementations for mostly non-core Unicode character properties
     19 *   stored in uprops.icu.
     20 *
     21 *   With the APIs implemented here, almost all properties files and
     22 *   their associated implementation files are used from this file,
     23 *   including those for normalization and case mappings.
     24 */
     25 
     26 #include "unicode/utypes.h"
     27 #include "unicode/uchar.h"
     28 #include "unicode/ucptrie.h"
     29 #include "unicode/udata.h"
     30 #include "unicode/unorm2.h"
     31 #include "unicode/uscript.h"
     32 #include "unicode/ustring.h"
     33 #include "unicode/utf16.h"
     34 #include "cstring.h"
     35 #include "emojiprops.h"
     36 #include "mutex.h"
     37 #include "normalizer2impl.h"
     38 #include "umutex.h"
     39 #include "ubidi_props.h"
     40 #include "uprops.h"
     41 #include "ucase.h"
     42 #include "ucln_cmn.h"
     43 #include "ulayout_props.h"
     44 #include "ustr_imp.h"
     45 
     46 U_NAMESPACE_USE
     47 
     48 // Unicode text layout properties data -----------------------------------------
     49 
     50 namespace {
     51 
     52 icu::UInitOnce gLayoutInitOnce {};
     53 UDataMemory *gLayoutMemory = nullptr;
     54 
     55 UCPTrie *gInpcTrie = nullptr;  // Indic_Positional_Category
     56 UCPTrie *gInscTrie = nullptr;  // Indic_Syllabic_Category
     57 UCPTrie *gVoTrie = nullptr;  // Vertical_Orientation
     58 
     59 int32_t gMaxInpcValue = 0;
     60 int32_t gMaxInscValue = 0;
     61 int32_t gMaxVoValue = 0;
     62 
     63 UBool U_CALLCONV uprops_cleanup() {
     64    udata_close(gLayoutMemory);
     65    gLayoutMemory = nullptr;
     66 
     67    ucptrie_close(gInpcTrie);
     68    gInpcTrie = nullptr;
     69    ucptrie_close(gInscTrie);
     70    gInscTrie = nullptr;
     71    ucptrie_close(gVoTrie);
     72    gVoTrie = nullptr;
     73 
     74    gMaxInpcValue = 0;
     75    gMaxInscValue = 0;
     76    gMaxVoValue = 0;
     77 
     78    gLayoutInitOnce.reset();
     79    return true;
     80 }
     81 
     82 UBool U_CALLCONV
     83 ulayout_isAcceptable(void * /*context*/,
     84                     const char * /* type */, const char * /*name*/,
     85                     const UDataInfo *pInfo) {
     86    return pInfo->size >= 20 &&
     87        pInfo->isBigEndian == U_IS_BIG_ENDIAN &&
     88        pInfo->charsetFamily == U_CHARSET_FAMILY &&
     89        pInfo->dataFormat[0] == ULAYOUT_FMT_0 &&
     90        pInfo->dataFormat[1] == ULAYOUT_FMT_1 &&
     91        pInfo->dataFormat[2] == ULAYOUT_FMT_2 &&
     92        pInfo->dataFormat[3] == ULAYOUT_FMT_3 &&
     93        pInfo->formatVersion[0] == 1;
     94 }
     95 
     96 // UInitOnce singleton initialization function
     97 void U_CALLCONV ulayout_load(UErrorCode &errorCode) {
     98    gLayoutMemory = udata_openChoice(
     99        nullptr, ULAYOUT_DATA_TYPE, ULAYOUT_DATA_NAME,
    100        ulayout_isAcceptable, nullptr, &errorCode);
    101    if (U_FAILURE(errorCode)) { return; }
    102 
    103    const uint8_t* inBytes = static_cast<const uint8_t*>(udata_getMemory(gLayoutMemory));
    104    const int32_t* inIndexes = reinterpret_cast<const int32_t*>(inBytes);
    105    int32_t indexesLength = inIndexes[ULAYOUT_IX_INDEXES_LENGTH];
    106    if (indexesLength < 12) {
    107        errorCode = U_INVALID_FORMAT_ERROR;  // Not enough indexes.
    108        return;
    109    }
    110    int32_t offset = indexesLength * 4;
    111    int32_t top = inIndexes[ULAYOUT_IX_INPC_TRIE_TOP];
    112    int32_t trieSize = top - offset;
    113    if (trieSize >= 16) {
    114        gInpcTrie = ucptrie_openFromBinary(
    115            UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY,
    116            inBytes + offset, trieSize, nullptr, &errorCode);
    117    }
    118    offset = top;
    119    top = inIndexes[ULAYOUT_IX_INSC_TRIE_TOP];
    120    trieSize = top - offset;
    121    if (trieSize >= 16) {
    122        gInscTrie = ucptrie_openFromBinary(
    123            UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY,
    124            inBytes + offset, trieSize, nullptr, &errorCode);
    125    }
    126    offset = top;
    127    top = inIndexes[ULAYOUT_IX_VO_TRIE_TOP];
    128    trieSize = top - offset;
    129    if (trieSize >= 16) {
    130        gVoTrie = ucptrie_openFromBinary(
    131            UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY,
    132            inBytes + offset, trieSize, nullptr, &errorCode);
    133    }
    134 
    135    uint32_t maxValues = inIndexes[ULAYOUT_IX_MAX_VALUES];
    136    gMaxInpcValue = maxValues >> ULAYOUT_MAX_INPC_SHIFT;
    137    gMaxInscValue = (maxValues >> ULAYOUT_MAX_INSC_SHIFT) & 0xff;
    138    gMaxVoValue = (maxValues >> ULAYOUT_MAX_VO_SHIFT) & 0xff;
    139 
    140    ucln_common_registerCleanup(UCLN_COMMON_UPROPS, uprops_cleanup);
    141 }
    142 
    143 UBool ulayout_ensureData(UErrorCode &errorCode) {
    144    if (U_FAILURE(errorCode)) { return false; }
    145    umtx_initOnce(gLayoutInitOnce, &ulayout_load, errorCode);
    146    return U_SUCCESS(errorCode);
    147 }
    148 
    149 UBool ulayout_ensureData() {
    150    UErrorCode errorCode = U_ZERO_ERROR;
    151    return ulayout_ensureData(errorCode);
    152 }
    153 
    154 }  // namespace
    155 
    156 /* general properties API functions ----------------------------------------- */
    157 
    158 struct BinaryProperty;
    159 
    160 typedef UBool BinaryPropertyContains(const BinaryProperty &prop, UChar32 c, UProperty which);
    161 
    162 struct BinaryProperty {
    163    int32_t column;  // SRC_PROPSVEC column, or "source" if mask==0
    164    uint32_t mask;
    165    BinaryPropertyContains *contains;
    166 };
    167 
    168 static UBool defaultContains(const BinaryProperty &prop, UChar32 c, UProperty /*which*/) {
    169    /* systematic, directly stored properties */
    170    return (u_getUnicodeProperties(c, prop.column)&prop.mask)!=0;
    171 }
    172 
    173 static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) {
    174    return ucase_hasBinaryProperty(c, which);
    175 }
    176 
    177 static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    178    return ubidi_isBidiControl(c);
    179 }
    180 
    181 static UBool isMirrored(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    182    return ubidi_isMirrored(c);
    183 }
    184 
    185 static UBool isJoinControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    186    return ubidi_isJoinControl(c);
    187 }
    188 
    189 #if UCONFIG_NO_NORMALIZATION
    190 static UBool hasFullCompositionExclusion(const BinaryProperty &, UChar32, UProperty) {
    191    return false;
    192 }
    193 #else
    194 static UBool hasFullCompositionExclusion(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    195    // By definition, Full_Composition_Exclusion is the same as NFC_QC=No.
    196    UErrorCode errorCode=U_ZERO_ERROR;
    197    const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
    198    return U_SUCCESS(errorCode) && impl->isCompNo(impl->getNorm16(c));
    199 }
    200 #endif
    201 
    202 // UCHAR_NF*_INERT properties
    203 #if UCONFIG_NO_NORMALIZATION
    204 static UBool isNormInert(const BinaryProperty &, UChar32, UProperty) {
    205    return false;
    206 }
    207 #else
    208 static UBool isNormInert(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) {
    209    UErrorCode errorCode=U_ZERO_ERROR;
    210    const Normalizer2 *norm2=Normalizer2Factory::getInstance(
    211        static_cast<UNormalizationMode>(which - UCHAR_NFD_INERT + UNORM_NFD), errorCode);
    212    return U_SUCCESS(errorCode) && norm2->isInert(c);
    213 }
    214 #endif
    215 
    216 #if UCONFIG_NO_NORMALIZATION
    217 static UBool changesWhenCasefolded(const BinaryProperty &, UChar32, UProperty) {
    218    return false;
    219 }
    220 #else
    221 static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    222    UnicodeString nfd;
    223    UErrorCode errorCode=U_ZERO_ERROR;
    224    const Normalizer2 *nfcNorm2=Normalizer2::getNFCInstance(errorCode);
    225    if(U_FAILURE(errorCode)) {
    226        return false;
    227    }
    228    if(nfcNorm2->getDecomposition(c, nfd)) {
    229        /* c has a decomposition */
    230        if(nfd.length()==1) {
    231            c=nfd[0];  /* single BMP code point */
    232        } else if(nfd.length()<=U16_MAX_LENGTH &&
    233                  nfd.length()==U16_LENGTH(c=nfd.char32At(0))
    234        ) {
    235            /* single supplementary code point */
    236        } else {
    237            c=U_SENTINEL;
    238        }
    239    } else if(c<0) {
    240        return false;  /* protect against bad input */
    241    }
    242    if(c>=0) {
    243        /* single code point */
    244        const char16_t *resultString;
    245        return ucase_toFullFolding(c, &resultString, U_FOLD_CASE_DEFAULT) >= 0;
    246    } else {
    247        /* guess some large but stack-friendly capacity */
    248        char16_t dest[2*UCASE_MAX_STRING_LENGTH];
    249        int32_t destLength;
    250        destLength=u_strFoldCase(dest, UPRV_LENGTHOF(dest),
    251                                  nfd.getBuffer(), nfd.length(),
    252                                  U_FOLD_CASE_DEFAULT, &errorCode);
    253        return U_SUCCESS(errorCode) &&
    254                       0!=u_strCompare(nfd.getBuffer(), nfd.length(),
    255                                       dest, destLength, false);
    256    }
    257 }
    258 #endif
    259 
    260 #if UCONFIG_NO_NORMALIZATION
    261 static UBool changesWhenNFKC_Casefolded(const BinaryProperty &, UChar32, UProperty) {
    262    return false;
    263 }
    264 #else
    265 static UBool changesWhenNFKC_Casefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    266    UErrorCode errorCode=U_ZERO_ERROR;
    267    const Normalizer2Impl *kcf=Normalizer2Factory::getNFKC_CFImpl(errorCode);
    268    if(U_FAILURE(errorCode)) {
    269        return false;
    270    }
    271    UnicodeString src(c);
    272    UnicodeString dest;
    273    {
    274        // The ReorderingBuffer must be in a block because its destructor
    275        // needs to release dest's buffer before we look at its contents.
    276        ReorderingBuffer buffer(*kcf, dest);
    277        // Small destCapacity for NFKC_CF(c).
    278        if(buffer.init(5, errorCode)) {
    279            const char16_t *srcArray=src.getBuffer();
    280            kcf->compose(srcArray, srcArray+src.length(), false,
    281                          true, buffer, errorCode);
    282        }
    283    }
    284    return U_SUCCESS(errorCode) && dest!=src;
    285 }
    286 #endif
    287 
    288 #if UCONFIG_NO_NORMALIZATION
    289 static UBool isCanonSegmentStarter(const BinaryProperty &, UChar32, UProperty) {
    290    return false;
    291 }
    292 #else
    293 static UBool isCanonSegmentStarter(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    294    UErrorCode errorCode=U_ZERO_ERROR;
    295    const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
    296    return
    297        U_SUCCESS(errorCode) && impl->ensureCanonIterData(errorCode) &&
    298        impl->isCanonSegmentStarter(c);
    299 }
    300 #endif
    301 
    302 static UBool isPOSIX_alnum(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    303    return u_isalnumPOSIX(c);
    304 }
    305 
    306 static UBool isPOSIX_blank(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    307    return u_isblank(c);
    308 }
    309 
    310 static UBool isPOSIX_graph(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    311    return u_isgraphPOSIX(c);
    312 }
    313 
    314 static UBool isPOSIX_print(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    315    return u_isprintPOSIX(c);
    316 }
    317 
    318 static UBool isPOSIX_xdigit(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    319    return u_isxdigit(c);
    320 }
    321 
    322 static UBool isRegionalIndicator(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    323    // Property starts are a subset of lb=RI etc.
    324    return 0x1F1E6<=c && c<=0x1F1FF;
    325 }
    326 
    327 static UBool hasEmojiProperty(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) {
    328    return EmojiProps::hasBinaryProperty(c, which);
    329 }
    330 
    331 static UBool isIDSUnaryOperator(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    332    // New in Unicode 15.1 for just two characters.
    333    return 0x2FFE<=c && c<=0x2FFF;
    334 }
    335 
    336 /** Ranges (start/limit pairs) of ID_Compat_Math_Continue (only), from UCD PropList.txt. */
    337 static constexpr UChar32 ID_COMPAT_MATH_CONTINUE[] = {
    338    0x00B2, 0x00B3 + 1,
    339    0x00B9, 0x00B9 + 1,
    340    0x2070, 0x2070 + 1,
    341    0x2074, 0x207E + 1,
    342    0x2080, 0x208E + 1
    343 };
    344 
    345 /** ID_Compat_Math_Start characters, from UCD PropList.txt. */
    346 static constexpr UChar32 ID_COMPAT_MATH_START[] = {
    347    0x2202,
    348    0x2207,
    349    0x221E,
    350    0x1D6C1,
    351    0x1D6DB,
    352    0x1D6FB,
    353    0x1D715,
    354    0x1D735,
    355    0x1D74F,
    356    0x1D76F,
    357    0x1D789,
    358    0x1D7A9,
    359    0x1D7C3
    360 };
    361 
    362 /** Ranges (start/limit pairs) of Modifier_Combining_mark (only), from UCD PropList.txt. */
    363 static constexpr UChar32 MODIFIER_COMBINING_MARK[] = {
    364    0x0654, 0x0655 + 1,
    365    0x0658, 0x0658 + 1, // U+0658
    366    0x06DC, 0x06DC + 1, // U+06DC
    367    0x06E3, 0x06E3 + 1, // U+06E3
    368    0x06E7, 0x06E8 + 1,
    369    0x08CA, 0x08CB + 1,
    370    0x08CD, 0x08CF + 1,
    371    0x08D3, 0x08D3 + 1, // U+08D3
    372    0x08F3, 0x08F3 + 1  // U+08F3
    373 };
    374 
    375 static UBool isIDCompatMathStart(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    376    if (c < ID_COMPAT_MATH_START[0]) { return false; }  // fastpath for common scripts
    377    for (UChar32 startChar : ID_COMPAT_MATH_START) {
    378        if (c == startChar) { return true; }
    379    }
    380    return false;
    381 }
    382 
    383 static UBool isIDCompatMathContinue(const BinaryProperty &prop, UChar32 c, UProperty /*which*/) {
    384    for (int32_t i = 0; i < UPRV_LENGTHOF(ID_COMPAT_MATH_CONTINUE); i += 2) {
    385        if (c < ID_COMPAT_MATH_CONTINUE[i]) { return false; }  // below range start
    386        if (c < ID_COMPAT_MATH_CONTINUE[i + 1]) { return true; }  // below range limit
    387    }
    388    return isIDCompatMathStart(prop, c, UCHAR_ID_COMPAT_MATH_START);
    389 }
    390 
    391 static UBool isModifierCombiningMark(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    392    for (int32_t i = 0; i < UPRV_LENGTHOF(MODIFIER_COMBINING_MARK); i += 2) {
    393        if (c < MODIFIER_COMBINING_MARK[i]) { return false; }  // below range start
    394        if (c < MODIFIER_COMBINING_MARK[i + 1]) { return true; }  // below range limit
    395    }
    396    return false;
    397 }
    398 
    399 static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={
    400    /*
    401     * column and mask values for binary properties from u_getUnicodeProperties().
    402     * Must be in order of corresponding UProperty,
    403     * and there must be exactly one entry per binary UProperty.
    404     *
    405     * Properties with mask==0 are handled in code.
    406     * For them, column is the UPropertySource value.
    407     */
    408    { 1,                U_MASK(UPROPS_ALPHABETIC), defaultContains },
    409    { 1,                U_MASK(UPROPS_ASCII_HEX_DIGIT), defaultContains },
    410    { UPROPS_SRC_BIDI,  0, isBidiControl },
    411    { UPROPS_SRC_BIDI,  0, isMirrored },
    412    { 1,                U_MASK(UPROPS_DASH), defaultContains },
    413    { 1,                U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT), defaultContains },
    414    { 1,                U_MASK(UPROPS_DEPRECATED), defaultContains },
    415    { 1,                U_MASK(UPROPS_DIACRITIC), defaultContains },
    416    { 1,                U_MASK(UPROPS_EXTENDER), defaultContains },
    417    { UPROPS_SRC_NFC,   0, hasFullCompositionExclusion },
    418    { 1,                U_MASK(UPROPS_GRAPHEME_BASE), defaultContains },
    419    { 1,                U_MASK(UPROPS_GRAPHEME_EXTEND), defaultContains },
    420    { 1,                U_MASK(UPROPS_GRAPHEME_LINK), defaultContains },
    421    { 1,                U_MASK(UPROPS_HEX_DIGIT), defaultContains },
    422    { 1,                U_MASK(UPROPS_HYPHEN), defaultContains },
    423    { 1,                U_MASK(UPROPS_ID_CONTINUE), defaultContains },
    424    { 1,                U_MASK(UPROPS_ID_START), defaultContains },
    425    { 1,                U_MASK(UPROPS_IDEOGRAPHIC), defaultContains },
    426    { 1,                U_MASK(UPROPS_IDS_BINARY_OPERATOR), defaultContains },
    427    { 1,                U_MASK(UPROPS_IDS_TRINARY_OPERATOR), defaultContains },
    428    { UPROPS_SRC_BIDI,  0, isJoinControl },
    429    { 1,                U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION), defaultContains },
    430    { UPROPS_SRC_CASE,  0, caseBinaryPropertyContains },  // UCHAR_LOWERCASE
    431    { 1,                U_MASK(UPROPS_MATH), defaultContains },
    432    { 1,                U_MASK(UPROPS_NONCHARACTER_CODE_POINT), defaultContains },
    433    { 1,                U_MASK(UPROPS_QUOTATION_MARK), defaultContains },
    434    { 1,                U_MASK(UPROPS_RADICAL), defaultContains },
    435    { UPROPS_SRC_CASE,  0, caseBinaryPropertyContains },  // UCHAR_SOFT_DOTTED
    436    { 1,                U_MASK(UPROPS_TERMINAL_PUNCTUATION), defaultContains },
    437    { 1,                U_MASK(UPROPS_UNIFIED_IDEOGRAPH), defaultContains },
    438    { UPROPS_SRC_CASE,  0, caseBinaryPropertyContains },  // UCHAR_UPPERCASE
    439    { 1,                U_MASK(UPROPS_WHITE_SPACE), defaultContains },
    440    { 1,                U_MASK(UPROPS_XID_CONTINUE), defaultContains },
    441    { 1,                U_MASK(UPROPS_XID_START), defaultContains },
    442    { UPROPS_SRC_CASE,  0, caseBinaryPropertyContains },  // UCHAR_CASE_SENSITIVE
    443    { 1,                U_MASK(UPROPS_S_TERM), defaultContains },
    444    { 1,                U_MASK(UPROPS_VARIATION_SELECTOR), defaultContains },
    445    { UPROPS_SRC_NFC,   0, isNormInert },  // UCHAR_NFD_INERT
    446    { UPROPS_SRC_NFKC,  0, isNormInert },  // UCHAR_NFKD_INERT
    447    { UPROPS_SRC_NFC,   0, isNormInert },  // UCHAR_NFC_INERT
    448    { UPROPS_SRC_NFKC,  0, isNormInert },  // UCHAR_NFKC_INERT
    449    { UPROPS_SRC_NFC_CANON_ITER, 0, isCanonSegmentStarter },
    450    { 1,                U_MASK(UPROPS_PATTERN_SYNTAX), defaultContains },
    451    { 1,                U_MASK(UPROPS_PATTERN_WHITE_SPACE), defaultContains },
    452    { UPROPS_SRC_CHAR_AND_PROPSVEC,  0, isPOSIX_alnum },
    453    { UPROPS_SRC_CHAR,  0, isPOSIX_blank },
    454    { UPROPS_SRC_CHAR,  0, isPOSIX_graph },
    455    { UPROPS_SRC_CHAR,  0, isPOSIX_print },
    456    { UPROPS_SRC_CHAR,  0, isPOSIX_xdigit },
    457    { UPROPS_SRC_CASE,  0, caseBinaryPropertyContains },  // UCHAR_CASED
    458    { UPROPS_SRC_CASE,  0, caseBinaryPropertyContains },  // UCHAR_CASE_IGNORABLE
    459    { UPROPS_SRC_CASE,  0, caseBinaryPropertyContains },  // UCHAR_CHANGES_WHEN_LOWERCASED
    460    { UPROPS_SRC_CASE,  0, caseBinaryPropertyContains },  // UCHAR_CHANGES_WHEN_UPPERCASED
    461    { UPROPS_SRC_CASE,  0, caseBinaryPropertyContains },  // UCHAR_CHANGES_WHEN_TITLECASED
    462    { UPROPS_SRC_CASE_AND_NORM,  0, changesWhenCasefolded },
    463    { UPROPS_SRC_CASE,  0, caseBinaryPropertyContains },  // UCHAR_CHANGES_WHEN_CASEMAPPED
    464    { UPROPS_SRC_NFKC_CF, 0, changesWhenNFKC_Casefolded },
    465    { UPROPS_SRC_EMOJI, 0, hasEmojiProperty },  // UCHAR_EMOJI
    466    { UPROPS_SRC_EMOJI, 0, hasEmojiProperty },  // UCHAR_EMOJI_PRESENTATION
    467    { UPROPS_SRC_EMOJI, 0, hasEmojiProperty },  // UCHAR_EMOJI_MODIFIER
    468    { UPROPS_SRC_EMOJI, 0, hasEmojiProperty },  // UCHAR_EMOJI_MODIFIER_BASE
    469    { UPROPS_SRC_EMOJI, 0, hasEmojiProperty },  // UCHAR_EMOJI_COMPONENT
    470    { 2,                0, isRegionalIndicator },
    471    { 1,                U_MASK(UPROPS_PREPENDED_CONCATENATION_MARK), defaultContains },
    472    { UPROPS_SRC_EMOJI, 0, hasEmojiProperty },  // UCHAR_EXTENDED_PICTOGRAPHIC
    473    { UPROPS_SRC_EMOJI, 0, hasEmojiProperty },  // UCHAR_BASIC_EMOJI
    474    { UPROPS_SRC_EMOJI, 0, hasEmojiProperty },  // UCHAR_EMOJI_KEYCAP_SEQUENCE
    475    { UPROPS_SRC_EMOJI, 0, hasEmojiProperty },  // UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE
    476    { UPROPS_SRC_EMOJI, 0, hasEmojiProperty },  // UCHAR_RGI_EMOJI_FLAG_SEQUENCE
    477    { UPROPS_SRC_EMOJI, 0, hasEmojiProperty },  // UCHAR_RGI_EMOJI_TAG_SEQUENCE
    478    { UPROPS_SRC_EMOJI, 0, hasEmojiProperty },  // UCHAR_RGI_EMOJI_ZWJ_SEQUENCE
    479    { UPROPS_SRC_EMOJI, 0, hasEmojiProperty },  // UCHAR_RGI_EMOJI
    480    { UPROPS_SRC_IDSU, 0, isIDSUnaryOperator }, // UCHAR_IDS_UNARY_OPERATOR
    481    { UPROPS_SRC_ID_COMPAT_MATH, 0, isIDCompatMathStart }, // UCHAR_ID_COMPAT_MATH_START
    482    { UPROPS_SRC_ID_COMPAT_MATH, 0, isIDCompatMathContinue }, // UCHAR_ID_COMPAT_MATH_CONTINUE
    483    { UPROPS_SRC_MCM, 0 , isModifierCombiningMark }, // UCHAR_MODIFIER_COMBINING_MARK
    484 };
    485 
    486 U_CAPI UBool U_EXPORT2
    487 u_hasBinaryProperty(UChar32 c, UProperty which) {
    488    /* c is range-checked in the functions that are called from here */
    489    if(which<UCHAR_BINARY_START || UCHAR_BINARY_LIMIT<=which) {
    490        /* not a known binary property */
    491        return false;
    492    } else {
    493        const BinaryProperty &prop=binProps[which];
    494        return prop.contains(prop, c, which);
    495    }
    496 }
    497 
    498 /* Checks if the Unicode character can start a Unicode identifier.*/
    499 U_CAPI UBool U_EXPORT2
    500 u_isIDStart(UChar32 c) {
    501    return u_hasBinaryProperty(c, UCHAR_ID_START);
    502 }
    503 
    504 /* Checks if the Unicode character can be a Unicode identifier part other than starting the
    505 identifier.*/
    506 U_CAPI UBool U_EXPORT2
    507 u_isIDPart(UChar32 c) {
    508    return u_hasBinaryProperty(c, UCHAR_ID_CONTINUE);
    509 }
    510 
    511 U_CAPI UBool U_EXPORT2
    512 u_stringHasBinaryProperty(const char16_t *s, int32_t length, UProperty which) {
    513    if (s == nullptr && length != 0) { return false; }
    514    if (length == 1) {
    515        return u_hasBinaryProperty(s[0], which);  // single code point
    516    } else if (length == 2 || (length < 0 && *s != 0)) {  // not empty string
    517        // first code point
    518        int32_t i = 0;
    519        UChar32 c;
    520        U16_NEXT(s, i, length, c);
    521        if (length > 0 ? i == length : s[i] == 0) {
    522            return u_hasBinaryProperty(c, which);  // single code point
    523        }
    524    }
    525    // Only call into EmojiProps for a relevant property,
    526    // so that we not unnecessarily try to load its data file.
    527    return UCHAR_BASIC_EMOJI <= which && which <= UCHAR_RGI_EMOJI &&
    528        EmojiProps::hasBinaryProperty(s, length, which);
    529 }
    530 
    531 struct IntProperty;
    532 
    533 typedef int32_t IntPropertyGetValue(const IntProperty &prop, UChar32 c, UProperty which);
    534 typedef int32_t IntPropertyGetMaxValue(const IntProperty &prop, UProperty which);
    535 
    536 struct IntProperty {
    537    int32_t column;  // SRC_PROPSVEC column, or "source" if mask==0
    538    uint32_t mask;
    539    int32_t shift;  // =maxValue if getMaxValueFromShift() is used
    540    IntPropertyGetValue *getValue;
    541    IntPropertyGetMaxValue *getMaxValue;
    542 };
    543 
    544 static int32_t defaultGetValue(const IntProperty &prop, UChar32 c, UProperty /*which*/) {
    545    /* systematic, directly stored properties */
    546    return static_cast<int32_t>(u_getUnicodeProperties(c, prop.column) & prop.mask) >> prop.shift;
    547 }
    548 
    549 static int32_t defaultGetMaxValue(const IntProperty &prop, UProperty /*which*/) {
    550    return (uprv_getMaxValues(prop.column)&prop.mask)>>prop.shift;
    551 }
    552 
    553 static int32_t getMaxValueFromShift(const IntProperty &prop, UProperty /*which*/) {
    554    return prop.shift;
    555 }
    556 
    557 static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    558    return static_cast<int32_t>(u_charDirection(c));
    559 }
    560 
    561 static int32_t getBiDiPairedBracketType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    562    return static_cast<int32_t>(ubidi_getPairedBracketType(c));
    563 }
    564 
    565 static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) {
    566    return ubidi_getMaxValue(which);
    567 }
    568 
    569 static int32_t getBlock(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    570    return static_cast<int32_t>(ublock_getCode(c));
    571 }
    572 
    573 static int32_t blockGetMaxValue(const IntProperty &/*prop*/, UProperty /*which*/) {
    574    return uprv_getMaxValues(UPROPS_MAX_VALUES_OTHER_INDEX) & UPROPS_MAX_BLOCK;
    575 }
    576 
    577 #if UCONFIG_NO_NORMALIZATION
    578 static int32_t getCombiningClass(const IntProperty &, UChar32, UProperty) {
    579    return 0;
    580 }
    581 #else
    582 static int32_t getCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    583    return u_getCombiningClass(c);
    584 }
    585 #endif
    586 
    587 static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    588    return static_cast<int32_t>(u_charType(c));
    589 }
    590 
    591 static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    592    return ubidi_getJoiningGroup(c);
    593 }
    594 
    595 static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    596    return ubidi_getJoiningType(c);
    597 }
    598 
    599 static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    600    int32_t ntv = static_cast<int32_t>(GET_NUMERIC_TYPE_VALUE(u_getMainProperties(c)));
    601    return UPROPS_NTV_GET_TYPE(ntv);
    602 }
    603 
    604 static int32_t getScript(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    605    UErrorCode errorCode=U_ZERO_ERROR;
    606    return static_cast<int32_t>(uscript_getScript(c, &errorCode));
    607 }
    608 
    609 static int32_t scriptGetMaxValue(const IntProperty &/*prop*/, UProperty /*which*/) {
    610    return uprv_getMaxValues(0)&UPROPS_MAX_SCRIPT;
    611 }
    612 
    613 /*
    614 * Map some of the Grapheme Cluster Break values to Hangul Syllable Types.
    615 * Hangul_Syllable_Type used to be fully redundant with a subset of Grapheme_Cluster_Break.
    616 *
    617 * Starting with Unicode 16, this is no longer true for HST=V vs. GCB=V in some cases:
    618 * Some Kirat Rai vowels are given GCB=V for proper grapheme clustering, but
    619 * they are of course not related to Hangul syllables.
    620 */
    621 static const UHangulSyllableType gcbToHst[]={
    622    U_HST_NOT_APPLICABLE,   /* U_GCB_OTHER */
    623    U_HST_NOT_APPLICABLE,   /* U_GCB_CONTROL */
    624    U_HST_NOT_APPLICABLE,   /* U_GCB_CR */
    625    U_HST_NOT_APPLICABLE,   /* U_GCB_EXTEND */
    626    U_HST_LEADING_JAMO,     /* U_GCB_L */
    627    U_HST_NOT_APPLICABLE,   /* U_GCB_LF */
    628    U_HST_LV_SYLLABLE,      /* U_GCB_LV */
    629    U_HST_LVT_SYLLABLE,     /* U_GCB_LVT */
    630    U_HST_TRAILING_JAMO,    /* U_GCB_T */
    631    U_HST_VOWEL_JAMO        /* U_GCB_V */
    632    /*
    633     * Omit GCB values beyond what we need for hst.
    634     * The code below checks for the array length.
    635     */
    636 };
    637 
    638 static int32_t getHangulSyllableType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    639    // Ignore supplementary code points: They all have HST=NA.
    640    // This is a simple way to handle the GCB!=hst cases since Unicode 16 (Kirat Rai vowels).
    641    if(c>0xffff) {
    642        return U_HST_NOT_APPLICABLE;
    643    }
    644    /* see comments on gcbToHst[] above */
    645    int32_t gcb = static_cast<int32_t>(u_getUnicodeProperties(c, 2) & UPROPS_GCB_MASK) >> UPROPS_GCB_SHIFT;
    646    if(gcb<UPRV_LENGTHOF(gcbToHst)) {
    647        return gcbToHst[gcb];
    648    } else {
    649        return U_HST_NOT_APPLICABLE;
    650    }
    651 }
    652 
    653 #if UCONFIG_NO_NORMALIZATION
    654 static int32_t getNormQuickCheck(const IntProperty &, UChar32, UProperty) {
    655    return 0;
    656 }
    657 #else
    658 static int32_t getNormQuickCheck(const IntProperty &/*prop*/, UChar32 c, UProperty which) {
    659    return static_cast<int32_t>(unorm_getQuickCheck(c, static_cast<UNormalizationMode>(which - UCHAR_NFD_QUICK_CHECK + UNORM_NFD)));
    660 }
    661 #endif
    662 
    663 #if UCONFIG_NO_NORMALIZATION
    664 static int32_t getLeadCombiningClass(const IntProperty &, UChar32, UProperty) {
    665    return 0;
    666 }
    667 #else
    668 static int32_t getLeadCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    669    return unorm_getFCD16(c)>>8;
    670 }
    671 #endif
    672 
    673 #if UCONFIG_NO_NORMALIZATION
    674 static int32_t getTrailCombiningClass(const IntProperty &, UChar32, UProperty) {
    675    return 0;
    676 }
    677 #else
    678 static int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
    679    return unorm_getFCD16(c)&0xff;
    680 }
    681 #endif
    682 
    683 static int32_t getInPC(const IntProperty &, UChar32 c, UProperty) {
    684    return ulayout_ensureData() && gInpcTrie != nullptr ? ucptrie_get(gInpcTrie, c) : 0;
    685 }
    686 
    687 static int32_t getInSC(const IntProperty &, UChar32 c, UProperty) {
    688    return ulayout_ensureData() && gInscTrie != nullptr ? ucptrie_get(gInscTrie, c) : 0;
    689 }
    690 
    691 static int32_t getVo(const IntProperty &, UChar32 c, UProperty) {
    692    return ulayout_ensureData() && gVoTrie != nullptr ? ucptrie_get(gVoTrie, c) : 0;
    693 }
    694 
    695 static int32_t layoutGetMaxValue(const IntProperty &/*prop*/, UProperty which) {
    696    if (!ulayout_ensureData()) { return 0; }
    697    switch (which) {
    698    case UCHAR_INDIC_POSITIONAL_CATEGORY:
    699        return gMaxInpcValue;
    700    case UCHAR_INDIC_SYLLABIC_CATEGORY:
    701        return gMaxInscValue;
    702    case UCHAR_VERTICAL_ORIENTATION:
    703        return gMaxVoValue;
    704    default:
    705        return 0;
    706    }
    707 }
    708 
    709 static int32_t getIDStatusValue(const IntProperty & /*prop*/, UChar32 c, UProperty /*which*/) {
    710    uint32_t value = u_getUnicodeProperties(c, 2) >> UPROPS_2_ID_TYPE_SHIFT;
    711    return value >= UPROPS_ID_TYPE_ALLOWED_MIN ? U_ID_STATUS_ALLOWED : U_ID_STATUS_RESTRICTED;
    712 }
    713 
    714 static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={
    715    /*
    716     * column, mask and shift values for int-value properties from u_getUnicodeProperties().
    717     * Must be in order of corresponding UProperty,
    718     * and there must be exactly one entry per int UProperty.
    719     *
    720     * Properties with mask==0 are handled in code.
    721     * For them, column is the UPropertySource value.
    722     */
    723    { UPROPS_SRC_BIDI,  0, 0,                               getBiDiClass, biDiGetMaxValue },
    724    { UPROPS_SRC_BLOCK, 0, 0,                               getBlock, blockGetMaxValue },
    725    { UPROPS_SRC_NFC,   0, 0xff,                            getCombiningClass, getMaxValueFromShift },
    726    { 2,                UPROPS_DT_MASK, 0,                  defaultGetValue, defaultGetMaxValue },
    727    { 0,                UPROPS_EA_MASK, UPROPS_EA_SHIFT,    defaultGetValue, defaultGetMaxValue },
    728    { UPROPS_SRC_CHAR, 0, static_cast<int32_t>(U_CHAR_CATEGORY_COUNT) - 1, getGeneralCategory, getMaxValueFromShift },
    729    { UPROPS_SRC_BIDI,  0, 0,                               getJoiningGroup, biDiGetMaxValue },
    730    { UPROPS_SRC_BIDI,  0, 0,                               getJoiningType, biDiGetMaxValue },
    731    { 2,                UPROPS_LB_MASK, UPROPS_LB_SHIFT,    defaultGetValue, defaultGetMaxValue },
    732    { UPROPS_SRC_CHAR, 0, static_cast<int32_t>(U_NT_COUNT) - 1, getNumericType, getMaxValueFromShift },
    733    { UPROPS_SRC_PROPSVEC, 0, 0,                            getScript, scriptGetMaxValue },
    734    { UPROPS_SRC_PROPSVEC, 0, static_cast<int32_t>(U_HST_COUNT) - 1, getHangulSyllableType, getMaxValueFromShift },
    735    // UCHAR_NFD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes"
    736    { UPROPS_SRC_NFC, 0, static_cast<int32_t>(UNORM_YES), getNormQuickCheck, getMaxValueFromShift },
    737    // UCHAR_NFKD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes"
    738    { UPROPS_SRC_NFKC, 0, static_cast<int32_t>(UNORM_YES), getNormQuickCheck, getMaxValueFromShift },
    739    // UCHAR_NFC_QUICK_CHECK: max=2=MAYBE
    740    { UPROPS_SRC_NFC, 0, static_cast<int32_t>(UNORM_MAYBE), getNormQuickCheck, getMaxValueFromShift },
    741    // UCHAR_NFKC_QUICK_CHECK: max=2=MAYBE
    742    { UPROPS_SRC_NFKC, 0, static_cast<int32_t>(UNORM_MAYBE), getNormQuickCheck, getMaxValueFromShift },
    743    { UPROPS_SRC_NFC,   0, 0xff,                            getLeadCombiningClass, getMaxValueFromShift },
    744    { UPROPS_SRC_NFC,   0, 0xff,                            getTrailCombiningClass, getMaxValueFromShift },
    745    { 2,                UPROPS_GCB_MASK, UPROPS_GCB_SHIFT,  defaultGetValue, defaultGetMaxValue },
    746    { 2,                UPROPS_SB_MASK, UPROPS_SB_SHIFT,    defaultGetValue, defaultGetMaxValue },
    747    { 2,                UPROPS_WB_MASK, UPROPS_WB_SHIFT,    defaultGetValue, defaultGetMaxValue },
    748    { UPROPS_SRC_BIDI,  0, 0,                               getBiDiPairedBracketType, biDiGetMaxValue },
    749    { UPROPS_SRC_INPC,  0, 0,                               getInPC, layoutGetMaxValue },
    750    { UPROPS_SRC_INSC,  0, 0,                               getInSC, layoutGetMaxValue },
    751    { UPROPS_SRC_VO,    0, 0,                               getVo, layoutGetMaxValue },
    752    { UPROPS_SRC_PROPSVEC, 0, static_cast<int32_t>(U_ID_STATUS_ALLOWED), getIDStatusValue, getMaxValueFromShift },
    753    { 0,                UPROPS_INCB_MASK, UPROPS_INCB_SHIFT,defaultGetValue, defaultGetMaxValue },
    754 };
    755 
    756 U_CAPI int32_t U_EXPORT2
    757 u_getIntPropertyValue(UChar32 c, UProperty which) {
    758    if(which<UCHAR_INT_START) {
    759        if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) {
    760            const BinaryProperty &prop=binProps[which];
    761            return prop.contains(prop, c, which);
    762        }
    763    } else if(which<UCHAR_INT_LIMIT) {
    764        const IntProperty &prop=intProps[which-UCHAR_INT_START];
    765        return prop.getValue(prop, c, which);
    766    } else if(which==UCHAR_GENERAL_CATEGORY_MASK) {
    767        return U_MASK(u_charType(c));
    768    }
    769    return 0;  // undefined
    770 }
    771 
    772 U_CAPI int32_t U_EXPORT2
    773 u_getIntPropertyMinValue(UProperty /*which*/) {
    774    return 0; /* all binary/enum/int properties have a minimum value of 0 */
    775 }
    776 
    777 U_CAPI int32_t U_EXPORT2
    778 u_getIntPropertyMaxValue(UProperty which) {
    779    if(which<UCHAR_INT_START) {
    780        if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) {
    781            return 1;  // maximum true for all binary properties
    782        }
    783    } else if(which<UCHAR_INT_LIMIT) {
    784        const IntProperty &prop=intProps[which-UCHAR_INT_START];
    785        return prop.getMaxValue(prop, which);
    786    }
    787    return -1;  // undefined
    788 }
    789 
    790 U_CFUNC UPropertySource U_EXPORT2
    791 uprops_getSource(UProperty which) {
    792    if(which<UCHAR_BINARY_START) {
    793        return UPROPS_SRC_NONE; /* undefined */
    794    } else if(which<UCHAR_BINARY_LIMIT) {
    795        const BinaryProperty &prop=binProps[which];
    796        if(prop.mask!=0) {
    797            return UPROPS_SRC_PROPSVEC;
    798        } else {
    799            return (UPropertySource)prop.column;
    800        }
    801    } else if(which<UCHAR_INT_START) {
    802        return UPROPS_SRC_NONE; /* undefined */
    803    } else if(which<UCHAR_INT_LIMIT) {
    804        const IntProperty &prop=intProps[which-UCHAR_INT_START];
    805        if(prop.mask!=0) {
    806            return UPROPS_SRC_PROPSVEC;
    807        } else {
    808            return (UPropertySource)prop.column;
    809        }
    810    } else if(which<UCHAR_STRING_START) {
    811        switch(which) {
    812        case UCHAR_GENERAL_CATEGORY_MASK:
    813        case UCHAR_NUMERIC_VALUE:
    814            return UPROPS_SRC_CHAR;
    815 
    816        default:
    817            return UPROPS_SRC_NONE;
    818        }
    819    } else if(which<UCHAR_STRING_LIMIT) {
    820        switch(which) {
    821        case UCHAR_AGE:
    822            return UPROPS_SRC_PROPSVEC;
    823 
    824        case UCHAR_BIDI_MIRRORING_GLYPH:
    825            return UPROPS_SRC_BIDI;
    826 
    827        case UCHAR_CASE_FOLDING:
    828        case UCHAR_LOWERCASE_MAPPING:
    829        case UCHAR_SIMPLE_CASE_FOLDING:
    830        case UCHAR_SIMPLE_LOWERCASE_MAPPING:
    831        case UCHAR_SIMPLE_TITLECASE_MAPPING:
    832        case UCHAR_SIMPLE_UPPERCASE_MAPPING:
    833        case UCHAR_TITLECASE_MAPPING:
    834        case UCHAR_UPPERCASE_MAPPING:
    835            return UPROPS_SRC_CASE;
    836 
    837        case UCHAR_ISO_COMMENT:
    838        case UCHAR_NAME:
    839        case UCHAR_UNICODE_1_NAME:
    840            return UPROPS_SRC_NAMES;
    841 
    842        default:
    843            return UPROPS_SRC_NONE;
    844        }
    845    } else {
    846        switch(which) {
    847        case UCHAR_SCRIPT_EXTENSIONS:
    848        case UCHAR_IDENTIFIER_TYPE:
    849            return UPROPS_SRC_PROPSVEC;
    850        default:
    851            return UPROPS_SRC_NONE; /* undefined */
    852        }
    853    }
    854 }
    855 
    856 U_CFUNC void U_EXPORT2
    857 uprops_addPropertyStarts(UPropertySource src, const USetAdder *sa, UErrorCode *pErrorCode) {
    858    if (U_FAILURE(*pErrorCode)) { return; }
    859    if (src == UPROPS_SRC_ID_COMPAT_MATH) {
    860        // range limits
    861        for (UChar32 c : ID_COMPAT_MATH_CONTINUE) {
    862            sa->add(sa->set, c);
    863        }
    864        // single characters
    865        for (UChar32 c : ID_COMPAT_MATH_START) {
    866            sa->add(sa->set, c);
    867            sa->add(sa->set, c + 1);
    868        }
    869        return;
    870    }
    871    if (src == UPROPS_SRC_MCM) {
    872        // range limits
    873        for (UChar32 c : MODIFIER_COMBINING_MARK) {
    874            sa->add(sa->set, c);
    875        }
    876        return;
    877    }
    878    if (!ulayout_ensureData(*pErrorCode)) { return; }
    879    const UCPTrie *trie;
    880    switch (src) {
    881    case UPROPS_SRC_INPC:
    882        trie = gInpcTrie;
    883        break;
    884    case UPROPS_SRC_INSC:
    885        trie = gInscTrie;
    886        break;
    887    case UPROPS_SRC_VO:
    888        trie = gVoTrie;
    889        break;
    890    default:
    891        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    892        return;
    893    }
    894 
    895    if (trie == nullptr) {
    896        *pErrorCode = U_MISSING_RESOURCE_ERROR;
    897        return;
    898    }
    899 
    900    // Add the start code point of each same-value range of the trie.
    901    UChar32 start = 0, end;
    902    while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,
    903                                   nullptr, nullptr, nullptr)) >= 0) {
    904        sa->add(sa->set, start);
    905        start = end + 1;
    906    }
    907 }
    908 
    909 U_CAPI bool U_EXPORT2
    910 u_hasIDType(UChar32 c, UIdentifierType type) {
    911    uint32_t typeIndex = type;  // also guards against negative type integers
    912    if (typeIndex >= UPRV_LENGTHOF(uprops_idTypeToEncoded)) {
    913        return false;
    914    }
    915    uint32_t encodedType = uprops_idTypeToEncoded[typeIndex];
    916    uint32_t value = u_getUnicodeProperties(c, 2) >> UPROPS_2_ID_TYPE_SHIFT;
    917    if ((encodedType & UPROPS_ID_TYPE_BIT) != 0) {
    918        return value < UPROPS_ID_TYPE_FORBIDDEN && (value & encodedType) != 0;
    919    } else {
    920        return value == encodedType;
    921    }
    922 }
    923 
    924 namespace {
    925 
    926 void maybeAppendType(uint32_t value, uint32_t bit, UIdentifierType t,
    927                     UIdentifierType *types, int32_t &length, int32_t capacity) {
    928    if ((value & bit) != 0) {
    929        if (length < capacity) {
    930            types[length] = t;
    931        }
    932        ++length;
    933    }
    934 }
    935 
    936 }  // namespace
    937 
    938 U_CAPI int32_t U_EXPORT2
    939 u_getIDTypes(UChar32 c, UIdentifierType *types, int32_t capacity, UErrorCode *pErrorCode) {
    940    if (U_FAILURE(*pErrorCode)) { return 0; }
    941    if (capacity < 0 || (capacity > 0 && types == nullptr)) {
    942        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
    943        return 0;
    944    }
    945    uint32_t value = u_getUnicodeProperties(c, 2) >> UPROPS_2_ID_TYPE_SHIFT;
    946    if ((value & UPROPS_ID_TYPE_FORBIDDEN) == UPROPS_ID_TYPE_FORBIDDEN ||
    947            value == UPROPS_ID_TYPE_NOT_CHARACTER) {
    948        // single value
    949        if (capacity > 0) {
    950            UIdentifierType t;
    951            switch (value) {
    952                case UPROPS_ID_TYPE_NOT_CHARACTER: t = U_ID_TYPE_NOT_CHARACTER; break;
    953                case UPROPS_ID_TYPE_DEPRECATED: t = U_ID_TYPE_DEPRECATED; break;
    954                case UPROPS_ID_TYPE_DEFAULT_IGNORABLE: t = U_ID_TYPE_DEFAULT_IGNORABLE; break;
    955                case UPROPS_ID_TYPE_NOT_NFKC: t = U_ID_TYPE_NOT_NFKC; break;
    956                case UPROPS_ID_TYPE_INCLUSION: t = U_ID_TYPE_INCLUSION; break;
    957                case UPROPS_ID_TYPE_RECOMMENDED: t = U_ID_TYPE_RECOMMENDED; break;
    958                default:
    959                    *pErrorCode = U_INVALID_FORMAT_ERROR;
    960                    return 0;
    961            }
    962            types[0] = t;
    963        } else {
    964            *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
    965        }
    966        return 1;
    967    } else {
    968        // one or more combinable bits
    969        int32_t length = 0;
    970        maybeAppendType(value, UPROPS_ID_TYPE_NOT_XID, U_ID_TYPE_NOT_XID,
    971                        types, length, capacity);
    972        maybeAppendType(value, UPROPS_ID_TYPE_EXCLUSION, U_ID_TYPE_EXCLUSION,
    973                        types, length, capacity);
    974        maybeAppendType(value, UPROPS_ID_TYPE_OBSOLETE, U_ID_TYPE_OBSOLETE,
    975                        types, length, capacity);
    976        maybeAppendType(value, UPROPS_ID_TYPE_TECHNICAL, U_ID_TYPE_TECHNICAL,
    977                        types, length, capacity);
    978        maybeAppendType(value, UPROPS_ID_TYPE_UNCOMMON_USE, U_ID_TYPE_UNCOMMON_USE,
    979                        types, length, capacity);
    980        maybeAppendType(value, UPROPS_ID_TYPE_LIMITED_USE, U_ID_TYPE_LIMITED_USE,
    981                        types, length, capacity);
    982        if (length >= capacity) {
    983            *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
    984        }
    985        return length;
    986    }
    987 }
    988 
    989 #if !UCONFIG_NO_NORMALIZATION
    990 
    991 U_CAPI int32_t U_EXPORT2
    992 u_getFC_NFKC_Closure(UChar32 c, char16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode) {
    993    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
    994        return 0;
    995    }
    996    if(destCapacity<0 || (dest==nullptr && destCapacity>0)) {
    997        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    998        return 0;
    999    }
   1000    // Compute the FC_NFKC_Closure on the fly:
   1001    // We have the API for complete coverage of Unicode properties, although
   1002    // this value by itself is not useful via API.
   1003    // (What could be useful is a custom normalization table that combines
   1004    // case folding and NFKC.)
   1005    // For the derivation, see Unicode's DerivedNormalizationProps.txt.
   1006    const Normalizer2 *nfkc=Normalizer2::getNFKCInstance(*pErrorCode);
   1007    if(U_FAILURE(*pErrorCode)) {
   1008        return 0;
   1009    }
   1010    // first: b = NFKC(Fold(a))
   1011    UnicodeString folded1String;
   1012    const char16_t *folded1;
   1013    int32_t folded1Length=ucase_toFullFolding(c, &folded1, U_FOLD_CASE_DEFAULT);
   1014    if(folded1Length<0) {
   1015        const Normalizer2Impl *nfkcImpl=Normalizer2Factory::getImpl(nfkc);
   1016        if(nfkcImpl->getCompQuickCheck(nfkcImpl->getNorm16(c))!=UNORM_NO) {
   1017            return u_terminateUChars(dest, destCapacity, 0, pErrorCode);  // c does not change at all under CaseFolding+NFKC
   1018        }
   1019        folded1String.setTo(c);
   1020    } else {
   1021        if(folded1Length>UCASE_MAX_STRING_LENGTH) {
   1022            folded1String.setTo(folded1Length);
   1023        } else {
   1024            folded1String.setTo(false, folded1, folded1Length);
   1025        }
   1026    }
   1027    UnicodeString kc1=nfkc->normalize(folded1String, *pErrorCode);
   1028    // second: c = NFKC(Fold(b))
   1029    UnicodeString folded2String(kc1);
   1030    UnicodeString kc2=nfkc->normalize(folded2String.foldCase(), *pErrorCode);
   1031    // if (c != b) add the mapping from a to c
   1032    if(U_FAILURE(*pErrorCode) || kc1==kc2) {
   1033        return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
   1034    } else {
   1035        return kc2.extract(dest, destCapacity, *pErrorCode);
   1036    }
   1037 }
   1038 
   1039 #endif