[ tor-browser ].git.dasho

uspoof.h (81921B)
      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ***************************************************************************
      5 * Copyright (C) 2008-2016, International Business Machines Corporation
      6 * and others. All Rights Reserved.
      7 ***************************************************************************
      8 *   file name:  uspoof.h
      9 *   encoding:   UTF-8
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2008Feb13
     14 *   created by: Andy Heninger
     15 *
     16 *   Unicode Spoof Detection
     17 */
     18 
     19 #ifndef USPOOF_H
     20 #define USPOOF_H
     21 
     22 #include "unicode/ubidi.h"
     23 #include "unicode/utypes.h"
     24 #include "unicode/uset.h"
     25 #include "unicode/parseerr.h"
     26 
     27 #if !UCONFIG_NO_NORMALIZATION
     28 
     29 
     30 #if U_SHOW_CPLUSPLUS_API
     31 #include "unicode/localpointer.h"
     32 #include "unicode/unistr.h"
     33 #include "unicode/uniset.h"
     34 #endif
     35 
     36 
     37 /**
     38 * \file
     39 * \brief C API: Unicode Security and Spoofing Detection
     40 *
     41 * <p>
     42 * This class, based on <a href="http://unicode.org/reports/tr36">Unicode Technical Report #36</a> and
     43 * <a href="http://unicode.org/reports/tr39">Unicode Technical Standard #39</a>, has two main functions:
     44 *
     45 * <ol>
     46 * <li>Checking whether two strings are visually <em>confusable</em> with each other, such as "Harvest" and
     47 * &quot;&Eta;arvest&quot;, where the second string starts with the Greek capital letter Eta.</li>
     48 * <li>Checking whether an individual string is likely to be an attempt at confusing the reader (<em>spoof
     49 * detection</em>), such as "paypal" with some Latin characters substituted with Cyrillic look-alikes.</li>
     50 * </ol>
     51 *
     52 * <p>
     53 * Although originally designed as a method for flagging suspicious identifier strings such as URLs,
     54 * <code>USpoofChecker</code> has a number of other practical use cases, such as preventing attempts to evade bad-word
     55 * content filters.
     56 *
     57 * <p>
     58 * The functions of this class are exposed as C API, with a handful of syntactical conveniences for C++.
     59 *
     60 * <h2>Confusables</h2>
     61 *
     62 * <p>
     63 * The following example shows how to use <code>USpoofChecker</code> to check for confusability between two strings:
     64 *
     65 * \code{.c}
     66 * UErrorCode status = U_ZERO_ERROR;
     67 * UChar* str1 = (UChar*) u"Harvest";
     68 * UChar* str2 = (UChar*) u"\u0397arvest";  // with U+0397 GREEK CAPITAL LETTER ETA
     69 *
     70 * USpoofChecker* sc = uspoof_open(&status);
     71 * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
     72 *
     73 * int32_t bitmask = uspoof_areConfusable(sc, str1, -1, str2, -1, &status);
     74 * UBool result = bitmask != 0;
     75 * // areConfusable: 1 (status: U_ZERO_ERROR)
     76 * printf("areConfusable: %d (status: %s)\n", result, u_errorName(status));
     77 * uspoof_close(sc);
     78 * \endcode
     79 *
     80 * <p>
     81 * The call to {@link uspoof_open} creates a <code>USpoofChecker</code> object; the call to {@link uspoof_setChecks}
     82 * enables confusable checking and disables all other checks; the call to {@link uspoof_areConfusable} performs the
     83 * confusability test; and the following line extracts the result out of the return value. For best performance,
     84 * the instance should be created once (e.g., upon application startup), and the efficient
     85 * {@link uspoof_areConfusable} method can be used at runtime.
     86 *
     87 * If the paragraph direction used to display the strings is known, the bidi function should be used instead:
     88 *
     89 * \code{.c}
     90 * UErrorCode status = U_ZERO_ERROR;
     91 * // These strings look identical when rendered in a left-to-right context.
     92 * // They look distinct in a right-to-left context.
     93 * UChar* str1 = (UChar*) u"A1\u05D0";  // A1א
     94 * UChar* str2 = (UChar*) u"A\u05D01";  // Aא1
     95 *
     96 * USpoofChecker* sc = uspoof_open(&status);
     97 * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
     98 *
     99 * int32_t bitmask = uspoof_areBidiConfusable(sc, UBIDI_LTR, str1, -1, str2, -1, &status);
    100 * UBool result = bitmask != 0;
    101 * // areBidiConfusable: 1 (status: U_ZERO_ERROR)
    102 * printf("areBidiConfusable: %d (status: %s)\n", result, u_errorName(status));
    103 * uspoof_close(sc);
    104 * \endcode
    105 *
    106 * <p>
    107 * The type {@link LocalUSpoofCheckerPointer} is exposed for C++ programmers.  It will automatically call
    108 * {@link uspoof_close} when the object goes out of scope:
    109 *
    110 * \code{.cpp}
    111 * UErrorCode status = U_ZERO_ERROR;
    112 * LocalUSpoofCheckerPointer sc(uspoof_open(&status));
    113 * uspoof_setChecks(sc.getAlias(), USPOOF_CONFUSABLE, &status);
    114 * // ...
    115 * \endcode
    116 *
    117 * UTS 39 defines two strings to be <em>confusable</em> if they map to the same <em>skeleton string</em>. A skeleton can
    118 * be thought of as a "hash code". {@link uspoof_getSkeleton} computes the skeleton for a particular string, so
    119 * the following snippet is equivalent to the example above:
    120 *
    121 * \code{.c}
    122 * UErrorCode status = U_ZERO_ERROR;
    123 * UChar* str1 = (UChar*) u"Harvest";
    124 * UChar* str2 = (UChar*) u"\u0397arvest";  // with U+0397 GREEK CAPITAL LETTER ETA
    125 *
    126 * USpoofChecker* sc = uspoof_open(&status);
    127 * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
    128 *
    129 * // Get skeleton 1
    130 * int32_t skel1Len = uspoof_getSkeleton(sc, 0, str1, -1, NULL, 0, &status);
    131 * UChar* skel1 = (UChar*) malloc(++skel1Len * sizeof(UChar));
    132 * status = U_ZERO_ERROR;
    133 * uspoof_getSkeleton(sc, 0, str1, -1, skel1, skel1Len, &status);
    134 *
    135 * // Get skeleton 2
    136 * int32_t skel2Len = uspoof_getSkeleton(sc, 0, str2, -1, NULL, 0, &status);
    137 * UChar* skel2 = (UChar*) malloc(++skel2Len * sizeof(UChar));
    138 * status = U_ZERO_ERROR;
    139 * uspoof_getSkeleton(sc, 0, str2, -1, skel2, skel2Len, &status);
    140 *
    141 * // Are the skeletons the same?
    142 * UBool result = u_strcmp(skel1, skel2) == 0;
    143 * // areConfusable: 1 (status: U_ZERO_ERROR)
    144 * printf("areConfusable: %d (status: %s)\n", result, u_errorName(status));
    145 * uspoof_close(sc);
    146 * free(skel1);
    147 * free(skel2);
    148 * \endcode
    149 *
    150 * If you need to check if a string is confusable with any string in a dictionary of many strings, rather than calling
    151 * {@link uspoof_areConfusable} many times in a loop, {@link uspoof_getSkeleton} can be used instead, as shown below:
    152 *
    153 * \code{.c}
    154 * UErrorCode status = U_ZERO_ERROR;
    155 * #define DICTIONARY_LENGTH 2
    156 * UChar* dictionary[DICTIONARY_LENGTH] = { (UChar*) u"lorem", (UChar*) u"ipsum" };
    157 * UChar* skeletons[DICTIONARY_LENGTH];
    158 * UChar* str = (UChar*) u"1orern";
    159 *
    160 * // Setup:
    161 * USpoofChecker* sc = uspoof_open(&status);
    162 * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
    163 * for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
    164 *     UChar* word = dictionary[i];
    165 *     int32_t len = uspoof_getSkeleton(sc, 0, word, -1, NULL, 0, &status);
    166 *     skeletons[i] = (UChar*) malloc(++len * sizeof(UChar));
    167 *     status = U_ZERO_ERROR;
    168 *     uspoof_getSkeleton(sc, 0, word, -1, skeletons[i], len, &status);
    169 * }
    170 *
    171 * // Live Check:
    172 * {
    173 *     int32_t len = uspoof_getSkeleton(sc, 0, str, -1, NULL, 0, &status);
    174 *     UChar* skel = (UChar*) malloc(++len * sizeof(UChar));
    175 *     status = U_ZERO_ERROR;
    176 *     uspoof_getSkeleton(sc, 0, str, -1, skel, len, &status);
    177 *     UBool result = false;
    178 *     for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
    179 *         result = u_strcmp(skel, skeletons[i]) == 0;
    180 *         if (result == true) { break; }
    181 *     }
    182 *     // Has confusable in dictionary: 1 (status: U_ZERO_ERROR)
    183 *     printf("Has confusable in dictionary: %d (status: %s)\n", result, u_errorName(status));
    184 *     free(skel);
    185 * }
    186 *
    187 * for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
    188 *     free(skeletons[i]);
    189 * }
    190 * uspoof_close(sc);
    191 * \endcode
    192 *
    193 * <b>Note:</b> Since the Unicode confusables mapping table is frequently updated, confusable skeletons are <em>not</em>
    194 * guaranteed to be the same between ICU releases. We therefore recommend that you always compute confusable skeletons
    195 * at runtime and do not rely on creating a permanent, or difficult to update, database of skeletons.
    196 *
    197 * <h2>Spoof Detection</h2>
    198 *
    199 * The following snippet shows a minimal example of using <code>USpoofChecker</code> to perform spoof detection on a
    200 * string:
    201 *
    202 * \code{.c}
    203 * UErrorCode status = U_ZERO_ERROR;
    204 * UChar* str = (UChar*) u"p\u0430ypal";  // with U+0430 CYRILLIC SMALL LETTER A
    205 *
    206 * // Get the default set of allowable characters:
    207 * USet* allowed = uset_openEmpty();
    208 * uset_addAll(allowed, uspoof_getRecommendedSet(&status));
    209 * uset_addAll(allowed, uspoof_getInclusionSet(&status));
    210 *
    211 * USpoofChecker* sc = uspoof_open(&status);
    212 * uspoof_setAllowedChars(sc, allowed, &status);
    213 * uspoof_setRestrictionLevel(sc, USPOOF_MODERATELY_RESTRICTIVE);
    214 *
    215 * int32_t bitmask = uspoof_check(sc, str, -1, NULL, &status);
    216 * UBool result = bitmask != 0;
    217 * // fails checks: 1 (status: U_ZERO_ERROR)
    218 * printf("fails checks: %d (status: %s)\n", result, u_errorName(status));
    219 * uspoof_close(sc);
    220 * uset_close(allowed);
    221 * \endcode
    222 *
    223 * As in the case for confusability checking, it is good practice to create one <code>USpoofChecker</code> instance at
    224 * startup, and call the cheaper {@link uspoof_check} online. We specify the set of
    225 * allowed characters to be those with type RECOMMENDED or INCLUSION, according to the recommendation in UTS 39.
    226 *
    227 * In addition to {@link uspoof_check}, the function {@link uspoof_checkUTF8} is exposed for UTF8-encoded char* strings,
    228 * and {@link uspoof_checkUnicodeString} is exposed for C++ programmers.
    229 *
    230 * If the {@link USPOOF_AUX_INFO} check is enabled, a limited amount of information on why a string failed the checks
    231 * is available in the returned bitmask.  For complete information, use the {@link uspoof_check2} class of functions
    232 * with a {@link USpoofCheckResult} parameter:
    233 *
    234 * \code{.c}
    235 * UErrorCode status = U_ZERO_ERROR;
    236 * UChar* str = (UChar*) u"p\u0430ypal";  // with U+0430 CYRILLIC SMALL LETTER A
    237 *
    238 * // Get the default set of allowable characters:
    239 * USet* allowed = uset_openEmpty();
    240 * uset_addAll(allowed, uspoof_getRecommendedSet(&status));
    241 * uset_addAll(allowed, uspoof_getInclusionSet(&status));
    242 *
    243 * USpoofChecker* sc = uspoof_open(&status);
    244 * uspoof_setAllowedChars(sc, allowed, &status);
    245 * uspoof_setRestrictionLevel(sc, USPOOF_MODERATELY_RESTRICTIVE);
    246 *
    247 * USpoofCheckResult* checkResult = uspoof_openCheckResult(&status);
    248 * int32_t bitmask = uspoof_check2(sc, str, -1, checkResult, &status);
    249 *
    250 * int32_t failures1 = bitmask;
    251 * int32_t failures2 = uspoof_getCheckResultChecks(checkResult, &status);
    252 * assert(failures1 == failures2);
    253 * // checks that failed: 0x00000010 (status: U_ZERO_ERROR)
    254 * printf("checks that failed: %#010x (status: %s)\n", failures1, u_errorName(status));
    255 *
    256 * // Cleanup:
    257 * uspoof_close(sc);
    258 * uset_close(allowed);
    259 * uspoof_closeCheckResult(checkResult);
    260 * \endcode
    261 *
    262 * C++ users can take advantage of a few syntactical conveniences.  The following snippet is functionally
    263 * equivalent to the one above:
    264 *
    265 * \code{.cpp}
    266 * UErrorCode status = U_ZERO_ERROR;
    267 * UnicodeString str((UChar*) u"p\u0430ypal");  // with U+0430 CYRILLIC SMALL LETTER A
    268 *
    269 * // Get the default set of allowable characters:
    270 * UnicodeSet allowed;
    271 * allowed.addAll(*uspoof_getRecommendedUnicodeSet(&status));
    272 * allowed.addAll(*uspoof_getInclusionUnicodeSet(&status));
    273 *
    274 * LocalUSpoofCheckerPointer sc(uspoof_open(&status));
    275 * uspoof_setAllowedChars(sc.getAlias(), allowed.toUSet(), &status);
    276 * uspoof_setRestrictionLevel(sc.getAlias(), USPOOF_MODERATELY_RESTRICTIVE);
    277 *
    278 * LocalUSpoofCheckResultPointer checkResult(uspoof_openCheckResult(&status));
    279 * int32_t bitmask = uspoof_check2UnicodeString(sc.getAlias(), str, checkResult.getAlias(), &status);
    280 *
    281 * int32_t failures1 = bitmask;
    282 * int32_t failures2 = uspoof_getCheckResultChecks(checkResult.getAlias(), &status);
    283 * assert(failures1 == failures2);
    284 * // checks that failed: 0x00000010 (status: U_ZERO_ERROR)
    285 * printf("checks that failed: %#010x (status: %s)\n", failures1, u_errorName(status));
    286 *
    287 * // Explicit cleanup not necessary.
    288 * \endcode
    289 *
    290 * The return value is a bitmask of the checks that failed. In this case, there was one check that failed:
    291 * {@link USPOOF_RESTRICTION_LEVEL}, corresponding to the fifth bit (16). The possible checks are:
    292 *
    293 * <ul>
    294 * <li><code>RESTRICTION_LEVEL</code>: flags strings that violate the
    295 * <a href="http://unicode.org/reports/tr39/#Restriction_Level_Detection">Restriction Level</a> test as specified in UTS
    296 * 39; in most cases, this means flagging strings that contain characters from multiple different scripts.</li>
    297 * <li><code>INVISIBLE</code>: flags strings that contain invisible characters, such as zero-width spaces, or character
    298 * sequences that are likely not to display, such as multiple occurrences of the same non-spacing mark.</li>
    299 * <li><code>CHAR_LIMIT</code>: flags strings that contain characters outside of a specified set of acceptable
    300 * characters. See {@link uspoof_setAllowedChars} and {@link uspoof_setAllowedLocales}.</li>
    301 * <li><code>MIXED_NUMBERS</code>: flags strings that contain digits from multiple different numbering systems.</li>
    302 * </ul>
    303 *
    304 * <p>
    305 * These checks can be enabled independently of each other. For example, if you were interested in checking for only the
    306 * INVISIBLE and MIXED_NUMBERS conditions, you could do:
    307 *
    308 * \code{.c}
    309 * UErrorCode status = U_ZERO_ERROR;
    310 * UChar* str = (UChar*) u"8\u09EA";  // 8 mixed with U+09EA BENGALI DIGIT FOUR
    311 *
    312 * USpoofChecker* sc = uspoof_open(&status);
    313 * uspoof_setChecks(sc, USPOOF_INVISIBLE | USPOOF_MIXED_NUMBERS, &status);
    314 *
    315 * int32_t bitmask = uspoof_check2(sc, str, -1, NULL, &status);
    316 * UBool result = bitmask != 0;
    317 * // fails checks: 1 (status: U_ZERO_ERROR)
    318 * printf("fails checks: %d (status: %s)\n", result, u_errorName(status));
    319 * uspoof_close(sc);
    320 * \endcode
    321 *
    322 * Here is an example in C++ showing how to compute the restriction level of a string:
    323 *
    324 * \code{.cpp}
    325 * UErrorCode status = U_ZERO_ERROR;
    326 * UnicodeString str((UChar*) u"p\u0430ypal");  // with U+0430 CYRILLIC SMALL LETTER A
    327 *
    328 * // Get the default set of allowable characters:
    329 * UnicodeSet allowed;
    330 * allowed.addAll(*uspoof_getRecommendedUnicodeSet(&status));
    331 * allowed.addAll(*uspoof_getInclusionUnicodeSet(&status));
    332 *
    333 * LocalUSpoofCheckerPointer sc(uspoof_open(&status));
    334 * uspoof_setAllowedChars(sc.getAlias(), allowed.toUSet(), &status);
    335 * uspoof_setRestrictionLevel(sc.getAlias(), USPOOF_MODERATELY_RESTRICTIVE);
    336 * uspoof_setChecks(sc.getAlias(), USPOOF_RESTRICTION_LEVEL | USPOOF_AUX_INFO, &status);
    337 *
    338 * LocalUSpoofCheckResultPointer checkResult(uspoof_openCheckResult(&status));
    339 * int32_t bitmask = uspoof_check2UnicodeString(sc.getAlias(), str, checkResult.getAlias(), &status);
    340 *
    341 * URestrictionLevel restrictionLevel = uspoof_getCheckResultRestrictionLevel(checkResult.getAlias(), &status);
    342 * // Since USPOOF_AUX_INFO was enabled, the restriction level is also available in the upper bits of the bitmask:
    343 * assert((restrictionLevel & bitmask) == restrictionLevel);
    344 * // Restriction level: 0x50000000 (status: U_ZERO_ERROR)
    345 * printf("Restriction level: %#010x (status: %s)\n", restrictionLevel, u_errorName(status));
    346 * \endcode
    347 *
    348 * The code '0x50000000' corresponds to the restriction level USPOOF_MINIMALLY_RESTRICTIVE.  Since
    349 * USPOOF_MINIMALLY_RESTRICTIVE is weaker than USPOOF_MODERATELY_RESTRICTIVE, the string fails the check.
    350 *
    351 * <b>Note:</b> The Restriction Level is the most powerful of the checks. The full logic is documented in
    352 * <a href="http://unicode.org/reports/tr39/#Restriction_Level_Detection">UTS 39</a>, but the basic idea is that strings
    353 * are restricted to contain characters from only a single script, <em>except</em> that most scripts are allowed to have
    354 * Latin characters interspersed. Although the default restriction level is <code>HIGHLY_RESTRICTIVE</code>, it is
    355 * recommended that users set their restriction level to <code>MODERATELY_RESTRICTIVE</code>, which allows Latin mixed
    356 * with all other scripts except Cyrillic, Greek, and Cherokee, with which it is often confusable. For more details on
    357 * the levels, see UTS 39 or {@link URestrictionLevel}. The Restriction Level test is aware of the set of
    358 * allowed characters set in {@link uspoof_setAllowedChars}. Note that characters which have script code
    359 * COMMON or INHERITED, such as numbers and punctuation, are ignored when computing whether a string has multiple
    360 * scripts.
    361 *
    362 * <h2>Advanced bidirectional usage</h2>
    363 * If the paragraph direction with which the identifiers will be displayed is not known, there are
    364 * multiple options for confusable detection depending on the circumstances.
    365 *
    366 * <p>
    367 * In some circumstances, the only concern is confusion between identifiers displayed with the same
    368 * paragraph direction.
    369 *
    370 * <p>
    371 * An example is the case where identifiers are usernames prefixed with the @ symbol.
    372 * That symbol will appear to the left in a left-to-right context, and to the right in a
    373 * right-to-left context, so that an identifier displayed in a left-to-right context can never be
    374 * confused with an identifier displayed in a right-to-left context:
    375 * <ul>
    376 * <li>
    377 * The usernames "A1א" (A one aleph) and "Aא1" (A aleph 1)
    378 * would be considered confusable, since they both appear as \@A1א in a left-to-right context, and the
    379 * usernames "אA_1" (aleph A underscore one) and "א1_A" (aleph one underscore A) would be considered
    380 * confusable, since they both appear as A_1א@ in a right-to-left context.
    381 * </li>
    382 * <li>
    383 * The username "Mark_" would not be considered confusable with the username "_Mark",
    384 * even though the latter would appear as Mark_@ in a right-to-left context, and the
    385 * former as \@Mark_ in a left-to-right context.
    386 * </li>
    387 * </ul>
    388 * <p>
    389 * In that case, the caller should check for both LTR-confusability and RTL-confusability:
    390 *
    391 * \code{.cpp}
    392 * bool confusableInEitherDirection =
    393 *     uspoof_areBidiConfusableUnicodeString(sc, UBIDI_LTR, id1, id2, &status) ||
    394 *     uspoof_areBidiConfusableUnicodeString(sc, UBIDI_RTL, id1, id2, &status);
    395 * \endcode
    396 *
    397 * If the bidiSkeleton is used, the LTR and RTL skeleta should be kept separately and compared, LTR
    398 * with LTR and RTL with RTL.
    399 *
    400 * <p>
    401 * In cases where confusability between the visual appearances of an identifier displayed in a
    402 * left-to-right context with another identifier displayed in a right-to-left context is a concern,
    403 * the LTR skeleton of one can be compared with the RTL skeleton of the other.  However, this
    404 * very broad definition of confusability may have unexpected results; for instance, it treats the
    405 * ASCII identifiers "Mark_" and "_Mark" as confusable.
    406 *
    407 * <h2>Additional Information</h2>
    408 *
    409 * A <code>USpoofChecker</code> instance may be used repeatedly to perform checks on any number of identifiers.
    410 *
    411 * <b>Thread Safety:</b> The test functions for checking a single identifier, or for testing whether
    412 * two identifiers are possible confusable, are thread safe. They may called concurrently, from multiple threads,
    413 * using the same USpoofChecker instance.
    414 *
    415 * More generally, the standard ICU thread safety rules apply: functions that take a const USpoofChecker parameter are
    416 * thread safe. Those that take a non-const USpoofChecker are not thread safe..
    417 *
    418 * @stable ICU 4.6
    419 */
    420 
    421 U_CDECL_BEGIN
    422 
    423 struct USpoofChecker;
    424 /**
    425 * @stable ICU 4.2
    426 */
    427 typedef struct USpoofChecker USpoofChecker; /**< typedef for C of USpoofChecker */
    428 
    429 struct USpoofCheckResult;
    430 /**
    431 * @see uspoof_openCheckResult
    432 * @stable ICU 58
    433 */
    434 typedef struct USpoofCheckResult USpoofCheckResult;
    435 
    436 /**
    437 * Enum for the kinds of checks that USpoofChecker can perform.
    438 * These enum values are used both to select the set of checks that
    439 * will be performed, and to report results from the check function.
    440 *
    441 * @stable ICU 4.2
    442 */
    443 typedef enum USpoofChecks {
    444    /**
    445     * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
    446     * that the two strings are visually confusable and that they are from the same script, according to UTS 39 section
    447     * 4.
    448     *
    449     * @see uspoof_areConfusable
    450     * @stable ICU 4.2
    451     */
    452    USPOOF_SINGLE_SCRIPT_CONFUSABLE =   1,
    453 
    454    /**
    455     * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
    456     * that the two strings are visually confusable and that they are <b>not</b> from the same script, according to UTS
    457     * 39 section 4.
    458     *
    459     * @see uspoof_areConfusable
    460     * @stable ICU 4.2
    461     */
    462    USPOOF_MIXED_SCRIPT_CONFUSABLE  =   2,
    463 
    464    /**
    465     * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
    466     * that the two strings are visually confusable and that they are not from the same script but both of them are
    467     * single-script strings, according to UTS 39 section 4.
    468     *
    469     * @see uspoof_areConfusable
    470     * @stable ICU 4.2
    471     */
    472    USPOOF_WHOLE_SCRIPT_CONFUSABLE  =   4,
    473 
    474    /**
    475     * Enable this flag in {@link uspoof_setChecks} to turn on all types of confusables.  You may set
    476     * the checks to some subset of SINGLE_SCRIPT_CONFUSABLE, MIXED_SCRIPT_CONFUSABLE, or WHOLE_SCRIPT_CONFUSABLE to
    477     * make {@link uspoof_areConfusable} return only those types of confusables.
    478     *
    479     * @see uspoof_areConfusable
    480     * @see uspoof_getSkeleton
    481     * @stable ICU 58
    482     */
    483    USPOOF_CONFUSABLE               =   USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_WHOLE_SCRIPT_CONFUSABLE,
    484 
    485 #ifndef U_HIDE_DEPRECATED_API
    486    /**
    487      * This flag is deprecated and no longer affects the behavior of SpoofChecker.
    488      *
    489      * @deprecated ICU 58  Any case confusable mappings were removed from UTS 39; the corresponding ICU API was deprecated.
    490      */
    491    USPOOF_ANY_CASE                 =   8,
    492 #endif  /* U_HIDE_DEPRECATED_API */
    493 
    494    /**
    495      * Check that an identifier is no looser than the specified RestrictionLevel.
    496      * The default if {@link uspoof_setRestrictionLevel} is not called is HIGHLY_RESTRICTIVE.
    497      *
    498      * If USPOOF_AUX_INFO is enabled the actual restriction level of the
    499      * identifier being tested will also be returned by uspoof_check().
    500      *
    501      * @see URestrictionLevel
    502      * @see uspoof_setRestrictionLevel
    503      * @see USPOOF_AUX_INFO
    504      *
    505      * @stable ICU 51
    506      */
    507    USPOOF_RESTRICTION_LEVEL        = 16,
    508 
    509 #ifndef U_HIDE_DEPRECATED_API
    510    /** Check that an identifier contains only characters from a
    511      * single script (plus chars from the common and inherited scripts.)
    512      * Applies to checks of a single identifier check only.
    513      * @deprecated ICU 51  Use RESTRICTION_LEVEL instead.
    514      */
    515    USPOOF_SINGLE_SCRIPT            =  USPOOF_RESTRICTION_LEVEL,
    516 #endif  /* U_HIDE_DEPRECATED_API */
    517 
    518    /** Check an identifier for the presence of invisible characters,
    519      * such as zero-width spaces, or character sequences that are
    520      * likely not to display, such as multiple occurrences of the same
    521      * non-spacing mark.  This check does not test the input string as a whole
    522      * for conformance to any particular syntax for identifiers.
    523      */
    524    USPOOF_INVISIBLE                =  32,
    525 
    526    /** Check that an identifier contains only characters from a specified set
    527      * of acceptable characters.  See {@link uspoof_setAllowedChars} and
    528      * {@link uspoof_setAllowedLocales}.  Note that a string that fails this check
    529      * will also fail the {@link USPOOF_RESTRICTION_LEVEL} check.
    530      */
    531    USPOOF_CHAR_LIMIT               =  64,
    532 
    533    /**
    534     * Check that an identifier does not mix numbers from different numbering systems.
    535     * For more information, see UTS 39 section 5.3.
    536     *
    537     * @stable ICU 51
    538     */
    539    USPOOF_MIXED_NUMBERS            = 128,
    540 
    541    /**
    542     * Check that an identifier does not have a combining character following a character in which that
    543     * combining character would be hidden; for example 'i' followed by a U+0307 combining dot.
    544     *
    545     * More specifically, the following characters are forbidden from preceding a U+0307:
    546     * <ul>
    547     * <li>Those with the Soft_Dotted Unicode property (which includes 'i' and 'j')</li>
    548     * <li>Latin lowercase letter 'l'</li>
    549     * <li>Dotless 'i' and 'j' ('ı' and 'ȷ', U+0131 and U+0237)</li>
    550     * <li>Any character whose confusable prototype ends with such a character
    551     * (Soft_Dotted, 'l', 'ı', or 'ȷ')</li>
    552     * </ul>
    553     * In addition, combining characters are allowed between the above characters and U+0307 except those
    554     * with combining class 0 or combining class "Above" (230, same class as U+0307).
    555     *
    556     * This list and the number of combing characters considered by this check may grow over time.
    557     *
    558     * @stable ICU 62
    559     */
    560    USPOOF_HIDDEN_OVERLAY            = 256,
    561 
    562   /**
    563     * Enable all spoof checks.
    564     *
    565     * @stable ICU 4.6
    566     */
    567    USPOOF_ALL_CHECKS               = 0xFFFF,
    568 
    569    /**
    570      * Enable the return of auxiliary (non-error) information in the
    571      * upper bits of the check results value.
    572      *
    573      * If this "check" is not enabled, the results of {@link uspoof_check} will be
    574      * zero when an identifier passes all of the enabled checks.
    575      *
    576      * If this "check" is enabled, (uspoof_check() & {@link USPOOF_ALL_CHECKS}) will
    577      * be zero when an identifier passes all checks.
    578      *
    579      * @stable ICU 51
    580      */
    581    USPOOF_AUX_INFO                  = 0x40000000
    582 
    583    } USpoofChecks;
    584 
    585 
    586    /**
    587     * Constants from UTS #39 for use in {@link uspoof_setRestrictionLevel}, and
    588     * for returned identifier restriction levels in check results.
    589     *
    590     * @stable ICU 51
    591     *
    592     * @see uspoof_setRestrictionLevel
    593     * @see uspoof_check
    594     */
    595    typedef enum URestrictionLevel {
    596        /**
    597         * All characters in the string are in the identifier profile and all characters in the string are in the
    598         * ASCII range.
    599         *
    600         * @stable ICU 51
    601         */
    602        USPOOF_ASCII = 0x10000000,
    603        /**
    604         * The string classifies as ASCII-Only, or all characters in the string are in the identifier profile and
    605         * the string is single-script, according to the definition in UTS 39 section 5.1.
    606         *
    607         * @stable ICU 53
    608         */
    609        USPOOF_SINGLE_SCRIPT_RESTRICTIVE = 0x20000000,
    610        /**
    611         * The string classifies as Single Script, or all characters in the string are in the identifier profile and
    612         * the string is covered by any of the following sets of scripts, according to the definition in UTS 39
    613         * section 5.1:
    614         * <ul>
    615         *   <li>Latin + Han + Bopomofo (or equivalently: Latn + Hanb)</li>
    616         *   <li>Latin + Han + Hiragana + Katakana (or equivalently: Latn + Jpan)</li>
    617         *   <li>Latin + Han + Hangul (or equivalently: Latn +Kore)</li>
    618         * </ul>
    619         * This is the default restriction in ICU.
    620         *
    621         * @stable ICU 51
    622         */
    623        USPOOF_HIGHLY_RESTRICTIVE = 0x30000000,
    624        /**
    625         * The string classifies as Highly Restrictive, or all characters in the string are in the identifier profile
    626         * and the string is covered by Latin and any one other Recommended or Aspirational script, except Cyrillic,
    627         * Greek, and Cherokee.
    628         *
    629         * @stable ICU 51
    630         */
    631        USPOOF_MODERATELY_RESTRICTIVE = 0x40000000,
    632        /**
    633         * All characters in the string are in the identifier profile.  Allow arbitrary mixtures of scripts.
    634         *
    635         * @stable ICU 51
    636         */
    637        USPOOF_MINIMALLY_RESTRICTIVE = 0x50000000,
    638        /**
    639         * Any valid identifiers, including characters outside of the Identifier Profile.
    640         *
    641         * @stable ICU 51
    642         */
    643        USPOOF_UNRESTRICTIVE = 0x60000000,
    644        /**
    645         * Mask for selecting the Restriction Level bits from the return value of {@link uspoof_check}.
    646         *
    647         * @stable ICU 53
    648         */
    649        USPOOF_RESTRICTION_LEVEL_MASK = 0x7F000000,
    650 #ifndef U_HIDE_INTERNAL_API
    651        /**
    652         * An undefined restriction level.
    653         * @internal
    654         */
    655        USPOOF_UNDEFINED_RESTRICTIVE = -1
    656 #endif  /* U_HIDE_INTERNAL_API */
    657    } URestrictionLevel;
    658 
    659 /**
    660 *  Create a Unicode Spoof Checker, configured to perform all
    661 *  checks except for USPOOF_LOCALE_LIMIT and USPOOF_CHAR_LIMIT.
    662 *  Note that additional checks may be added in the future,
    663 *  resulting in the changes to the default checking behavior.
    664 *
    665 *  @param status  The error code, set if this function encounters a problem.
    666 *  @return        the newly created Spoof Checker
    667 *  @stable ICU 4.2
    668 */
    669 U_CAPI USpoofChecker * U_EXPORT2
    670 uspoof_open(UErrorCode *status);
    671 
    672 
    673 /**
    674 * Open a Spoof checker from its serialized form, stored in 32-bit-aligned memory.
    675 * Inverse of uspoof_serialize().
    676 * The memory containing the serialized data must remain valid and unchanged
    677 * as long as the spoof checker, or any cloned copies of the spoof checker,
    678 * are in use.  Ownership of the memory remains with the caller.
    679 * The spoof checker (and any clones) must be closed prior to deleting the
    680 * serialized data.
    681 *
    682 * @param data a pointer to 32-bit-aligned memory containing the serialized form of spoof data
    683 * @param length the number of bytes available at data;
    684 *               can be more than necessary
    685 * @param pActualLength receives the actual number of bytes at data taken up by the data;
    686 *                      can be NULL
    687 * @param pErrorCode ICU error code
    688 * @return the spoof checker.
    689 *
    690 * @see uspoof_open
    691 * @see uspoof_serialize
    692 * @stable ICU 4.2
    693 */
    694 U_CAPI USpoofChecker * U_EXPORT2
    695 uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength,
    696                          UErrorCode *pErrorCode);
    697 
    698 /**
    699  * Open a Spoof Checker from the source form of the spoof data.
    700  * The input corresponds to the Unicode data file confusables.txt
    701  * as described in Unicode Technical Standard #39.  The syntax of the source data
    702  * is as described in UTS #39 for this file, and the content of
    703  * this file is acceptable input.
    704  *
    705  * The character encoding of the (char *) input text is UTF-8.
    706  *
    707  * @param confusables a pointer to the confusable characters definitions,
    708  *                    as found in file confusables.txt from unicode.org.
    709  * @param confusablesLen The length of the confusables text, or -1 if the
    710  *                    input string is zero terminated.
    711  * @param confusablesWholeScript
    712  *                    Deprecated in ICU 58.  No longer used.
    713  * @param confusablesWholeScriptLen
    714  *                    Deprecated in ICU 58.  No longer used.
    715  * @param errType     In the event of an error in the input, indicates
    716  *                    which of the input files contains the error.
    717  *                    The value is one of USPOOF_SINGLE_SCRIPT_CONFUSABLE or
    718  *                    USPOOF_WHOLE_SCRIPT_CONFUSABLE, or
    719  *                    zero if no errors are found.
    720  * @param pe          In the event of an error in the input, receives the position
    721  *                    in the input text (line, offset) of the error.
    722  * @param status      an in/out ICU UErrorCode.  Among the possible errors is
    723  *                    U_PARSE_ERROR, which is used to report syntax errors
    724  *                    in the input.
    725  * @return            A spoof checker that uses the rules from the input files.
    726  * @stable ICU 4.2
    727  */
    728 U_CAPI USpoofChecker * U_EXPORT2
    729 uspoof_openFromSource(const char *confusables,  int32_t confusablesLen,
    730                      const char *confusablesWholeScript, int32_t confusablesWholeScriptLen,
    731                      int32_t *errType, UParseError *pe, UErrorCode *status);
    732 
    733 
    734 /**
    735  * Close a Spoof Checker, freeing any memory that was being held by
    736  *   its implementation.
    737  * @stable ICU 4.2
    738  */
    739 U_CAPI void U_EXPORT2
    740 uspoof_close(USpoofChecker *sc);
    741 
    742 /**
    743 * Clone a Spoof Checker.  The clone will be set to perform the same checks
    744 *   as the original source.
    745 *
    746 * @param sc       The source USpoofChecker
    747 * @param status   The error code, set if this function encounters a problem.
    748 * @return
    749 * @stable ICU 4.2
    750 */
    751 U_CAPI USpoofChecker * U_EXPORT2
    752 uspoof_clone(const USpoofChecker *sc, UErrorCode *status);
    753 
    754 
    755 /**
    756 * Specify the bitmask of checks that will be performed by {@link uspoof_check}. Calling this method
    757 * overwrites any checks that may have already been enabled. By default, all checks are enabled.
    758 *
    759 * To enable specific checks and disable all others,
    760 * OR together only the bit constants for the desired checks.
    761 * For example, to fail strings containing characters outside of
    762 * the set specified by {@link uspoof_setAllowedChars} and
    763 * also strings that contain digits from mixed numbering systems:
    764 *
    765 * <pre>
    766 * {@code
    767 * uspoof_setChecks(USPOOF_CHAR_LIMIT | USPOOF_MIXED_NUMBERS);
    768 * }
    769 * </pre>
    770 *
    771 * To disable specific checks and enable all others,
    772 * start with ALL_CHECKS and "AND away" the not-desired checks.
    773 * For example, if you are not planning to use the {@link uspoof_areConfusable} functionality,
    774 * it is good practice to disable the CONFUSABLE check:
    775 *
    776 * <pre>
    777 * {@code
    778 * uspoof_setChecks(USPOOF_ALL_CHECKS & ~USPOOF_CONFUSABLE);
    779 * }
    780 * </pre>
    781 *
    782 * Note that methods such as {@link uspoof_setAllowedChars}, {@link uspoof_setAllowedLocales}, and
    783 * {@link uspoof_setRestrictionLevel} will enable certain checks when called. Those methods will OR the check they
    784 * enable onto the existing bitmask specified by this method. For more details, see the documentation of those
    785 * methods.
    786 *
    787 * @param sc       The USpoofChecker
    788 * @param checks         The set of checks that this spoof checker will perform.
    789 *                 The value is a bit set, obtained by OR-ing together
    790 *                 values from enum USpoofChecks.
    791 * @param status   The error code, set if this function encounters a problem.
    792 * @stable ICU 4.2
    793 *
    794 */
    795 U_CAPI void U_EXPORT2
    796 uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status);
    797 
    798 /**
    799 * Get the set of checks that this Spoof Checker has been configured to perform.
    800 *
    801 * @param sc       The USpoofChecker
    802 * @param status   The error code, set if this function encounters a problem.
    803 * @return         The set of checks that this spoof checker will perform.
    804 *                 The value is a bit set, obtained by OR-ing together
    805 *                 values from enum USpoofChecks.
    806 * @stable ICU 4.2
    807 *
    808 */
    809 U_CAPI int32_t U_EXPORT2
    810 uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status);
    811 
    812 /**
    813 * Set the loosest restriction level allowed for strings. The default if this is not called is
    814 * {@link USPOOF_HIGHLY_RESTRICTIVE}. Calling this method enables the {@link USPOOF_RESTRICTION_LEVEL} and
    815 * {@link USPOOF_MIXED_NUMBERS} checks, corresponding to Sections 5.1 and 5.2 of UTS 39. To customize which checks are
    816 * to be performed by {@link uspoof_check}, see {@link uspoof_setChecks}.
    817 *
    818 * @param sc       The USpoofChecker
    819 * @param restrictionLevel The loosest restriction level allowed.
    820 * @see URestrictionLevel
    821 * @stable ICU 51
    822 */
    823 U_CAPI void U_EXPORT2
    824 uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel);
    825 
    826 
    827 /**
    828  * Get the Restriction Level that will be tested if the checks include {@link USPOOF_RESTRICTION_LEVEL}.
    829  *
    830  * @return The restriction level
    831  * @see URestrictionLevel
    832  * @stable ICU 51
    833  */
    834 U_CAPI URestrictionLevel U_EXPORT2
    835 uspoof_getRestrictionLevel(const USpoofChecker *sc);
    836 
    837 /**
    838 * Limit characters that are acceptable in identifiers being checked to those
    839 * normally used with the languages associated with the specified locales.
    840 * Any previously specified list of locales is replaced by the new settings.
    841 *
    842 * A set of languages is determined from the locale(s), and
    843 * from those a set of acceptable Unicode scripts is determined.
    844 * Characters from this set of scripts, along with characters from
    845 * the "common" and "inherited" Unicode Script categories
    846 * will be permitted.
    847 *
    848 * Supplying an empty string removes all restrictions;
    849 * characters from any script will be allowed.
    850 *
    851 * The {@link USPOOF_CHAR_LIMIT} test is automatically enabled for this
    852 * USpoofChecker when calling this function with a non-empty list
    853 * of locales.
    854 *
    855 * The Unicode Set of characters that will be allowed is accessible
    856 * via the uspoof_getAllowedChars() function.  uspoof_setAllowedLocales()
    857 * will <i>replace</i> any previously applied set of allowed characters.
    858 *
    859 * Adjustments, such as additions or deletions of certain classes of characters,
    860 * can be made to the result of uspoof_setAllowedLocales() by
    861 * fetching the resulting set with uspoof_getAllowedChars(),
    862 * manipulating it with the Unicode Set API, then resetting the
    863 * spoof detectors limits with uspoof_setAllowedChars().
    864 *
    865 * @param sc           The USpoofChecker
    866 * @param localesList  A list list of locales, from which the language
    867 *                     and associated script are extracted.  The locales
    868 *                     are comma-separated if there is more than one.
    869 *                     White space may not appear within an individual locale,
    870 *                     but is ignored otherwise.
    871 *                     The locales are syntactically like those from the
    872 *                     HTTP Accept-Language header.
    873 *                     If the localesList is empty, no restrictions will be placed on
    874 *                     the allowed characters.
    875 *
    876 * @param status       The error code, set if this function encounters a problem.
    877 * @stable ICU 4.2
    878 */
    879 U_CAPI void U_EXPORT2
    880 uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status);
    881 
    882 /**
    883 * Get a list of locales for the scripts that are acceptable in strings
    884 *  to be checked.  If no limitations on scripts have been specified,
    885 *  an empty string will be returned.
    886 *
    887 *  uspoof_setAllowedChars() will reset the list of allowed to be empty.
    888 *
    889 *  The format of the returned list is the same as that supplied to
    890 *  uspoof_setAllowedLocales(), but returned list may not be identical
    891 *  to the originally specified string; the string may be reformatted,
    892 *  and information other than languages from
    893 *  the originally specified locales may be omitted.
    894 *
    895 * @param sc           The USpoofChecker
    896 * @param status       The error code, set if this function encounters a problem.
    897 * @return             A string containing a list of  locales corresponding
    898 *                     to the acceptable scripts, formatted like an
    899 *                     HTTP Accept Language value.
    900 *
    901 * @stable ICU 4.2
    902 */
    903 U_CAPI const char * U_EXPORT2
    904 uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status);
    905 
    906 
    907 /**
    908 * Limit the acceptable characters to those specified by a Unicode Set.
    909 *   Any previously specified character limit is
    910 *   is replaced by the new settings.  This includes limits on
    911 *   characters that were set with the uspoof_setAllowedLocales() function.
    912 *
    913 * The USPOOF_CHAR_LIMIT test is automatically enabled for this
    914 * USpoofChecker by this function.
    915 *
    916 * @param sc       The USpoofChecker
    917 * @param chars    A Unicode Set containing the list of
    918 *                 characters that are permitted.  Ownership of the set
    919 *                 remains with the caller.  The incoming set is cloned by
    920 *                 this function, so there are no restrictions on modifying
    921 *                 or deleting the USet after calling this function.
    922 * @param status   The error code, set if this function encounters a problem.
    923 * @stable ICU 4.2
    924 */
    925 U_CAPI void U_EXPORT2
    926 uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status);
    927 
    928 
    929 /**
    930 * Get a USet for the characters permitted in an identifier.
    931 * This corresponds to the limits imposed by the Set Allowed Characters
    932 * functions. Limitations imposed by other checks will not be
    933 * reflected in the set returned by this function.
    934 *
    935 * The returned set will be frozen, meaning that it cannot be modified
    936 * by the caller.
    937 *
    938 * Ownership of the returned set remains with the Spoof Detector.  The
    939 * returned set will become invalid if the spoof detector is closed,
    940 * or if a new set of allowed characters is specified.
    941 *
    942 *
    943 * @param sc       The USpoofChecker
    944 * @param status   The error code, set if this function encounters a problem.
    945 * @return         A USet containing the characters that are permitted by
    946 *                 the USPOOF_CHAR_LIMIT test.
    947 * @stable ICU 4.2
    948 */
    949 U_CAPI const USet * U_EXPORT2
    950 uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status);
    951 
    952 
    953 /**
    954 * Check the specified string for possible security issues.
    955 * The text to be checked will typically be an identifier of some sort.
    956 * The set of checks to be performed is specified with uspoof_setChecks().
    957 *
    958 * \note
    959 *   Consider using the newer API, {@link uspoof_check2}, instead.
    960 *   The newer API exposes additional information from the check procedure
    961 *   and is otherwise identical to this method.
    962 *
    963 * @param sc      The USpoofChecker
    964 * @param id      The identifier to be checked for possible security issues,
    965 *                in UTF-16 format.
    966 * @param length  the length of the string to be checked, expressed in
    967 *                16 bit UTF-16 code units, or -1 if the string is
    968 *                zero terminated.
    969 * @param position  Deprecated in ICU 51.  Always returns zero.
    970 *                Originally, an out parameter for the index of the first
    971 *                string position that failed a check.
    972 *                This parameter may be NULL.
    973 * @param status  The error code, set if an error occurred while attempting to
    974 *                perform the check.
    975 *                Spoofing or security issues detected with the input string are
    976 *                not reported here, but through the function's return value.
    977 * @return        An integer value with bits set for any potential security
    978 *                or spoofing issues detected.  The bits are defined by
    979 *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
    980 *                will be zero if the input string passes all of the
    981 *                enabled checks.
    982 * @see uspoof_check2
    983 * @stable ICU 4.2
    984 */
    985 U_CAPI int32_t U_EXPORT2
    986 uspoof_check(const USpoofChecker *sc,
    987                         const UChar *id, int32_t length,
    988                         int32_t *position,
    989                         UErrorCode *status);
    990 
    991 
    992 /**
    993 * Check the specified string for possible security issues.
    994 * The text to be checked will typically be an identifier of some sort.
    995 * The set of checks to be performed is specified with uspoof_setChecks().
    996 *
    997 * \note
    998 *   Consider using the newer API, {@link uspoof_check2UTF8}, instead.
    999 *   The newer API exposes additional information from the check procedure
   1000 *   and is otherwise identical to this method.
   1001 *
   1002 * @param sc      The USpoofChecker
   1003 * @param id      A identifier to be checked for possible security issues, in UTF8 format.
   1004 * @param length  the length of the string to be checked, or -1 if the string is
   1005 *                zero terminated.
   1006 * @param position  Deprecated in ICU 51.  Always returns zero.
   1007 *                Originally, an out parameter for the index of the first
   1008 *                string position that failed a check.
   1009 *                This parameter may be NULL.
   1010 * @param status  The error code, set if an error occurred while attempting to
   1011 *                perform the check.
   1012 *                Spoofing or security issues detected with the input string are
   1013 *                not reported here, but through the function's return value.
   1014 *                If the input contains invalid UTF-8 sequences,
   1015 *                a status of U_INVALID_CHAR_FOUND will be returned.
   1016 * @return        An integer value with bits set for any potential security
   1017 *                or spoofing issues detected.  The bits are defined by
   1018 *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
   1019 *                will be zero if the input string passes all of the
   1020 *                enabled checks.
   1021 * @see uspoof_check2UTF8
   1022 * @stable ICU 4.2
   1023 */
   1024 U_CAPI int32_t U_EXPORT2
   1025 uspoof_checkUTF8(const USpoofChecker *sc,
   1026                 const char *id, int32_t length,
   1027                 int32_t *position,
   1028                 UErrorCode *status);
   1029 
   1030 
   1031 /**
   1032 * Check the specified string for possible security issues.
   1033 * The text to be checked will typically be an identifier of some sort.
   1034 * The set of checks to be performed is specified with uspoof_setChecks().
   1035 *
   1036 * @param sc      The USpoofChecker
   1037 * @param id      The identifier to be checked for possible security issues,
   1038 *                in UTF-16 format.
   1039 * @param length  the length of the string to be checked, or -1 if the string is
   1040 *                zero terminated.
   1041 * @param checkResult  An instance of USpoofCheckResult to be filled with
   1042 *                details about the identifier.  Can be NULL.
   1043 * @param status  The error code, set if an error occurred while attempting to
   1044 *                perform the check.
   1045 *                Spoofing or security issues detected with the input string are
   1046 *                not reported here, but through the function's return value.
   1047 * @return        An integer value with bits set for any potential security
   1048 *                or spoofing issues detected.  The bits are defined by
   1049 *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
   1050 *                will be zero if the input string passes all of the
   1051 *                enabled checks.  Any information in this bitmask will be
   1052 *                consistent with the information saved in the optional
   1053 *                checkResult parameter.
   1054 * @see uspoof_openCheckResult
   1055 * @see uspoof_check2UTF8
   1056 * @see uspoof_check2UnicodeString
   1057 * @stable ICU 58
   1058 */
   1059 U_CAPI int32_t U_EXPORT2
   1060 uspoof_check2(const USpoofChecker *sc,
   1061    const UChar* id, int32_t length,
   1062    USpoofCheckResult* checkResult,
   1063    UErrorCode *status);
   1064 
   1065 /**
   1066 * Check the specified string for possible security issues.
   1067 * The text to be checked will typically be an identifier of some sort.
   1068 * The set of checks to be performed is specified with uspoof_setChecks().
   1069 *
   1070 * This version of {@link uspoof_check} accepts a USpoofCheckResult, which
   1071 * returns additional information about the identifier.  For more
   1072 * information, see {@link uspoof_openCheckResult}.
   1073 *
   1074 * @param sc      The USpoofChecker
   1075 * @param id      A identifier to be checked for possible security issues, in UTF8 format.
   1076 * @param length  the length of the string to be checked, or -1 if the string is
   1077 *                zero terminated.
   1078 * @param checkResult  An instance of USpoofCheckResult to be filled with
   1079 *                details about the identifier.  Can be NULL.
   1080 * @param status  The error code, set if an error occurred while attempting to
   1081 *                perform the check.
   1082 *                Spoofing or security issues detected with the input string are
   1083 *                not reported here, but through the function's return value.
   1084 * @return        An integer value with bits set for any potential security
   1085 *                or spoofing issues detected.  The bits are defined by
   1086 *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
   1087 *                will be zero if the input string passes all of the
   1088 *                enabled checks.  Any information in this bitmask will be
   1089 *                consistent with the information saved in the optional
   1090 *                checkResult parameter.
   1091 * @see uspoof_openCheckResult
   1092 * @see uspoof_check2
   1093 * @see uspoof_check2UnicodeString
   1094 * @stable ICU 58
   1095 */
   1096 U_CAPI int32_t U_EXPORT2
   1097 uspoof_check2UTF8(const USpoofChecker *sc,
   1098    const char *id, int32_t length,
   1099    USpoofCheckResult* checkResult,
   1100    UErrorCode *status);
   1101 
   1102 /**
   1103 * Create a USpoofCheckResult, used by the {@link uspoof_check2} class of functions to return
   1104 * information about the identifier.  Information includes:
   1105 * <ul>
   1106 *   <li>A bitmask of the checks that failed</li>
   1107 *   <li>The identifier's restriction level (UTS 39 section 5.2)</li>
   1108 *   <li>The set of numerics in the string (UTS 39 section 5.3)</li>
   1109 * </ul>
   1110 * The data held in a USpoofCheckResult is cleared whenever it is passed into a new call
   1111 * of {@link uspoof_check2}.
   1112 *
   1113 * @param status  The error code, set if this function encounters a problem.
   1114 * @return        the newly created USpoofCheckResult
   1115 * @see uspoof_check2
   1116 * @see uspoof_check2UTF8
   1117 * @see uspoof_check2UnicodeString
   1118 * @stable ICU 58
   1119 */
   1120 U_CAPI USpoofCheckResult* U_EXPORT2
   1121 uspoof_openCheckResult(UErrorCode *status);
   1122 
   1123 /**
   1124 * Close a USpoofCheckResult, freeing any memory that was being held by
   1125 *   its implementation.
   1126 *
   1127 * @param checkResult  The instance of USpoofCheckResult to close
   1128 * @stable ICU 58
   1129 */
   1130 U_CAPI void U_EXPORT2
   1131 uspoof_closeCheckResult(USpoofCheckResult *checkResult);
   1132 
   1133 /**
   1134 * Indicates which of the spoof check(s) have failed. The value is a bitwise OR of the constants for the tests
   1135 * in question: USPOOF_RESTRICTION_LEVEL, USPOOF_CHAR_LIMIT, and so on.
   1136 *
   1137 * @param checkResult  The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
   1138 * @param status       The error code, set if an error occurred.
   1139 * @return        An integer value with bits set for any potential security
   1140 *                or spoofing issues detected.  The bits are defined by
   1141 *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
   1142 *                will be zero if the input string passes all of the
   1143 *                enabled checks.
   1144 * @see uspoof_setChecks
   1145 * @stable ICU 58
   1146 */
   1147 U_CAPI int32_t U_EXPORT2
   1148 uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status);
   1149 
   1150 /**
   1151 * Gets the restriction level that the text meets, if the USPOOF_RESTRICTION_LEVEL check
   1152 * was enabled; otherwise, undefined.
   1153 *
   1154 * @param checkResult  The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
   1155 * @param status       The error code, set if an error occurred.
   1156 * @return             The restriction level contained in the USpoofCheckResult
   1157 * @see uspoof_setRestrictionLevel
   1158 * @stable ICU 58
   1159 */
   1160 U_CAPI URestrictionLevel U_EXPORT2
   1161 uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult *checkResult, UErrorCode *status);
   1162 
   1163 /**
   1164 * Gets the set of numerics found in the string, if the USPOOF_MIXED_NUMBERS check was enabled;
   1165 * otherwise, undefined.  The set will contain the zero digit from each decimal number system found
   1166 * in the input string.  Ownership of the returned USet remains with the USpoofCheckResult.
   1167 * The USet will be free'd when {@link uspoof_closeCheckResult} is called.
   1168 *
   1169 * @param checkResult  The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
   1170 * @return             The set of numerics contained in the USpoofCheckResult
   1171 * @param status       The error code, set if an error occurred.
   1172 * @stable ICU 58
   1173 */
   1174 U_CAPI const USet* U_EXPORT2
   1175 uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status);
   1176 
   1177 
   1178 /**
   1179 * Check whether two specified strings are visually confusable.
   1180 *
   1181 * If the strings are confusable, the return value will be nonzero, as long as
   1182 * {@link USPOOF_CONFUSABLE} was enabled in uspoof_setChecks().
   1183 *
   1184 * The bits in the return value correspond to flags for each of the classes of
   1185 * confusables applicable to the two input strings.  According to UTS 39
   1186 * section 4, the possible flags are:
   1187 *
   1188 * <ul>
   1189 *   <li>{@link USPOOF_SINGLE_SCRIPT_CONFUSABLE}</li>
   1190 *   <li>{@link USPOOF_MIXED_SCRIPT_CONFUSABLE}</li>
   1191 *   <li>{@link USPOOF_WHOLE_SCRIPT_CONFUSABLE}</li>
   1192 * </ul>
   1193 *
   1194 * If one or more of the above flags were not listed in uspoof_setChecks(), this
   1195 * function will never report that class of confusable.  The check
   1196 * {@link USPOOF_CONFUSABLE} enables all three flags.
   1197 *
   1198 *
   1199 * @param sc      The USpoofChecker
   1200 * @param id1     The first of the two identifiers to be compared for
   1201 *                confusability.  The strings are in UTF-16 format.
   1202 * @param length1 the length of the first identifier, expressed in
   1203 *                16 bit UTF-16 code units, or -1 if the string is
   1204 *                nul terminated.
   1205 * @param id2     The second of the two identifiers to be compared for
   1206 *                confusability.  The identifiers are in UTF-16 format.
   1207 * @param length2 The length of the second identifiers, expressed in
   1208 *                16 bit UTF-16 code units, or -1 if the string is
   1209 *                nul terminated.
   1210 * @param status  The error code, set if an error occurred while attempting to
   1211 *                perform the check.
   1212 *                Confusability of the identifiers is not reported here,
   1213 *                but through this function's return value.
   1214 * @return        An integer value with bit(s) set corresponding to
   1215 *                the type of confusability found, as defined by
   1216 *                enum USpoofChecks.  Zero is returned if the identifiers
   1217 *                are not confusable.
   1218 *
   1219 * @stable ICU 4.2
   1220 */
   1221 U_CAPI int32_t U_EXPORT2
   1222 uspoof_areConfusable(const USpoofChecker *sc,
   1223                     const UChar *id1, int32_t length1,
   1224                     const UChar *id2, int32_t length2,
   1225                     UErrorCode *status);
   1226 
   1227 /**
   1228 * Check whether two specified strings are visually confusable when
   1229 * displayed in a context with the given paragraph direction.
   1230 *
   1231 * If the strings are confusable, the return value will be nonzero, as long as
   1232 * {@link USPOOF_CONFUSABLE} was enabled in uspoof_setChecks().
   1233 *
   1234 * The bits in the return value correspond to flags for each of the classes of
   1235 * confusables applicable to the two input strings.  According to UTS 39
   1236 * section 4, the possible flags are:
   1237 *
   1238 * <ul>
   1239 *   <li>{@link USPOOF_SINGLE_SCRIPT_CONFUSABLE}</li>
   1240 *   <li>{@link USPOOF_MIXED_SCRIPT_CONFUSABLE}</li>
   1241 *   <li>{@link USPOOF_WHOLE_SCRIPT_CONFUSABLE}</li>
   1242 * </ul>
   1243 *
   1244 * If one or more of the above flags were not listed in uspoof_setChecks(), this
   1245 * function will never report that class of confusable.  The check
   1246 * {@link USPOOF_CONFUSABLE} enables all three flags.
   1247 *
   1248 *
   1249 * @param sc      The USpoofChecker
   1250 * @param direction The paragraph direction with which the identifiers are
   1251 *                displayed.  Must be either UBIDI_LTR or UBIDI_RTL.
   1252 * @param id1     The first of the two identifiers to be compared for
   1253 *                confusability.  The strings are in UTF-16 format.
   1254 * @param length1 the length of the first identifier, expressed in
   1255 *                16 bit UTF-16 code units, or -1 if the string is
   1256 *                nul terminated.
   1257 * @param id2     The second of the two identifiers to be compared for
   1258 *                confusability.  The identifiers are in UTF-16 format.
   1259 * @param length2 The length of the second identifiers, expressed in
   1260 *                16 bit UTF-16 code units, or -1 if the string is
   1261 *                nul terminated.
   1262 * @param status  The error code, set if an error occurred while attempting to
   1263 *                perform the check.
   1264 *                Confusability of the identifiers is not reported here,
   1265 *                but through this function's return value.
   1266 * @return        An integer value with bit(s) set corresponding to
   1267 *                the type of confusability found, as defined by
   1268 *                enum USpoofChecks.  Zero is returned if the identifiers
   1269 *                are not confusable.
   1270 *
   1271 * @stable ICU 74
   1272 */
   1273 U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusable(const USpoofChecker *sc, UBiDiDirection direction,
   1274                                                  const UChar *id1, int32_t length1,
   1275                                                  const UChar *id2, int32_t length2,
   1276                                                  UErrorCode *status);
   1277 
   1278 /**
   1279 * A version of {@link uspoof_areConfusable} accepting strings in UTF-8 format.
   1280 *
   1281 * @param sc      The USpoofChecker
   1282 * @param id1     The first of the two identifiers to be compared for
   1283 *                confusability.  The strings are in UTF-8 format.
   1284 * @param length1 the length of the first identifiers, in bytes, or -1
   1285 *                if the string is nul terminated.
   1286 * @param id2     The second of the two identifiers to be compared for
   1287 *                confusability.  The strings are in UTF-8 format.
   1288 * @param length2 The length of the second string in bytes, or -1
   1289 *                if the string is nul terminated.
   1290 * @param status  The error code, set if an error occurred while attempting to
   1291 *                perform the check.
   1292 *                Confusability of the strings is not reported here,
   1293 *                but through this function's return value.
   1294 * @return        An integer value with bit(s) set corresponding to
   1295 *                the type of confusability found, as defined by
   1296 *                enum USpoofChecks.  Zero is returned if the strings
   1297 *                are not confusable.
   1298 *
   1299 * @stable ICU 4.2
   1300 *
   1301 * @see uspoof_areConfusable
   1302 */
   1303 U_CAPI int32_t U_EXPORT2
   1304 uspoof_areConfusableUTF8(const USpoofChecker *sc,
   1305                         const char *id1, int32_t length1,
   1306                         const char *id2, int32_t length2,
   1307                         UErrorCode *status);
   1308 
   1309 /**
   1310 * A version of {@link uspoof_areBidiConfusable} accepting strings in UTF-8 format.
   1311 *
   1312 * @param sc      The USpoofChecker
   1313 * @param direction The paragraph direction with which the identifiers are
   1314 *                displayed.  Must be either UBIDI_LTR or UBIDI_RTL.
   1315 * @param id1     The first of the two identifiers to be compared for
   1316 *                confusability.  The strings are in UTF-8 format.
   1317 * @param length1 the length of the first identifiers, in bytes, or -1
   1318 *                if the string is nul terminated.
   1319 * @param id2     The second of the two identifiers to be compared for
   1320 *                confusability.  The strings are in UTF-8 format.
   1321 * @param length2 The length of the second string in bytes, or -1
   1322 *                if the string is nul terminated.
   1323 * @param status  The error code, set if an error occurred while attempting to
   1324 *                perform the check.
   1325 *                Confusability of the strings is not reported here,
   1326 *                but through this function's return value.
   1327 * @return        An integer value with bit(s) set corresponding to
   1328 *                the type of confusability found, as defined by
   1329 *                enum USpoofChecks.  Zero is returned if the strings
   1330 *                are not confusable.
   1331 *
   1332 * @stable ICU 74
   1333 *
   1334 * @see uspoof_areBidiConfusable
   1335 */
   1336 U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUTF8(const USpoofChecker *sc, UBiDiDirection direction,
   1337                                                      const char *id1, int32_t length1,
   1338                                                      const char *id2, int32_t length2,
   1339                                                      UErrorCode *status);
   1340 
   1341 /**
   1342 *  Get the "skeleton" for an identifier.
   1343 *  Skeletons are a transformation of the input identifier;
   1344 * Two identifiers are confusable if their skeletons are identical.
   1345 *  See Unicode Technical Standard #39 for additional information.
   1346 *
   1347 *  Using skeletons directly makes it possible to quickly check
   1348 *  whether an identifier is confusable with any of some large
   1349 *  set of existing identifiers, by creating an efficiently
   1350 *  searchable collection of the skeletons.
   1351 *
   1352 * @param sc      The USpoofChecker
   1353 * @param type    Deprecated in ICU 58.  You may pass any number.
   1354 *                Originally, controlled which of the Unicode confusable data
   1355 *                tables to use.
   1356 * @param id      The input identifier whose skeleton will be computed.
   1357 * @param length  The length of the input identifier, expressed in 16 bit
   1358 *                UTF-16 code units, or -1 if the string is zero terminated.
   1359 * @param dest    The output buffer, to receive the skeleton string.
   1360 * @param destCapacity  The length of the output buffer, in 16 bit units.
   1361 *                The destCapacity may be zero, in which case the function will
   1362 *                return the actual length of the skeleton.
   1363 * @param status  The error code, set if an error occurred while attempting to
   1364 *                perform the check.
   1365 * @return        The length of the skeleton string.  The returned length
   1366 *                is always that of the complete skeleton, even when the
   1367 *                supplied buffer is too small (or of zero length)
   1368 *
   1369 * @stable ICU 4.2
   1370 * @see uspoof_areConfusable
   1371 */
   1372 U_CAPI int32_t U_EXPORT2
   1373 uspoof_getSkeleton(const USpoofChecker *sc,
   1374                   uint32_t type,
   1375                   const UChar *id,  int32_t length,
   1376                   UChar *dest, int32_t destCapacity,
   1377                   UErrorCode *status);
   1378 
   1379 /**
   1380 *  Get the "bidiSkeleton" for an identifier and a direction.
   1381 *  Skeletons are a transformation of the input identifier;
   1382 *  Two identifiers are LTR-confusable if their LTR bidiSkeletons are identical;
   1383 *  they are RTL-confusable if their RTL bidiSkeletons are identical.
   1384 *  See Unicode Technical Standard #39 for additional information:
   1385 *  https://www.unicode.org/reports/tr39/#Confusable_Detection.
   1386 *
   1387 *  Using skeletons directly makes it possible to quickly check
   1388 *  whether an identifier is confusable with any of some large
   1389 *  set of existing identifiers, by creating an efficiently
   1390 *  searchable collection of the skeletons.
   1391 *
   1392 * @param sc      The USpoofChecker.
   1393 * @param direction The context direction with which the identifier will be
   1394 *                displayed.  Must be either UBIDI_LTR or UBIDI_RTL.
   1395 * @param id      The input identifier whose skeleton will be computed.
   1396 * @param length  The length of the input identifier, expressed in 16 bit
   1397 *                UTF-16 code units, or -1 if the string is zero terminated.
   1398 * @param dest    The output buffer, to receive the skeleton string.
   1399 * @param destCapacity  The length of the output buffer, in 16 bit units.
   1400 *                The destCapacity may be zero, in which case the function will
   1401 *                return the actual length of the skeleton.
   1402 * @param status  The error code, set if an error occurred while attempting to
   1403 *                perform the check.
   1404 * @return        The length of the skeleton string.  The returned length
   1405 *                is always that of the complete skeleton, even when the
   1406 *                supplied buffer is too small (or of zero length)
   1407 *
   1408 * @stable ICU 74
   1409 * @see uspoof_areBidiConfusable
   1410 */
   1411 U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeleton(const USpoofChecker *sc,
   1412                                                UBiDiDirection direction,
   1413                                                const UChar *id, int32_t length,
   1414                                                UChar *dest, int32_t destCapacity, UErrorCode *status);
   1415 
   1416 /**
   1417 *  Get the "skeleton" for an identifier.
   1418 *  Skeletons are a transformation of the input identifier;
   1419 *  Two identifiers are confusable if their skeletons are identical.
   1420 *  See Unicode Technical Standard #39 for additional information.
   1421 *
   1422 *  Using skeletons directly makes it possible to quickly check
   1423 *  whether an identifier is confusable with any of some large
   1424 *  set of existing identifiers, by creating an efficiently
   1425 *  searchable collection of the skeletons.
   1426 *
   1427 * @param sc      The USpoofChecker
   1428 * @param type    Deprecated in ICU 58.  You may pass any number.
   1429 *                Originally, controlled which of the Unicode confusable data
   1430 *                tables to use.
   1431 * @param id      The UTF-8 format identifier whose skeleton will be computed.
   1432 * @param length  The length of the input string, in bytes,
   1433 *                or -1 if the string is zero terminated.
   1434 * @param dest    The output buffer, to receive the skeleton string.
   1435 * @param destCapacity  The length of the output buffer, in bytes.
   1436 *                The destCapacity may be zero, in which case the function will
   1437 *                return the actual length of the skeleton.
   1438 * @param status  The error code, set if an error occurred while attempting to
   1439 *                perform the check.  Possible Errors include U_INVALID_CHAR_FOUND
   1440 *                   for invalid UTF-8 sequences, and
   1441 *                   U_BUFFER_OVERFLOW_ERROR if the destination buffer is too small
   1442 *                   to hold the complete skeleton.
   1443 * @return        The length of the skeleton string, in bytes.  The returned length
   1444 *                is always that of the complete skeleton, even when the
   1445 *                supplied buffer is too small (or of zero length)
   1446 *
   1447 * @stable ICU 4.2
   1448 */
   1449 U_CAPI int32_t U_EXPORT2
   1450 uspoof_getSkeletonUTF8(const USpoofChecker *sc,
   1451                       uint32_t type,
   1452                       const char *id,  int32_t length,
   1453                       char *dest, int32_t destCapacity,
   1454                       UErrorCode *status);
   1455 
   1456 /**
   1457 *  Get the "bidiSkeleton" for an identifier and a direction.
   1458 *  Skeletons are a transformation of the input identifier;
   1459 *  Two identifiers are LTR-confusable if their LTR bidiSkeletons are identical;
   1460 *  they are RTL-confusable if their RTL bidiSkeletons are identical.
   1461 *  See Unicode Technical Standard #39 for additional information:
   1462 *  https://www.unicode.org/reports/tr39/#Confusable_Detection.
   1463 *
   1464 *  Using skeletons directly makes it possible to quickly check
   1465 *  whether an identifier is confusable with any of some large
   1466 *  set of existing identifiers, by creating an efficiently
   1467 *  searchable collection of the skeletons.
   1468 *
   1469 * @param sc      The USpoofChecker
   1470 * @param direction The context direction with which the identifier will be
   1471 *                displayed.  Must be either UBIDI_LTR or UBIDI_RTL.
   1472 * @param id      The UTF-8 format identifier whose skeleton will be computed.
   1473 * @param length  The length of the input string, in bytes,
   1474 *                or -1 if the string is zero terminated.
   1475 * @param dest    The output buffer, to receive the skeleton string.
   1476 * @param destCapacity  The length of the output buffer, in bytes.
   1477 *                The destCapacity may be zero, in which case the function will
   1478 *                return the actual length of the skeleton.
   1479 * @param status  The error code, set if an error occurred while attempting to
   1480 *                perform the check.  Possible Errors include U_INVALID_CHAR_FOUND
   1481 *                for invalid UTF-8 sequences, and
   1482 *                U_BUFFER_OVERFLOW_ERROR if the destination buffer is too small
   1483 *                to hold the complete skeleton.
   1484 * @return        The length of the skeleton string, in bytes.  The returned length
   1485 *                is always that of the complete skeleton, even when the
   1486 *                supplied buffer is too small (or of zero length)
   1487 *
   1488 * @stable ICU 74
   1489 */
   1490 U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeletonUTF8(const USpoofChecker *sc, UBiDiDirection direction,
   1491                                                    const char *id, int32_t length, char *dest,
   1492                                                    int32_t destCapacity, UErrorCode *status);
   1493 
   1494 /**
   1495  * Get the set of Candidate Characters for Inclusion in Identifiers, as defined
   1496  * in http://unicode.org/Public/security/latest/xidmodifications.txt
   1497  * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
   1498  *
   1499  * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
   1500  * be deleted by the caller.
   1501  *
   1502  * @param status The error code, set if a problem occurs while creating the set.
   1503  *
   1504  * @stable ICU 51
   1505  */
   1506 U_CAPI const USet * U_EXPORT2
   1507 uspoof_getInclusionSet(UErrorCode *status);
   1508 
   1509 /**
   1510  * Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined
   1511  * in http://unicode.org/Public/security/latest/xidmodifications.txt
   1512  * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
   1513  *
   1514  * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
   1515  * be deleted by the caller.
   1516  *
   1517  * @param status The error code, set if a problem occurs while creating the set.
   1518  *
   1519  * @stable ICU 51
   1520  */
   1521 U_CAPI const USet * U_EXPORT2
   1522 uspoof_getRecommendedSet(UErrorCode *status);
   1523 
   1524 /**
   1525 * Serialize the data for a spoof detector into a chunk of memory.
   1526 * The flattened spoof detection tables can later be used to efficiently
   1527 * instantiate a new Spoof Detector.
   1528 *
   1529 * The serialized spoof checker includes only the data compiled from the
   1530 * Unicode data tables by uspoof_openFromSource(); it does not include
   1531 * include any other state or configuration that may have been set.
   1532 *
   1533 * @param sc   the Spoof Detector whose data is to be serialized.
   1534 * @param data a pointer to 32-bit-aligned memory to be filled with the data,
   1535 *             can be NULL if capacity==0
   1536 * @param capacity the number of bytes available at data,
   1537 *                 or 0 for preflighting
   1538 * @param status an in/out ICU UErrorCode; possible errors include:
   1539 * - U_BUFFER_OVERFLOW_ERROR if the data storage block is too small for serialization
   1540 * - U_ILLEGAL_ARGUMENT_ERROR  the data or capacity parameters are bad
   1541 * @return the number of bytes written or needed for the spoof data
   1542 *
   1543 * @see utrie2_openFromSerialized()
   1544 * @stable ICU 4.2
   1545 */
   1546 U_CAPI int32_t U_EXPORT2
   1547 uspoof_serialize(USpoofChecker *sc,
   1548                 void *data, int32_t capacity,
   1549                 UErrorCode *status);
   1550 
   1551 U_CDECL_END
   1552 
   1553 #if U_SHOW_CPLUSPLUS_API
   1554 
   1555 U_NAMESPACE_BEGIN
   1556 
   1557 /**
   1558 * \class LocalUSpoofCheckerPointer
   1559 * "Smart pointer" class, closes a USpoofChecker via uspoof_close().
   1560 * For most methods see the LocalPointerBase base class.
   1561 *
   1562 * @see LocalPointerBase
   1563 * @see LocalPointer
   1564 * @stable ICU 4.4
   1565 */
   1566 /**
   1567 * \cond
   1568 * Note: Doxygen is giving a bogus warning on this U_DEFINE_LOCAL_OPEN_POINTER.
   1569 *       For now, suppress with a Doxygen cond
   1570 */
   1571 U_DEFINE_LOCAL_OPEN_POINTER(LocalUSpoofCheckerPointer, USpoofChecker, uspoof_close);
   1572 /** \endcond */
   1573 
   1574 /**
   1575 * \class LocalUSpoofCheckResultPointer
   1576 * "Smart pointer" class, closes a USpoofCheckResult via `uspoof_closeCheckResult()`.
   1577 * For most methods see the LocalPointerBase base class.
   1578 *
   1579 * @see LocalPointerBase
   1580 * @see LocalPointer
   1581 * @stable ICU 58
   1582 */
   1583 
   1584 /**
   1585 * \cond
   1586 * Note: Doxygen is giving a bogus warning on this U_DEFINE_LOCAL_OPEN_POINTER.
   1587 *       For now, suppress with a Doxygen cond
   1588 */
   1589 U_DEFINE_LOCAL_OPEN_POINTER(LocalUSpoofCheckResultPointer, USpoofCheckResult, uspoof_closeCheckResult);
   1590 /** \endcond */
   1591 
   1592 U_NAMESPACE_END
   1593 
   1594 /**
   1595 * Limit the acceptable characters to those specified by a Unicode Set.
   1596 *   Any previously specified character limit is
   1597 *   is replaced by the new settings.    This includes limits on
   1598 *   characters that were set with the uspoof_setAllowedLocales() function.
   1599 *
   1600 * The USPOOF_CHAR_LIMIT test is automatically enabled for this
   1601 * USoofChecker by this function.
   1602 *
   1603 * @param sc       The USpoofChecker
   1604 * @param chars    A Unicode Set containing the list of
   1605 *                 characters that are permitted.  Ownership of the set
   1606 *                 remains with the caller.  The incoming set is cloned by
   1607 *                 this function, so there are no restrictions on modifying
   1608 *                 or deleting the UnicodeSet after calling this function.
   1609 * @param status   The error code, set if this function encounters a problem.
   1610 * @stable ICU 4.2
   1611 */
   1612 U_CAPI void U_EXPORT2
   1613 uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const icu::UnicodeSet *chars, UErrorCode *status);
   1614 
   1615 
   1616 /**
   1617 * Get a UnicodeSet for the characters permitted in an identifier.
   1618 * This corresponds to the limits imposed by the Set Allowed Characters /
   1619 * UnicodeSet functions. Limitations imposed by other checks will not be
   1620 * reflected in the set returned by this function.
   1621 *
   1622 * The returned set will be frozen, meaning that it cannot be modified
   1623 * by the caller.
   1624 *
   1625 * Ownership of the returned set remains with the Spoof Detector.  The
   1626 * returned set will become invalid if the spoof detector is closed,
   1627 * or if a new set of allowed characters is specified.
   1628 *
   1629 *
   1630 * @param sc       The USpoofChecker
   1631 * @param status   The error code, set if this function encounters a problem.
   1632 * @return         A UnicodeSet containing the characters that are permitted by
   1633 *                 the USPOOF_CHAR_LIMIT test.
   1634 * @stable ICU 4.2
   1635 */
   1636 U_CAPI const icu::UnicodeSet * U_EXPORT2
   1637 uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status);
   1638 
   1639 /**
   1640 * Check the specified string for possible security issues.
   1641 * The text to be checked will typically be an identifier of some sort.
   1642 * The set of checks to be performed is specified with uspoof_setChecks().
   1643 *
   1644 * \note
   1645 *   Consider using the newer API, {@link uspoof_check2UnicodeString}, instead.
   1646 *   The newer API exposes additional information from the check procedure
   1647 *   and is otherwise identical to this method.
   1648 *
   1649 * @param sc      The USpoofChecker
   1650 * @param id      A identifier to be checked for possible security issues.
   1651 * @param position  Deprecated in ICU 51.  Always returns zero.
   1652 *                Originally, an out parameter for the index of the first
   1653 *                string position that failed a check.
   1654 *                This parameter may be nullptr.
   1655 * @param status  The error code, set if an error occurred while attempting to
   1656 *                perform the check.
   1657 *                Spoofing or security issues detected with the input string are
   1658 *                not reported here, but through the function's return value.
   1659 * @return        An integer value with bits set for any potential security
   1660 *                or spoofing issues detected.  The bits are defined by
   1661 *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
   1662 *                will be zero if the input string passes all of the
   1663 *                enabled checks.
   1664 * @see uspoof_check2UnicodeString
   1665 * @stable ICU 4.2
   1666 */
   1667 U_CAPI int32_t U_EXPORT2
   1668 uspoof_checkUnicodeString(const USpoofChecker *sc,
   1669                          const icu::UnicodeString &id,
   1670                          int32_t *position,
   1671                          UErrorCode *status);
   1672 
   1673 /**
   1674 * Check the specified string for possible security issues.
   1675 * The text to be checked will typically be an identifier of some sort.
   1676 * The set of checks to be performed is specified with uspoof_setChecks().
   1677 *
   1678 * @param sc      The USpoofChecker
   1679 * @param id      A identifier to be checked for possible security issues.
   1680 * @param checkResult  An instance of USpoofCheckResult to be filled with
   1681 *                details about the identifier.  Can be nullptr.
   1682 * @param status  The error code, set if an error occurred while attempting to
   1683 *                perform the check.
   1684 *                Spoofing or security issues detected with the input string are
   1685 *                not reported here, but through the function's return value.
   1686 * @return        An integer value with bits set for any potential security
   1687 *                or spoofing issues detected.  The bits are defined by
   1688 *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
   1689 *                will be zero if the input string passes all of the
   1690 *                enabled checks.  Any information in this bitmask will be
   1691 *                consistent with the information saved in the optional
   1692 *                checkResult parameter.
   1693 * @see uspoof_openCheckResult
   1694 * @see uspoof_check2
   1695 * @see uspoof_check2UTF8
   1696 * @stable ICU 58
   1697 */
   1698 U_CAPI int32_t U_EXPORT2
   1699 uspoof_check2UnicodeString(const USpoofChecker *sc,
   1700    const icu::UnicodeString &id,
   1701    USpoofCheckResult* checkResult,
   1702    UErrorCode *status);
   1703 
   1704 /**
   1705 * A version of {@link uspoof_areConfusable} accepting UnicodeStrings.
   1706 *
   1707 * @param sc      The USpoofChecker
   1708 * @param s1     The first of the two identifiers to be compared for
   1709 *                confusability.  The strings are in UTF-8 format.
   1710 * @param s2     The second of the two identifiers to be compared for
   1711 *                confusability.  The strings are in UTF-8 format.
   1712 * @param status  The error code, set if an error occurred while attempting to
   1713 *                perform the check.
   1714 *                Confusability of the identifiers is not reported here,
   1715 *                but through this function's return value.
   1716 * @return        An integer value with bit(s) set corresponding to
   1717 *                the type of confusability found, as defined by
   1718 *                enum USpoofChecks.  Zero is returned if the identifiers
   1719 *                are not confusable.
   1720 *
   1721 * @stable ICU 4.2
   1722 *
   1723 * @see uspoof_areConfusable
   1724 */
   1725 U_CAPI int32_t U_EXPORT2
   1726 uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
   1727                                  const icu::UnicodeString &s1,
   1728                                  const icu::UnicodeString &s2,
   1729                                  UErrorCode *status);
   1730 
   1731 /**
   1732 * A version of {@link uspoof_areBidiConfusable} accepting UnicodeStrings.
   1733 *
   1734 * @param sc      The USpoofChecker
   1735 * @param direction The paragraph direction with which the identifiers are
   1736 *                displayed.  Must be either UBIDI_LTR or UBIDI_RTL.
   1737 * @param s1     The first of the two identifiers to be compared for
   1738 *                confusability.  The strings are in UTF-8 format.
   1739 * @param s2     The second of the two identifiers to be compared for
   1740 *                confusability.  The strings are in UTF-8 format.
   1741 * @param status  The error code, set if an error occurred while attempting to
   1742 *                perform the check.
   1743 *                Confusability of the identifiers is not reported here,
   1744 *                but through this function's return value.
   1745 * @return        An integer value with bit(s) set corresponding to
   1746 *                the type of confusability found, as defined by
   1747 *                enum USpoofChecks.  Zero is returned if the identifiers
   1748 *                are not confusable.
   1749 *
   1750 * @stable ICU 74
   1751 *
   1752 * @see uspoof_areBidiConfusable
   1753 */
   1754 U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUnicodeString(const USpoofChecker *sc,
   1755                                                               UBiDiDirection direction,
   1756                                                               const icu::UnicodeString &s1,
   1757                                                               const icu::UnicodeString &s2,
   1758                                                               UErrorCode *status);
   1759 
   1760 /**
   1761 *  Get the "skeleton" for an identifier.
   1762 *  Skeletons are a transformation of the input identifier;
   1763 *  Two identifiers are confusable if their skeletons are identical.
   1764 *  See Unicode Technical Standard #39 for additional information.
   1765 *
   1766 *  Using skeletons directly makes it possible to quickly check
   1767 *  whether an identifier is confusable with any of some large
   1768 *  set of existing identifiers, by creating an efficiently
   1769 *  searchable collection of the skeletons.
   1770 *
   1771 * @param sc      The USpoofChecker.
   1772 * @param type    Deprecated in ICU 58.  You may pass any number.
   1773 *                Originally, controlled which of the Unicode confusable data
   1774 *                tables to use.
   1775 * @param id      The input identifier whose skeleton will be computed.
   1776 * @param dest    The output identifier, to receive the skeleton string.
   1777 * @param status  The error code, set if an error occurred while attempting to
   1778 *                perform the check.
   1779 * @return        A reference to the destination (skeleton) string.
   1780 *
   1781 * @stable ICU 4.2
   1782 */
   1783 U_I18N_API icu::UnicodeString & U_EXPORT2
   1784 uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
   1785                                uint32_t type,
   1786                                const icu::UnicodeString &id,
   1787                                icu::UnicodeString &dest,
   1788                                UErrorCode *status);
   1789 
   1790 /**
   1791 *  Get the "bidiSkeleton" for an identifier and a direction.
   1792 *  Skeletons are a transformation of the input identifier;
   1793 *  Two identifiers are LTR-confusable if their LTR bidiSkeletons are identical;
   1794 *  they are RTL-confusable if their RTL bidiSkeletons are identical.
   1795 *  See Unicode Technical Standard #39 for additional information.
   1796 *  https://www.unicode.org/reports/tr39/#Confusable_Detection.
   1797 *
   1798 *  Using skeletons directly makes it possible to quickly check
   1799 *  whether an identifier is confusable with any of some large
   1800 *  set of existing identifiers, by creating an efficiently
   1801 *  searchable collection of the skeletons.
   1802 *
   1803 * @param sc      The USpoofChecker.
   1804 * @param direction The context direction with which the identifier will be
   1805 *                displayed.  Must be either UBIDI_LTR or UBIDI_RTL.
   1806 * @param id      The input identifier whose bidiSkeleton will be computed.
   1807 * @param dest    The output identifier, to receive the skeleton string.
   1808 * @param status  The error code, set if an error occurred while attempting to
   1809 *                perform the check.
   1810 * @return        A reference to the destination (skeleton) string.
   1811 *
   1812 * @stable ICU 74
   1813 */
   1814 U_I18N_API icu::UnicodeString &U_EXPORT2 uspoof_getBidiSkeletonUnicodeString(
   1815    const USpoofChecker *sc, UBiDiDirection direction, const icu::UnicodeString &id,
   1816    icu::UnicodeString &dest, UErrorCode *status);
   1817 
   1818 /**
   1819  * Get the set of Candidate Characters for Inclusion in Identifiers, as defined
   1820  * in http://unicode.org/Public/security/latest/xidmodifications.txt
   1821  * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
   1822  *
   1823  * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
   1824  * be deleted by the caller.
   1825  *
   1826  * @param status The error code, set if a problem occurs while creating the set.
   1827  *
   1828  * @stable ICU 51
   1829  */
   1830 U_CAPI const icu::UnicodeSet * U_EXPORT2
   1831 uspoof_getInclusionUnicodeSet(UErrorCode *status);
   1832 
   1833 /**
   1834  * Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined
   1835  * in http://unicode.org/Public/security/latest/xidmodifications.txt
   1836  * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
   1837  *
   1838  * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
   1839  * be deleted by the caller.
   1840  *
   1841  * @param status The error code, set if a problem occurs while creating the set.
   1842  *
   1843  * @stable ICU 51
   1844  */
   1845 U_CAPI const icu::UnicodeSet * U_EXPORT2
   1846 uspoof_getRecommendedUnicodeSet(UErrorCode *status);
   1847 
   1848 #endif /* U_SHOW_CPLUSPLUS_API */
   1849 
   1850 #endif /* UCONFIG_NO_NORMALIZATION */
   1851 
   1852 #endif   /* USPOOF_H */
	tor-browser The Tor Browser
	git clone https://git.dasho.dev/tor-browser.git
	Log \| Files \| Refs \| README \| LICENSE